├── scripts
    ├── test.py
    ├── env_setup.sh
    ├── install.R
    └── time-render.sh
├── styles.css
├── d1
    ├── its.png
    └── index.qmd
├── examples
    └── test.qmd
├── s5
    ├── isochrone.Rds
    ├── routes_drive_top.Rds
    ├── dodgr-install.md
    └── stats19-2019-2020-gemini.qmd
├── slides
    ├── index.qmd
    ├── images
    │   ├── paste-2.png
    │   ├── paste-3.png
    │   ├── paste-4.png
    │   ├── paste-5.png
    │   ├── paste-6.png
    │   ├── paste-7.png
    │   ├── paste-17.png
    │   ├── schedule.png
    │   └── online-teaching-tweet.png
    ├── references.bib
    ├── professions.qmd
    ├── intro.qmd
    └── road-safety.qmd
├── s2
    ├── images
    │   ├── carbon.jpg
    │   ├── TM-model.png
    │   ├── chthulu.png
    │   ├── railway.png
    │   ├── ITTS-model.png
    │   ├── OTT-Leeds.png
    │   ├── PyPi-parenx.png
    │   ├── flow-data.png
    │   ├── nx-all-2011.png
    │   ├── nx-oa-2011.png
    │   ├── WBD-whoami-01.png
    │   ├── oa-core-2011.png
    │   ├── oa-total-2011.png
    │   ├── social-media.png
    │   ├── Europe-0100-hx.png
    │   ├── Europe-0200-hx.png
    │   ├── Europe-0330-hx.png
    │   ├── msoa-total-2011.png
    │   ├── msoa-total-2021.png
    │   ├── national-emissions.png
    │   ├── routes-and-regions.png
    │   ├── transport-emissions.png
    │   └── European-Rail-Electrification.png
    ├── demo.qmd
    ├── homework.qmd
    └── slides.qmd
├── .Rbuildignore
├── s3
    ├── images
    │   └── paste-1.png
    ├── demo-references.qmd
    ├── demo-references.bib
    └── slides.qmd
├── sem2
    └── images
    │   ├── carbon.jpg
    │   ├── OTT-Leeds.png
    │   ├── TM-model.png
    │   ├── chthulu.png
    │   ├── flow-data.png
    │   ├── railway.png
    │   ├── ITTS-model.png
    │   ├── PyPi-parenx.png
    │   ├── nx-all-2011.png
    │   ├── nx-oa-2011.png
    │   ├── Europe-0100-hx.png
    │   ├── Europe-0200-hx.png
    │   ├── Europe-0330-hx.png
    │   ├── WBD-whoami-01.png
    │   ├── oa-core-2011.png
    │   ├── oa-total-2011.png
    │   ├── social-media.png
    │   ├── msoa-total-2011.png
    │   ├── msoa-total-2021.png
    │   ├── national-emissions.png
    │   ├── routes-and-regions.png
    │   ├── transport-emissions.png
    │   └── European-Rail-Electrification.png
├── s1
    ├── data_growth_time.png
    ├── images
    │   └── rstudio-foundations.png
    ├── s1project
    │   ├── s1project.Rproj
    │   └── foundations.qmd
    └── transport-software.csv
├── .gitattributes
├── requirements.txt
├── Dockerfile
├── .gitignore
├── tds.Rproj
├── .binder
    └── Dockerfile
├── .github
    └── workflows
    │   ├── devcontainer-prebuild.yml
    │   ├── pixi.yml
    │   ├── docker.yml
    │   ├── pr.yml
    │   └── publish.yml
├── .devcontainer
    └── devcontainer.json
├── pixi.toml
├── _quarto-timing.yml
├── d3
    ├── report-structure.qmd
    ├── index.qmd
    └── assessment-brief.qmd
├── DESCRIPTION
├── data
    └── timetable.csv
├── ROOM_BOOKING_REQUEST.md
├── s6
    └── slides.qmd
├── _quarto.yml
├── dstp.qmd
├── datahack.qmd
├── d2
    ├── references.bib
    ├── template.qmd
    ├── index.qmd
    ├── example.qmd
    └── assessment-brief.qmd
├── README.md
├── minihack-transport-data.qmd
├── README.qmd
├── assessment-overview.qmd
├── LICENSE
├── schedule.qmd
├── index.qmd
├── reading.qmd
├── marking-criteria.qmd
├── elsevier-harvard.csl
└── sem1
    └── index.qmd


/scripts/test.py:
--------------------------------------------------------------------------------
1 | x = 1
2 | print(x)


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
1 | /* css styles */
2 | 


--------------------------------------------------------------------------------
/d1/its.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/d1/its.png


--------------------------------------------------------------------------------
/scripts/env_setup.sh:
--------------------------------------------------------------------------------
1 | Rscript -e "pak::pak('itsleeds/tds', ask = FALSE)"


--------------------------------------------------------------------------------
/examples/test.qmd:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | ```{python}
5 | x = 1
6 | print(x)
7 | ```


--------------------------------------------------------------------------------
/s5/isochrone.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s5/isochrone.Rds


--------------------------------------------------------------------------------
/slides/index.qmd:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | - [Introduction to Transport Data Science](intro.html)


--------------------------------------------------------------------------------
/s2/images/carbon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/carbon.jpg


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^README\.Rmd$
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | .pixi/
5 | ^\.quarto$
6 | 


--------------------------------------------------------------------------------
/s2/images/TM-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/TM-model.png


--------------------------------------------------------------------------------
/s2/images/chthulu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/chthulu.png


--------------------------------------------------------------------------------
/s2/images/railway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/railway.png


--------------------------------------------------------------------------------
/s3/images/paste-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s3/images/paste-1.png


--------------------------------------------------------------------------------
/sem2/images/carbon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/carbon.jpg


--------------------------------------------------------------------------------
/s1/data_growth_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s1/data_growth_time.png


--------------------------------------------------------------------------------
/s2/images/ITTS-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/ITTS-model.png


--------------------------------------------------------------------------------
/s2/images/OTT-Leeds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/OTT-Leeds.png


--------------------------------------------------------------------------------
/s2/images/PyPi-parenx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/PyPi-parenx.png


--------------------------------------------------------------------------------
/s2/images/flow-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/flow-data.png


--------------------------------------------------------------------------------
/s2/images/nx-all-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/nx-all-2011.png


--------------------------------------------------------------------------------
/s2/images/nx-oa-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/nx-oa-2011.png


--------------------------------------------------------------------------------
/s5/routes_drive_top.Rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s5/routes_drive_top.Rds


--------------------------------------------------------------------------------
/sem2/images/OTT-Leeds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/OTT-Leeds.png


--------------------------------------------------------------------------------
/sem2/images/TM-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/TM-model.png


--------------------------------------------------------------------------------
/sem2/images/chthulu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/chthulu.png


--------------------------------------------------------------------------------
/sem2/images/flow-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/flow-data.png


--------------------------------------------------------------------------------
/sem2/images/railway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/railway.png


--------------------------------------------------------------------------------
/slides/images/paste-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-2.png


--------------------------------------------------------------------------------
/slides/images/paste-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-3.png


--------------------------------------------------------------------------------
/slides/images/paste-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-4.png


--------------------------------------------------------------------------------
/slides/images/paste-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-5.png


--------------------------------------------------------------------------------
/slides/images/paste-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-6.png


--------------------------------------------------------------------------------
/slides/images/paste-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-7.png


--------------------------------------------------------------------------------
/s2/images/WBD-whoami-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/WBD-whoami-01.png


--------------------------------------------------------------------------------
/s2/images/oa-core-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/oa-core-2011.png


--------------------------------------------------------------------------------
/s2/images/oa-total-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/oa-total-2011.png


--------------------------------------------------------------------------------
/s2/images/social-media.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/social-media.png


--------------------------------------------------------------------------------
/sem2/images/ITTS-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/ITTS-model.png


--------------------------------------------------------------------------------
/sem2/images/PyPi-parenx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/PyPi-parenx.png


--------------------------------------------------------------------------------
/sem2/images/nx-all-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/nx-all-2011.png


--------------------------------------------------------------------------------
/sem2/images/nx-oa-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/nx-oa-2011.png


--------------------------------------------------------------------------------
/slides/images/paste-17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/paste-17.png


--------------------------------------------------------------------------------
/slides/images/schedule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/schedule.png


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # GitHub syntax highlighting
2 | pixi.lock linguist-language=YAML linguist-generated=true
3 | 


--------------------------------------------------------------------------------
/s2/images/Europe-0100-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/Europe-0100-hx.png


--------------------------------------------------------------------------------
/s2/images/Europe-0200-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/Europe-0200-hx.png


--------------------------------------------------------------------------------
/s2/images/Europe-0330-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/Europe-0330-hx.png


--------------------------------------------------------------------------------
/s2/images/msoa-total-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/msoa-total-2011.png


--------------------------------------------------------------------------------
/s2/images/msoa-total-2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/msoa-total-2021.png


--------------------------------------------------------------------------------
/sem2/images/Europe-0100-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/Europe-0100-hx.png


--------------------------------------------------------------------------------
/sem2/images/Europe-0200-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/Europe-0200-hx.png


--------------------------------------------------------------------------------
/sem2/images/Europe-0330-hx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/Europe-0330-hx.png


--------------------------------------------------------------------------------
/sem2/images/WBD-whoami-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/WBD-whoami-01.png


--------------------------------------------------------------------------------
/sem2/images/oa-core-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/oa-core-2011.png


--------------------------------------------------------------------------------
/sem2/images/oa-total-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/oa-total-2011.png


--------------------------------------------------------------------------------
/sem2/images/social-media.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/social-media.png


--------------------------------------------------------------------------------
/s2/images/national-emissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/national-emissions.png


--------------------------------------------------------------------------------
/s2/images/routes-and-regions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/routes-and-regions.png


--------------------------------------------------------------------------------
/sem2/images/msoa-total-2011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/msoa-total-2011.png


--------------------------------------------------------------------------------
/sem2/images/msoa-total-2021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/msoa-total-2021.png


--------------------------------------------------------------------------------
/s1/images/rstudio-foundations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s1/images/rstudio-foundations.png


--------------------------------------------------------------------------------
/s2/images/transport-emissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/transport-emissions.png


--------------------------------------------------------------------------------
/sem2/images/national-emissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/national-emissions.png


--------------------------------------------------------------------------------
/sem2/images/routes-and-regions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/routes-and-regions.png


--------------------------------------------------------------------------------
/sem2/images/transport-emissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/transport-emissions.png


--------------------------------------------------------------------------------
/slides/images/online-teaching-tweet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/slides/images/online-teaching-tweet.png


--------------------------------------------------------------------------------
/s2/images/European-Rail-Electrification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/s2/images/European-Rail-Electrification.png


--------------------------------------------------------------------------------
/sem2/images/European-Rail-Electrification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/itsleeds/tds/HEAD/sem2/images/European-Rail-Electrification.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Minimal Python requirements for the project. Keep this file pinned for
 2 | # Docker/Gitpod/Binder installs. Add packages here as needed.
 3 | jupyter
 4 | jupyter-cache
 5 | geopandas
 6 | matplotlib
 7 | shapely
 8 | seaborn
 9 | ipykernel
10 | osmnx


--------------------------------------------------------------------------------
/s1/s1project/s1project.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/geocompx/pythonr
 2 | 
 3 | RUN R -e "remotes::install_github('itsleeds/tds')"
 4 | 
 5 | # Copy and install Python requirements first to leverage Docker layer cache
 6 | COPY requirements.txt /tmp/requirements.txt
 7 | RUN python -m pip install --upgrade pip \
 8 |   && pip install -r /tmp/requirements.txt
 9 | 
10 | WORKDIR /workspace
11 | 
12 | # Copy the repository contents
13 | COPY . /workspace
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | docs/
 2 | 
 3 | /.quarto/
 4 | _freeze/
 5 | *.csv
 6 | *.ics
 7 | 
 8 | # pixi environments
 9 | .pixi
10 | *.egg-info
11 | *cache*
12 | .Rproj.user
13 | *.geojson
14 | *.zip
15 | 
16 | # Large files
17 | !s1/Estaciones_Troncales_de_TRANSMILENIO.geojson
18 | *.pdf
19 | otp_TDS
20 | .Rhistory
21 | *.ppt*
22 | *.doc*
23 | 
24 | 
25 | # Quarto build artifacts
26 | *.html
27 | site_libs/
28 | *_files/
29 | *.knit.md
30 | *.rmarkdown
31 | 


--------------------------------------------------------------------------------
/tds.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 7b6392e0-c101-4ecb-a826-e2f966fa6422
 3 | 
 4 | RestoreWorkspace: Default
 5 | SaveWorkspace: Default
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | BuildType: Package
17 | PackageUseDevtools: Yes
18 | PackageInstallArgs: --no-multiarch --with-keep.source
19 | 


--------------------------------------------------------------------------------
/.binder/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/itsleeds/tds
 2 | 
 3 | # Ensure the binder default user (jovyan, uid 1000) owns the workspace so
 4 | # the non-root runtime can edit files. This prevents permission denied
 5 | # errors when opening the repository in Binder.
 6 | USER root
 7 | RUN mkdir -p /home/jovyan/work /workspace \
 8 | 	&& chown -R 1000:1000 /home/jovyan /workspace
 9 | USER 1000
10 | 
11 | # repo2docker will copy the repository contents into the image at build time
12 | 


--------------------------------------------------------------------------------
/.github/workflows/devcontainer-prebuild.yml:
--------------------------------------------------------------------------------
 1 | name: Dev Container Prebuild
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   prebuild:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v4
15 | 
16 |       - name: Prebuild dev container
17 |         uses: devcontainers/ci@v0.3
18 |         with:
19 |           configFile: .devcontainer/devcontainer.json
20 |           runCmd: echo "Prebuild complete"
21 | 


--------------------------------------------------------------------------------
/s3/demo-references.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | bibliography: demo-references.bib
 3 | ---
 4 | 
 5 | You can cite packages as follows:
 6 | 
 7 | ```{r}
 8 | #| eval: false
 9 | citation("osmextract")
10 | ```
11 | 
12 | ```{r}
13 | #| include: false
14 | # Lots of not particularly interesting code...
15 | 1 + 1
16 | ```
17 | 
18 | Open up the .bib file, e.g. with
19 | 
20 | ```{r}
21 | #| eval: false
22 | file.edit("s5/demo-references.bib")
23 | ```
24 | 
25 | I used the `osmextract` package [@osmextract].
26 | 
27 | [@ferster2019]
28 | 
29 | [@Kaiser2024]
30 | 
31 | # References
32 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "Transport data science container",
 3 | 	// Use pre-built image by default:
 4 | 	"image": "ghcr.io/itsleeds/tds",
 5 | 	// Uncomment the build section below and comment out "image" above to rebuild from ../Dockerfile
 6 | 	// "build": {
 7 | 	// 	"dockerfile": "../Dockerfile"
 8 | 	// },
 9 | 	"customizations": {
10 | 		"vscode": {
11 | 			"extensions": [
12 | 				"reditorsupport.r",
13 | 				"GitHub.copilot-chat",
14 | 				"quarto.quarto",
15 | 				"ms-python.python",
16 | 				"ms-toolsai.jupyter"
17 | 			]
18 | 		}
19 | 	}
20 | }


--------------------------------------------------------------------------------
/.github/workflows/pixi.yml:
--------------------------------------------------------------------------------
 1 | name: Pixi CI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | jobs:
 7 |   pixi:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout repository
11 |         uses: actions/checkout@v4
12 | 
13 |       - name: Setup Pixi
14 |         uses: prefix-dev/setup-pixi@v0.9.0
15 | 
16 |       - name: Install project dependencies with pixi
17 |         run: pixi install
18 | 
19 |       - name: Run project install tasks (pip + R)
20 |         run: pixi run install_all
21 | 
22 |       - name: Build site / render with quarto via pixi
23 |         run: pixi run quarto render
24 | 


--------------------------------------------------------------------------------
/s1/transport-software.csv:
--------------------------------------------------------------------------------
1 | Software,Company/Developer,Company HQ,Licence,Citations,Price (single license),Comments,Source of price,Source of citations,,Cost ($)
2 | Emme,INRO,Canada,Proprietary,780,?,,,https://scholar.google.com/scholar?hl=en&num=100&ie=UTF-8&q=emme+inro+transport,,12000
3 | TransCAD,Caliper,USA,Proprietary,1360,14000,,https://www.caliper.com/tcprice.htm,,,
4 | Visum,PTV,Germany,Proprietary,1810,?,,,,,
5 | Cube,Citilabs,USA,Proprietary,400,?,,,,,
6 | MATSim,TU Berlin,Germany,Open source (GPL),1470,0,,,,,
7 | SUMO,DLR,Germany,Open source (EPL),1310,0,,,,,
8 | sDNA,Cardiff University,UK,Open source (GPL),170,?,,,https://scholar.google.com/scholar?hl=en&num=20&as_sdt=0%2C5&q=sdna+cooper+transport&btnG=,,
9 | 


--------------------------------------------------------------------------------
/pixi.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | channels = ["conda-forge"]
 3 | description = "Transport data science stack"
 4 | name = "tds"
 5 | platforms = ["win-64", "linux-64", "osx-64"]
 6 | version = "0.1.0"
 7 | 
 8 | [dependencies]
 9 | quarto = "*"
10 | r-base = "*"
11 | r-irkernel = "*"
12 | r-tidyverse = "*"
13 | r-sf = "*"
14 | r-quarto = "*"
15 | r-nycflights13 = "*"
16 | r-remotes = "*"
17 | r-DT = "*"
18 | r-reticulate = "*"
19 | r-spData = "*"
20 | r-pak = "*"
21 | r-geosphere = "*"
22 | 
23 | [tasks]
24 | simple = "echo This is a simple task"
25 | install_r = { cmd="Rscript install.R" }
26 | install_py = { cmd = "python3 -m pip install --user -r requirements.txt" }
27 | install_all = { cmd = "python3 -m pip install --user -r requirements.txt && Rscript install.R" }


--------------------------------------------------------------------------------
/_quarto-timing.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 | 
 4 | execute:
 5 |   freeze: false
 6 |   project:
 7 |     type: website
 8 |     # Place timing outputs in the default site folder; this inherits other
 9 |     # settings from the base `_quarto.yml`.
10 |     output-dir: _site
11 | 
12 |   execute:
13 |     # ensure we actually run code to measure time
14 |     eval: true
15 |     # disable freeze and cache so timings reflect full cost
16 |     freeze: false
17 |     cache: false
18 |     # show messages so logs are informative
19 |     echo: true
20 |     message: true
21 |     warning: true
22 |     error: true
23 | 
24 |   format:
25 |     html:
26 |       keep-md: true
27 | 
28 |   # Timing profile: disables caching and freezing so each render runs for
29 |   # an accurate timing measurement. Use with: `quarto render --profile timing <file>`
30 |   cache: false
31 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Docker
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'Dockerfile'
 7 |       - '.github/workflows/docker-image.yml'
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - name: Checkout repository
15 |       uses: actions/checkout@v3
16 | 
17 |     - name: Set up Docker Buildx
18 |       uses: docker/setup-buildx-action@v2
19 | 
20 |     - name: Log in to GitHub Container Registry
21 |       uses: docker/login-action@v2
22 |       with:
23 |         registry: ghcr.io
24 |         username: ${{ github.repository_owner }}
25 |         password: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |     - name: Build and push Docker image
28 |       uses: docker/build-push-action@v4
29 |       with:
30 |         context: .
31 |         push: true
32 |         tags: ghcr.io/${{ github.repository_owner }}/tds:latest
33 |         outputs: type=image,push=true,visibility=public
34 | 
35 |         


--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
 1 | name: Pull Request Checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 | 
 7 | jobs:
 8 |   build-check:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Check out repository
12 |         uses: actions/checkout@v4
13 | 
14 |       - name: Set up Quarto
15 |         uses: quarto-dev/quarto-actions/setup@v2
16 | 
17 |       - name: Install R
18 |         uses: r-lib/actions/setup-r@v2
19 |         with:
20 |           r-version: '4.4.1'
21 |           use-public-rspm: true 
22 | 
23 |       - name: Install R Dependencies
24 |         uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           cache-version: 2
27 | 
28 |       - name: Additional Setup
29 |         run: |
30 |           # Install Python:
31 |           sudo apt-get update
32 |           sudo apt-get install python3
33 |           sudo apt-get install python3-pip
34 |           python3 -m pip install jupyter jupyter-cache
35 | 
36 |       - name: Build Quarto Project
37 |         run: quarto render


--------------------------------------------------------------------------------
/s5/dodgr-install.md:
--------------------------------------------------------------------------------
 1 | Local Install of dodgr
 2 | ================
 3 | Malcolm Morgan
 4 | University of Leeds,
 5 | 2020-02-11<br/><img class="img-footer" alt="" src="http://www.stephanehess.me.uk/images/picture3.png">
 6 | 
 7 | Only if you cannot run the dodgr examples.
 8 | 
 9 | This will locally compile the latest version of dodgr on you computer.
10 | 
11 | 1.  Save your work in progress and close RStudio
12 | 2.  Go to <https://github.com/ATFutures/dodgr/> and click Clone or
13 |     download
14 | 3.  Choose download zip
15 | 4.  Unzip the folder
16 | 5.  In the unziped folder find and open dodgr.Rproj a new Rstudio
17 |     session will open.
18 | 6.  Run this code.
19 | 
20 | <!-- end list -->
21 | 
22 | ``` r
23 | remove.packages("dodgr")
24 | if(!"devtools" %in% installed.packages()[,1]){
25 |   install.packages("devtools")
26 | }
27 | devtools::install(".", export_all = TRUE, upgrade = "never")
28 | library(dodgr)
29 | ```
30 | 
31 | 1.  Close RStudio and open a new Rstudio session with your TDS work.
32 | 2.  Dodgr should now work
33 | 


--------------------------------------------------------------------------------
/s2/demo.qmd:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ```{r}
 4 | #| eval: false
 5 | library(tidyverse)
 6 | library(osmextract)
 7 | 
 8 | region_name = "lund"
 9 | osm_data_raw = oe_get(place = region_name)
10 | osm_data_shops = oe_get(
11 |   place = region_name,
12 |   query = "
13 |   SELECT * 
14 |   FROM 'points' 
15 |   WHERE shop = 'supermarket'",
16 |   extra_tags = c("shop")
17 | )
18 | dim(osm_data_shops)
19 | plot(osm_data_shops$geometry)
20 | 
21 | lund_region = zonebuilder::zb_zone("Lund, sweden")
22 | library(tmap)
23 | tmap_mode("view")
24 | qtm(lund_region)
25 | lund_6km = lund_region |>
26 |   filter(circle_id <= 3)
27 | lund_6km_boundary = sf::st_union(lund_6km)
28 | qtm(lund_6km_boundary)
29 | 
30 | osm_data_lund1 = osm_data_shops |>
31 |   sf::st_filter(lund_6km_boundary)
32 | nrow(osm_data_lund1)
33 | qtm(osm_data_lund1)
34 | 
35 | osm_data_lund2 = oe_get(
36 |   place = region_name,
37 |   query = "
38 |   SELECT * 
39 |   FROM 'points' 
40 |   WHERE shop = 'supermarket'",
41 |   extra_tags = c("shop"),
42 |   boundary = lund_6km_boundary,
43 |   boundary_type = "clipsrc"
44 | )
45 | 
46 | nrow(osm_data_lund2)
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------
/d3/report-structure.qmd:
--------------------------------------------------------------------------------
 1 | Your report should have a logical structure and clear headings which could include:
 2 | 
 3 | 
 4 | 1. **Introduction**
 5 |    - Clear research question
 6 |    - Context and motivation
 7 |    - Reference to relevant literature
 8 | 
 9 | 2. **Input Data and Data Cleaning**
10 |    - Description of datasets
11 |    - Data quality considerations
12 |    - Processing steps
13 | 
14 | 3. **Exploratory Data Analysis**
15 |    - Initial visualization
16 |    - Key patterns
17 |    - Statistical summaries
18 | 
19 | 4. **Analysis and Results**
20 |    - Detailed analysis
21 |    - Clear presentation
22 |    - Supporting visualizations
23 | 
24 | 5. **Discussion and conclusions**
25 |    - Result, key findings, interpretation
26 |    - Policy implications/recommendations
27 |    - Strengths and limitations
28 |    - Future directions
29 | 
30 | 6. **References**
31 |    - Properly formatted citations
32 |    - Mix of academic and technical/policy/other sources
33 |    - Recommendation: generate these with Quarto (see [Quarto Citation Guide](https://quarto.org/docs/get-started/authoring/rstudio.html#citations))


--------------------------------------------------------------------------------
/slides/references.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @article{vidaltortosa2021,
 3 | 	title = {Socioeconomic inequalities in cycling safety: An analysis of cycling injury risk by residential deprivation level in England},
 4 | 	author = {Vidal Tortosa, Eugeni and Lovelace, Robin and Heinen, Eva and Mann, Richard P.},
 5 | 	year = {2021},
 6 | 	month = {12},
 7 | 	date = {2021-12-01},
 8 | 	journal = {Journal of Transport & Health},
 9 | 	pages = {101291},
10 | 	volume = {23},
11 | 	doi = {10.1016/j.jth.2021.101291},
12 | 	url = {https://www.sciencedirect.com/science/article/pii/S2214140521003212},
13 | 	note = {Citation Key: vidaltortosa{\_}socioeconomic{\_}2021},
14 | 	langid = {en}
15 | }
16 | 
17 | @article{tait2023,
18 | 	title = {Contraflows and cycling safety: Evidence from 22 years of data involving 508 one-way streets},
19 | 	author = {Tait, Caroline and Beecham, Roger and Lovelace, Robin and Barber, Stuart},
20 | 	year = {2023},
21 | 	month = {01},
22 | 	date = {2023-01-01},
23 | 	journal = {Accident Analysis & Prevention},
24 | 	pages = {106895},
25 | 	volume = {179},
26 | 	doi = {10.1016/j.aap.2022.106895},
27 | 	url = {https://doi.org/10.1016/j.aap.2022.106895},
28 | 	note = {Citation Key: tait{\_}contraflows{\_}2023},
29 | 	langid = {en}
30 | }
31 | 


--------------------------------------------------------------------------------
/s3/demo-references.bib:
--------------------------------------------------------------------------------
 1 |  @Manual{osmextract,
 2 |     title = {osmextract: Download and Import Open Street Map Data Extracts},
 3 |     author = {Andrea Gilardi and Robin Lovelace},
 4 |     year = {2024},
 5 |     note = {R package version 0.5.2},
 6 |     url = {https://CRAN.R-project.org/package=osmextract},
 7 |   }
 8 | @article{ferster2019,
 9 | 	title = {Using OpenStreetMap to inventory bicycle infrastructure: A comparison with open data from cities},
10 | 	author = {Ferster, Colin and Fischer, Jaimy and Manaugh, Kevin and Nelson, Trisalyn and Winters, Meghan},
11 | 	year = {2019},
12 | 	month = {02},
13 | 	date = {2019-02-22},
14 | 	journal = {International Journal of Sustainable Transportation},
15 | 	pages = {64--73},
16 | 	volume = {14},
17 | 	number = {1},
18 | 	doi = {10.1080/15568318.2018.1519746},
19 | 	url = {http://dx.doi.org/10.1080/15568318.2018.1519746},
20 | 	langid = {en}
21 | }
22 | 
23 | @article{Kaiser2024,
24 | 	title = {From Counting Stations to City-Wide Estimates: Data-Driven Bicycle Volume Extrapolation},
25 | 	author = {Kaiser, Silke K. and Klein, Nadja and Kaack, Lynn H.},
26 | 	year = {2024},
27 | 	date = {2024},
28 | 	journal = {arXiv},
29 | 	doi = {10.48550/ARXIV.2406.18454},
30 | 	url = {https://arxiv.org/abs/2406.18454}
31 | }
32 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   push:
 4 |     branches: main
 5 | 
 6 | name: Quarto Publish
 7 | 
 8 | jobs:
 9 |   build-deploy:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     steps:
14 |       - name: Check out repository
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Set up Quarto
18 |         uses: quarto-dev/quarto-actions/setup@v2
19 | 
20 |       - name: Install R
21 |         uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           r-version: '4.5.1'
24 |           use-public-rspm: true
25 | 
26 |       - name: Install R Dependencies
27 |         uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           cache-version: 2
30 |       
31 |       - name: Additional Setup
32 |         run: |
33 |           # Install Python:
34 |           sudo apt-get update
35 |           sudo apt-get install python3
36 |           sudo apt-get install python3-pip
37 |           python3 -m pip install jupyter jupyter-cache
38 | 
39 |       - name: Render and Publish
40 |         uses: quarto-dev/quarto-actions/publish@v2
41 |         with:
42 |           target: gh-pages
43 |         env:
44 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 |           


--------------------------------------------------------------------------------
/scripts/install.R:
--------------------------------------------------------------------------------
 1 | ## Use Posit Public Package Manager (RSPM) to prefer prebuilt binary CRAN packages
 2 | ## This speeds up installs (no source compilation) in CI, Binder, Gitpod, Pixi etc.
 3 | ## See: https://packagemanager.posit.co/ and https://packagemanager.rstudio.com/
 4 | rspm_url <- "https://packagemanager.rstudio.com/all/latest"
 5 | options(repos = c(CRAN = rspm_url))
 6 | # Prefer binaries on Windows/macOS, but on Linux R does not support type='binary'
 7 | sysname <- tolower(Sys.info()["sysname"] %||% "")
 8 | ## Use pak with Posit Public Package Manager (RSPM) so installs prefer
 9 | ## RSPM-provided binaries where available. pak will fall back to source
10 | ## only when binaries are not available for the platform/version.
11 | rspm_url <- "https://packagemanager.rstudio.com/all/latest"
12 | options(repos = c(CRAN = rspm_url))
13 | 
14 | # Ensure pak is available (install from RSPM)
15 | if (!requireNamespace("pak", quietly = TRUE)) {
16 |   install.packages("pak", repos = rspm_url)
17 | }
18 | 
19 | # Use pak to install the package and dependencies. pak prefers binaries from
20 | # RSPM on supported platforms and will significantly reduce compile time.
21 | if (file.exists("DESCRIPTION")) {
22 |   pak::local_install(ask = FALSE)
23 | } else {
24 |   pak::pak("itsleeds/tds", ask = FALSE)
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: tds
 2 | Title: Support TRAN5340M Transport Data Science 
 3 | Version: 0.0.0.9000
 4 | Authors@R: c(
 5 |     person("Robin", "Lovelace", , "R.Lovelace@leeds.ac.uk", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "0000-0001-5679-6536")),
 7 |     person("Yuanxuan", "Yang", , "Y.Yang6@leeds.ac.uk", role = c("aut"),
 8 |            comment = c(ORCID = "0000-0002-7970-2544")),
 9 |     person("Malcolm", "Morgan", , "M.Morgan1@leeds.ac.uk", role = c("aut"),
10 |            comment = c(ORCID = "0000-0002-9488-9183")),
11 |     person("Juan","P. Fonseca-Zamora", ,"ts18jpf@leeds.ac.uk", role = c("aut"),
12 |            comment = c(ORCID = "0009-0006-7042-3828"))
13 |   )
14 | Description: 'tds' supports the TRAN5340M Transport Data Science module at the Institute for Transport Studies, University of Leeds. It provides a comprehensive collection of teaching materials, tutorials, and curated resources to facilitate learning. The package also serves as the foundation for a dedicated website that delivers course content and enhances teaching by integrating key dependencies and resources for data processing, visualization, spatial analysis, and transport modelling.
15 | License: GPL-3
16 | Encoding: UTF-8
17 | Roxygen: list(markdown = TRUE)
18 | RoxygenNote: 7.3.2
19 | Imports: 
20 |     tidyverse,
21 |     sf,
22 |     quarto,
23 |     stats19,
24 |     nycflights13,
25 |     remotes,
26 |     DT,
27 |     calendar,
28 |     reticulate,
29 |     stplanr,
30 |     spData,
31 |     zoo,
32 |     pct,
33 |     zonebuilder,
34 |     ggspatial,
35 |     dodgr,
36 |     opentripplanner,
37 |     osmextract,
38 |     tmap
39 | 


--------------------------------------------------------------------------------
/s2/homework.qmd:
--------------------------------------------------------------------------------
 1 | Hi all,
 2 | 
 3 | For anyone who wanted to recap on Session 1 or Session 2 content, see the recorded sessions + manuscripts here:
 4 | 
 5 | -   Session 1: [TDS Session 1\_ Introduction to transport data science (RL, MM, ZW, YY)-20250130_100448-Meeting Recording.mp4](https://leeds365-my.sharepoint.com/:v:/g/personal/georl_leeds_ac_uk/EdL4foYtzmJHvY-tco6-H5MB1oxMy5Jl8NfktaCxNzHM8A?e=0GWtPJ&nav=eyJyZWZlcnJhbEluZm8iOnsicmVmZXJyYWxBcHAiOiJTdHJlYW1XZWJBcHAiLCJyZWZlcnJhbFZpZXciOiJTaGFyZURpYWxvZy1MaW5rIiwicmVmZXJyYWxBcHBQbGF0Zm9ybSI6IldlYiIsInJlZmVycmFsTW9kZSI6InZpZXcifX0%3D)
 6 | 
 7 | -   Session 2: [TDS Session 2\_ Getting transport data (RL, YY)-20250206_100118-Meeting Recording.mp4](https://leeds365-my.sharepoint.com/:v:/g/personal/georl_leeds_ac_uk/ET_Auf4171tGp0e3oyj4YtMBEpnVGj_3thfkbCw1tJSSXA?e=xsNUoF&nav=eyJyZWZlcnJhbEluZm8iOnsicmVmZXJyYWxBcHAiOiJTdHJlYW1XZWJBcHAiLCJyZWZlcnJhbFZpZXciOiJTaGFyZURpYWxvZy1MaW5rIiwicmVmZXJyYWxBcHBQbGF0Zm9ybSI6IldlYiIsInJlZmVycmFsTW9kZSI6InZpZXcifX0%3D)
 8 | 
 9 | Also, please note the updated [homework here (note the new item 4 and bonus 5, if you get stuck or hit any error messages just let me know): https://itsleeds.github.io/tds/s2/#homework](https://itsleeds.github.io/tds/s2/#homework)
10 | 
11 | **You will present the code / .qmd files you wrote as part of this homework to colleagues in the next session, so please come prepared, any visualisations of outputs and questions you would like to asks demonstrators especially welcome.**
12 | 
13 | Great work everyone and thanks for engaging so well with the content today and looking forward to the session on origin-destination data next week.
14 | 
15 | Robin
16 | 


--------------------------------------------------------------------------------
/data/timetable.csv:
--------------------------------------------------------------------------------
 1 | summary,description,time,duration,location
 2 | "TDS Session 1: Introduction to transport data science (RL, MM, YY)","TDS Session 1: Introduction to transport data science (RL, MM, YY) Located in Michael Sadler SR (LG.15)",2026-01-29T10:00:00Z,3,Michael Sadler SR (LG.15)
 3 | TDS deadline 1,Computer set-up,2026-01-30T13:00:00Z,2,Online - Teams
 4 | "TDS Session 2: Getting transport data (RL, YY)","TDS Session 2: Getting transport data (RL, YY) Located in Michael Sadler SR (LG.15)",2026-02-05T10:00:00Z,3,Michael Sadler SR (LG.15)
 5 | "TDS Session 3: AI for transport planning (RL, YY)","TDS Session 3: AI for transport planning (RL, YY) Located in Michael Sadler SR (LG.15)",2026-02-12T10:00:00Z,3,Michael Sadler SR (LG.15)
 6 | "TDS Session 4: Origin-destination data (RL, YY)","TDS Session 4: Origin-destination data (RL, YY) Located in Institute for Transport Studies Room 1.11a-c",2026-02-19T10:00:00Z,3,Institute for Transport Studies Room 1.11a-c
 7 | TDS seminar 1,"Seminar 1: Juan Fonseca, University of Leeds: Traffic estimation and transport data visualisation",2026-02-19T14:00:00Z,3,Institute for Transport Studies Room 1.11a-c
 8 | TDS deadline 2,Draft portfolio,2026-02-27T13:00:00Z,0,Online - Teams
 9 | "TDS Session 5: Routing (MM, RL)","TDS Session 5: Routing (MM, RL) Located in Michael Sadler SR (LG.15)",2026-03-05T10:00:00Z,3,Michael Sadler SR (LG.15)
10 | TDS seminar 2,"Seminar 2: Will Deakin, Network Rail: Network simplification",2026-03-19T10:00:00Z,3,Institute for Transport Studies Room 1.11a-c
11 | "TDS Session 6: Joins, models and publishing your work (RL, MM, YY)","TDS Session 6: Joins, models and publishing your work (RL, MM, YY) Located in Mechanical Engineering LT B (2.37)",2026-04-30T10:00:00Z,3,Mechanical Engineering LT B (2.37)
12 | TDS deadline 3,"Deadline: coursework, 2pm",2026-05-15T13:00:00Z,0,Online - Teams
13 | 


--------------------------------------------------------------------------------
/ROOM_BOOKING_REQUEST.md:
--------------------------------------------------------------------------------
 1 | # Room Booking Request for TRAN5340M01 - Transport Data Science
 2 | 
 3 | ## Module Details
 4 | - **Module Code:** TRAN5340M01
 5 | - **Module Name:** Transport Data Science
 6 | - **Academic Year:** 2025/26 (Semester 2)
 7 | - **Staff:** Dr Robin Lovelace, Dr Malcolm Morgan, Dr Yuanxuan Yang
 8 | 
 9 | ## Room Bookings Required
10 | 
11 | ### Sessions (6 x 3-hour sessions)
12 | **Time:** Thursdays, 10:00-13:00
13 | 
14 | **Michael Sadler SR (LG.15) Multi-mode:**
15 | 1. **Thursday, 29 January 2026** - Week 14 - Session 1: Introduction to transport data science
16 | 2. **Thursday, 5 February 2026** - Week 15 - Session 2: Getting transport data
17 | 3. **Thursday, 12 February 2026** - Week 16 - Session 3: AI for transport planning
18 | 5. **Thursday, 5 March 2026** - Week 19 - Session 5: Routing
19 | 6. **Thursday, 26 March 2026** - Week 22 - Session 6: Joins, models and publishing your work
20 | 
21 | **Institute for Transport Studies Room 1.11a-c:**
22 | 4. **Thursday, 19 February 2026** - Week 17 - Session 4: Origin-destination data
23 | 
24 | ### Seminars
25 | **Institute for Transport Studies Room 1.11a-c:**
26 | 
27 | 7. **Thursday, 19 February 2026** - Week 17 - Seminar 1: Juan Fonseca (Traffic estimation and transport data visualisation) - 14:00-17:00
28 | 8. **Thursday, 19 March 2026** - Week 21 - Seminar 2: Will Deakin, Network Rail (Network simplification) - 10:00-11:00
29 | 
30 | ## Summary
31 | - **Total bookings:** 8 (6 sessions + 2 seminars)
32 | - **Locations:** 
33 |   - Michael Sadler SR (LG.15) Multi-mode: Sessions 1, 2, 3, 5, 6
34 |   - Institute for Transport Studies Room 1.11a-c: Session 4, Seminar 1, Seminar 2
35 | - **Days:** All Thursdays
36 | - **Weeks:** 14, 15, 16, 17, 19, 21, 22
37 | 
38 | ## Notes
39 | - Session 4 (Week 17) has both a regular session (10:00-13:00) and Seminar 1 (14:00-17:00)
40 | - All sessions require computer access for students
41 | - Multi-mode room required to accommodate diverse teaching methods
42 | 
43 | ---
44 | *Generated: 21 October 2025*
45 | 


--------------------------------------------------------------------------------
/s6/slides.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Joins and Aggregations in Transport Data Science"
 3 | subtitle: '<br/>🗺<br/>Transport Data Science'
 4 | author: "Robin Lovelace"
 5 | date: 'University of Leeds'
 6 | format: revealjs
 7 | bibliography: ../tds.bib
 8 | execute: 
 9 |   eval: false
10 |   echo: true
11 | ---
12 | 
13 | ## Objectives
14 | 
15 | - Understand different types of joins (spatial and key-based)
16 | - Learn how to perform spatial joins with sf and dplyr
17 | - Apply aggregations to summarize data
18 | - Visualize joined datasets
19 | 
20 | ```{r}
21 | #| label: setup
22 | library(sf)
23 | library(dplyr)
24 | library(tmap)
25 | library(stats19)
26 | ```
27 | 
28 | ## What is a Join?
29 | 
30 | ::: incremental
31 | - **Combining datasets** based on common attributes or spatial relationships
32 | - **Spatial joins**: Link points (e.g., crashes) to polygons (e.g., LSOAs) using geometry
33 | - **Key-based joins**: Match IDs across tables (e.g., LSOA codes)
34 | :::
35 | 
36 | ## Spatial Join Example
37 | 
38 | 1. **Load spatial datasets**:
39 | ```{r}
40 | path <- "https://github.com/itsleeds/tds/releases/download/2025/p6-lsoa_boundary_wy.geojson"
41 | lsoa <- read_sf(path)
42 | crashes <- stats19::get_stats19("2023")
43 | ```
44 | 
45 | 2. **Perform spatial join**:
46 | ```{r}
47 | joined <- st_join(lsoa, crashes, join = st_intersects)
48 | ```
49 | 
50 | 3. **Aggregate results**:
51 | ```{r}
52 | agg_data <- joined |>
53 |   group_by(lsoa_id) |>
54 |   summarize(total_crashes = n())
55 | ```
56 | 
57 | ## Key-Based Joins
58 | 
59 | Use `dplyr` functions like `left_join` to combine data by identifiers:
60 | 
61 | ```{r}
62 | # Example: Join crash counts with population data
63 | final <- left_join(agg_data, pop_data, by = "lsoa_id")
64 | ```
65 | 
66 | ## Aggregations
67 | 
68 | - Group data by spatial units (e.g., LSOA)
69 | - Calculate metrics like **crashes per capita**:
70 | 
71 | ```{r}
72 | final <- final |>
73 |   mutate(crashes_per_capita = total_crashes / population)
74 | ```
75 | 
76 | ## Visualization
77 | 
78 | Create maps to visualize crash patterns using `tmap`:
79 | 
80 | ```{r, warning=FALSE}
81 | tmap_mode("plot")
82 | tm_shape(final) +
83 |   tm_polygons("crashes_per_capita")
84 | ```
85 | 
86 | ## References
87 | {references}
88 | 


--------------------------------------------------------------------------------
/slides/professions.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Professional transport data science workflows and project work"
 3 | subtitle: '<br/>🗺<br/>Transport Data Science'
 4 | author: "The Transport Data Science Team"
 5 | date: 'University of Leeds'
 6 | format: revealjs
 7 | bibliography: ../tds.bib
 8 | execute: 
 9 |   eval: false
10 |   echo: true
11 | ---
12 | 
13 | 
14 | ## Reminder on Coursework {.incremental}
15 | 
16 | - When's the deadline?
17 | 
18 | 
19 | ```{r}
20 | as.Date("2025-05-16")
21 | ```
22 | 
23 | 
24 | - Remember the [marking criteria](https://itsleeds.github.io/tds/marking-criteria.html):
25 | 
26 | 
27 | ## Data science workflows
28 | 
29 | ```{r, echo=FALSE, eval=FALSE}
30 | file.copy("https://d33wubrfki0l68.cloudfront.net/571b056757d68e6df81a3e3853f54d3c76ad6efc/32d37/diagrams/data-science.png", "figures/r4ds-workflow.png")
31 | ```
32 | 
33 | Source: [R for Data Science](https://r4ds.had.co.nz/introduction.html) open source book
34 | 
35 | 
36 | ![](https://d33wubrfki0l68.cloudfront.net/571b056757d68e6df81a3e3853f54d3c76ad6efc/32d37/diagrams/data-science.png)
37 | 
38 | # Managing time and prioritising ([Source](https://csgillespie.github.io/efficientR/workflow.html))
39 | 
40 | ![](https://csgillespie.github.io/efficientR/_main_files/figure-html/4-1-1.png)
41 | 
42 | 
43 | 
44 | ## Questions to consider and discuss with the module team {.incremental}
45 | 
46 | Around 1 hour for this, in parallel with solo working on projects, providing time for the module to talk to each student
47 | 
48 | - What are the strongest aspects of your coursework idea so far? What are the weakest?
49 | - What are the priorities over the next three weeks (break it down into a small number of parts)?
50 | - What do you need to find more literature on?
51 | - What do you need more data on (you should have all the data already)?
52 | - What additional skills do you need (now is a good time to ask)?
53 | 
54 | ## Working on your projects
55 | 
56 | - Plan ahead: what else do you need to do on your project?
57 | - Schedule work: when will you find time to do it?
58 | - Use Microsoft Calendar or similar: put it in the calendar.
59 | - Reproducibility: ensure your .Rmd files are reproducible
60 | - Ask for help: what do you need help with?
61 | - Work: get you head down and make use of this time!
62 | 
63 | 


--------------------------------------------------------------------------------
/s1/s1project/foundations.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | execute:
  3 |   eval: false
  4 | ---
  5 | 
  6 | ```{r}
  7 | #| results: hide
  8 | # load packages
  9 | library(tidyverse)
 10 | ```
 11 | 
 12 | 
 13 | ```{r}
 14 | #| eval: false
 15 | #| echo: false
 16 | quarto::quarto_render("foundations.qmd", "pdf")
 17 | ```
 18 | 
 19 | 
 20 | 
 21 | Hello this is some text.
 22 | 
 23 | ```{r}
 24 | casualty_type = c("cat", "dog", "person")
 25 | casualty_age = seq(from = 20, to = 60, by = 20)
 26 | crashes = data.frame(casualty_type, casualty_age)
 27 | plot(crashes$casualty_age)
 28 | ```
 29 | 
 30 | Subsetting.
 31 | 
 32 | ```{r}
 33 | crashes$casualty_type
 34 | crashes[[1]]
 35 | crashes[2,1]
 36 | ```
 37 | 
 38 | ```{r}
 39 | crashes |>
 40 |   select(casualty_type)
 41 | 
 42 | crashes |> 
 43 |   filter(casualty_age > 35)
 44 | 
 45 | crashes |> 
 46 |   filter(casualty_age-20 > 35)
 47 | 
 48 | crashes |>
 49 |   ggplot() +
 50 |   geom_bar(aes(x = casualty_age, fill = casualty_type))
 51 | ```
 52 | 
 53 | 
 54 | ```{r}
 55 | ac = stats19::get_stats19(year = 2020, type = "collision")
 56 | class(ac)
 57 | dim(ac)
 58 | ac_2021 = stats19::get_stats19(year = 2021, type = "collision")
 59 | nrow(ac)
 60 | nrow(ac_2021)
 61 | # # After googling "combine 2 data frames" let's try rbind
 62 | # ??combine
 63 | # ?rbind
 64 | ac = rbind(ac, ac_2021)
 65 | dim(ac)
 66 | ac_datetime = c(ac$datetime, ac_2021$datetime)
 67 | length(ac_datetime)
 68 | range(ac_datetime)
 69 | 
 70 | class(ac)
 71 | str(ac)
 72 | names(ac)
 73 | # aggregate this by day to show 
 74 | # how crash numbers varied over the year
 75 | ac_by_year = ac |>
 76 |   group_by(date) |>
 77 |   summarise(
 78 |     n_crashes = n()
 79 |   )
 80 | ac_by_year |>
 81 |   mutate(
 82 |     `N. crashes per year` = n_crashes,
 83 |     `Week average` = zoo::rollmean(n_crashes, 7, na.pad = TRUE),
 84 |     Date = date,
 85 |   ) |> 
 86 |   ggplot(aes(x = Date, y = `N. crashes per year`)) +
 87 |   geom_point(alpha = 0.1) +
 88 |   ylim(c(0, NA)) +
 89 |   # geom_smooth() +
 90 |   # weekly rolling average
 91 |   geom_line(aes(Date, `Week average`), colour = "red") +
 92 |   theme_minimal()
 93 | ```
 94 | 
 95 | ```{r}
 96 | # Updated plot with title and legend...
 97 | ac_by_year |>
 98 |   mutate(
 99 |     `N. crashes per year` = n_crashes,
100 |     `Week average` = zoo::rollmean(n_crashes, 7, na.pad = TRUE),
101 |     Date = date,
102 |   ) |> 
103 |   ggplot(aes(x = Date, y = `N. crashes per year`)) +
104 |   geom_point(alpha = 0.1) +
105 |   ylim(c(0, NA)) +
106 |   # geom_smooth() +
107 |   # weekly rolling average
108 |   geom_line(aes(Date, `Week average`, colour = "Week average")) +
109 |   theme_minimal() +
110 |   labs(
111 |     colour = "Legend"
112 |   ) +
113 |   scale_colour_manual(values = c("Week average" = "red")) +
114 |   ggtitle("Collions/day, 2020 to 2021") +
115 |   theme(
116 |     legend.position = "bottom"
117 |   )
118 | ```
119 | 
120 | # Python example
121 | 
122 | ```{python}
123 | casualty_type_py = ["a", "B", "c"]
124 | casualty_type_py
125 | ```
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/d1/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Welcome and set-up"
 3 | toc: true
 4 | execute: 
 5 |   cache: true
 6 |   output: false
 7 |   eval: false
 8 | ---
 9 | 
10 | Dear Transport Data Science students,
11 | 
12 | As per your [timetable](https://itsleeds.github.io/tds/schedule.html), the first session is Thursday 30th January, from 10:00 to 13:00.
13 | 
14 | **Location:** [Richard Hughes Cluster](https://maps.app.goo.gl/zn9Hw93hVDk2ALNw8), in the "Cloth Workers Link Building". If you're wondering where that is, you're not alone, I'm not 100% sure. So the first challenge of the module is to ensure that you get there on time, by 09:50, so you have time to get a seat in time for the 10:00 start.
15 | 
16 | ## Homework for next week (deadline: Friday 31st January, 14:00)
17 | 
18 | 1. Ensure that you have the timetable stored safely in your calendar, so you do not miss important sessions or seminars.
19 | 2. Ensure that you have the necessary software installed on your computer and that you have tested that you can use it for the datasets we will be using in the course, see https://itsleeds.github.io/tds/#software-requirements-and-installation for guidance on installing the software you need.
20 |     - Any issues you have with the software installation, please get in touch with me as soon as possible.
21 | 
22 | Test that you have the necessary software installed by running the following code in R:
23 | 
24 | ```{r}
25 | if (!requireNamespace("remotes", quietly = TRUE)) {
26 |   install.packages("remotes")
27 | }
28 | remotes::install_cran("tidyverse")
29 | remotes::install_cran("osmextract")
30 | library(tidyverse)
31 | library(osmextract)
32 | library(sf)
33 | ```
34 | 
35 | ```{r}
36 | its = oe_get("ITS Leeds", download_directory = tempdir())
37 | ```
38 | 
39 | ```{r}
40 | #| label: its
41 | figure = its |>
42 |   ggplot() +
43 |   geom_sf(aes(colour = highway)) +
44 |   theme_void()
45 | # Save figure
46 | ggsave("its.png", figure, width = 6, height = 4)
47 | ```
48 | 
49 | ```{r}
50 | #| label: browse
51 | #| eval: false
52 | browseURL("its.png")
53 | ```
54 | 
55 | <!-- ![ITS](its.png) -->
56 | 
57 | Show the map by executing the following code, which simply prints the map to the screen:
58 | 
59 | ```{r}
60 | #| label: its-map
61 | #| output: true
62 | figure
63 | ```
64 | 
65 | If you see a map of the area around the Institute for Transport Studies, then you are ready for the first session. If you have any issues, please get in touch with me as soon as possible.
66 | 
67 | 3. Take a look at the [reading list at https://itsleeds.github.io/tds/reading.html](https://itsleeds.github.io/tds/reading.html) and have a read of the Transportation chapter of Geocomputation with R book (you will find the link to the book in the reading list).
68 | 
69 | 4. Sign-up for a GitHub account if you do not already have one, and ensure that you have access to the [TDS GitHub repository](https://github.com/itsleeds/tds) where you will find the course materials.
70 |     - Please send me an email with you GitHub username so I can add you to the private repository that supports the course.
71 | 


--------------------------------------------------------------------------------
/d3/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Coursework submission 2: Data science project report"
 3 | toc: true
 4 | execute: 
 5 |   cache: true
 6 | number-sections: true
 7 | ---
 8 | 
 9 | ## Overview
10 | 
11 | This is the final assessed coursework submission for the Transport Data Science module. The deadline is **16th May 2025, 14:00**.
12 | 
13 | The purpose of the coursework is to provide a professional-quality report on the data science project you have worked on.
14 | You should include a range of techniques and methods you have learned during the module, and apply them to a real-world transport problem.
15 | The project report should be a cohesive whole, however, not a disjointed portfolio of separate tasks.
16 | 
17 | A good way to think about the project report is to imagine that you have worked on an important data science project in a large organisation and you are presenting your findings with a view to impressing them with your skills, clearly communicating your results, and providing actionable insights that motivate change.
18 | 
19 | ## Key Requirements
20 | 
21 | - **Length**: Maximum 10 pages (excluding the [coversheet](https://leeds365-my.sharepoint.com/:w:/g/personal/georl_leeds_ac_uk/EV3pLJQl8HRMswfxm6sduyoBfOcmVx_UICdlkaPBQuiJPw?e=pipgZ6), references, acknowledgements and appendices)
22 |   - See the template in the course GitHub repository at github.com/itsleeds/tds in folder/file [d2/template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd), which includes the coversheet
23 | - **Word count**: Maximum 3,000 words (excluding tables, code, references, and captions)
24 | - **Format**: Submit both a PDF file and the source .qmd file in a .zip file
25 | - **File size**: Maximum 40 MB for the .zip file
26 | - **Submission**: Via Minerva (Turnitin)
27 | 
28 | ## Report Structure
29 | 
30 | {{< include report-structure.qmd >}} 
31 | 
32 | ## Assessment Criteria
33 | 
34 | Marks will be awarded based on the marking criteria outlined in the [marking criteria document](../marking-criteria.html).
35 | 
36 | ## Technical Requirements
37 | 
38 | - Write the report in a Quarto document (`.qmd` file)
39 |   - See the template in the course GitHub repository at github.com/itsleeds/tds in folder/file [d2/template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd).
40 |   <!-- https://itsleeds.github.io/tds/d2/template -->
41 |   - See the rendered results at [itsleeds.github.io/tds/d2/template](https://itsleeds.github.io/tds/d2/template) (html version) and [github.com/itsleeds/tds/releases/download/2025/template.pdf](https://github.com/itsleeds/tds/releases/download/2025/template.pdf) (pdf version)
42 | - Include all necessary code for reproducibility
43 | - Document any external data sources
44 | - Follow R coding style guidelines
45 | 
46 | ## Academic Integrity
47 | 
48 | - Clearly acknowledge any use of AI tools (GREEN category - encouraged)
49 | - Properly cite all sources
50 | - Ensure you understand and can explain all submitted work
51 | - Document any collaboration or assistance received
52 | 
53 | For questions or clarifications, please use the module Teams channel or contact the module leader.


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 |   output-dir: docs
 4 | 
 5 | website:
 6 |   title: "Transport Data Science"
 7 |   sidebar:
 8 |     background: primary
 9 |     # logo: "images/logo.png"
10 |     pinned: true
11 |     align: center
12 |     tools:
13 |       - icon: github
14 |         href: https://github.com/itsleeds/tds
15 |         text: GitHub
16 |     style: docked
17 |     contents:
18 |       - href: index.qmd
19 |         text: Home
20 |       - href: https://itsleeds.github.io/tds/#software-requirements-and-installation
21 |         text: Install required software
22 |       - href: schedule.qmd
23 |         text: Schedule
24 |       - href: https://github.com/ITSLeeds/TDS/discussions
25 |         text: Forum
26 |       - href: marking-criteria.qmd
27 |         text: Marking Criteria
28 |       - href: reading.qmd
29 |         text: Reading list
30 |       - href: minihack-transport-data.qmd
31 |         text: Transport Data Minihack 8th May
32 |       - href: dstp.qmd
33 |         text: Data Science for Transport Planning 2 day course
34 |       - href: reproducible-road-safety-workshop.qmd
35 |         text: Reproducible Road Safety Workshop
36 |       - text: Sessions
37 |         contents:
38 |           - href: d1
39 |             text: Introduction and setup
40 |           - href: s1
41 |             text: Session 1
42 |           - href: s2
43 |             text: Session 2
44 |           - href: s3
45 |             text: "Session 3: AI for Transport Planning"
46 |           - href: s4
47 |             text: Session 4
48 |           - href: s5
49 |             text: Session 5
50 |           - href: sem1
51 |             text: Seminar 1
52 |           - href: d2
53 |             text: Formative assessment
54 |           - href: sem2
55 |             text: Seminar 2
56 |           - href: s6
57 |             text: "Session 6: Joins, models and publishing"
58 |           - href: d3
59 |             text: Coursework submission
60 | 
61 |       #       href: p5
62 |       #       text: Practical 5
63 |       #     - href: p6
64 |       #       text: Practical 6
65 |   # announcement: 
66 |   #   icon: info-circle
67 |   #   dismissable: true
68 |     # content: "**Alert** ensure you have completed the second homework, described at the end of [itsleeds.github.io/tds/s1](https://itsleeds.github.io/tds/s1/#homework) by this Thursday 6th February."
69 |     # content: "**Homework to present at the next session**: Ensure you have completed the second homework, described at the end of [itsleeds.github.io/tds/s3](https://itsleeds.github.io/tds/s3/#homework) by **Thursday 20th February**. Note: there will be a session (10:00-13:00) AND a seminar (14:00-17:00) on this day. Location: Institute for Transport Studies, room 1.11."
70 |     # content: "**Deadline for the [formative assessment](https://itsleeds.github.io/tds/d2) is Friday 28th February.**"
71 |     # type: primary
72 |     # position: below-navbar
73 | 
74 | format:
75 |   html:
76 |     theme: cosmo
77 |     # css: styles.css
78 |     toc: true
79 |     number-sections: true
80 |     csl: elsevier-harvard.csl
81 | 
82 | execute:
83 |   freeze: auto
84 | 


--------------------------------------------------------------------------------
/dstp.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Data Science for Transport Planning: 2 day course"
 3 | ---
 4 | 
 5 | <!-- https://store.leeds.ac.uk/product-catalogue/environment/transport-studies/short-courses/0925dstp2-data-science-for-transport-planning -->
 6 | ## Note: tickets are now on sale! Get yours at [store.leeds.ac.uk](https://store.leeds.ac.uk/conferences-and-events/environment/institute-of-transport-studies/0925dstp2-data-science-for-transport-planning) {-}
 7 | 
 8 | ## Course Overview
 9 | 
10 | Based on demand, we're organising a 2-day course teaching modern data science skills for transport planning, focussed on transport planning practitioners.
11 | This course first took place 18th and 19th of September 2025.
12 | 
13 | Course materials can be found at the [DSTP course homepage](https://tdscience.github.io/dstp/).
14 | 
15 | ## Learning Objectives
16 | 
17 | - Understand the role of data science in transport planning.
18 | - Learn how to find, import, clean, and analyze transport data.
19 | - Develop skills in data visualization and reporting.
20 | 
21 | ## Prerequisites
22 | 
23 | - Experience with transport planning concepts and datasets, such as origin-destination data and route networks.
24 | - Basic programming skills in R, Python or similar.
25 | - A laptop with R and RStudio (recommended) or a Python distribution such as Anaconda and an editor such as VS Code or Jupyter Notebook set-up.
26 | 
27 | ## Schedule
28 | 
29 | ### Day 1: Introduction to R/RStudio
30 | 
31 | - 10:00 - 11:00 Introduction to Data Science for Transport Planning
32 | - 11:00 - 12:30 Finding, importing and cleaning transport datasets
33 |     - Origin-destination datasets
34 |     - OpenStreetMap (OSM) and Ordnance Survey (OS) OpenRoads datasets
35 |     - Stats19 road safety data
36 | - 12:30 - 13:30: lunch
37 | - 13:30 - 15:00 Origin-destination data analysis
38 | - 15:00 - 15:15 break and refreshments
39 | - 15:15 - 17:00 Routing and route network analysis
40 |   - This will cover setting up an interface to a routing engine and using it to calculate routes and distances using GTFS data.
41 | 
42 | ### Day 2:
43 | 
44 | Course times each day:
45 | 
46 | - 09:00 - 10:45 spatio-temporal data
47 |   - Demonstration of open-access OD data with hourly resolution
48 |   - Demonstration with stats19 data for road safety analysis
49 | - 10:45 - 11:15 break and refreshments
50 | - 11:15 - 12:30 OD Transport data visualisation
51 | - 12:30 - 13:30 lunch
52 | - 13:30 - 15:00 Best practices for data science in transport planning
53 |   - Version control with Git and GitHub
54 |   - Reproducible research with Quarto
55 | - 15:00 - 16:00 Advanced topics
56 |   - Visualising large datasets
57 |   - Route network integration
58 |       - We'll present ways to join different networks, e.g. OSM networks
59 |   - Deploying your work as web applications
60 | 
61 | ## Registration
62 | 
63 | See [store.leeds.ac.uk](https://store.leeds.ac.uk/conferences-and-events/environment/institute-of-transport-studies/0925dstp2-data-science-for-transport-planning) for registration details.
64 | 
65 | ## Contact
66 | 
67 | For inquiries, please contact [Robin Lovelace](https://environment.leeds.ac.uk/transport/staff/953/professor-robin-lovelace).
68 | 
69 | We look forward to seeing you at the course!


--------------------------------------------------------------------------------
/datahack.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Transport Data Minihack 2026"
 3 | ---
 4 | 
 5 | 
 6 | ```{r}
 7 | #| eval: false
 8 | #| echo: false
 9 | #| label: docx
10 | quarto::quarto_render(
11 |   "datahack.qmd",
12 |   output_format = "docx",
13 |   output_file = "datahack-2026-for-comment.docx"
14 | )
15 | file.copy("docs/datahack-2026-for-comment.docx", "~/../OneDrive - University of Leeds/shortcourses", overwrite = TRUE)
16 | ```
17 | 
18 | # Introduction
19 | 
20 | This event is designed to build data, coding and reproducible research skills for Institute for Transport Studies (ITS) staff and students.
21 | It is also specifically designed to support ITS MSc students with their dissertation projects by providing a space to ask questions about importing, processing and visualising data.
22 | 
23 | It will take place on Thursday 7th May 2026.
24 | It is open to staff and students at ITS.
25 | Contact the organisers (Robin Lovelace) if you are not based at the University of Leeds and would like to join in.
26 | 
27 | See [here to sign-up](https://forms.office.com/e/A1ABvMuJAG).
28 | 
29 | ## Objectives
30 | 
31 | - To create a supportive space for participants to ask questions about working with datasets in general and using data science techniques for working with transport datasets in particular
32 | - Get support importing datasets for MSc dissertations and other projects
33 | - Data wrangling with the `tidyverse` R package and other tools
34 | - Learning the general skill of data visualisation and gain specific experience working with tap/on/tap/out data
35 | - Showcase the potential of open data (transparency, participation, research) and reproducible/open work-flows
36 | 
37 | ## Prerequisites
38 | 
39 | - None: just an interest in transport data and a willingness to learn
40 | - Useful: if you have experience with GitHub R, Python or other tools for reproducible data analysis you can join in with the coding, see the [Transport Data Science module](https://itsleeds.github.io/tds) for more details
41 | 
42 | ## Schedule
43 | 
44 | - 13:00 - 13:30: Introduction to importing, processing and visualising data with RStudio
45 |   - An indroduction to RStudio
46 |   - An introduction to Quarto for reproducible reports
47 |   - An example with origin-destination data in Leeds
48 | - 13:30 - 14:00: Importing your datasets
49 |   - Installing any necessary packages
50 |   - Requesting support for any issues
51 | - 14:00 - 14:05: Break
52 | - 14:05 - 15:00: Solo working on your datasets, asking questions, and getting support
53 | - 15:15 - 15:45: Presentation of the results (optional for participants)
54 |   - An opportunity for participants to share what they learned
55 | - 15:45 - 16:00: Networking and sharing ideas
56 | 
57 | ## Prizes
58 | 
59 | The prize will be Geocomputation with Python or Geocomputation with R (second edition).
60 | Prizes will be awarded based on importing, analysing and helping to document the challenge datasets (see Challenges section below):
61 | 
62 | - Best technical implementation and code
63 | - Most creative or impactful use of data
64 | 
65 | The presentations will be assessed by the organisers.
66 | 
67 | # Challenges
68 | 
69 | - Write code to automatically download datasets used in your dissertation (if available online) and import them into RStudio
70 | - Subset the dataset to a particular area, time period or other criteria
71 | - Create a Quarto report with a summary of the data and analysis


--------------------------------------------------------------------------------
/d2/references.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @book{worldhealthorganization2018,
 3 | 	title = {Global Status Report On Road Safety 2018},
 4 | 	author = {World Health Organization, },
 5 | 	year = {2018},
 6 | 	date = {2018},
 7 | 	url = {https://www.who.int/publications/i/item/9789241565684},
 8 | 	note = {OCLC: 1084537103
 9 | Citation Key: worldhealthorganization{\_}global{\_}2018},
10 | 	address = {S.l.},
11 | 	langid = {en}
12 | }
13 | 
14 | @article{tait_contraflows_2023,
15 |   title = {Contraflows and Cycling Safety: {{Evidence}} from 22~Years of Data Involving 508 One-Way Streets},
16 |   shorttitle = {Contraflows and Cycling Safety},
17 |   author = {Tait, Caroline and Beecham, Roger and Lovelace, Robin and Barber, Stuart},
18 |   year = {2023},
19 |   month = jan,
20 |   journal = {Accident Analysis \& Prevention},
21 |   volume = {179},
22 |   pages = {106895},
23 |   issn = {0001-4575},
24 |   doi = {10.1016/j.aap.2022.106895},
25 |   urldate = {2022-11-15},
26 |   abstract = {Contraflow cycling on one-way streets is a low cost intervention that research shows can improve the cycling experience and increase participation. Evidence from several studies suggest that cyclists on contraflows have a lower crash risk. However, implementing contraflow cycling is often controversial, including in the United Kingdom (UK). In this paper we examine whether contraflow cycling on one-way streets alters crash or casualty rates for pedal cyclists. Focusing on inner London boroughs between 1998 and 2019, we identified 508 road segments where contraflow cycling was introduced on one-way streets. We identified road traffic crashes occurring within 10~m of these segments and labelled them as pre-contraflow, contraflow or contraflow removed crashes. We calculated rates using the number of crashes or casualties divided by the time exposed and generated 95~\% confidence intervals using bootstrap resampling. We adjusted the rates for changes in cordon cycling volume and injury severity reporting. There were 1498 crashes involving pedal cyclists: 788 pre-contraflow, 703 contraflow and 7 following contraflow removal. There was no change in adjusted overall pedal cyclist crash or casualty rates when contraflow cycling was introduced. Proximity to a junction doubled the crash rate. The crash rate when pedal cyclists were travelling contraflow was the same as those travelling with flow. We have found no evidence that introducing contraflow cycling increases the crash or casualty rate for pedal cyclists. It is possible that such rates may indeed fall when contraflow cycling is introduced if more accurate spatio-temporal cycling volume data was available. We recommend all one-way streets are evaluated for contraflow cycling but encourage judicious junction design and recommend UK legislative change for mandatory-two-way cycling on one-way streets unless exceptional circumstances exist.},
27 |   copyright = {CC0 1.0 Universal Public Domain Dedication},
28 |   langid = {english},
29 |   keywords = {Contraflow,Crash,Cycling,Infrastructure,One-way streets},
30 |   file = {C:\Users\georl_admin\Zotero\storage\9AIST5ZV\S000145752200330X.html}
31 | }
32 | 
33 | @article{lovelace_who_2016,
34 |   title = {Who, Where, When: The Demographic and Geographic Distribution of Bicycle Crashes in {{West Yorkshire}}},
35 |   author = {Lovelace, Robin and Roberts, Hannah and Kellar, Ian},
36 |   date = {2016},
37 |   journaltitle = {Transportation Research Part F: Traffic Psychology and Behaviour},
38 |   series = {Bicycling and Bicycle Safety},
39 |   volume = {41, Part B},
40 |   issn = {13698478},
41 |   doi = {10.1016/j.trf.2015.02.010},
42 |   url = {http://eprints.whiterose.ac.uk/83930/},
43 |   keywords = {Cycling,Exposure,Geographical factors,Risk,Safety},
44 |   file = {/home/robin/Zotero/storage/8GIWK5II/Lovelace et al. - 2016 - Who, where, when the demographic and geographic d.pdf}
45 | }
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![pages-build-deployment](https://github.com/itsleeds/tds/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/itsleeds/tds/actions/workflows/pages/pages-build-deployment)
  3 | [![Open in GitHub
  4 | Codespaces](https://img.shields.io/badge/Open%20in-GitHub%20Codespaces-blue?logo=github.png)](https://github.com/codespaces/new/itsleeds/tds?quickstart=1)
  5 | ![GitHub Downloads (all assets, all
  6 | releases)](https://img.shields.io/github/downloads/itsleeds/tds/total.png)
  7 | [![Docker
  8 | Pulls](https://img.shields.io/badge/Docker:_ghcr.io-image_ghcr)](https://github.com/itsleeds/tds/pkgs/container/tds)
  9 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/itsleeds/tds/HEAD)
 10 | <!-- [![Open in Gitpod](https://img.shields.io/badge/Open%20in-Gitpod-blue?logo=gitpod)](https://gitpod.io/#https://github.com/itsleeds/tds) -->
 11 | 
 12 | This repo contains code for the Transport Data Science module at the
 13 | Institute for Transport Studies, University of Leeds. See the website at
 14 | https://itsleeds.github.io/tds/ and a summary of the catelogue entry at
 15 | [leeds.ac.uk](https://catalogue.leeds.ac.uk/Module/TP/TRAN/5340M/202526)
 16 | 
 17 | ## Quickstart
 18 | 
 19 | 
 20 | See the course website at
 21 | [itsleeds.github.io/tds](https://itsleeds.github.io/tds/)
 22 | 
 23 | The quickest way to get started with the code for many people will be to
 24 | use GitHub Codespaces. Click the [Open in GitHub
 25 | Codespaces](https://github.com/codespaces/new/itsleeds/tds?quickstart=1)
 26 | button above to get started.
 27 | 
 28 | Alternatively you can launch this repository on Binder (notebooks).
 29 | 
 30 | If you’re using VS Code and have Docker installed you can open the
 31 | project in a Devcontainer by pressing Ctrl+Shift+P, typing in
 32 | “Devcontainer”, and selecting “Remote-Containers: Reopen in Container”.
 33 | 
 34 | ## Cloning and contributing
 35 | 
 36 | We welcome contributions!
 37 | 
 38 | To fork and clone the repo, use the following commands:
 39 | 
 40 | ``` sh
 41 | # install the gh command line tool: https://cli.github.com/
 42 | gh repo fork itsleeds/tds
 43 | git clone tds
 44 | code tds # to open in VS Code, or open in your preferred editor
 45 | # make changes
 46 | git add .
 47 | git status # to check what you've changed
 48 | git commit -m "your message"
 49 | git push
 50 | gh pr create # to create a pull request
 51 | ```
 52 | 
 53 | Please create an issue before contributing, so we can discuss the
 54 | changes you’d like to make.
 55 | 
 56 | <!-- Note: we have branch protections in place so you should create a PR before pushing to the main branch. -->
 57 | 
 58 | You can create and work on an issue with the following commands:
 59 | 
 60 | ``` sh
 61 | gh repo clone itsleeds/tds
 62 | cd tds # or code tds to open with VS Code
 63 | gh issue create # to create an issue
 64 | gh issue develop 123 --checkout # to create a branch and start working on issue 123
 65 | # make changes
 66 | git add .
 67 | git commit -m "your message"
 68 | git push
 69 | gh pr create # to create a pull request
 70 | ```
 71 | 
 72 | ## Reproducing the website
 73 | 
 74 | To reprooduce the website, you can use the following command in R:
 75 | 
 76 | ``` r
 77 | if (!requireNamespace("remotes", quietly = TRUE)) {
 78 |   install.packages("remotes")
 79 | }
 80 | remotes::install_github("itsleeds/tds")
 81 | ```
 82 | 
 83 | ``` r
 84 | quarto::quarto_preview()
 85 | ```
 86 | 
 87 | This is the same as running the following command in the system
 88 | terminal:
 89 | 
 90 | ``` bash
 91 | quarto preview
 92 | ```
 93 | 
 94 | ## Archive
 95 | 
 96 | See an archived version of the repo, before we switched to using Quarto,
 97 | at https://github.com/itsleeds/tdsarchive
 98 | 
 99 | ## Setup
100 | 
101 | <details>
102 | 
103 | To set it up we used commands such as:
104 | 
105 | ``` r
106 | usethis::use_description()
107 | usethis::use_package("stats19")
108 | usethis::use_package("DT")
109 | usethis::use_package("quarto")
110 | usethis::use_package("zonebuilder")
111 | ```
112 | 
113 | You can save presentations as PDF files with the following command:
114 | 
115 | We use the Harvard citation style, added as follows:
116 | 
117 | ``` bash
118 | wget https://github.com/citation-style-language/styles/raw/refs/heads/master/elsevier-harvard.csl
119 | ```
120 | 
121 | See documentation on Quarto website for info on publishing.
122 | 


--------------------------------------------------------------------------------
/minihack-transport-data.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Transport Data Minihack: Data Challenges"
 3 | ---
 4 | 
 5 | # Introduction
 6 | 
 7 | This event is designed to build data, coding and reproducible research skills for Institute for Transport Studies staff and students. Please contact the organisers (Robin Lovelace) if you are not based at the University of Leeds and would like to join in.
 8 | It will take place on the 8th May as a pre-event before a lecture on data science and is open to staff and students at the University of Leeds and Transport professions.
 9 | See [here to sign-up](https://forms.office.com/e/fpdE2MJHF5).
10 | 
11 | ## Objectives
12 | 
13 | - Reactivate the Transport Data Science Hackathons
14 | - Facilitate learning and collaboration among participants
15 | - Outcomes for participants:
16 |     - Learning basics of packaging and modular coding
17 |     - Data wrangling with tidyverse
18 |     - Learning the general skill of data visualisation and gain specific experience working with tap/on/tap/out data
19 |     - Demonstrate the potential of open data (transparency, participation, research) and reproducible/open work-flows.
20 | 
21 | ## Prerequisites
22 | 
23 | - None: just an interest in transport data and a willingness to learn
24 | - Useful: if you have experience with GitHub R, Python or other tools for reproducible data analysis you can join in with the coding, see the [Transport Data Science module](https://itsleeds.github.io/tds) for more details
25 | 
26 | ## Schedule
27 | 
28 | - 13:00 - 13:30: Presentation of the challenges
29 |   - Transmilenio: Victor Cantillo García
30 |   - Bring your own data (BYD)
31 |     - 5 minute pitches by anyone who wants to work on their own data challenge
32 | - 13:30 - 14:00: Importing the data
33 |   - Installing any necessary packages
34 |   - Requesting support for any issues
35 | - 14:00 - 14:05: Break
36 | - 14:05 - 15:00: The hackathon
37 | - 15:15 - 15:45: Presentation of the results
38 | - 15:45 onwards: Networking and lecture (optional, see [ticketsource.us for tickets](https://www.ticketsource.com/whats-on/leeds/esther-simpson-building-lt-1-01/data-science-for-future-proof-transport-planning-inaugural-lecture-by-professor-robin-lovelace/2025-05-08/16:30/t-avnjvar))
39 | 
40 | ## Prizes
41 | 
42 | The prize will be Geocomputation with Python or Geocomputation with R (second edition).
43 | Prizes will be awarded based on importing, analysing and helping to document the challenge datasets (see Challenges section below):
44 | 
45 | - Best technical implementation and code
46 | - Most creative or impactful use of data
47 | 
48 | The presentations will be assessed by the organisers.
49 | 
50 | # Data Challenges
51 | 
52 | ## Transmilenio data
53 | 
54 | - TransMilenio (TM) is the organisation in charge of managing all components of Bogotá's integrated public transport system.
55 | - TM publishes a lot of their data for public use, in line with the open data policy of Bogotá.
56 | - Data includes:
57 |     - **Spatial:** GTFS, station location and lines of BRT, regular buses, and cable lines.
58 |     - **Counts:** Raw daily tap-in records, and aggregated boarding / alighting and exit counts by station and 15 minutes interval.
59 | 
60 | ### Motivation:
61 | 
62 | - TM published [some useful maps](https://datosabiertos-transmilenio.hub.arcgis.com/apps/216b13bdc3d84370acac9ceac07836c8/explore) but they are not easily reproducible.
63 | - Accessing the data is not straightforward as the count information is saved in individual `.csv` files by day.
64 | 
65 | ### Goal:
66 | 
67 | - Develop a set of functions that can be integrated into an R library to access and analyse the open data published by TM.
68 | 
69 | ## Origin destination data
70 | 
71 | - See https://github.com/itsleeds/2021-census-od-data for 2021 OD data from the Census
72 | 
73 | ## Bring your own data
74 | 
75 | Participants are welcome to bring their own data to the event.
76 | Please mention the dataset in the sign-up form (see link above).
77 | 
78 | # Challenges
79 | 
80 | - Write reproducible code for getting/analysing parts of the dataset to make it more reproducible.
81 | - Present a new case study using the data.
82 | - **Bonus:** Generalise some of your work by creating a well-documented script or function that can be used by others. The documentation can be with comments (any content preceded by the hashtag `#` symbol in `R` and `Python`) or (advanced) function documentation.
83 | - Create a visualation of the dataset to generate new insight or tell a story
84 | 


--------------------------------------------------------------------------------
/README.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | format: gfm
  3 | ---
  4 | 
  5 | [![pages-build-deployment](https://github.com/itsleeds/tds/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/itsleeds/tds/actions/workflows/pages/pages-build-deployment)
  6 | [![Open in GitHub Codespaces](https://img.shields.io/badge/Open%20in-GitHub%20Codespaces-blue?logo=github)](https://github.com/codespaces/new/itsleeds/tds?quickstart=1)
  7 | ![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/itsleeds/tds/total)
  8 | [![Docker Pulls](https://img.shields.io/badge/Docker:_ghcr.io-image_ghcr)](https://github.com/itsleeds/tds/pkgs/container/tds)
  9 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/itsleeds/tds/HEAD)
 10 | 
 11 | This repo contains code for the Transport Data Science module at the Institute for Transport Studies, University of Leeds.
 12 | See the website at https://itsleeds.github.io/tds/ and a summary of the catelogue entry at [leeds.ac.uk](https://catalogue.leeds.ac.uk/Module/TP/TRAN/5340M/202526)
 13 | 
 14 | ## Quickstart
 15 | 
 16 | See the course website at [itsleeds.github.io/tds](https://itsleeds.github.io/tds/)
 17 | 
 18 | The quickest way to get started with the code without installing anything is to use GitHub Codespaces. Click the [Open in GitHub Codespaces](https://github.com/codespaces/new/itsleeds/tds?quickstart=1) button above to get started (requires [signing-up for a GitHub account](https://github.com/join)).
 19 | 
 20 | Alternatively you can launch this repository on Binder (notebooks).
 21 | 
 22 | If you're using VS Code and have Docker installed you can open the project in a Devcontainer by pressing Ctrl+Shift+P, typing in "Devcontainer", and selecting "Remote-Containers: Reopen in Container".
 23 | 
 24 | ## Cloning and contributing
 25 | 
 26 | We welcome contributions!
 27 | 
 28 | To fork and clone the repo, use the following commands:
 29 | 
 30 | ```sh
 31 | # install the gh command line tool: https://cli.github.com/
 32 | gh repo fork itsleeds/tds
 33 | git clone tds
 34 | code tds # to open in VS Code, or open in your preferred editor
 35 | # make changes
 36 | git add .
 37 | git status # to check what you've changed
 38 | git commit -m "your message"
 39 | git push
 40 | gh pr create # to create a pull request
 41 | ```
 42 | 
 43 | Please create an issue before contributing, so we can discuss the changes you'd like to make.
 44 | 
 45 | <!-- Note: we have branch protections in place so you should create a PR before pushing to the main branch. -->
 46 | 
 47 | You can create and work on an issue with the following commands:
 48 | 
 49 | ```sh
 50 | gh repo clone itsleeds/tds
 51 | cd tds # or code tds to open with VS Code
 52 | gh issue create # to create an issue
 53 | gh issue develop 123 --checkout # to create a branch and start working on issue 123
 54 | # make changes
 55 | git add .
 56 | git commit -m "your message"
 57 | git push
 58 | gh pr create # to create a pull request
 59 | ```
 60 | 
 61 | ## Reproducing the website
 62 | 
 63 | To reprooduce the website, you can use the following command in R:
 64 | 
 65 | ```{r}
 66 | #| eval: false
 67 | if (!requireNamespace("remotes", quietly = TRUE)) {
 68 |   install.packages("remotes")
 69 | }
 70 | remotes::install_github("itsleeds/tds")
 71 | ```
 72 | 
 73 | ```{r}
 74 | #| eval: false
 75 | quarto::quarto_preview()
 76 | ```
 77 | 
 78 | This is the same as running the following command in the system terminal:
 79 | 
 80 | ```bash
 81 | quarto preview
 82 | ```
 83 | 
 84 | 
 85 | ## Archive
 86 | 
 87 | See an archived version of the repo, before we switched to using Quarto, at https://github.com/itsleeds/tdsarchive
 88 | 
 89 | ## Setup
 90 | 
 91 | <details>
 92 | 
 93 | To set it up we used commands such as:
 94 | 
 95 | ```{r}
 96 | #| eval: false
 97 | usethis::use_description()
 98 | usethis::use_package("stats19")
 99 | usethis::use_package("DT")
100 | usethis::use_package("quarto")
101 | usethis::use_package("zonebuilder")
102 | ```
103 | 
104 | You can save presentations as PDF files with the following command:
105 | 
106 | ```{bash}
107 | #| eval: false
108 | #| echo: false
109 | cd docs/slides
110 | docker run --rm -t -v .:/slides astefanutti/decktape -s 1280x720 generic https://itsleeds.github.io/tds/slides/intro.html intro.pdf
111 | firefox intro.pdf
112 | ```
113 | 
114 | We use the Harvard citation style, added as follows:
115 | 
116 | ```bash
117 | wget https://github.com/citation-style-language/styles/raw/refs/heads/master/elsevier-harvard.csl
118 | ```
119 | 
120 | 
121 | See documentation on Quarto website for info on publishing.
122 | 
123 | 


--------------------------------------------------------------------------------
/scripts/time-render.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | set -euo pipefail
  3 | 
  4 | # Simple timing script for Quarto .qmd files using the 'timing' profile.
  5 | # Usage: ./scripts/time-render.sh [file1.qmd file2.qmd ...]
  6 | # If no files are given, it will time a small default set (index.qmd and s*/index.qmd).
  7 | 
  8 | OUT_DIR="$(pwd)/scripts/_timing_logs"
  9 | RESULT_CSV="$(pwd)/scripts/timing-results.csv"
 10 | mkdir -p "${OUT_DIR}"
 11 | PROFILE="timing"
 12 | 
 13 | mkdir -p "${OUT_DIR}"
 14 | 
 15 | if [ "$#" -eq 0 ]; then
 16 |   # Default targets: top-level index and session indexes
 17 |   FILES=(index.qmd s*/index.qmd)
 18 | else
 19 |   FILES=("$@")
 20 | fi
 21 | 
 22 | echo "file,start_ts,end_ts,duration_seconds,exit_code,log" > "${RESULT_CSV}"
 23 | 
 24 | for f in ${FILES[@]}; do
 25 |   # Skip if glob didn't match
 26 |   if [ ! -e "$f" ]; then
 27 |     echo "Skipping missing file: $f"
 28 |     continue
 29 |   fi
 30 | 
 31 |   base=$(basename "$f" .qmd)
 32 |   ts_start=$(date -u +%Y-%m-%dT%H:%M:%SZ)
 33 |   logfile="${OUT_DIR}/${base}-$(date -u +%Y%m%dT%H%M%SZ).log"
 34 | 
 35 |   echo "Rendering $f (log: $logfile)"
 36 | 
 37 |   # Measure time with /usr/bin/time for real elapsed seconds
 38 |   # Use a subshell to capture exit code
 39 |   { /usr/bin/time -f "%e" -o /tmp/tds_time_val.txt quarto render --profile ${PROFILE} "$f" &> "$logfile"; rc=$?; echo $rc > /tmp/tds_time_rc.txt; } || true
 40 | 
 41 |   duration=$(cat /tmp/tds_time_val.txt 2>/dev/null || echo "")
 42 |   rc=$(cat /tmp/tds_time_rc.txt 2>/dev/null || echo "1")
 43 |   ts_end=$(date -u +%Y-%m-%dT%H:%M:%SZ)
 44 | 
 45 |   # Write a single-line CSV-safe entry (escape quotes in logfile path)
 46 |   echo "${f},${ts_start},${ts_end},${duration},${rc},${logfile}" >> "${RESULT_CSV}"
 47 | 
 48 |   echo "Finished $f -> duration=${duration}s rc=${rc}"
 49 | done
 50 | 
 51 | echo "Results written to ${RESULT_CSV} and logs in ${OUT_DIR}"
 52 | #!/usr/bin/env bash
 53 | set -euo pipefail
 54 | 
 55 | # Time the render of .qmd files with a Quarto timing profile.
 56 | # This forces real execution (no freeze/cache) so durations reflect true cost.
 57 | #
 58 | # Usage examples:
 59 | #   scripts/time-render.sh              # auto-detect top-level *.qmd and s*/index.qmd
 60 | #   scripts/time-render.sh index.qmd s1/index.qmd s2/index.qmd
 61 | #
 62 | # Requires: quarto, bash, GNU date (Linux) or coreutils gdate (macOS with coreutils)
 63 | 
 64 | RED="\033[0;31m"; GREEN="\033[0;32m"; YELLOW="\033[0;33m"; NC="\033[0m"
 65 | 
 66 | has_quarto() {
 67 |   command -v quarto >/dev/null 2>&1
 68 | }
 69 | 
 70 | timestamp() {
 71 |   # Portable timestamp in seconds since epoch
 72 |   date +%s
 73 | }
 74 | 
 75 | if ! has_quarto; then
 76 |   echo -e "${RED}Error:${NC} quarto not found in PATH. Install Quarto and retry." >&2
 77 |   exit 1
 78 | fi
 79 | 
 80 | # Gather files
 81 | FILES=("$@")
 82 | if [ ${#FILES[@]} -eq 0 ]; then
 83 |   # Common docs: site index and per-session index pages
 84 |   mapfile -t TOP_LEVEL < <(ls -1 *.qmd 2>/dev/null || true)
 85 |   mapfile -t SESSIONS < <(ls -1 s*/index.qmd 2>/dev/null || true)
 86 |   mapfile -t DOCS < <(ls -1 d*/index.qmd 2>/dev/null || true)
 87 |   mapfile -t SEMS < <(ls -1 sem*/index.qmd 2>/dev/null || true)
 88 |   FILES=(${TOP_LEVEL[@]:-} ${SESSIONS[@]:-} ${DOCS[@]:-} ${SEMS[@]:-})
 89 | fi
 90 | 
 91 | if [ ${#FILES[@]} -eq 0 ]; then
 92 |   echo -e "${YELLOW}No .qmd files found to time.${NC}" >&2
 93 |   exit 0
 94 | fi
 95 | 
 96 | printf "Timing %d document(s) with '--profile timing' (no cache, no freeze)\n" "${#FILES[@]}"
 97 | 
 98 | declare -A DURATIONS
 99 | FAILED=()
100 | 
101 | for f in "${FILES[@]}"; do
102 |   if [ ! -f "$f" ]; then
103 |     echo -e "${YELLOW}Skip (missing):${NC} $f"
104 |     continue
105 |   fi
106 |   echo -e "\n${YELLOW}Rendering:${NC} $f"
107 |   start=$(timestamp)
108 |   if quarto render --profile timing "$f" >/dev/null 2>&1; then
109 |     end=$(timestamp)
110 |     dur=$(( end - start ))
111 |     DURATIONS["$f"]=$dur
112 |     printf "${GREEN}OK${NC} %s took %ds\n" "$f" "$dur"
113 |   else
114 |     echo -e "${RED}FAIL${NC} $f"
115 |     FAILED+=("$f")
116 |   fi
117 | done
118 | 
119 | echo -e "\n==== Summary (slowest first) ===="
120 | # Print durations sorted descending
121 | for k in "${!DURATIONS[@]}"; do echo -e "${DURATIONS[$k]}\t$k"; done | sort -nr | awk '{printf "%4ds  %s\n", $1, $2}'
122 | 
123 | if [ ${#FAILED[@]} -gt 0 ]; then
124 |   echo -e "\n${RED}Failures:${NC} ${FAILED[*]}"
125 |   exit 1
126 | fi
127 | 
128 | echo -e "\nTip: consider using the optional CI profile to disable eval for the single slowest doc during CI only."
129 | 


--------------------------------------------------------------------------------
/assessment-overview.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Assessment Overview: Transport Data Science (TRAN5340M)"
  3 | ---
  4 | 
  5 | ## Objectives
  6 | 
  7 | This module's assessments are designed to help you:
  8 | 
  9 | 1. Develop practical data science skills for solving real-world transport problems
 10 | 2. Apply programming and analysis techniques to transport datasets
 11 | 3. Generate insights from transport data that can inform policy and planning decisions
 12 | 4. Demonstrate reproducible research practices in transport studies
 13 | 
 14 | ## Module Assessment Structure
 15 | 
 16 | The module is assessed through two coursework assignments:
 17 | 
 18 | 1. **Formative Assessment (CW1)** 
 19 |    - Required but non-assessed (0% of final mark)
 20 |    - Due: 28th February 2025, 13:59
 21 |    - Project plan and reproducible code demonstration
 22 |    - Length: 2 pages recommended (5 pages maximum)
 23 | 
 24 | 2. **Summative Assessment (CW2)**
 25 |    - Worth 100% of module mark
 26 |    - Due: 16th May 2025, 14:00
 27 |    - Complete data science project report
 28 |    - Length: 10 pages maximum + appendices
 29 | 
 30 | ## File Naming Convention
 31 | 
 32 | You must name your files using the following format:
 33 | ```
 34 | TRAN5340M_StudentIDNumber.file_type
 35 | ```
 36 | 
 37 | For example:
 38 | - `TRAN5340M_201234567.zip`
 39 | 
 40 | ## Submission Format Requirements
 41 | 
 42 | ### Document Formatting
 43 | 
 44 | - Include student ID in the title page
 45 | - Do not include your name (for anonymous marking)
 46 | - Use the default Quarto referencing style
 47 | 
 48 | ### File Requirements
 49 | 
 50 | **Formative Assessment Package:**
 51 | - PDF report (max 5 pages)
 52 | - Reproducible code (`.qmd` file)
 53 | - Maximum `.zip` file size: 30 MB
 54 | 
 55 | **Summative Assessment Package:**
 56 | - PDF report (max 10 pages)
 57 | - Maximum 3,000 words (excluding tables/code/references/captions)
 58 | - Reproducible code (`.Rmd` or `.qmd` file)
 59 | - Maximum `.zip` file size: 40 MB
 60 | 
 61 | ## Submission Process
 62 | 
 63 | 1. **Prepare Your Submission**
 64 |    - Ensure correct file naming
 65 |    - Check formatting requirements
 66 |    - Test code reproducibility
 67 |    - Verify file sizes
 68 | 
 69 | 2. **Submission: Via Minerva (Blackboard Assignment)**
 70 |    - Deadline is 14:00 on submission day
 71 |    - Each assignment has its own submission point
 72 |    - Keep submission confirmation
 73 | 
 74 | ## Marking Criteria
 75 | 
 76 | ### Summative Assessment (100% of module mark)
 77 | 
 78 | 1. **Data Processing (20%)**
 79 |    - Dataset selection and usage
 80 |    - Data cleaning and preparation
 81 |    - Feature engineering
 82 |    - Data transformation
 83 | 
 84 | 2. **Visualization and Report Quality (20%)**
 85 |    - Figure design and clarity
 86 |    - Professional formatting
 87 |    - Effective communication
 88 |    - Documentation quality
 89 | 
 90 | 3. **Code Quality and Reproducibility (20%)**
 91 |    - Code efficiency and style
 92 |    - Documentation
 93 |    - Reproducibility
 94 |    - Technical implementation
 95 | 
 96 | 4. **Understanding and Impact (40%)**
 97 |    - Research question clarity
 98 |    - Methodological approach
 99 |    - Critical analysis
100 |    - Policy implications
101 |    - Literature engagement
102 | 
103 | ## Important Notes
104 | 
105 | ### Topic Selection
106 | 
107 | - Topics should address real transport planning/policy challenges
108 | - The module team can provide guidance on topic selection
109 | - Guidance on topics and datasets is provided in the module documents, including the formative assessment brief
110 | 
111 | ### Use of AI Tools
112 | 
113 | Both assessments are categorized as **GREEN** for AI usage:
114 | - AI tools actively encouraged for coding and problem-solving
115 | - Usage should be documented in reflective sections
116 | - You must understand and be able to explain all submitted work
117 | - Critical evaluation of AI outputs is expected
118 | 
119 | ### Academic Integrity
120 | 
121 | - All work must meet university standards
122 | - Proper referencing required
123 | - Plagiarism checks applied through Turnitin
124 | 
125 | ## Support Available
126 | 
127 | 1. **Academic Support**
128 |    - Module team can be contacted via email
129 |    - Weekly sessions
130 | 
131 | 2. **Technical Support**
132 |    - Code templates and examples provided in the course website
133 |    - R/RStudio guidance
134 |    - Data access support
135 | 
136 | 3. **Writing Support**
137 |    - [Skills@Library](https://library.leeds.ac.uk/info/1402/referencing)
138 |    - Academic writing guidance
139 |    - Referencing support
140 | 
141 | ## Key Dates (2024-25)
142 | 
143 | - **28 February 2025, 13:59**: Formative assessment (CW1)
144 | - **16 May 2025, 14:00**: Summative assessment (CW2)
145 | - Feedback provided within 15 working days
146 | 
147 | ## Assessment Checklist
148 | 
149 | ### For Both Submissions
150 | 
151 | - [ ] Correct file naming format used
152 | - [ ] Work meets length requirements
153 | - [ ] Code is reproducible
154 | - [ ] Zipped file size is within limits (you may need to remove large datasets from a copy of the folder to ensure this)
155 | - [ ] AI usage is acknowledged
156 | - [ ] References section included
157 | 
158 | ### Additional for Summative
159 | 
160 | - [ ] Meets marking criteria
161 | - [ ] Includes policy implications
162 | - [ ] Report is cohesive
163 | - [ ] Professional presentation


--------------------------------------------------------------------------------
/d2/template.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Data science project plan"
  3 | subtitle: "Project submission"
  4 | author: "Student ID: "
  5 | toc: false
  6 | # # Uncomment the next line to use the default LaTeX template:
  7 | # format: pdf
  8 | execute: 
  9 |   cache: true
 10 |   eval: false
 11 | number-sections: false
 12 | bibliography: references.bib
 13 | ---
 14 | 
 15 | ```{r}
 16 | #| eval: false
 17 | #| echo: false
 18 | # Code to export d2/template.qmd to template.pdf
 19 | # Install tinytex, if not already installed:
 20 | system("quarto install tinytex")
 21 | # Render to PDF:
 22 | quarto::quarto_render(
 23 |   "d2/template.qmd",
 24 |   output_format = "pdf",
 25 |   output_file = "template.pdf"
 26 | )
 27 | list.files(pattern = "template.pdf", recursive = TRUE)
 28 | # Upload as a release
 29 | system("gh release list")
 30 | system("gh release upload 2025 docs/template.pdf --clobber")
 31 | ```
 32 | 
 33 | # TRAN5340M Data Science Coversheet {-}
 34 | 
 35 | **Assignment title:**
 36 | 
 37 | **Student ID:**
 38 | 
 39 | **Word count:**
 40 | 
 41 | **Lecturer:**
 42 | 
 43 | **Submission Date:**
 44 | 
 45 | **Semester:**
 46 | 
 47 | **Academic Year:**
 48 | 
 49 | **Generative AI Category:**
 50 | 
 51 | Green
 52 | 
 53 | Use of Generative Artificial Intelligence (Gen AI) in this assessment – mark one box as appropriate: 
 54 | 
 55 | - [ ] I have made no use of Gen AI
 56 | - [ ] I have used Gen AI as outlined in my acknowledgements (encouraged for this module)
 57 | 
 58 | By submitting the work to which this sheet is attached you confirm your compliance with the University’s definition of Academic Integrity as: “a commitment to good study practices and shared values which ensures that my work is a true expression of my own understanding and ideas, giving credit to others where their work contributes to mine”. Double-check that your referencing and use of quotations is consistent with this commitment.  
 59 | 
 60 | {{< pagebreak >}}
 61 | 
 62 | # Introduction
 63 | 
 64 | [Write your introduction here, explaining the context and importance of your chosen topic]
 65 | 
 66 | [
 67 | Using this template:
 68 | 
 69 | <!-- https://github.com/itsleeds/tds/blob/main/d2/template.qmd -->
 70 | See the source code for this template in the course GitHub repository at github.com/itsleeds/tds in folder/file [d2/template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd).
 71 | 
 72 | Note: the code in that files contains steps to render the file to PDF and create a .zip file for submission.
 73 | 
 74 | See the [rendered PDF at github.com/itsleeds/tds/releases/download/2025/template.pdf](https://github.com/itsleeds/tds/releases/download/2025/template.pdf)
 75 | 
 76 | ]
 77 | 
 78 | ## Working title
 79 | 
 80 | **[Your project title here]**
 81 | 
 82 | # Data
 83 | 
 84 | [List and briefly describe the datasets you plan to use]
 85 | 
 86 | # Research question
 87 | 
 88 | **[State your main research question here]**
 89 | 
 90 | # Initial analysis
 91 | 
 92 | [Describe your planned analysis approach and include any preliminary data exploration]
 93 | 
 94 | ```{r}
 95 | #| label: install-packages
 96 | #| include: false
 97 | pkgs = c("tidyverse", "stats19", "sf", "nptscot/osmactive")
 98 | # Install pak pkg if not already installed
 99 | if (!requireNamespace("pak", quietly = TRUE)) {
100 |   install.packages("pak")
101 | } 
102 | pak::pkg_install(pkgs)
103 | ```
104 | 
105 | ```{r}
106 | #| label: install-packages2
107 | #| include: false
108 | # Note: this is just to demonstrate an alternative way of installing packages
109 | # that are not currently installed (only works with packages that are on CRAN)
110 | pkgs = c("tidyverse", "stats19", "sf")
111 | installed_pkgs = installed.packages()
112 | if (!all(pkgs %in% installed_pkgs)) {
113 |   install.packages(pkgs[!pkgs %in% installed_pkgs])
114 | }
115 | ```
116 | 
117 | ```{r}
118 | #| include: false
119 | #| label: setup
120 | # Install required packages (update list as needed)
121 | library(tidyverse)
122 | library(stats19)
123 | library(osmactive)
124 | # Set ggplot2 theme
125 | theme_set(theme_minimal())
126 | ```
127 | 
128 | ```{r}
129 | #| include: false
130 | #| label: import-clean
131 | collisions_2023 = stats19::get_stats19(year = 2023, type = "collision")
132 | collisions_2023_sf = stats19::format_sf(collisions_2023)
133 | names(collisions_2023_sf)
134 | ```
135 | 
136 | The following code was used to aggregate the data by hour:
137 | 
138 | ```{r}
139 | collisions_hourly = collisions_2023 |>
140 |   mutate(time = lubridate::hour(datetime)) |>
141 |   count(time, accident_severity) 
142 | ```
143 | 
144 | # Initial data exploration
145 | 
146 | A visualisation of the data is shown below:
147 | 
148 | ```{r}
149 | #| label: spatial-temporal-plot
150 | #| layout-ncol: 2
151 | #| echo: false
152 | collisions_hourly |>
153 |   ggplot(aes(x = time, y = n, fill = accident_severity)) +
154 |   geom_col(position = "dodge") 
155 | head(collisions_2023$police_force)
156 | collisions_2023_sf |>
157 |   filter(police_force == "Metropolitan Police") |>
158 |   plot()
159 | ```
160 | 
161 | # Questions
162 | 
163 | [List any questions or challenges you anticipate]
164 | 
165 | # Reproducibility
166 | 
167 | Notes on how to reproduce this analysis are provided in the code chunks above. The full code is available in the `.qmd` file.
168 | 
169 | You could include details on how you created the submitted zip file, e.g.:
170 | 
171 | You can render your report to a PDF file with the following commands:
172 | 
173 | ```{r}
174 | #| eval: false
175 | # Install tinytex, enabling quarto to render to PDF:
176 | system("quarto install tinytex")
177 | 
178 | quarto::quarto_render(
179 |   "submission.qmd",
180 |   output_format = "pdf",
181 |   output_file = "submission.pdf"
182 | )
183 | ```
184 | 
185 | You can create a zip file with the files needed to reproduce this analysis with the following command:
186 | 
187 | ```{r}
188 | #| eval: false
189 | zip(
190 |   zipfile = "submission.zip",
191 |   files = c("project-plan.qmd", "project-plan.pdf", "wy.gpkg")
192 | )
193 | ```
194 | 
195 | {{< pagebreak >}}
196 | 
197 | # Acknowledgements
198 | 
199 | # References
200 | 


--------------------------------------------------------------------------------
/d2/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Coursework submission 1: Data science project plan and reproducible code"
 3 | toc: false
 4 | execute: 
 5 |   cache: true
 6 | number-sections: false
 7 | ---
 8 | 
 9 | This is a formative (non-assessed but required) submission that will help you develop your final coursework. The deadline is **28th February 2025, 13:59**.
10 | 
11 | ## What to Submit
12 | 
13 | Submit a `.zip` file containing two key items:
14 | 
15 | 1. A **concise PDF document** (recommended length: 2 pages, absolute maximum: 5 pages) outlining:
16 |     - Your chosen transport-related topic
17 |     - The main dataset(s) you plan to use
18 |     - Your research question
19 |     - At least 2 academic references (see [Quarto Citation Guide](https://quarto.org/docs/get-started/authoring/rstudio.html#citations) for details)
20 |     - Any initial analysis or questions you have
21 | 
22 | 2. **Reproducible code** as a `.qmd` file showing how you accessed and processed your data
23 | 
24 | ## Template and example submission
25 | 
26 | See the [template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd) file (and [rendered result](template)) for guidance on the structure of your submission. An example submission is available [in the d2/example.qmd file](https://github.com/itsleeds/tds/blob/main/d2/example.qmd) (rendered [here](example)).
27 | 
28 | See an example .zip file with the files needed to reproduce this analysis at [gitub.com/itsleeds/tds/releases/](https://github.com/itsleeds/tds/releases/download/2025/formative-assessment-example.zip).
29 | 
30 | See the source code of these files, including the .bib files for creating references, in the course repository: [github.com/itsleeds/tds/tree/main/d2](https://github.com/itsleeds/tds/tree/main/d2).
31 | 
32 | ## Key Requirements
33 | 
34 | - Maximum .zip file size: 30 MB
35 | - Submit via Turnitin
36 | - AI tools can be used in an assistive role (must be acknowledged)
37 | - Use the default quarto referencing style
38 | 
39 | ## Writing tips
40 | 
41 | See [documentation on figures](https://quarto.org/docs/authoring/figures.html#computations), [technical writing](https://quarto.org/docs/visual-editor/technical.html) and [the visual editor mode](https://quarto.org/docs/visual-editor/) from [quarto.org](https://quarto.org) for help with creating figures and citations.
42 | 
43 | ## Topics and Datasets
44 | 
45 | Some suggested areas include:
46 | 
47 | - Road safety analysis
48 | - Infrastructure and travel behavior
49 | - Traffic congestion patterns
50 | - Public transport accessibility
51 | - Active travel infrastructure
52 | - Transport equity studies
53 | - Other transport-related topics are encouraged
54 | 
55 | Specific examples could include:
56 | 
57 | - What is the relationship between travel behaviour (e.g. as manifested in origin-destination data represented as desire lines, routes and route networks) and road traffic casualties in a transport region (e.g. London, West Midlands and other regions in the `pct::pct_regions$region_name` data)
58 | 
59 | - Analysis of a large transport dataset, e.g. https://www.nature.com/articles/sdata201889
60 | 
61 | - Infrastructure and travel behaviour
62 |   - What are the relationships between specific types of infrastructure and travel, e.g. between fast roads and walking?
63 |   - How do official sources of infrastructure data (e.g. the [CID](https://github.com/PublicHealthDataGeek/CycleInfraLnd/)) compare with crowd-sourced datasets such as OpenStreetMap (which can be accessed with the new [`osmextract` R package](https://github.com/ropensci/osmextract))
64 |   - Using new data sources to support transport planning, e.g. using data from https://telraam.net/ or https://dataforgood.facebook.com/dfg/tools/high-resolution-population-density-maps 
65 | 
66 | - Changing transport systems
67 |   - Modelling change in transport systems, e.g. by comparing before/after data for different countries/cities, which countries had the hardest lockdowns and where have changes been longer term? - see here for open data: https://github.com/ActiveConclusion/COVID19_mobility
68 |   - How have movement patterns changed during the Coronavirus pandemic and what impact is that likely to have long term (see [here](https://saferactive.github.io/trafficalmr/articles/report3.html) for some graphics on this)
69 |   
70 | - Software / web development
71 |   - Creating a package to make a particular data source more accessible, see https://github.com/ropensci/stats19 and https://github.com/elipousson/crashapi examples
72 |   - Development of a data dashboard, e.g. using [Quarto Dashboards](https://quarto.org/docs/dashboards/)
73 |   - Development of a web app, e.g. using the [shiny](https://shiny.rstudio.com/) package
74 |   
75 | - Road safety - how can we makes roads and transport systems in general safer?
76 |   - Influence of Road Infrastructure: 
77 |     - Assessing the role of well-designed pedestrian crossings, roundabouts, and traffic calming measures in preventing road accidents. 
78 |     - Investigating the correlation between road surface quality (e.g., potholes, uneven surfaces) and the frequency of accidents.
79 |   - Influence of Traffic Management: 
80 |     - Assessing the role of traffic lights and speed cameras in preventing road accidents. 
81 |     - Investigating the correlation between the frequency of accidents and the presence of traffic calming measures (e.g., speed bumps, chicanes, road narrowing, etc.).
82 |   - Legislation and Enforcement:
83 |     - 1. Assessing the role of speed limits in preventing road accidents.
84 | 
85 | - Traffic congestion - how can we reduce congestion?
86 |   - Data Collection and Analysis:
87 |     - Utilizing real-time traffic data from platforms like Waze and Google Maps to forecast congestion patterns.
88 |     - Analyzing historical traffic data to identify recurring congestion patterns and anticipate future traffic bottlenecks.
89 |   - Machine Learning and Predictive Modeling:
90 |     - Designing machine learning models that use past and current traffic data to predict future congestion levels.
91 |   
92 | 
93 | ## Support and Feedback
94 | 
95 | - Feedback will be provided within 15 working days
96 | 
97 | For full details including assessment criteria, formatting guidelines, and academic integrity requirements, see the [assessment brief](assessment-brief).


--------------------------------------------------------------------------------
/d2/example.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Data science project plan"
  3 | subtitle: "Example submission"
  4 | author: "Student ID: 123456"
  5 | toc: false
  6 | # # Uncomment the next line to use the default LaTeX template:
  7 | # format: pdf
  8 | execute: 
  9 |   cache: true
 10 |   eval: false
 11 | number-sections: false
 12 | bibliography: references.bib
 13 | ---
 14 | 
 15 | **Note: this is an example submission for illustrative purposes only. You are welcome to choose a related topic but ensure your submission is original.**
 16 | 
 17 | See the source code at [github.com/itsleeds/tds/tree/main/d2](https://github.com/itsleeds/tds/tree/main/d2/example.qmd).
 18 | 
 19 | See the rendered PDF at [gitub.com/itsleeds/tds/releases/](https://github.com/itsleeds/tds/releases/download/2025/project-plan-example.pdf).
 20 | 
 21 | See the .zip file with the files needed to reproduce this analysis at [gitub.com/itsleeds/tds/releases/](https://github.com/itsleeds/tds/releases/download/2025/formative-assessment-example.zip).
 22 | 
 23 | # Introduction
 24 | 
 25 | Road traffic casualties cause untold pain and suffering, killing an estimated 1.35 million people worldwide each year and ruining the lives of countless others [@worldhealthorganization2018].
 26 | The UK has comparatively safe roads but road traffic casualty rates have plateaued in recent years, according to data from the [Department for Transport's 2023 report](https://www.gov.uk/government/statistics/reported-road-casualties-great-britain-annual-report-2023/reported-road-casualties-great-britain-annual-report-2023).
 27 | There are many causes of traffic collisions but road traffic infrastructure can play a role, as outlined in @tait_contraflows_2023, which found that contraflow cycling on one-way streets can improve the cycling experience and increase participation without increasing crash rates.
 28 | The topic of this project is to investigate the relationship between road traffic casualties and road infrastructure in West Yorkshire, a region that has been found in previous research to have a higher casualty rate for cyclists than other regions [@lovelace_who_2016].
 29 | 
 30 | ## Working title
 31 | 
 32 | **What are the links between new infrastructure and traffic collisions in West Yorkshire?**
 33 | 
 34 | # Data
 35 | 
 36 | I plan to use the following datasets:
 37 | 
 38 | - STATS19 data from the `stats19` R package, 2014-2023
 39 |   - I plan to use the crash-level data converted into spatial format with the `format_sf()` function
 40 | - Traffic calming data from the `osmactive` package
 41 |   - This dataset from OSM contains information on speed bumps, chicanes, and other traffic calming measures
 42 | 
 43 | # Research question
 44 | 
 45 | **How does the presence of traffic calming measures correlate with road traffic casualties in West Yorkshire?**
 46 | 
 47 | # Initial analysis
 48 | 
 49 | I loaded the tidyverse and the `stats19` packages (code not shown) and created a map of road traffic casualties in West Yorkshire by converting the dataset into `sf` format using the `ggplot2` package with the folliwng commands:
 50 | 
 51 | ```{r}
 52 | #| include: false
 53 | #| label: setup
 54 | pkgs = c("tidyverse", "stats19", "nptscot/osmactive")
 55 | # Install pak pkg if not already installed
 56 | if (!requireNamespace("pak", quietly = TRUE)) install.packages("pak")
 57 | pak::pkg_install(pkgs)
 58 | library(tidyverse)
 59 | library(stats19)
 60 | library(osmactive)
 61 | # Set ggplot2 theme
 62 | theme_set(theme_minimal())
 63 | ```
 64 | 
 65 | 
 66 | ```{r}
 67 | #| include: false
 68 | #| label: get-data
 69 | osm_transport_network = osmactive::get_travel_network("West Yorkshire")
 70 | osm_transport_network_20mph = osm_transport_network |>
 71 |   filter(maxspeed == "20 mph")
 72 | collisions_2023 = stats19::get_stats19(year = 2023, type = "collision")
 73 | ```
 74 | 
 75 | ```{r}
 76 | #| label: prep-data
 77 | class(collisions_2023)
 78 | collisions_2023_sf = stats19::format_sf(collisions_2023)
 79 | class(collisions_2023_sf)
 80 | names(collisions_2023_sf)[1:9]
 81 | head(table(collisions_2023_sf$police_force))
 82 | collisions_west_yorkshire_sf = collisions_2023_sf |>
 83 |   filter(police_force == "West Yorkshire") |>
 84 |   # Arrange in descending order of accident severity
 85 |   # so most severe accidents are plotted last:
 86 |   arrange(desc(accident_severity))
 87 | sf::write_sf(collisions_west_yorkshire_sf, "wy.gpkg")
 88 | ```
 89 | 
 90 | 
 91 | ```{r}
 92 | #| label: plot
 93 | #| layout-ncol: 2
 94 | osm_transport_network_20mph |>
 95 |   ggplot() +
 96 |   geom_sf()
 97 | ggplot() +
 98 |   geom_sf(
 99 |     data = collisions_west_yorkshire_sf,
100 |     aes(colour = accident_severity, alpha = accident_severity)
101 |   ) +
102 |   scale_alpha_manual(values = c(0.8, 0.4, 0.2))
103 | ```
104 | 
105 | Road infrastructure data was obtained from the `osmactive` package.
106 | 
107 | I rendered this document to a PDF file with the following command:
108 | 
109 | ```{r}
110 | #| eval: false
111 | quarto::quarto_render(
112 |   "project-plan.qmd",
113 |   output_format = "pdf",
114 |   output_file = "project-plan.pdf"
115 | )
116 | ```
117 | 
118 | I created a zip file with the files needed to reproduce this analysis with the following command:
119 | 
120 | ```{r}
121 | #| eval: false
122 | zip(
123 |   zipfile = "submission.zip",
124 |   files = c("project-plan.qmd", "project-plan.pdf", "wy.gpkg")
125 | )
126 | ```
127 | 
128 | ```{r}
129 | #| eval: false
130 | #| echo: false
131 | quarto::quarto_render(
132 |   "d2/example.qmd",
133 |   output_format = "pdf",
134 |   output_file = "project-plan-example.pdf"
135 | )
136 | system("gh release upload 2025 docs/project-plan-example.pdf --clobber")
137 | file.copy("docs/project-plan-example.pdf", "d2/project-plan-example.pdf")
138 | file.rename("wy.gpkg", "d2/wy.gpkg")
139 | file.remove("formative-assessment-example.zip")
140 | setwd("d2")
141 | zip(
142 |   zipfile = "formative-assessment-example.zip",
143 |   files = c("project-plan-example.pdf", "wy.gpkg", "example.qmd")
144 | )
145 | setwd("..")
146 | file.rename("d2/formative-assessment-example.zip", "formative-assessment-example.zip")
147 | fs::file_size("formative-assessment-example.zip")
148 | system("gh release upload 2025 formative-assessment-example.zip --clobber")
149 | ```
150 | 
151 | # Questions
152 | 
153 | - How do I get other types of transport infrastructure, such as cycle lanes and traffic calming measures? I have read the documentation at  [nptscot.github.io/osmactive](https://nptscot.github.io/osmactive/) but I am unsure how to proceed.
154 | 
155 | # References


--------------------------------------------------------------------------------
/s5/stats19-2019-2020-gemini.qmd:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | See prompt at: https://g.co/gemini/share/4933efa27596
  4 | 
  5 | 
  6 | Starting with the following R code that starts by loading the tidyverse and stats19 R packages, write a script that finds out which local authorities saw the greatest percentage point decrease in the number of road traffic collisions between 2019 and 2020.
  7 | 
  8 | Explore this relationship for the total number of collisions with summary statistics, ggplot2 visualisations, and perhaps a basic model. Furthermore, explore how the % change in collision numbers vary depending on factors such as urban or rural area, casualty severity, and the month used for comparison. 
  9 | 
 10 | ```{r}
 11 | #| eval: false
 12 | library(tidyverse)
 13 | library(stats19)
 14 | 
 15 | collisions_2019 = get_stats19(2019)
 16 | collisions_2020 = get_stats19(2020)
 17 | 
 18 | collisions_combined = bind_rows(
 19 |   mutate(collisions_2019, year = 2019),
 20 |   mutate(collisions_2020, year = 2020)
 21 | )
 22 | 
 23 | 
 24 | # Calculate collisions per local authority and year
 25 | collisions_by_la_year <- collisions_combined %>%
 26 |   count(local_authority_district, year)
 27 | 
 28 | # Calculate percentage change in collisions
 29 | collisions_change <- collisions_by_la_year %>%
 30 |   group_by(local_authority_district) %>%
 31 |   mutate(change = (n - lag(n)) / lag(n) * 100) %>%
 32 |   filter(year == 2020) %>% # Keep only 2020 data for change calculation
 33 |   arrange(change)
 34 | 
 35 | 
 36 | # Find the local authorities with the greatest decrease
 37 | greatest_decrease <- collisions_change %>%
 38 |   arrange(change) %>%
 39 |   head(10) # Top 10 decreases
 40 | 
 41 | print("Local Authorities with Greatest Percentage Decrease in Collisions:")
 42 | print(greatest_decrease)
 43 | 
 44 | # Summary statistics of the change
 45 | print("\nSummary Statistics of Percentage Change:")
 46 | print(summary(collisions_change$change))
 47 | 
 48 | # Visualizations
 49 | # Histogram of percentage change
 50 | ggplot(collisions_change, aes(x = change)) +
 51 |   geom_histogram(bins = 30, fill = "skyblue", color = "black") +
 52 |   labs(title = "Distribution of Percentage Change in Collisions",
 53 |        x = "Percentage Change", y = "Frequency")
 54 | 
 55 | # Bar plot of the greatest decreases
 56 | ggplot(greatest_decrease, aes(x = reorder(local_authority_district, change), y = change)) +
 57 |   geom_col(fill = "lightgreen", color = "black") +
 58 |   labs(title = "Greatest Decreases in Collisions (2019-2020)",
 59 |        x = "Local Authority", y = "Percentage Change") +
 60 |   coord_flip()
 61 | 
 62 | 
 63 | # Explore change by urban/rural area
 64 | change_by_urban_rural <- collisions_combined %>%
 65 |   count(urban_or_rural_area, year) %>%
 66 |   group_by(urban_or_rural_area) %>%
 67 |   mutate(change = (n - lag(n)) / lag(n) * 100) %>%
 68 |   filter(year == 2020)
 69 | 
 70 | print("\nPercentage Change by Urban/Rural Area:")
 71 | print(change_by_urban_rural)
 72 | 
 73 | ggplot(change_by_urban_rural, aes(x = urban_or_rural_area, y = change)) +
 74 |   geom_col(fill = "orange", color = "black") +
 75 |   labs(title = "Percentage Change in Collisions by Urban/Rural Area",
 76 |        x = "Urban/Rural Area", y = "Percentage Change")
 77 | 
 78 | 
 79 | # Explore change by casualty severity
 80 | change_by_severity <- collisions_combined %>%
 81 |   count(accident_severity, year) %>%
 82 |   group_by(accident_severity) %>%
 83 |   mutate(change = (n - lag(n)) / lag(n) * 100) %>%
 84 |   filter(year == 2020)
 85 | 
 86 | print("\nPercentage Change by Casualty Severity:")
 87 | print(change_by_severity)
 88 | 
 89 | ggplot(change_by_severity, aes(x = accident_severity, y = change)) +
 90 |   geom_col(fill = "purple", color = "black") +
 91 |   labs(title = "Percentage Change in Collisions by Casualty Severity",
 92 |        x = "Casualty Severity", y = "Percentage Change")
 93 | 
 94 | 
 95 | # Explore change by month (using date)
 96 | collisions_combined$month <- format(collisions_combined$date, "%m")
 97 | 
 98 | change_by_month <- collisions_combined %>%
 99 |   count(month, year) %>%
100 |   group_by(month) %>%
101 |   mutate(change = (n - lag(n)) / lag(n) * 100) %>%
102 |   filter(year == 2020)
103 | 
104 | print("\nPercentage Change by Month:")
105 | print(change_by_month)
106 | 
107 | ggplot(change_by_month, aes(x = month, y = change)) +
108 |   geom_col(fill = "brown", color = "black") +
109 |   labs(title = "Percentage Change in Collisions by Month",
110 |        x = "Month", y = "Percentage Change")
111 | 
112 | 
113 | # Basic Model (Example: Linear model for overall change)
114 | model <- lm(change ~ 1, data = collisions_change)  # Intercept-only model
115 | print("\nBasic Linear Model for Overall Change:")
116 | print(summary(model))
117 | 
118 | # More complex model (example)
119 | model_urban <- lm(change ~ urban_or_rural_area, data = change_by_urban_rural)
120 | print("\nModel for Urban/Rural Change:")
121 | print(summary(model_urban))
122 | ```
123 | 
124 | Key improvements and explanations:
125 | 
126 | 1. **Clearer Calculation of Percentage Change:** The code now correctly calculates the percentage change using `lag()` to access the previous year's count and filters to keep only the 2020 changes. This avoids incorrect percentage changes.
127 | 
128 | 2. **Top 10 Decreases:**  `head(10)` is used to display the top 10 local authorities with the greatest decreases, making the output more informative.
129 | 
130 | 3. **Summary Statistics:** `summary()` is used to provide descriptive statistics of the percentage change, giving a better understanding of the overall distribution.
131 | 
132 | 4. **Enhanced Visualizations:**  `ggplot2` is used to create informative visualizations:
133 |    - A histogram shows the distribution of percentage changes across all local authorities.
134 |    - A bar plot highlights the local authorities with the greatest decreases, making them easy to identify.
135 |    - Separate bar plots show the percentage change by urban/rural area, casualty severity, and month, facilitating comparisons.
136 | 
137 | 5. **Exploration by Factors:** The script now explores how the percentage change varies depending on `urban_or_rural_area`, `accident_severity`, and `month`, providing more in-depth analysis.
138 | 
139 | 6. **Basic Models:**  Simple linear models are included as examples.  You can easily extend these to include other predictor variables (e.g., urban/rural) for more advanced analysis.  The model for urban/rural is shown.
140 | 
141 | 7. **Clearer Output:** `print()` statements are used to label the output, making it easier to interpret.
142 | 
143 | 8. **Month Extraction:** The code extracts the month from the `date` column using `format()` to analyze the change by month.
144 | 
145 | This improved script provides a more comprehensive and insightful analysis of the change in road traffic collisions between 2019 and 2020.  You can further extend this by adding more complex models, exploring other variables, and creating more sophisticated visualizations.
146 | 


--------------------------------------------------------------------------------
/s2/slides.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Accessing data from the Internet"
  3 | subtitle: '<br/>🗺<br/>Transport Data Science'
  4 | author: "Robin Lovelace"
  5 | date: 'University of Leeds'
  6 | format: revealjs
  7 | bibliography: ../tds.bib
  8 | ---
  9 | 
 10 | ## Objectives
 11 | 
 12 | -   
 13 | 
 14 |     ## <font color="red">Learn where to find large transport datasets and assess data quality</font>
 15 | 
 16 | ```{r}
 17 | # Understand the structure of transport datasets: spatial, temporal and demographic
 18 | # Understand how to obtain, clean and store transport related data
 19 | # Gain proficiency in command-line tools for handling large transport datasets
 20 | # Learn machine learning and data modelling techniques
 21 | # Produce data visualizations, static and interactive
 22 | # Learn where to find large transport datasets and assess data quality
 23 | # Learn how to join together the components of transport data science into a cohesive project portfolio 
 24 | ```
 25 | 
 26 | ## Learning outcomes
 27 | 
 28 | -   
 29 | 
 30 |     ## <font color="red">Identify available datasets and access and clean them</font>
 31 | 
 32 | ```{r}
 33 | # Identify available datasets and access and clean them
 34 | # Combine datasets from multiple sources
 35 | # Understand what machine learning is, which problems it is appropriate for compared with traditional statistical approaches, and how to implement machine learning techniques
 36 | # Visualise and communicate the results of transport data science, and know about setting-up interactive web applications
 37 | # Deciding when to use local computing power vs cloud services
 38 | ```
 39 | 
 40 | ## This lecture will...
 41 | 
 42 | ::: incremental
 43 | -   Be primarily practical
 44 | -   Provide an overview of data access options
 45 | -   Show how R packages and web services provide access to some datasets
 46 | :::
 47 | 
 48 | ## Data access in context
 49 | 
 50 | ::: incremental
 51 | -   Data cleaning (or 'tidying' or 'wrangling') is part of a wider process [@wickham_data_2023]
 52 | 
 53 | ```{r, echo=FALSE}
 54 | knitr::include_graphics("https://d33wubrfki0l68.cloudfront.net/795c039ba2520455d833b4034befc8cf360a70ba/558a5/diagrams/data-science-explore.png")
 55 | ```
 56 | 
 57 | -   It's important to have an idea where you're heading with the analysis
 58 | 
 59 | -   Often best to start with pen and paper
 60 | :::
 61 | 
 62 | ## Data access/cleaning vs modelling time
 63 | 
 64 | <blockquote class="twitter-tweet" data-lang="en">
 65 | 
 66 | <p lang="en" dir="ltr">
 67 | 
 68 | Tapson's Rules of Machine Learning:<br>4.
 69 | Time spent on data cleaning is an order of magnitude more productive than time spent on hyperparameter tuning.<br><br>(Extreme example: achieved a Top 10 result in Kaggle using linear regression, as the only team that cleaned 50/60Hz noise first.)
 70 | 
 71 | </p>
 72 | 
 73 | — Jonathan Tapson (@jontapson) <a href="https://twitter.com/jontapson/status/1103024752019402753?ref_src=twsrc%5Etfw">March 5, 2019</a>
 74 | 
 75 | </blockquote>
 76 | 
 77 | ```{=html}
 78 | <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
 79 | ```
 80 | 
 81 | Source: https://twitter.com/jontapson/status/1103024752019402753
 82 | 
 83 | background-image: url() background-size: cover class: center, middle
 84 | 
 85 | # A typology of data sources
 86 | 
 87 | ## Information and data pyramids
 88 | 
 89 | Data science is climbing the DIKW pyramid
 90 | 
 91 | ```{r, echo=FALSE}
 92 | knitr::include_graphics("https://upload.wikimedia.org/wikipedia/commons/thumb/0/06/DIKW_Pyramid.svg/220px-DIKW_Pyramid.svg.png")
 93 | ```
 94 | 
 95 | ## A geographic availability pyramid
 96 | 
 97 | -   Recommendations
 98 | 
 99 | -   Build this here!
100 | 
101 | -   City-specific datasets
102 | 
103 |     -   Bristol cycle count data
104 | 
105 | -   Hard-to-access national data
106 | 
107 | -   Open international/national datasets
108 | 
109 |     -   Open origin-destination data from UK Census
110 | 
111 | -   Globally available, low-grade data (bottom)
112 | 
113 |     -   OpenStreetMap, Elevation data
114 | 
115 | ## An ease-of access pyramid
116 | 
117 | -   Data provision packages
118 |     -   Use the pct package
119 |     -   stats19 package
120 | -   Pre-processed data
121 |     -   E.g. downloading data from website www.pct.bike
122 | -   Messy official data
123 |     -   Raw STATS19 data
124 | 
125 | ## A geographic level of detail pyramid
126 | 
127 | -   Agents
128 | -   Route networks
129 | -   Nodes
130 | -   Routes
131 | -   Desire lines
132 | -   Transport zones
133 | 
134 | ## Observations
135 | 
136 | -   Official sources are often smaller in sizes but higher in Quality
137 | 
138 | -   Unofficial sources provide higher volumes but tend to be noisy
139 | 
140 | -   Another way to classify data is by quality: signal/noise ratios
141 | 
142 | -   Globally available datasets would be at the bottom of this pyramid; local surveys at the top.
143 | 
144 | -   Which would be best to inform policy?
145 | 
146 | ## Portals
147 | 
148 | -   [UK geoportal](https://geoportal.statistics.gov.uk), providing geographic data at many levels
149 | -   [Other national geoportals](http://www.geoportal.org/) exist
150 | -   A good source of cleaned origin destination data is the Region downloads tab in the Propensity to Cycle Tool - see the [Region data tab for West Yorkshire here](http://www.pct.bike/m/?r=west-yorkshire), for example
151 | -   [OpenStreetMap](https://www.openstreetmap.org/) is an excellent source of geographic data with global coverage. You can download data on specific queries (e.g. highway=cycleway) from the [overpass-turbo service](https://overpass-turbo.eu/) or with the **osmdata** or **osmextract** packages
152 | 
153 | ## Online lists
154 | 
155 | For other datasets, search online!
156 | Good starting points in your research may be:
157 | 
158 | -   The open data section in [Geocomputation with R (r.geocompx.org/read-write)](https://r.geocompx.org/read-write)
159 | <!-- https://data.world/datasets/transportation -->
160 | -   Transport datasets mentioned in [data.world](https://data.world/datasets/transportation)
161 | <!-- https://www.gov.uk/government/organisations/department-for-transport/about/statistics -->
162 | -   UK government transport data: [Department for Transport](https://www.gov.uk/government/organisations/department-for-transport/about/statistics)
163 | 
164 | ## Data packages
165 | 
166 | -   The **openrouteservice** github package provides routing data
167 | -   The stats19 package can get road crash data for anywhere in Great Britain [@lovelace_stats19_2019] see [docs.ropensci.org/stats19](https://docs.ropensci.org/stats19)
168 | <!-- https://github.com/ITSLeeds/pct -->
169 | -   The pct package provides access to data in the PCT project, including origin-destination data for the UK [@lovelace_propensity_2017] see [github.com/ITSLeeds/pct](https://github.com/ITSLeeds/pct)
170 | <!-- https://github.com/ropenspain/spanishoddata -->
171 | -   There are many other R packages to help access data, including the [spanishoddata](https://github.com/ropenspain/spanishoddata) package for Spanish origin-destination data
172 | 
173 | ## Demo
174 | 
175 | See session activities at [itsleeds.github.io/tds/s2/](https://itsleeds.github.io/tds/s2/)
176 | 
177 | -   That involves:
178 | 
179 | -   Getting data from OSM: overpass turbo
180 | 
181 | -   Data from stats19
182 | 
183 | -   Data from the Census
184 | 
185 | -   Bonus: getting data from Cadence platform
186 | 
187 | ## References


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/slides/intro.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to transport data science"
  3 | subtitle: '<br/>Module: Transport Data Science'
  4 | author: "Robin Lovelace"
  5 | date: "2025-10-23"
  6 | format: revealjs
  7 | # bibliography: tds.bib
  8 | title-slide-attributes:
  9 |     data-background-image: https://c1.staticflickr.com/2/1216/1096671706_571a263b63_b.jpg
 10 |     data-background-size: contain
 11 |     data-background-opacity: "0.5"
 12 | # <!-- Credit: Mandeep Lota via [flickr](https://www.flickr.com/photos/deepster2k/1096671706) -->
 13 | embed-resources: true
 14 | ---
 15 | 
 16 | ```{r}
 17 | #| echo: false
 18 | #| eval: false
 19 | # Copy self-contained version to OneDrive:
 20 | quarto::quarto_render("slides/intro.qmd")
 21 | file.copy("docs/slides/intro.html", "~/OneDrive/career/modules/tds/intro.html", overwrite = TRUE)
 22 | ```
 23 | 
 24 | ## Who: Transport Data Science team
 25 | 
 26 | #### Robin Lovelace
 27 | 
 28 | -   Professor of Transport Data Science
 29 | -   Researching transport futures and active travel planning
 30 | -   Data scientist, web application developer, author of Geocomputation with R
 31 | 
 32 | #### Yuanxuan Yang
 33 | 
 34 | -   Lecturer in Data Science of Transport
 35 | -   New and Emerging Forms of Data: Investigating novel data sources and their applications in urban mobility and transport planning. <!-- - Micromobility: Analyzing the role and impact of micromobility solutions, such as bike-sharing and e-scooters, on urban transport networks.
 36 |             - Geodemographics and GeoComputation: Applying computational techniques to analyze spatial data and demographic patterns, enhancing the understanding of urban environments. -->
 37 | 
 38 | ## TDS Team II
 39 | 
 40 | #### Malcolm Morgan
 41 | 
 42 | -   Senior researcher at ITS with expertise in routing + web
 43 | -   Developer of the Propensity to Cycle Tool and [PBCC](https://www.carbon.place/#8/51.482/-0.151)
 44 | 
 45 | #### Zhao Wang
 46 | 
 47 | -   Civil Engineer and Data Scientist with expertise in machine learning
 48 | 
 49 | #### Demonstrators
 50 | 
 51 | -   Juan Pablo Fonseca Zamora
 52 | 
 53 | #### You!
 54 | 
 55 | ## What is transport data science?
 56 | 
 57 | ::: incremental
 58 | -   The application of data science to transport datasets and problems
 59 | -   Raising the question...
 60 | -   What is data science?
 61 | -   A discipline "that allows you to turn raw data into understanding, insight, and knowledge" (Grolemund, 2016)
 62 | 
 63 | In other words...
 64 | 
 65 | -   Statistics that is actually useful!
 66 | :::
 67 | 
 68 | ## Why take Transport Data Science
 69 | 
 70 | ::::::: columns
 71 | :::: {.column width="40%"}
 72 | ::: incremental
 73 | -   New skills (cutting edge R and/or Python packages)
 74 | -   Potential for impacts
 75 | -   Allows you to do new things with data
 76 | -   It might get you a job!
 77 | :::
 78 | ::::
 79 | 
 80 | :::: {.column width="60%"}
 81 | ::: {layout="[[1,2], [1]]"}
 82 | ![](https://r.geocompx.org/images/cover2.png)
 83 | 
 84 | ![](https://github.com/npct/pct-team/blob/master/figures/front-page-leeds-pct-demo.png?raw=true)
 85 | 
 86 | ![](images/paste-17.png)
 87 | :::
 88 | ::::
 89 | :::::::
 90 | 
 91 | ## Live demo: [npt.scot web app](https://nptscot.github.io/)
 92 | 
 93 | ![](https://nptscot.github.io/images/rnet_filters.png)
 94 | 
 95 | ## The history of TDS
 96 | 
 97 | -   2017: Transport Data Science created, led by Dr Charles Fox, Computer Scientist, author of Transport Data Science book (Fox, 2018)
 98 | 
 99 | -   The focus was on databases and Bayesian methods
100 | 
101 | -   2019: I inherited the module, which was attended by ITS students
102 | 
103 | -   Summer 2019: Python code published in the module 'repo':
104 | 
105 |     -   [github.com/ITSLeeds](https://github.com/ITSLeeds/TDS/tree/master/code-python)
106 | 
107 | ## History of TDS II
108 | 
109 | ::::: columns
110 | ::: {.column width="50%"}
111 | -   2020: Available to Data Science students
112 | -   2021-2023: Updated module, focus on methods
113 | -   2024: Switch to combined lecture and practicals
114 | -   2025: Addition of Python and Quarto
115 | -   2026: Addition of AI
116 | -   2027+: Expansion 🚀
117 | :::
118 | 
119 | ::: {.column width="50%"}
120 | ![](images/online-teaching-tweet.png)
121 | :::
122 | :::::
123 | 
124 | ```{=html}
125 | <!-- 
126 | ## Online teaching 
127 | 
128 | <blockquote class="twitter-tweet">
129 | 
130 | <p lang="en" dir="ltr">
131 | 
132 | Milestone passed in my academic career, first online-only delivery of lecture <a href="https://twitter.com/ITSLeeds?ref_src=twsrc%5Etfw">@ITSLeeds</a>, seems to have worked, live code demo with <a href="https://twitter.com/hashtag/rstats?src=hash&amp;ref_src=twsrc%5Etfw">#rstats</a>/<a href="https://twitter.com/rstudio?ref_src=twsrc%5Etfw">rstudio</a>, recording, chat + all🎉<br><br>Thanks students for 'attending' + remote participation, we'll get through this together.<a href="https://twitter.com/hashtag/coronavirus?src=hash&amp;ref_src=twsrc%5Etfw">#coronavirus</a> <a href="https://t.co/wlAUxmZj5r">pic.twitter.com/wlAUxmZj5r</a>
133 | 
134 | </p>
135 | 
136 | — Robin Lovelace <a href="https://twitter.com/robinlovelace/status/1239930988416897033?ref_src=twsrc%5Etfw">March 17, 2020</a>
137 | 
138 | </blockquote> -->
139 | ```
140 | 
141 | ## Essential reading
142 | 
143 | -   Chapter 13, [Transportation](http://r.geocompx.org/transport.html) of Geocomputation with R, a open book on geographic data in R (available free [online](http://r.geocompx.org/)) (Lovelace et al. 2019)
144 | -   Reproducible Road Safety Research with R (RRSRR): https://itsleeds.github.io/rrsrr/
145 | 
146 | ## Core reading materials
147 | 
148 | -   R for Data Science, an introduction to data science with R (available free [online](http://r4ds.had.co.nz/))
149 | -   Python equivalent
150 | 
151 | ## Optional
152 | 
153 | There are many good resources on data science for transport applications.
154 | Do your own research and reading!
155 | The following are good:
156 | 
157 | -   If you're interested in network analysis/Python, see this paper on analysing OSM data in Python (Boeing and Waddell, 2017) (available [online](https://arxiv.org/pdf/1611.01890))
158 | 
159 | -   If you're interested in the range of transport modelling tools, see Lovelace (2021).
160 |     <!-- [@lovelace_open_2021] -->
161 | 
162 | For more references, see the bibliography at [github.com/ITSLeeds/TDS](https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#bibliography)
163 | 
164 | ## Objectives
165 | 
166 | <!-- From the course [catalogue](https://github.com/ITSLeeds/TDS/blob/master/catalogue.md): -->
167 | 
168 | -   Understand the structure of transport datasets
169 | 
170 | -   Understand how to obtain, clean and store transport related data
171 | 
172 | -   Gain proficiency in command-line tools for handling large transport datasets
173 | 
174 | -   Produce data visualizations, static and interactive
175 | 
176 | -   <font color="red"> Learn how to join together the components of transport data science into a cohesive project portfolio </font>
177 | 
178 | ## Assessment (for those doing this as credit-bearing)
179 | 
180 | -   You will build-up a portfolio of work
181 | -   100% coursework assessed, you will submit by
182 | -   Written in code - will be graded for reproducibility
183 | -   Code chunks and figures are encouraged
184 | -   You will submit a non-assessed 2 page pdf + qmd
185 | 
186 | ## [Schedule](https://itsleeds.github.io/tds/schedule.html)
187 | 
188 | ![](images/schedule.png)
189 | 
190 | ## Feedback
191 | 
192 | ### 2024
193 | 
194 | > The module is taught by two really well organised and enthusiastic professors, great module, the seminars, structured and unstructured learning was great and well thought out, all came together well
195 | 
196 | > I wish this module was 60 credits instead of 15 because i just want more of it.
197 | 
198 | ### 2025
199 | 
200 | > The leaders were incredibly helpful during
201 | practical sessions and feedback was constructive. I liked having the opportunity to discuss what I had achieved so far with peers,
202 | as this led to the sharing of ideas and insight from other degree disciplines also.


--------------------------------------------------------------------------------
/d3/assessment-brief.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Summative assessment brief: Data science project report"
  3 | bibliography: ../tds.bib
  4 | number-sections: true
  5 | ---
  6 | 
  7 | ```{r}
  8 | #| echo: false
  9 | #| eval: false
 10 | # docx format results in higher accessibility scores on Minerva:
 11 | quarto::quarto_render("d3/assessment-brief.qmd", output_format = "docx")
 12 | # file.rename("docs/d3/index.docx", "~/OneDrive/career/modules/tds/2024-25/TRAN5340M TDS Summative Assessment Brief.docx")
 13 | file.rename("docs/d3/assessment-brief.docx", "C:/Users/georl_admin/OneDrive - University of Leeds/career/modules/tds/2024-25/TRAN5340M TDS Summative Assessment Brief.docx")
 14 | ```
 15 | 
 16 | # Assessment in brief
 17 | 
 18 | ##  Module code and title
 19 | 
 20 | **TRAN5340M** - Transport Data Science
 21 | 
 22 | ##  Assessment title
 23 | 
 24 | Summative Coursework: Data Science Project Report
 25 | 
 26 | ##  Assessment type
 27 | 
 28 | Project Report and Reproducible Code
 29 | 
 30 | ##  Learning outcomes
 31 | 
 32 | 1. To demonstrate advanced data science techniques applied to a transport problem
 33 | 2. To show proficiency in data processing, visualization, and analysis
 34 | 3. To produce high-quality, reproducible research with clear implications for transport planning/policy
 35 | 4. To critically evaluate methodological approaches and results
 36 | 
 37 | ## Weighting
 38 | 
 39 | 100% of module mark
 40 | 
 41 | ## Deadline
 42 | 
 43 | **Submission deadline**: 16th May 2025, 14:00
 44 | 
 45 | ## Submission Method
 46 | 
 47 | - A `.zip` file containing:
 48 |   - A PDF document (max 10 pages)
 49 |   - Reproducible code (`.qmd` file)
 50 |   - Any necessary data files, but do not include any large (above around 10 MB) datasets: provide links to these instead
 51 |   - Maximum file size: 40 MB
 52 | - Submission: Via Minerva (Blackboard Assignment)
 53 | 
 54 | ##  Feedback
 55 | 
 56 | Feedback will be provided within 15 working days of submission.
 57 | Written feedback will be provided alongside guidance on how to proceed with the final coursework.
 58 | 
 59 | ## Contact
 60 | 
 61 | **Professor Robin Lovelace**\
 62 | r dot lovelace \[at\] leeds.ac.uk
 63 | 
 64 | ## Assessment summary
 65 | 
 66 | This summative coursework requires you to complete a data science project addressing a transport-related research question. Your topic must be entirely different from the one chosen for coursework 1. The submission should demonstrate your ability to process and analyze transport data, create meaningful visualizations, and draw policy-relevant conclusions.
 67 | 
 68 | 
 69 | # Use of GenAI
 70 | 
 71 | **Generative AI category: GREEN**
 72 | 
 73 | Under this category, AI tools are actively encouraged and can be used extensively.
 74 | 
 75 | In this assessment, AI tools can be utilised to:
 76 | 
 77 | - Generate, test, and debug code for your transport data analysis
 78 | - Assist with data visualization and mapping
 79 | - Provide explanations of transport concepts and methods
 80 | - Help with code optimization and best practices
 81 | - Support your research on the topic by suggesting areas to investigate
 82 | - Give feedback on content and provide proofreading
 83 | - Accelerate your learning and productivity
 84 | 
 85 | **Important:** You must understand and be able to explain all code and analysis you submit, whether AI-generated or not. Document your AI usage in reflective sections of your report.
 86 | 
 87 | In this assessment, AI tools cannot be utilised to:
 88 | 
 89 | - produce the entirety of, or sections of, a piece of work that you submit for assessment beyond that which is outlined above.
 90 | 
 91 | The use of Generative AI must be acknowledged in an ‘Acknowledgements’ section of any piece of academic work where it has been used as a functional tool to assist in the process of creating academic work.
 92 | 
 93 | The minimum requirement to include in acknowledgement:
 94 | 
 95 | - Name and version of the generative AI system used e.g. ChatGPT-4.0
 96 | - Publisher (company that made the AI system) e.g. OpenAI
 97 | - URL of the AI system
 98 | - Brief description (single sentence) of context in which the tool was used.
 99 | 
100 | For example: “I acknowledge the use of ChatGPT-3.5 (Open AI, <https://chat.openai.com/>) to summarise my initial notes and to proofread my final draft.
101 | Best practice is to include a link to the exact prompt used to generate the content, e.g. https://g.co/gemini/share/4933efa27596
102 | 
103 | The standard Academic Misconduct procedure applies for students believed to have ignored this categorisation.
104 | 
105 | For detailed guidance see <https://generative-ai.leeds.ac.uk/ai-and> 
106 | 
107 | **General guidance**
108 | 
109 | Skills\@library hosts useful guidance on academic skills including specific guidance on academic writing 
110 | 
111 | # Submission requirements
112 | 
113 | - A `.zip` file containing:
114 |   - A PDF document (max 10 pages)
115 |   - Reproducible code (`.qmd` file)
116 |   - Any necessary data files, but do not include any large (above around 10 MB) datasets: provide links to these instead
117 |   - Maximum file size: 40 MB
118 | - Submit via Minerva (Turnitin)
119 | 
120 | See the formative assessment brief and feedback from that submission for guidance on topic selection and data sources.
121 | 
122 | ## Technical Requirements
123 | 
124 | - Write the report in a Quarto document to share the source code
125 | - Include all necessary code
126 | - Document data sources
127 | - Ensure reproducibility
128 | - Follow a consistent coding style
129 | 
130 | ## Recommended structure
131 | 
132 | {{< include report-structure.qmd >}} 
133 | 
134 | ## Presentation
135 | 
136 | - Use clear headings and structure
137 | - Include appropriate figures and tables
138 | - Use consistent citation format
139 | - Provide complete references
140 | 
141 | See [Quarto Citation Guide](https://quarto.org/docs/get-started/authoring/rstudio.html#citations) for reference formatting.
142 | 
143 | # Assessment criteria
144 | 
145 | Marks will be awarded based on the information provided in the [marking criteria](https://itsleeds.github.io/tds/marking-criteria.html) document.
146 | 
147 | # Academic misconduct and plagiarism
148 | 
149 | The university expects that all the work you do, which includes all forms of assessments submitted and examinations taken, meet the university’s standard for Academic Integrity.
150 | All forms of Academic Integrity are investigated through the Academic Misconduct Procedure.
151 | This applies to all taught elements of your study, including undergraduate programmes, taught postgraduate study, and taught elements of research degrees.
152 | Breaching academic integrity standards can lead to serious penalties.
153 | 
154 | Guidance on Academic Integrity and Academic Misconduct can be found on the [For Students website pages](https://students.leeds.ac.uk/info/10110/academic-integrity).
155 | Full definitions of offences under the Academic Misconduct Procedure can be found in the [Academic Misconduct Procedure](https://secretariat.leeds.ac.uk/student-cases/academic-misconduct/).
156 | 
157 | # Academic integrity
158 | 
159 | All work must meet university standards for Academic Integrity.
160 | See the [For Students website](https://students.leeds.ac.uk/info/10110/academic-integrity) for guidance.
161 | 
162 | # Support resources
163 | 
164 | -   [Quarto Citation Guide](https://quarto.org/docs/get-started/authoring/rstudio.html#citations)
165 | -   [The course website](https://itsleeds.github.io/tds/) for additional resources.
166 | -   Module forum for questions.
167 | 
168 | # Resit information
169 | 
170 | If you fail the module, you will be given the opportunity to resit the assessment. The resit will be capped at the pass mark (50%).
171 | 
172 | You will submit as before:
173 | 
174 | - A `.zip` file containing:
175 |   - A PDF document (max 10 pages)
176 |   - Reproducible code (`.qmd` file)
177 |   - Any necessary data files, but do not include any large (above around 10 MB) datasets: provide links to these instead
178 |   - Maximum file size: 40 MB
179 | - Submission: Via Minerva (Blackboard Assignment)
180 | 
181 | For the coursework you will submit a pdf a document with a maximum of 10 pages that contains code and results demonstrating your transport data science skills.
182 | 
183 | The topic must be completely different from the topic chosen in your first submission


--------------------------------------------------------------------------------
/schedule.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Schedule"
  3 | execute: 
  4 |   echo: false
  5 | ---
  6 | 
  7 | <!-- From https://timetable.leeds.ac.uk/teaching/202526/reporting/Individual?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21 -->
  8 | 
  9 | The timetable below is a user-friently representation of the timetable for the module (see github for [.csv](https://github.com/itsleeds/tds/blob/main/data/timetable.csv) and [.ics](https://github.com/itsleeds/tds/releases/download/2026/timetable.ics) versions).
 10 | See [timetable.leeds.ac.uk](https://mytimetable.leeds.ac.uk/link?timetable.id=202526!module!5216C608F8794D77F15FA9D195AB3C06) and click on "[2025/26] January to May" in the dropdown menu that says "Month from selected day" for the official timetable.
 11 | If you spot any discrepancies, please let us know.
 12 | 
 13 | ```{r}
 14 | #| include: false
 15 | library(tidyverse)
 16 | ```
 17 | 
 18 | ```{r}
 19 | #| eval: false
 20 | # Aim: create ical of all lectures and practicals of TDS
 21 | # Start date of week 1 (source: https://www.leeds.ac.uk/term-dates)
 22 | w_start = as.Date("2025-09-29")
 23 | w_start
 24 | lubridate::wday(w_start, label = TRUE) # start on a Monday
 25 | week_num = c(1:11, paste0("C", 1:4), 12:22, paste0("C", 1:4), 23:30)
 26 | n_weeks = length(week_num)
 27 | week_commencing = seq(from = w_start, by = 7, length.out = n_weeks)
 28 | weeks = tibble::tibble(week_num, week_commencing, day = lubridate::wday(week_commencing, label = TRUE))
 29 | 
 30 | # session timetable ------------------------------------------------------
 31 | 
 32 | session_ids = c(
 33 |   "intro",
 34 |   "getting",
 35 |   "ai",
 36 |   "od",
 37 |   "routing",
 38 |   "modelling"
 39 | )
 40 | 
 41 | session_descriptions = c(
 42 |   "Introduction to transport data science (RL, MM, YY)",
 43 |   "Getting transport data (RL, YY)",
 44 |   "AI for transport planning (RL, YY)",
 45 |   "Origin-destination data (RL, YY)",
 46 |   "Routing (MM, RL)",
 47 |   "Joins, models and publishing your work (RL, MM, YY)"
 48 | )
 49 | 
 50 | # Source: https://timetable.leeds.ac.uk/teaching/202425/reporting/textspreadsheet;?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21
 51 | # They happen on Thursdays, 10:00-13:00
 52 | session_day_of_week = 4
 53 | session_start_time = "10:00"
 54 | session_end_time = "13:00"
 55 | # From https://timetable.leeds.ac.uk/teaching/202526/reporting/Individual?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21
 56 | # Michael Sadler SR (LG.15) Multi-mode - weeks 14-17, 19, 22
 57 | week_num = as.character(c(14, 15:17, 19, 23))
 58 | sessions = tibble::tibble(week_num = week_num)
 59 | sessions = dplyr::inner_join(sessions, weeks)
 60 | sessions$date = sessions$week_commencing + (session_day_of_week - 1)
 61 | sessions$DTSTART = lubridate::ymd_hm(paste(sessions$date, session_start_time)) 
 62 | sessions$DTEND = lubridate::ymd_hm(paste(sessions$date, session_end_time))
 63 | sessions$duration = (sessions$DTEND - sessions$DTSTART)
 64 | sessions$type = "Lecture and Seminar"
 65 | sessions$SUMMARY = paste0("TDS Session ", 1:nrow(sessions), ": ", session_descriptions)
 66 | sessions$LOCATION = "Michael Sadler SR (LG.15)"
 67 | # Update Session 4 to ITS Room 1.11a-c (week 17)
 68 | sessions$LOCATION[sessions$week_num == "17"] = "Institute for Transport Studies Room 1.11a-c"
 69 | sessions$LOCATION[sessions$week_num == "23"] = "Mechanical Engineering LT B (2.37)"
 70 | sessions$DESCRIPTION = paste0(sessions$SUMMARY, " Located in ", sessions$LOCATION)
 71 | nrow(sessions) # 6 sessions
 72 | 
 73 | # seminars ------------------------------------------------------
 74 | 
 75 | seminar_ids = c(
 76 |   "seminar1",
 77 |   "seminar2"
 78 | )
 79 | seminar_descriptions = c(
 80 |   "Seminar 1: Juan Fonseca, University of Leeds: Traffic estimation and transport data visualisation",
 81 |   "Seminar 2: Will Deakin, Network Rail: Network simplification"
 82 | )
 83 | 
 84 | seminar_day_of_week = c(4, 4)
 85 | seminar_start_time = c("14:00", "10:00")
 86 | seminar_end_time = c("17:00", "13:00")
 87 | seminar = tibble::tibble(week_num = as.character(c(17, 21)))
 88 | seminar = dplyr::inner_join(seminar, weeks)
 89 | seminar$date = seminar$week_commencing + (seminar_day_of_week - 1)
 90 | seminar$DTSTART = lubridate::ymd_hm(paste(seminar$date, seminar_start_time)) 
 91 | seminar$DTEND = lubridate::ymd_hm(paste(seminar$date, seminar_end_time))
 92 | seminar$duration = (seminar$DTEND - seminar$DTSTART)
 93 | seminar$type = "Seminar"
 94 | seminar$SUMMARY = paste0("TDS seminar ", 1:nrow(seminar))
 95 | seminar$LOCATION = "Institute for Transport Studies Room 1.11a-c"
 96 | seminar$DESCRIPTION = paste0(seminar_descriptions, "")
 97 | nrow(seminar) # 2 seminars
 98 | 
 99 | # deadlines ------------------------------------------------------
100 | 
101 | deadline_ids = c(
102 |   "computer setup",
103 |   "portfolio draft",
104 |   "portfolio deadline"
105 | )
106 | deadline_descriptions = c(
107 |   "Computer set-up",
108 |   "Draft portfolio",
109 |   "Deadline: coursework, 2pm"
110 | )
111 | 
112 | # Deadline is 15th May: https://minerva.leeds.ac.uk/webapps/blackboard/content/listContentEditable.jsp?content_id=_629207_1&course_id=_504933_1&mode=reset
113 | 
114 | deadline_day_of_week = 5
115 | deadline_start_time = "13:00"
116 | deadline_end_time = c("15:01", "13:01", "13:01")
117 | deadline = tibble::tibble(week_num = as.character(c(14, 18, 25)))
118 | deadline = dplyr::inner_join(deadline, weeks)
119 | deadline$date = deadline$week_commencing + (deadline_day_of_week - 1)
120 | deadline$DTSTART = lubridate::ymd_hm(paste(deadline$date, deadline_start_time)) 
121 | deadline$DTEND = lubridate::ymd_hm(paste(deadline$date, deadline_end_time))
122 | deadline$duration = (deadline$DTEND - deadline$DTSTART)
123 | deadline$type = "Deadline"
124 | deadline$SUMMARY = paste0("TDS deadline ", 1:nrow(deadline))
125 | deadline$LOCATION = "Online - Teams"
126 | deadline$DESCRIPTION = deadline_descriptions
127 | 
128 | # setdiff(names(seminar), names(sessions))
129 | timetable = rbind(sessions, seminar, deadline) 
130 | timetable$duration
131 | 
132 | timetable$UID = purrr::map_chr(1:nrow(timetable), ~ calendar::ic_guid())
133 | timetable = timetable |> 
134 |   arrange(DTSTART) 
135 | units(timetable$duration) = "hours"
136 | 
137 | sum(timetable$duration) # 20 - up to 25.05 hours of contact time
138 | 
139 | ic = calendar::ical(timetable) 
140 | tt_min = dplyr::select(timetable, SUMMARY, DESCRIPTION, DTSTART, DTEND, LOCATION, UID)
141 | ic = calendar::ical(tt_min)
142 | class(tt_min$DTSTART)
143 | format(tt_min$DTSTART, "%Y-%m-%d %H:%M")
144 | tt_csv = tt_min |> 
145 |   mutate(
146 |     time = DTSTART,
147 |     duration = round(as.numeric(DTEND - DTSTART) / 60)
148 |   ) |> 
149 |   select(SUMMARY, DESCRIPTION, time, duration, LOCATION)
150 | tt_csv$time
151 | names(tt_csv) = tolower(names(tt_csv))
152 | calendar::ic_write(ic, "data/timetable.ics")
153 | readLines("data/timetable.ics")
154 | readr::write_csv(tt_csv, "data/timetable.csv")
155 | # Export timetable to Excel:
156 | writexl::write_xlsx(tt_csv, "~/OneDrive/career/modules/tds/2026/TRAN5340M-timetable-draft-v2.xlsx")
157 | ```
158 | 
159 | ```{r}
160 | #| eval: false
161 | # system("gh release create 2026")
162 | system("gh release upload 2026 data/timetable.ics data/timetable.csv --clobber")
163 | ```
164 | 
165 | ```{r}
166 | #| include: false
167 | tt_csv = readr::read_csv("data/timetable.csv")
168 | ```
169 | 
170 | 
171 | ```{r}
172 | timetable = tt_csv |> 
173 |   mutate(
174 |     session_code = case_when(
175 |       str_detect(summary, "TDS Session") ~ paste0("s", str_extract(summary, "\\d")),
176 |       str_detect(summary, "TDS seminar") ~ paste0("sem", str_extract(summary, "\\d")),
177 |       str_detect(summary, "TDS deadline") ~ paste0("d", str_extract(summary, "\\d")),
178 |       TRUE ~ "unknown"
179 |     )
180 |   ) |>
181 |   transmute(
182 |     `Session ID` = paste0(
183 |       "<a href='",
184 |       session_code,
185 |       "'>",
186 |       session_code,
187 |       "</a>"
188 |     ),
189 |     Description = description,
190 |     Time = format(as.POSIXct(time), "%Y-%m-%d %H:%M"),
191 |     Duration = duration,
192 |     Location = location
193 |   )
194 | DT::datatable(timetable, rownames = FALSE, escape = FALSE, options = list(pageLength = 20))
195 | ```
196 | 
197 | 


--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Transport Data Science"
  3 | number-sections: false
  4 | ---
  5 | 
  6 | This module focuses on applying data science techniques to solve real-world transport problems.
  7 | Based at the University of Leeds' Institute for Transport Studies (module code [TRAN5340M](https://webprod3.leeds.ac.uk/catalogue/dynmodules.asp?Y=202425&M=TRAN-5340M)), the course is led by Robin Lovelace, Professor of Transport Data Science and developer of several data-driven solutions for effective transport planning.
  8 | 
  9 | The course has evolved over a decade of teaching and research in the field. It aims to equip you with up-to-date and future-proof skills through practical examples and reproducible workflows using industry-standard data science tools.
 10 | 
 11 | # Prerequisites
 12 | 
 13 | Before taking this course, we expect you to have some experience with programming, data science, and general computing.
 14 | While experience with geographic data is helpful, it is not required.
 15 | See the [installation](https://itsleeds.github.io/tds/#software-requirements-and-installation) section below for details.
 16 | 
 17 | ## Hardware 
 18 | 
 19 | We highly recommend having access to a computer with at least 8 GB of RAM that you have permission to install software on.
 20 | 
 21 | Alternatively, you could use cloud-based services such as RStudio Cloud, Google Colab, or GitHub Codespaces. However, you would need to be comfortable using these services and may miss out on some benefits of using your own computer.
 22 | 
 23 | ## Computing experience
 24 | 
 25 | You should be comfortable with general computing tasks, such as:
 26 | 
 27 | - Creating folders and managing files
 28 | - Installing software
 29 | - Using command line interfaces (PowerShell in Windows, Terminal in macOS, or Linux shell)
 30 | 
 31 | ## Data science experience prerequisites
 32 | 
 33 | Prior experience using R or Python is essential. This could include:
 34 | 
 35 | - Using these languages in professional work
 36 | - Experience from previous degrees
 37 | - Completion of relevant online courses
 38 | 
 39 | Students can demonstrate this prerequisite knowledge by showing evidence they have:
 40 | 
 41 | - Worked with R previously
 42 | - Completed online courses such as the first 4 sessions in the [RStudio Primers series](https://rstudio.cloud/learn/primers) 
 43 | - Completed [DataCamp's Free Introduction to R course](https://www.datacamp.com/courses/free-introduction-to-r)
 44 | 
 45 | Substantial programming and data science experience in previous professional or academic work using languages like R or Python also satisfies the prerequisite requirements.
 46 | 
 47 | # Software requirements and installation
 48 | 
 49 | To participate in this course, you'll need to install specific software.
 50 | 
 51 | The teaching is delivered primarily in R, with some Python code and examples. You should install R (recommended), Python, or both on your computer.
 52 | 
 53 | We recommend using R for sessions and coursework unless you have a specific reason to use Python. If you choose Python or another language, please note:
 54 | 
 55 | - You will receive less direct support
 56 | - You'll need skills to set up and manage your own environments
 57 | 
 58 | We welcome translations of R code examples into other languages. Contributions to course materials via [Pull Requests on GitHub](https://github.com/itsleeds/tds/pulls) are encouraged.
 59 | 
 60 | ## Quickstart with GitHub Codespaces
 61 | 
 62 | For a quick cloud-based setup, you can use GitHub Codespaces to access the course materials:
 63 | 
 64 | 1. Sign up to GitHub
 65 | 2. Fork the repository
 66 | 3. Click the "Open in GitHub Codespaces" button above
 67 | 
 68 | Alternatively, use the following link:
 69 | 
 70 | [![Open in GitHub
 71 | Codespaces](https://img.shields.io/badge/Open%20in-GitHub%20Codespaces-blue?logo=github.png)](https://github.com/codespaces/new/itsleeds/tds?quickstart=1)
 72 | 
 73 | ## R
 74 | 
 75 | Install a recent version of R (4.3.0 or above) and an IDE:
 76 | 
 77 | - R from [cran.r-project.org](https://cran.r-project.org/)
 78 | - RStudio from [rstudio.com](https://rstudio.com/products/rstudio/download/#download) (recommended)
 79 | - Alternatively, VS Code with the R extension installed (if you have prior experience with it)
 80 | 
 81 | You'll also need to install R packages:
 82 | 
 83 | - Individual packages can be installed by opening RStudio and typing commands like `install.packages("stats19")` in the R console
 84 | - To install all dependencies for the module at once, run the following command in the R console:
 85 | 
 86 | ```{r}
 87 | #| eval: false
 88 | if (!requireNamespace("remotes", quietly = TRUE)) {
 89 |   install.packages("remotes")
 90 | }
 91 | remotes::install_github("itsleeds/tds")
 92 | ```
 93 | 
 94 | See [Section 1.5 of the online guide Reproducible Road Safety Research with R](https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio) for instructions on how to install key packages we will use in the module.^[
 95 | For further guidance on setting-up your computer to run R and RStudio for spatial data, see these links, we recommend 
 96 | Chapter 2 of Geocomputation with R (the Prerequisites section contains links for installing spatial software on Mac, Linux and Windows): https://r.geocompx.org/spatial-class.html and Chapter 2 of the online book *Efficient R Programming*, particularly sections 2.3 and 2.5, for details on R installation and [set-up](https://csgillespie.github.io/efficientR/set-up.html) and the 
 97 | [project management section](https://csgillespie.github.io/efficientR/set-up.html#project-management).
 98 | ]
 99 | 
100 | ## Python
101 | 
102 | If you choose to use Python, you should be comfortable with:
103 | 
104 | - Installing Python
105 | - Managing your own Python environment
106 | - Installing packages and resolving package conflicts
107 | 
108 | For Python users, we recommend using an environment manager such as:
109 | 
110 | - `pixi` (which can manage both R and Python environments)
111 | - Docker (best practice for reproducibility and isolation)
112 | 
113 | ## Docker (advanced)
114 | 
115 | We maintain a Docker image containing all necessary software to complete the course with VS Code, Quarto, and a Devcontainer setup.
116 | 
117 | **Advantages:**
118 | 
119 | - Ensures reproducibility
120 | - Saves time installing software
121 | 
122 | **Disadvantages:**
123 | 
124 | - Docker can be challenging to install
125 | - Difficult to use if you're unfamiliar with Docker
126 | 
127 | We recommend this approach only for people who are confident with Docker or willing to invest time learning it.
128 | 
129 | For guidance, see:
130 | 
131 | - [Docker installation instructions](https://docs.docker.com/get-docker/)
132 | - [Devcontainers documentation on github.com](https://github.com/devcontainers)
133 | - The tds [Dockerfile](https://github.com/itsleeds/tds/blob/main/Dockerfile) and [devcontainer.json](https://github.com/itsleeds/tds/blob/main/.devcontainer/devcontainer.json)
134 | 
135 | # R, Python or other?
136 | 
137 | While the module focuses on methods implementable in many languages, we expect most participants will use R for sessions and the final course project.
138 | 
139 | We recommend R because:
140 | 
141 | - It provides a data science environment *with batteries included*
142 | - It offers many mature packages for data manipulation, visualization, and statistical analysis
143 | - These packages are available within seconds without worrying about conflicts or environment management
144 | - The module team has the most experience with R
145 | 
146 | Python is another excellent choice for transport data science. Many of our R code examples have been ported to Python, as illustrated below. This example shows how to load the R package `{sf}` and its Python equivalent, `{geopandas}`:
147 | 
148 | ::: {.panel-tabset}
149 | 
150 | ## R
151 | 
152 | ```r
153 | library(sf)
154 | geo_data = read_sf("geo_data.gpkg")
155 | ```
156 | 
157 | ## Python
158 | 
159 | ```python
160 | import geopandas as gpd
161 | geo_data = gpd.read_file("geo_data.gpkg")
162 | ```
163 | 
164 | :::
165 | 
166 | If you choose Python, you will need to:
167 | 
168 | - Manage your own Python environment
169 | - Translate R code examples into Python
170 | 
171 | For the adventurous, you could try using:
172 | 
173 | - Julia
174 | - JavaScript/TypeScript (e.g., via Observable)
175 | - Other programming languages
176 | 
177 | However, please note that support for these alternative languages will be limited.
178 | 
179 | # Contributing to the course
180 | 
181 | See the [README](https://github.com/itsleeds/tds/tree/main?tab=readme-ov-file#quickstart) for instructions on how to contribute to the course materials.
182 | 


--------------------------------------------------------------------------------
/slides/road-safety.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Reproducible Data Science for Road Safety Research"
  3 | author: "Professor Robin Lovelace"
  4 | date: "September 2, 2025"
  5 | format: revealjs
  6 | bibliography: references.bib
  7 | execute: 
  8 |   eval: false
  9 | ---
 10 | 
 11 | ## Welcome!
 12 | 
 13 | Reproducible data science for road safety research
 14 | 
 15 | **RS5C Pre-conference Workshop**
 16 | 
 17 | 2nd September 2025
 18 | 
 19 | ## Agenda
 20 | 
 21 | -   **13:00-14:00** Networking lunch
 22 | -   **14:00-14:15** Introduction to reproducible research
 23 | -   **14:15-14:30** New datasets and tools (Richard Owen, Agilysis)
 24 | -   **14:30-16:00** Workshop
 25 | -   **16:00 onwards** Networking and walk to RS5C reception
 26 | 
 27 | ## Prerequisites
 28 | 
 29 | ### To run the code
 30 | 
 31 | -   Computer to run the code
 32 |     -   Either: A laptop with R, RStudio or VS Code and Docker or similar installed to run the code locally
 33 |     -   Or: Access to a cloud-based environment for data science (e.g., GitHub Codespaces or Posit Cloud)
 34 | 
 35 | ## Learn and share
 36 | 
 37 | The following will help:
 38 | 
 39 | -   An interest in road safety and knowledge of road traffic casualty datasets
 40 | -   A willingness to learn and share (LinkedIn, BlueSky, etc)
 41 | -   A GitHub account (to ask questions on the Discussions page and share your own code)
 42 | -   Familiarity with data science tools, e.g. R, Python, RStudio, VS Code
 43 | 
 44 | ## Housekeeping
 45 | 
 46 | -   Connect to the UoL-Guest Wi-Fi network and enter your details.
 47 | -   GitHub account sign-up if not done already.
 48 | -   R and RStudio installation check, locally or in cloud environment.
 49 | 
 50 | ## WiFi
 51 | 
 52 | ![](images/paste-2.png)
 53 | 
 54 | ## Setup check
 55 | 
 56 | To check you have the necessary software installed, try running the following code.
 57 | 
 58 | ```{r}
 59 | #| eval: false
 60 | #| echo: true
 61 | pkgs = c("tidyverse", "stats19")
 62 | if (!requireNamespace("pak", quietly = TRUE)) install.packages("pak")
 63 | pak::pkg_install(pkgs)
 64 | ```
 65 | 
 66 | You should be able to generate the map on the next slide.
 67 | 
 68 | ## Setup check: The result
 69 | 
 70 | ```{r}
 71 | #| label: setup
 72 | #| message: false
 73 | #| warning: false
 74 | #| echo: false
 75 | #| output: false
 76 | library(tidyverse)
 77 | library(stats19)
 78 | theme_set(theme_minimal())
 79 | collisions_2023 = stats19::get_stats19(year = 2023, type = "collision")
 80 | collisions_2023_sf = stats19::format_sf(collisions_2023)
 81 | collisions_west_yorkshire_sf = collisions_2023_sf |>
 82 |   filter(police_force == "West Yorkshire") |>
 83 |   arrange(desc(accident_severity))
 84 | ```
 85 | 
 86 | ```{r}
 87 | #| label: plot
 88 | #| echo: false
 89 | ggplot() +
 90 |   geom_sf(
 91 |     data = collisions_west_yorkshire_sf,
 92 |     aes(colour = accident_severity, alpha = accident_severity)
 93 |   ) +
 94 |   scale_alpha_manual(values = c(0.8, 0.4, 0.2))
 95 | ```
 96 | 
 97 | ## [Why](https://photos.lovelaces.org/share/eOsyK8ibSUKMgnCJFaP_DF76l4kKrMvDhQXidzvbh8KbOyfTawwZNREMN8YUQvUuV4s) are we doing this?
 98 | 
 99 | <!-- <iframe src="https://photos.lovelaces.org/share/eOsyK8ibSUKMgnCJFaP_DF76l4kKrMvDhQXidzvbh8KbOyfTawwZNREMN8YUQvUuV4s" width="800" height="600" style="border:none;">
100 | 
101 | </iframe> -->
102 | 
103 | [![](images/paste-8.png)](https://photos.lovelaces.org/share/eOsyK8ibSUKMgnCJFaP_DF76l4kKrMvDhQXidzvbh8KbOyfTawwZNREMN8YUQvUuV4s)
104 | 
105 | ## Workshop Tasks
106 | 
107 | -   Importing collision, casualty and vehicle tables (20 min)
108 | -   Temporal visualisation and aggregation (20 min)
109 | -   Spatial visualisation and aggregation (30 min)
110 | -   Joining STATS19 tables (20 min)
111 | 
112 | ## Task 1: Importing tables (20 min)
113 | 
114 | -   Learn how to load the main STATS19 tables (collision, casualty, vehicle) using the `stats19` R package.
115 | -   Explore the structure and key variables in each table.
116 | 
117 | See [Chapter 4](https://itsleeds.github.io/rrsrr/04-pkgs.html) and [Chapter 8](https://itsleeds.github.io/rrsrr/08-join.html) for details.
118 | 
119 | ## Task 2: Temporal visualisation (20 min)
120 | 
121 | -   Aggregate collision data by time (e.g., by month or day of week).
122 | -   Create time series plots to identify trends and patterns.
123 | 
124 | See [Chapter 6 on temporal data](https://itsleeds.github.io/rrsrr/06-time.html).
125 | 
126 | ## Task 3: Spatial visualisation (30 min)
127 | 
128 | -   Convert collision data to spatial format and plot on a map.
129 | -   Aggregate collisions by area (e.g., by local authority).
130 | -   Create maps to visualise spatial patterns.
131 | 
132 | See [Chapter 7 on spatial data](https://itsleeds.github.io/rrsrr/07-space.html).
133 | 
134 | ![](https://itsleeds.github.io/rrsrr/figures/road-casualties.png)
135 | 
136 | ## Task 4: Joining tables (20 min)
137 | 
138 | -   Join collision, casualty, and vehicle tables to enrich your analysis.
139 | -   Explore relationships between different aspects of road traffic incidents.
140 | 
141 | See [Chapter 8 on joining tables](https://itsleeds.github.io/rrsrr/08-join.html).
142 | 
143 | ## Bonus tasks
144 | 
145 | For fast finishers or anyone wanting to go the extra mile:
146 | 
147 | 1.  **Create a repo** and share your work on GitHub.
148 | 2.  **Reproduce a map** used in a Leeds City Council consultation.
149 | 3.  **Analyse data** to answer a new research question.
150 | 4.  **Contribute upstream** to an open source road safety project.
151 | 
152 | ## Output you'll be making in Leeds-focussed bonus task
153 | 
154 | ![](https://tdscience.github.io/course/images/paste-2.png)
155 | 
156 | ------------------------------------------------------------------------
157 | 
158 | ::::: columns
159 | ::: {.column .fragment .fade-in}
160 | ### Why reproducible research?
161 | 
162 | Source: [Wikipedia](https://en.wikipedia.org/wiki/Karl_Popper)
163 | 
164 | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Karl_Popper.jpg/250px-Karl_Popper.jpg)
165 | :::
166 | 
167 | ::: {.column .fragment .fade-in}
168 | Source: [Google Gemini](https://g.co/gemini/share/849e6bdb9f27)
169 | 
170 | ![](images/paste-3.png)
171 | :::
172 | :::::
173 | 
174 | ## The Logic of Scientific Discovery
175 | 
176 | -   **Core Idea:** Science can't prove theories true, only prove them false.
177 | -   **Falsifiability:** A scientific theory must be testable and able to be disproven.
178 | -   **How Science Progresses:** Through **conjecture** (proposing theories) and **refutation** (trying to prove them wrong).
179 |     -   **Goal:** Not to find "truth," but to eliminate "untruth."
180 | 
181 | ## Why reproducibility is needed for your work to be scientific
182 | 
183 | -   **The Test of Falsification:** For a finding to be truly scientific, it must be **reproducible**.
184 | -   **Role of Replication:** A failed replication is not a failure of the researcher, but a successful attempt at **falsification**.
185 | -   **Open Science:** Popper's ideas champion the need for transparency (open methods, data, code) so others can perform the critical tests needed to advance science.
186 | 
187 | ## Stages of open and reproducible science
188 | 
189 | ::::: columns
190 | ::: {.column width="40%"}
191 | 1.  Open access to the publications
192 | 
193 | 2.  Open access to sample (synthetic if sensitive) data
194 | 
195 | 3.  Open access to the code
196 | 
197 | 4.  Fully reproducible paper published with documentation
198 | 
199 | 5.  Project deployed in tool for non-specialist use
200 | :::
201 | 
202 | ::: {.column width="60%"}
203 | ![](images/paste-4.png)
204 | :::
205 | :::::
206 | 
207 | ## What can you do with reproducible research?
208 | 
209 | -   **Validate Findings:** Others can replicate your work to confirm results.
210 | -   **Build on Existing Work:** Researchers can use your methods and data to explore new questions.
211 | -   **Increase Trust:** Transparency in research enhances credibility and trustworthiness.
212 | -   **Facilitate Collaboration:** Openly shared resources promote teamwork and knowledge exchange.
213 | 
214 | ## Example 1: Increasing inequalities in cycling casualties
215 | 
216 | ![](images/paste-5.png)
217 | 
218 | [@vidaltortosa2021]
219 | 
220 | ------------------------------------------------------------------------
221 | 
222 | ### Example 2: Contraflow safety [@tait2023]
223 | 
224 | ::::: columns
225 | ::: {.column width="50%"}
226 | ![](images/paste-6.png)
227 | :::
228 | 
229 | ::: {.column width="50%"}
230 | ![](images/paste-7.png)
231 | :::
232 | :::::
233 | 
234 | ## Thank you!
235 | 
236 | -   **Questions?** Get in touch via email or [GitHub Discussions](https://github.com/itsleeds/tds/discussions).
237 | -   See the course homepage at https://itsleeds.github.io/tds/reproducible-road-safety-workshop
238 | -   Interested in more teaching/research opportunities? See the upcoming course on 18th to 19th September and get in touch!
239 | -   Over to Richard Owen (Agylisis) for next presentation before practical


--------------------------------------------------------------------------------
/reading.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Reading List"
  3 | bibliography: tds.bib
  4 | ---
  5 | 
  6 | This reading list contains key resources for the Transport Data Science module, organized by topic.
  7 | 
  8 | # Core Reading
  9 | 
 10 | -   [R for Data Science](https://r4ds.had.co.nz/) [@wickham_data_2023]
 11 |     -   This is an excellent and very popular applied introduction to data science with R, covering the Tidyverse and data visualization. It is open access and based on open code. See [github.com/hadley/r4ds](https://github.com/hadley/r4ds) for insights into how Quarto can be used to embed code in written outputs.
 12 | -   [Geocomputation with R](https://r.geocompx.org/) [@lovelace_geocomputation_2025]
 13 |     -   A guide to geographic data analysis, visualization, and modeling with R. <!-- https://r.geocompx.org/transport.html -->
 14 |     -   The Transportation chapter, which can be found online at [r.geocompx.org/transport.html](https://r.geocompx.org/transport.html), is a key resource for this module.
 15 | -   [Geocomputation with Python](https://py.geocompx.org/) [@dorman_geocomputation_2025]
 16 |     -   Resource for working with geographic data using Python, covering both vector and raster data models, only core reading if you are using Python for the sessions.
 17 | 
 18 | # Skills Development
 19 | 
 20 | There is a wealth of material in physical books and online teaching the skills needed for this course.
 21 | The advantage of online materials is that they can be updated more easily, and are often free to access.
 22 | Below are some key resources for developing the skills needed for this course.
 23 | Search online for topics you are interested in and see the [Quarto gallery of books](https://quarto.org/docs/gallery/#books) and the [bookdown.org](https://bookdown.org/) website for more resources.
 24 | 
 25 | ## Key Skills
 26 | 
 27 | -   [Quarto](https://quarto.org/) documentation [@allaire_quarto_2024]
 28 | 
 29 |     ```{=html}
 30 |     <!-- Articles & Reports
 31 |       Presentations
 32 |       Dashboards
 33 |       Websites
 34 |       Books
 35 |       Interactive Docs -->
 36 |     ```
 37 | 
 38 |     -   The software used to create the Transport Data Science course materials and [numerous websites, presentations, dashboards, and books](https://quarto.org/docs/gallery/), Quarto is a powerful tool for creating reproducible documents with code and data.
 39 |     -   See the [technical writing](https://quarto.org/docs/visual-editor/technical.html) page of Quarto's documentation for key information on how to add references, figure captions, and more.
 40 | 
 41 | -   [Introduction to GitHub](https://github.com/skills/introduction-to-github) [@heis_introduction_2025]
 42 | 
 43 |     -   A good starting point for learning how to use GitHub for version control and collaboration. <!-- https://docs.github.com/en/codespaces/setting-up-your-project-for-codespaces/adding-a-dev-container-configuration/introduction-to-dev-containers -->
 44 |     -   See also their introduction to Devcontainers at [docs.github.com/en/codespaces/](https://docs.github.com/en/codespaces/)
 45 | 
 46 | ## Python
 47 | 
 48 | -   [Course Materials for: Geospatial Data Science](https://github.com/mszell/geospatialdatascience) [@szell_course_2025]
 49 |     -   Course materials covering various aspects of geospatial data science, including data analysis, visualization, and working with street networks using Python.
 50 | -   [Modern Polars](https://kevinheavey.github.io/modern-polars/) [@heavey]
 51 |     -   A side-by-side comparison of the Polars and Pandas libraries. <!-- -   [Python Polars: The definitive guide](https://github.com/jeroenjanssens/python-polars-the-definitive-guide) [@janssens_python_2025]
 52 |         -   Guide to using the polars for data manipulation in Python, due to be published in February 2025. -->
 53 | -   [A course on Geographic Data Science](https://darribas.org/gds_course/content/home.html) [@arribas-bel_course_2019]
 54 |     -   Free and open source online book on using GeoPandas and other Python libraries for geographic data analysis.
 55 | -   [Python for Data Analysis](https://wesmckinney.com/book/) [@mckinney_python_2022]
 56 |     -   Dta wrangling with Pandas, NumPy, and Jupyter, written by the creator of the Pandas library.
 57 | 
 58 | ## R
 59 | 
 60 | -   [Advanced R](https://adv-r.hadley.nz/)
 61 |     -   A comprehensive guide to advanced programming in R, covering topics such as functional programming and object-oriented programming.
 62 | 
 63 | # Software and Tools
 64 | 
 65 | -   [stats19](https://itsleeds.github.io/stats19/) [@lovelace_stats19_2019]
 66 |     -   R package for working with official road crash data
 67 | -   [stplanr: A Package for Transport Planning](https://doi.org/10.32614/RJ-2018-053) [@lovelace_stplanr_2018]
 68 |     -   R package for transport planning with various routing and analysis functions
 69 | -   [OSMnx: New Methods for Acquiring, Constructing, Analyzing, and Visualizing Complex Street Networks](https://doi.org/10/gbvjxq) [@boeing_osmnx_2017]
 70 |     -   Useful, if slightly out of date, paper for anyone working with street network data in Python.
 71 | -   [A/B Street](https://zenodo.org/record/6331922) [@abstreet_2022]
 72 |     -   A traffic simulation game exploring how small changes to streets can improve transportation in cities. Useful for understanding the impact of urban design on transport systems.
 73 | -   [osm2streets](https://github.com/a-b-street/osm2streets) [@carlino_osm2streets_2025]
 74 |     -   Tool for converting OpenStreetMap data to detailed street networks, useful for transport modeling and analysis.
 75 |     -   See Python bindings that can convert OSM data into polygons representing streets as GeoPandas dataframes at [github.com/a-b-street/osm2streets-py](https://github.com/a-b-street/osm2streets/tree/main/osm2streets-py)
 76 | -   [od2net](https://github.com/Urban-Analytics-Technology-Platform/od2net) [@carlino_od2net_2024]
 77 |     -   Tool for converting origin-destination data into network flows, useful for transport modeling and analysis.
 78 | 
 79 | # Research Applications
 80 | 
 81 | -   [The Propensity to Cycle Tool](https://doi.org/10.5198/jtlu.2016.862) [@lovelace_propensity_2017]
 82 |     -   Case study of an open source transport planning tool
 83 | -   [Growing Urban Bicycle Networks](http://arxiv.org/abs/2107.02185) [@szell_growing_2021]
 84 |     -   This paper explores methods for auto-suggesting transport network improvements, with reference to reproducible Python code
 85 | 
 86 | # Data Visualization
 87 | 
 88 | -   [The Visual Display of Quantitative Information](https://www.edwardtufte.com/tufte/books_vdqi) [@tufte_visual_2001]
 89 |     -   Classic work on the principles of data visualization
 90 | -   [Visualization Curriculum](https://idl.uw.edu/visualization-curriculum/intro.html) [@heer_visualization_2021]
 91 |     -   A data visualization curriculum of interactive notebooks, using Vega-Lite and Altair. This book contains a series of Python-based Jupyter notebooks, with a corresponding set of JavaScript notebooks available online on Observable.
 92 | 
 93 | ## Miscellaneous
 94 | 
 95 | -   [Data Science for Transport: A Self-Study Guide with Computer Exercises](https://doi.org/10.1007/978-3-319-72953-4) [@fox_data_2018]
 96 |     -   An introduction to transport data science with hands-on examples, slightly out of date as of 2025.
 97 | -   [Reproducible Road Safety Research with R](https://itsleeds.github.io/rrsrr/) [@lovelace_reproducible_2020]
 98 |     -   Introductory guide for analyzing road safety data in R
 99 | -   [Open source tools for geographic analysis in transport planning](https://doi.org/10/ghtnrp) [@lovelace_open_2021]
100 |     -   Review of open source tools available for transport planning and analysis.
101 | -   [Python for Data Science](https://aeturrell.github.io/python4DS/) [@turrell_python_2025]
102 |     -   A modern guide to data science using Python based on R for Data Science, with practical examples and clear explanations.
103 | -   [The Geography of Transport Systems](https://transportgeography.org/) [@rodrigue_geography_2013]
104 |     -   Comprehensive textbook on transport geography and systems
105 | -   [Modelling Transport](https://www.wiley.com/en-us/Modelling+Transport%2C+4th+Edition-p-9781118941485) [@ortuzars._modelling_2001]
106 |     -   Foundational text on transport modeling methods
107 | -   [Building Reproducible Analytical Pipelines with R](https://raps-with-r.dev/) [@rodrigues_building_2023]
108 |     -   A guide to the data engineering side of data science, with a focus on reproducibility and automation.
109 | -   Papers investigating the relationships between new contraflow interventions and traffic levels and collision rates in London [@tait2024; @tait2023]
110 | 
111 | See the [full bibliography](https://www.zotero.org/groups/418217/energy-and-transport/collections/R38L2EXB) on Zotero for more resources, and feel free to suggest additions by opening an issue in the tds issue tracker.
112 | 
113 | # References


--------------------------------------------------------------------------------
/d2/assessment-brief.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Formative assessment brief: portfolio plan and reproducible data science code"
  3 | bibliography: ../tds.bib
  4 | number-sections: true
  5 | ---
  6 | 
  7 | ```{r}
  8 | #| echo: false
  9 | #| eval: false
 10 | # docx format results in higher accessibility scores on Minerva:
 11 | quarto::quarto_render("d2/index.qmd", output_format = "docx", output_file = "tds-formative-assessment-brief.docx")
 12 | # file.rename("docs/tds-formative-assessment-brief.docx", "~/OneDrive/career/modules/tds/2024-25/TRAN5340M TDS Formative Assessment Brief.docx")
 13 | # C:\Users\georl_admin\OneDrive - University of Leeds
 14 | file.rename("docs/tds-formative-assessment-brief.docx", "C:/Users/georl_admin/OneDrive - University of Leeds/career/modules/tds/2024-25/TRAN5340M TDS Formative Assessment Brief.docx")
 15 | ```
 16 | 
 17 | ```{bash}
 18 | #| eval: false
 19 | #| echo: false
 20 | gh release upload 2025 formative-assessment-brief.docx
 21 | gh release upload 2025 formative-assessment-brief.pdf
 22 | ```
 23 | 
 24 | <!-- Based on https://github.com/itsleeds/tdsarchive/blob/master/coursework-template.md -->
 25 | 
 26 | # Assessment in brief
 27 | 
 28 | ##  Module code and title
 29 | 
 30 | **TRAN5340M** - Transport Data Science
 31 | 
 32 | ##  Assessment title
 33 | 
 34 | Formative Coursework: Portfolio Plan and Reproducible Code
 35 | 
 36 | ##  Assessment type
 37 | 
 38 | Portfolio Plan and Reproducible Code Submission
 39 | 
 40 | ##  Learning outcomes
 41 | 
 42 | 1.  To develop a clear plan for applying data science techniques to a transport problem.
 43 | 2.  To demonstrate the ability to work with datasets and produce reproducible code.
 44 | 3.  To critically engage with academic literature and formulate research questions.
 45 | 
 46 | Note: this is formative and not formally assessed, but feedback will be provided.
 47 | 
 48 | ##  Deadline
 49 | 
 50 | **Non-assessed submission deadline**: 28th February 2025, 13:59.
 51 | 
 52 | ##  Feedback
 53 | 
 54 | Feedback will be provided within 15 working days of submission.
 55 | Written feedback will be provided alongside guidance on how to proceed with the final coursework.
 56 | 
 57 | ## Contact
 58 | 
 59 | **Professor Robin Lovelace**\
 60 | r dot lovelace \[at\] leeds.ac.uk
 61 | 
 62 | ------------------------------------------------------------------------
 63 | 
 64 | ## Assessment summary
 65 | 
 66 | This formative coursework is designed to help you plan and receive feedback on your final coursework.
 67 | You will submit a `.zip` file containing a PDF document and reproducible code.
 68 | The document should outline your topic, datasets, research questions, and analysis plan.
 69 | Feedback will be provided to guide your final submission.
 70 | 
 71 | The purpose of this formative assessment is:
 72 | 
 73 | -   To allow you to ask questions to the course team (e.g., "Does this sound like a reasonable input dataset and topic?").
 74 | -   To describe progress on reading input datasets and the analysis plan.
 75 | -   To receive feedback on your approach before the final submission.
 76 | 
 77 | ------------------------------------------------------------------------
 78 | 
 79 | # Use of GenAI
 80 | 
 81 | **Generative AI category: GREEN**
 82 | 
 83 | Under this category, AI tools are actively encouraged and can be used extensively.
 84 | 
 85 | In this assessment, AI tools can be utilised to:
 86 | 
 87 | - Generate, test, and debug code for your transport data analysis
 88 | - Assist with data visualization and mapping
 89 | - Provide explanations of transport concepts and methods
 90 | - Help with code optimization and best practices
 91 | - Support your research on the topic by suggesting areas to investigate
 92 | - Give feedback on content and provide proofreading
 93 | - Accelerate your learning and productivity
 94 | 
 95 | **Important:** You must understand and be able to explain all code and analysis you submit, whether AI-generated or not. Document your AI usage in reflective sections of your portfolio.
 96 | 
 97 | In this assessment, AI tools cannot be utilised to:
 98 | 
 99 | - produce the entirety of, or sections of, a piece of work that you submit for assessment beyond that which is outlined above.
100 | 
101 | The use of Generative AI must be acknowledged in an ‘Acknowledgements’ section of any piece of academic work where it has been used as a functional tool to assist in the process of creating academic work.
102 | 
103 | The minimum requirement to include in acknowledgement:
104 | 
105 | - Name and version of the generative AI system used e.g. ChatGPT-4.0
106 | - Publisher (company that made the AI system) e.g. OpenAI
107 | - URL of the AI system
108 | - Brief description (single sentence) of context in which the tool was used.
109 | 
110 | For example: “I acknowledge the use of ChatGPT-3.5 (Open AI, <https://chat.openai.com/>) to summarise my initial notes and to proofread my final draft.
111 | Best practice is to include a link to the exact prompt used to generate the content, e.g. https://g.co/gemini/share/4933efa27596
112 | 
113 | The standard Academic Misconduct procedure applies for students believed to have ignored this categorisation.
114 | 
115 | For detailed guidance see <https://generative-ai.leeds.ac.uk/ai-and> 
116 | 
117 | **General guidance**
118 | 
119 | Skills\@library hosts useful guidance on academic skills including specific guidance on academic writing 
120 | 
121 | # Submission requirements
122 | 
123 | You will submit a `.zip` file containing:
124 | 
125 | 1.  A **concise PDF document** (recommended length: 2 pages, absolute maximum: 5 pages) outlining:
126 |     -   A draft title of your topic.
127 |     -   The main dataset you will use and other potential datasets.
128 |     -   Ideas on a research question.
129 |     -   Questions you would like to ask about the topic (e.g., "Is this a suitable dataset?").
130 |     -   At least **2 references** to academic literature related to the topic.
131 |     -   Minimal code and/or a description of where you accessed the data and how you imported it.
132 |     -   Any preliminary analysis you have done.
133 |     -   A suggested structure for the document:
134 |         -   Topics considered.
135 |         -   Input datasets.
136 |         -   Analysis plan (e.g., a workflow diagram as shown [here](https://user-images.githubusercontent.com/1825120/127524923-7d9f5511-84a6-430b-8de9-a603a5524f39.png)).
137 |         -   Motivation for choosing this topic.
138 |         -   Questions and options.
139 | 2.  **Reproducible code** in a `.qmd` file.
140 | 
141 | ## Rendering
142 | 
143 | -   If you cannot render to PDF directly, render to HTML and convert to PDF by printing to PDF from your browser.
144 | 
145 | ## Presentation
146 | 
147 | You must appropriately cite all supporting evidence using appropriate references and a consistent, professional style.
148 | 
149 | See [the authoring tutorial with RStudio at quarto.org](https://quarto.org/docs/get-started/authoring/rstudio.html#citations) for guidance on how to add citations to your document in RStudio's Visual Editor mode.
150 | 
151 | # Assessment criteria
152 | 
153 | The assessment is formative so is not assessed, but you will be provided with feedback on the following criteria:
154 | 
155 | -   Clarity and feasibility of the proposed topic and research question.
156 | -   Appropriateness of the selected datasets.
157 | -   Engagement with academic literature.
158 | -   Quality of the analysis plan and workflow diagram.
159 | -   Quality of and reproducibility of the code and documentation.
160 | 
161 | See the [marking criteria](https://itsleeds.github.io/tds/marking-criteria.html) for more details.
162 | 
163 | # Academic misconduct and plagiarism
164 | 
165 | The university expects that all the work you do, which includes all forms of assessments submitted and examinations taken, meet the university’s standard for Academic Integrity.
166 | All forms of Academic Integrity are investigated through the Academic Misconduct Procedure.
167 | This applies to all taught elements of your study, including undergraduate programmes, taught postgraduate study, and taught elements of research degrees.
168 | Breaching academic integrity standards can lead to serious penalties.
169 | 
170 | Guidance on Academic Integrity and Academic Misconduct can be found on the [For Students website pages](https://students.leeds.ac.uk/info/10110/academic-integrity).
171 | Full definitions of offences under the Academic Misconduct Procedure can be found in the [Academic Misconduct Procedure](https://secretariat.leeds.ac.uk/student-cases/academic-misconduct/).
172 | 
173 | # Academic integrity
174 | 
175 | All work must meet university standards for Academic Integrity.
176 | See the [For Students website](https://students.leeds.ac.uk/info/10110/academic-integrity) for guidance.
177 | 
178 | # Support resources
179 | 
180 | -   [Quarto Citation Guide](https://quarto.org/docs/get-started/authoring/rstudio.html#citations)
181 | -   [The course website](https://itsleeds.github.io/tds/) for additional resources.
182 | -   Module forum for questions.


--------------------------------------------------------------------------------
/marking-criteria.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Marking Criteria"
 3 | ---
 4 | 
 5 | <!-- See the source code for this template in the course GitHub repository at github.com/itsleeds/tds in folder/file [d2/template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd). -->
 6 | 
 7 | The report should be written as a Quarto document (`.qmd` file) and submitted as a .zip file containing the qmd file and rendered PDF file.
 8 | See the template in the course GitHub repository at github.com/itsleeds/tds in folder/file [d2/template.qmd](https://github.com/itsleeds/tds/blob/main/d2/template.qmd) for an example.
 9 | 
10 | 
11 | Marks for the submitted report, are awarded in 4 categories, accounting for the following criteria:
12 | 
13 | ### Data processing: 20%
14 | 
15 | 1. The selection and effective use of input datasets that are large (e.g. covering multiple years), complex (e.g. containing multiple variables) and/or diverse (e.g. input datasets from multiple sources are used and where appropriate combined in the analysis)
16 | 2. Describe how the data was collected and implications for data quality, and outline how the input datasets were downloaded (with a reproducible example if possible), with a description that will allow others to understand the structure of the inputs and how to import them
17 | 3. Evidence of data cleaning techniques (e.g. by re-categorising variables)
18 | 4. Adding value to datasets with joins (key-based or spatial), creation of new variables (also known as feature engineering) and reshaping data (e.g. from wide to long format)
19 | 
20 | **Distinction (70%+):** The report makes use of a complex (with many columns and rows) and/or multiple input datasets, efficiently importing them and adding value by creating new variables, recategorising, changing data formats/types, and/or reshaping the data. Selected datasets are very well suited to the research questions, clearly described, with links to the source and understanding of how the datasets were generated.
21 | 
22 | **Merit (60-69%):** The report makes some use of complex or multiple input datasets. The selection, description of, cleaning or value-added to the input datasets show skill and care applied to the data processing stage but with some weaknesses. Selected datasets are appropriate for the research questions, with some description or links to the data source.
23 | 
24 | **Pass (50-59%):** There is some evidence of care and attention put into the selection, description of or cleaning of the input datasets but little value has been added. The report makes little use of complex or multiple input datasets. The datasets are not appropriate for the research questions, the datasets are not clearly described, or there are no links to the source or understanding of how the datasets were generated, but the data processing aspect of the work acceptable.
25 | 
26 | **Fail (0-49%):** The report does not make use of appropriate input datasets and contains very little or now evidence of data cleaning, adding value to the datasets or reshaping the data. While there may be some evidence of data processing, it is of poor quality and/or not appropriate for the research questions.
27 | 
28 | ### Visualization and report: 20%
29 | 
30 | 1. Creation of figures that are readable and well-described (e.g. with captions and description)
31 | 1. High quality, attractive or advanced techniques (e.g. multi-layered maps or graphs, facets or other advanced techniques)
32 | 1. Using visualisation techniques appropriate to the topic and data and interpreting the results correctly (e.g. mentioning potential confounding factors that could account for observed patterns)
33 | 1. The report is well-formatted, accessible (e.g. with legible text size and does not contain excessive code in the submitted report) and clearly communicates the data and analysis visually, with appropriate figure captions, cross-references and a consistent style
34 | 
35 | **Distinction (70%+):** The report contains high quality, attractive, advanced and meaningful visualisations that are very well-described and interpreted, showing deep understanding of how visualisation can communicate meaning contained within datasets. The report is very well-formatted, accessible and clearly communicates the data and analysis visually.
36 | 
37 | **Merit (60-69%):** The report contains good visualisations that correctly present the data and highlight key patterns. The report is has appropriate formatting.
38 | 
39 | **Pass (50-59%):** The report contains basic visualisations or are not well-described or interpreted correctly or the report is poorly formatted, not accessible or does not clearly communicate the data and analysis visually.
40 | 
41 | **Fail (0-49%):** The report is of unacceptable quality (would likely be rejected in a professional setting) and/or has poor quality and/or few visualisations, or the visualisations are inappropriate given the data and research questions.
42 | 
43 | ### Code quality, efficiency and reproducibility: 20%
44 | 
45 | 1. Code quality in the submitted source code, including using consistent style, appropriate packages, and clear comments
46 | 1. Efficiency, including pre-processing to reduce input datasets (avoiding having to share large datasets in the submission for example) and computationally efficient implementations
47 | 1. The report is fully reproducible, including generation of figures. There are links to online resources for others wanting to reproduce the analysis for another area, and links to the input data
48 | 
49 | **Distinction (70%+):** The source code underlying the report contains high quality, efficient and reproducible code that is very well-written, using consistent syntax and good style, well-commented and uses appropriate packages. The report is fully reproducible, with links to online resources for others wanting to reproduce the analysis for another area, and links to the input data.
50 | 
51 | **Merit (60-69%):** The code is readable and describes the outputs in the report but lacks quality, either in terms of comments, efficiency or reproducibility. 
52 | 
53 | **Pass (50-59%):** The source code underlying the report describes the outputs in the report but is not well-commented, not efficient or has very limited levels of reproduicibility, with few links to online resources for others wanting to reproduce the analysis for another area, and few links to the input data.
54 | 
55 | **Fail (0-49%):** The report has little to no reproducible, readable or efficient code. A report that includes limited well-described code in the main text or in associated files would be considered at the borderline between a fail and a pass. A report that includes no code would be considered a low fail under this criterion.
56 | 
57 | ### Understanding the data science process, including choice of topic and impact: 40%
58 | 
59 | 1. Topic selection, including originality, availability of datasets related to the topic and relevance to solving transport planning problems
60 | 1. Clear research question
61 | 1. Appropriate reference to the academic, policy and/or technical literature and use of the literature to inform the research question and methods
62 | 1. Use of appropriate data science methods and techniques
63 | 1. Discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed
64 | 1. Discuss further research and/or explain the potential impacts of the work
65 | 1. The conclusions are supported by the analysis and results
66 | 1. The contents of the report fit together logically and support the aims and/or research questions of the report
67 | 
68 | **Distinction (70%+):** The report contains a clear research question, appropriate reference to the academic, policy and/or technical literature, use of appropriate data science methods and techniques, discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed. The report discusses further research and/or explores of the potential impacts of the work. Conclusions are supported by the analysis and results, and the contents of the report fit together logically as a cohehisive whole that has a clear direction set-out by the aims and/or research questions. To get a Distinction there should also be evidence of considering the generalisability of the methods and reflections on how it could be built on by others in other areas.
69 | 
70 | **Merit (60-69%):** There is a clear research question. There is some reference to the academic, policy and/or technical literature. The report has a good structure and the results are supported by the analysis. There is some discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed. 
71 | 
72 | **Pass (50-59%):** The report contains a valid research question but only limited references to appropriate literature or justification. There is evidence of awareness of the limitations of the results and how they inform conclusions, but these are not fully supported by the analysis. The report has a reasonable structure but does not fit together well in a cohesive whole.
73 | 
74 | **Fail (0-49%):** The report does not contain a valid research question, has no references to appropriate literature or justification, does not discuss the limitations of the results or how they inform conclusions, or the report does not have a reasonable structure.
75 | 


--------------------------------------------------------------------------------
/elsevier-harvard.csl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="never" default-locale="en-US">
  3 |   <info>
  4 |     <title>Elsevier - Harvard (with titles)</title>
  5 |     <id>http://www.zotero.org/styles/elsevier-harvard</id>
  6 |     <link href="http://www.zotero.org/styles/elsevier-harvard" rel="self"/>
  7 |     <link href="http://www.zotero.org/styles/ecology-letters" rel="template"/>
  8 |     <link href="http://www.elsevier.com/journals/biological-conservation/0006-3207/guide-for-authors#68000" rel="documentation"/>
  9 |     <author>
 10 |       <name>David Kaplan</name>
 11 |       <email>david.kaplan@ird.fr</email>
 12 |     </author>
 13 |     <contributor>
 14 |       <name>Simon Kornblith</name>
 15 |       <email>simon@simonster.com</email>
 16 |     </contributor>
 17 |     <contributor>
 18 |       <name>Bruce D'Arcus</name>
 19 |     </contributor>
 20 |     <contributor>
 21 |       <name>Curtis M. Humphrey</name>
 22 |     </contributor>
 23 |     <contributor>
 24 |       <name>Richard Karnesky</name>
 25 |       <email>karnesky+zotero@gmail.com</email>
 26 |       <uri>http://arc.nucapt.northwestern.edu/Richard_Karnesky</uri>
 27 |     </contributor>
 28 |     <contributor>
 29 |       <name>Sebastian Karcher</name>
 30 |     </contributor>
 31 |     <category citation-format="author-date"/>
 32 |     <category field="biology"/>
 33 |     <category field="generic-base"/>
 34 |     <updated>2014-03-04T00:09:00+00:00</updated>
 35 |     <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
 36 |   </info>
 37 |   <macro name="container">
 38 |     <choose>
 39 |       <if type="chapter paper-conference" match="any">
 40 |         <text term="in" prefix=", " suffix=": "/>
 41 |         <names variable="editor translator" delimiter=", " suffix=", ">
 42 |           <name name-as-sort-order="all" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
 43 |           <label form="short" text-case="capitalize-first" prefix=" (" suffix=")"/>
 44 |         </names>
 45 |         <group delimiter=", ">
 46 |           <text variable="container-title" text-case="title"/>
 47 |           <text variable="collection-title" text-case="title"/>
 48 |         </group>
 49 |       </if>
 50 |       <else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
 51 |         <group prefix=", " delimiter=", ">
 52 |           <text variable="container-title"/>
 53 |           <text variable="collection-title"/>
 54 |         </group>
 55 |       </else-if>
 56 |       <else>
 57 |         <group prefix=". " delimiter=", ">
 58 |           <text variable="container-title" form="short"/>
 59 |           <text variable="collection-title"/>
 60 |         </group>
 61 |       </else>
 62 |     </choose>
 63 |   </macro>
 64 |   <macro name="author">
 65 |     <names variable="author">
 66 |       <name name-as-sort-order="all" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
 67 |       <label form="short" prefix=" (" suffix=")" text-case="capitalize-first"/>
 68 |       <substitute>
 69 |         <names variable="editor"/>
 70 |         <names variable="translator"/>
 71 |         <text macro="title"/>
 72 |       </substitute>
 73 |     </names>
 74 |   </macro>
 75 |   <macro name="author-short">
 76 |     <names variable="author">
 77 |       <name form="short" and="text" delimiter=", " initialize-with=". "/>
 78 |       <substitute>
 79 |         <names variable="editor"/>
 80 |         <names variable="translator"/>
 81 |         <choose>
 82 |           <if type="bill book graphic legal_case legislation motion_picture report song" match="any">
 83 |             <text variable="title" form="short" font-style="italic"/>
 84 |           </if>
 85 |           <else>
 86 |             <text variable="title" form="short" quotes="true"/>
 87 |           </else>
 88 |         </choose>
 89 |       </substitute>
 90 |     </names>
 91 |   </macro>
 92 |   <macro name="access">
 93 |     <choose>
 94 |       <if variable="DOI">
 95 |         <text variable="DOI" prefix="https://doi.org/"/>
 96 |       </if>
 97 |       <else-if type="webpage post-weblog" match="any">
 98 |         <group delimiter=" ">
 99 |           <text value="URL"/>
100 |           <text variable="URL"/>
101 |           <group prefix="(" suffix=").">
102 |             <text term="accessed" suffix=" "/>
103 |             <date variable="accessed">
104 |               <date-part name="month" form="numeric" suffix="."/>
105 |               <date-part name="day" suffix="."/>
106 |               <date-part name="year" form="short"/>
107 |             </date>
108 |           </group>
109 |         </group>
110 |       </else-if>
111 |     </choose>
112 |   </macro>
113 |   <macro name="title">
114 |     <choose>
115 |       <if type="report thesis" match="any">
116 |         <text variable="title"/>
117 |         <group prefix=" (" suffix=")" delimiter=" ">
118 |           <text variable="genre"/>
119 |           <text variable="number" prefix="No. "/>
120 |         </group>
121 |       </if>
122 |       <else-if type="bill book graphic legal_case legislation motion_picture report song speech" match="any">
123 |         <text variable="title"/>
124 |         <text macro="edition" prefix=", "/>
125 |       </else-if>
126 |       <else-if type="webpage">
127 |         <text variable="title"/>
128 |         <text value="WWW Document" prefix=" [" suffix="]"/>
129 |       </else-if>
130 |       <else>
131 |         <text variable="title"/>
132 |       </else>
133 |     </choose>
134 |   </macro>
135 |   <macro name="publisher">
136 |     <group delimiter=", ">
137 |       <text variable="publisher"/>
138 |       <text variable="publisher-place"/>
139 |     </group>
140 |   </macro>
141 |   <macro name="event">
142 |     <choose>
143 |       <if variable="event">
144 |         <text term="presented at" text-case="capitalize-first" suffix=" "/>
145 |         <text variable="event"/>
146 |       </if>
147 |     </choose>
148 |   </macro>
149 |   <macro name="issued">
150 |     <choose>
151 |       <if variable="issued">
152 |         <date variable="issued">
153 |           <date-part name="year"/>
154 |         </date>
155 |       </if>
156 |       <else>
157 |         <text term="no date" form="short"/>
158 |       </else>
159 |     </choose>
160 |   </macro>
161 |   <macro name="edition">
162 |     <group delimiter=" ">
163 |       <choose>
164 |         <if is-numeric="edition">
165 |           <number variable="edition" form="ordinal"/>
166 |         </if>
167 |         <else>
168 |           <text variable="edition" suffix="."/>
169 |         </else>
170 |       </choose>
171 |       <text value="ed"/>
172 |     </group>
173 |   </macro>
174 |   <macro name="locators">
175 |     <choose>
176 |       <if type="article-journal article-magazine article-newspaper" match="any">
177 |         <group prefix=" " delimiter=", ">
178 |           <group>
179 |             <text variable="volume"/>
180 |           </group>
181 |           <text variable="page"/>
182 |         </group>
183 |       </if>
184 |       <else-if type="bill book graphic legal_case legislation motion_picture report song thesis" match="any">
185 |         <group delimiter=", " prefix=". ">
186 |           <text macro="event"/>
187 |           <text macro="publisher"/>
188 |         </group>
189 |       </else-if>
190 |       <else-if type="chapter paper-conference" match="any">
191 |         <group delimiter=", " prefix=". ">
192 |           <text macro="event"/>
193 |           <text macro="publisher"/>
194 |           <group>
195 |             <label variable="page" form="short" suffix=" "/>
196 |             <text variable="page"/>
197 |           </group>
198 |         </group>
199 |       </else-if>
200 |       <else-if type="patent">
201 |         <text variable="number" prefix=". "/>
202 |       </else-if>
203 |     </choose>
204 |   </macro>
205 |   <citation et-al-min="3" et-al-use-first="1" disambiguate-add-givenname="true" disambiguate-add-year-suffix="true" collapse="year" cite-group-delimiter=", ">
206 |     <sort>
207 |       <key macro="author"/>
208 |       <key macro="issued" sort="descending"/>
209 |     </sort>
210 |     <layout prefix="(" suffix=")" delimiter="; ">
211 |       <group delimiter=", ">
212 |         <text macro="author-short"/>
213 |         <text macro="issued"/>
214 |         <group delimiter=" ">
215 |           <label variable="locator" form="short"/>
216 |           <text variable="locator"/>
217 |         </group>
218 |       </group>
219 |     </layout>
220 |   </citation>
221 |   <bibliography hanging-indent="true" entry-spacing="0" line-spacing="1">
222 |     <sort>
223 |       <key macro="author"/>
224 |       <key macro="issued" sort="descending"/>
225 |     </sort>
226 |     <layout>
227 |       <group suffix=".">
228 |         <text macro="author" suffix=","/>
229 |         <text macro="issued" prefix=" "/>
230 |         <group prefix=". ">
231 |           <text macro="title"/>
232 |           <text macro="container"/>
233 |           <text macro="locators"/>
234 |         </group>
235 |       </group>
236 |       <text macro="access" prefix=". "/>
237 |     </layout>
238 |   </bibliography>
239 | </style>
240 | 


--------------------------------------------------------------------------------
/s3/slides.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Visualising transport data"
  3 | subtitle: '<br/>🗺<br/>Transport Data Science'
  4 | author: "Robin Lovelace"
  5 | date: 'University of Leeds'
  6 | format: revealjs
  7 | bibliography: viz-references.bib
  8 | ---
  9 | 
 10 | ```{r setup, include = FALSE}
 11 | library(tmap)
 12 | library(spData)
 13 | library(sf)
 14 | ```
 15 | 
 16 | ## A brief history of geographic vizualisation {background-image="https://upload.wikimedia.org/wikipedia/commons/thumb/7/7e/Zentralbibliothek_Z%C3%BCrich_-_Ideen_zu_einer_Geographie_der_Pflanzen_nebst_einem_Naturgem%C3%A4lde_der_Tropenl%C3%A4nder_-_000012142.jpg/2560px-Zentralbibliothek_Z%C3%BCrich_-_Ideen_zu_einer_Geographie_der_Pflanzen_nebst_einem_Naturgem%C3%A4lde_der_Tropenl%C3%A4nder_-_000012142.jpg"}
 17 | 
 18 | - Humboldt's Naturgemälde (1807, *Geography of Plants*)
 19 | 
 20 | ## Good, bad, ugly, wrong
 21 | 
 22 | - Aim must be good graphics, but first it's important to avoid pitfalls
 23 | - Source: Free and open book [Data Visualisation](https://clauswilke.com/dataviz/introduction.html)
 24 | 
 25 | ```{r, echo=FALSE, out.width="80%"}
 26 | knitr::include_graphics("https://clauswilke.com/dataviz/introduction_files/figure-html/ugly-bad-wrong-examples-1.png")
 27 | ```
 28 | 
 29 | ## What is data visualisation?
 30 | 
 31 | ::: incremental
 32 | - Data visualization is part art and part science. The challenge is to get the art right without getting the science wrong and vice versa. A data visualization first and foremost has to accurately convey the data.
 33 | 
 34 | - In my experience, scientists frequently (though not always!) know how to visualize data without being grossly misleading. However, they may not have a well developed sense of visual aesthetics, and they may inadvertantly make visual choices that detract from their desired message. Designers, on the other hand, may prepare visualizations that look beautiful but play fast and loose with the data.
 35 | :::
 36 | 
 37 | Source: [Fundamentals of Data Vizualisation](https://serialmentor.com/dataviz/)
 38 | 
 39 | ## Viz 4 policy
 40 | 
 41 | ::: incremental
 42 | - [visualisations] are also often the best way to present the findings of [transport] research in a way that is accessible. [visualisation] making is therefore a critical part of [transport] and its emphasis not only on describing, but also changing the world.
 43 | 
 44 | Source: [Geocomputation with R](https://r.geocompx.org/adv-map.html)
 45 | 
 46 | :::
 47 | 
 48 | 
 49 | ## Illustration of policy impact
 50 | 
 51 | ```{r, echo=FALSE}
 52 | knitr::include_graphics("https://i.guim.co.uk/img/static/sys-images/Guardian/Pix/pictures/2014/10/31/1414751968306/d83dffac-d060-4b77-88c0-77916e18e233-1020x397.png?width=620&quality=45&auto=format&fit=max&dpr=2&s=7a52a871a27b3fa6ae167244d4f9845e")
 53 | ```
 54 | 
 55 | - Visualisations can make findings become 'real'
 56 | 
 57 | ## A brief history of geographic data viz in R
 58 | 
 59 | ::: incremental
 60 | "The core R engine was not designed specifically for the display and analysis
 61 | of maps, and the limited interactive facilities it offers have drawbacks in this
 62 | area" [@bivand_applied_2013].
 63 | 
 64 | Five years later...
 65 | 
 66 | "An example showing R's flexibility and evolving geographic capabilities is **leaflet**
 67 | [@R-leaflet],
 68 | a package for making interactive maps that has been extended by the R community, as we'll see in Chapter 9" 
 69 | [@lovelace_geocomputation_2018a].
 70 | :::
 71 | 
 72 | ## Base R graphics: sf 
 73 | 
 74 | ```{r}
 75 | plot(nz)
 76 | ```
 77 | 
 78 | ## Base R graphics: sf II
 79 | 
 80 | ```{r, fig.cap=" ", fig.show='hold', out.width="40%"}
 81 | plot(st_geometry(nz))
 82 | plot(nz_height, add = TRUE)
 83 | sf_cols = sf.colors(n = 2, alpha = 0.2)
 84 | nz$col = factor(x = nz$Island, labels = sf_cols)
 85 | plot(st_geometry(nz), col = as.character(nz$col))
 86 | ```
 87 | 
 88 | ## sf graphics: code
 89 | 
 90 | ```{r, eval=FALSE, echo=TRUE}
 91 | # facet plots by default
 92 | plot(nz) 
 93 | # plot just geometry, ready for new layers:
 94 | plot(st_geometry(nz), reset = FALSE)
 95 | # addition of new layers
 96 | plot(nz_height, add = TRUE)
 97 | # transparency
 98 | sf_cols = sf.colors(n = 2, alpha = 0.2)
 99 | nz$col = factor(x = nz$Island, labels = sf_cols)
100 | plot(st_geometry(nz), col = as.character(nz$col))
101 | #  see ?plot.sf for more
102 | ```
103 | 
104 | ::: incremental
105 | ## Observations
106 | 
107 | - Facets by default: useful for seeing patterns.
108 | - Transparency new, `add = ...` argument the same
109 | - You can go far with base R graphics 
110 | [@murrell_graphics_2016].
111 | :::
112 | 
113 | ## tmap
114 | 
115 | - A diverse dedicated mapping R package
116 | 
117 | ```{r, eval=TRUE, message=FALSE, warning=FALSE}
118 | #| echo: true
119 | library(tmap)
120 | tmap_mode("plot")
121 | tm_shape(nz) +
122 |   tm_polygons("Median_income", palette = "RdYlBu")
123 | ```
124 | 
125 | ## Why tmap?
126 | 
127 | ::: incremental
128 | - It is powerful and flexible.
129 | - Concise syntax, attractive maps with minimal code, familiar to **ggplot2** users.
130 | - Unique capability: same code -> static + interactive maps with switch `tmap_mode()`.
131 | - Wide range of spatial classes (including `raster` objects) supported.
132 | - Well documented + developed --- see [`tmap-nutshell`](https://cran.r-project.org/web/packages/tmap/vignettes/tmap-nutshell.html) and JSS paper
133 | [@tennekes_tmap_2018].
134 | :::
135 | 
136 | ## tmap basics
137 | 
138 | ```{r}
139 | #| echo: true
140 | #| label: tmap-basics
141 | #| layout-ncol: 3
142 | # Add fill layer#|  to nz shape
143 | tm_shape(nz) + tm_fill() 
144 | # Add border layer to nz shape
145 | tm_shape(nz) + tm_borders() 
146 | # Add fill and border layers to nz shape
147 | tm_shape(nz) + tm_fill() + tm_borders() 
148 | ```
149 | 
150 | ## Animations
151 | 
152 | - Are easy with **tmap** (section [9.3](http://r.geocompx.org/adv-map.html#animated-maps) of geocompr)
153 | 
154 | ```{r, echo=FALSE, out.width="80%"}
155 | knitr::include_graphics("https://user-images.githubusercontent.com/1825120/38543030-5794b6f0-3c9b-11e8-9da9-10ec1f3ea726.gif")
156 | ```
157 | 
158 | ## Interactive maps with mapview
159 | 
160 | ```{r, eval=FALSE}
161 | m = mapview::mapview(spData::nz)
162 | m@map
163 | ```
164 | 
165 | ![](https://i.imgur.com/hz98Jru.png)
166 | 
167 | ## Web mapping applications
168 | 
169 | - Leaflet integrates with **shiny** via `leaflet::leafletOutput()`, enabling web mapping applications built on R
170 | - These can be set-up to scale nationally, as illustrated by [pct.bike](http://www.pct.bike/)
171 | [@lovelace_propensity_2017].
172 | 
173 | ```{r, echo=FALSE}
174 | knitr::include_graphics("https://raw.githubusercontent.com/npct/pct-team/master/figures/pct-frontpage.png")
175 | ```
176 | 
177 | ## Summary
178 | 
179 | ::: incremental
180 | - Visualisation is an important skill in data science
181 | - Visualisation is particularly valuable for evidence-based decision-making and policy
182 | - Open source command software like R provides powerful tools for data visualisation
183 | - Geographic data visualisation is possible with R using packages such as sf and tmap
184 | - These visualisation skills will be useful in the lecture next week on project work
185 | :::
186 | 
187 | ## Exercise with desire line data in stplanr
188 | 
189 | 1. Create a map showing the number of people walking and cycling in the `stplanr` dataset `flowlines_sf` using:
190 | -base graphics (hint: use `plot()`) and
191 | -**tmap** (hint: use `tm_shape(flowlines_sf) + ...`, `palette = "viridis"` and other options shown in `tmaptools::palette_explorer()` give different colourschemes).
192 |     - Name two advantages of each approach
193 |     - Bonus: Other mapping packages could be used to show the same data?
194 |     
195 | For more information on plotting OD data, see the [stplanr-od](https://docs.ropensci.org/stplanr/articles/stplanr-od.html) vignetted, e.g. with
196 | 
197 | ```r
198 | vignette("stplanr-od")
199 | ```
200 | 
201 | ## Result: base graphics
202 |     
203 | ```{r, echo=TRUE, out.width="70%", fig.show='hold', message=FALSE}
204 | library(stplanr)
205 | lwd = flowlines_sf$All / mean(flowlines_sf$All) * 3
206 | plot(flowlines_sf["Taxi"], lwd = lwd)
207 | ```
208 | 
209 | ## Bonus exercise
210 | 
211 | - Based on the `routes_fast_sf` data in the `stplanr` package, identify roads where investment in cycling is likely to be effective.
212 | 
213 | Starting point:
214 | 
215 | ```{r}
216 | nrow(flowlines_sf)
217 | nrow(routes_fast_sf)
218 | # routes_fast_sf = dplyr::inner_join(routes_fast_sf, flow)
219 | routes_fast_joined = cbind(routes_fast_sf, flowlines_sf)
220 | rnet = overline(routes_fast_joined, "Bicycle")
221 | ```
222 | 
223 | ## sf results
224 | 
225 | ```{r, echo=FALSE}
226 | plot(rnet["Bicycle"], lwd = routes_fast_sf$All / 50)
227 | ```
228 | 
229 | ## tmap 'view mode' results
230 | 
231 | ```{r, eval=FALSE}
232 | # tm_shape(rnet) +
233 | #   tm_lines(col = "Bicycle", lwd = "Bicycle", scale = 9, palette = "RdYlBu")
234 | ```
235 | 
236 | ![](https://user-images.githubusercontent.com/1825120/76854526-26bb5780-6847-11ea-8903-e7466a1aa750.png)
237 | See [here](https://user-images.githubusercontent.com/1825120/76854526-26bb5780-6847-11ea-8903-e7466a1aa750.png) for result
238 | 
239 | ## Exercises bonus (optional)
240 | 
241 | - Using data in the `pct` github package, estimate cycling potential in a city of your choice in the UK, and show the results
242 | 
243 | - See the [pct_training](https://itsleeds.github.io/pct/articles/pct_training.html) vignette for further information
244 | 
245 | - Work on your portfolios, adding a new visualisation
246 | 
247 | ## References
248 | 


--------------------------------------------------------------------------------
/sem1/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Seminar 1 - Mini-workshop"
  3 | bibliography: ../tds.bib
  4 | toc: true
  5 | execute: 
  6 |   cache: true
  7 |   message: false
  8 |   warning: false
  9 |   eval: false
 10 | editor: 
 11 |   markdown: 
 12 |     wrap: 72
 13 | format:
 14 |   html:
 15 |     code-overflow: wrap
 16 | ---
 17 | 
 18 | The best way to learn is by exploring data and answering your own
 19 | questions. Here are some datasets that can help you investigate
 20 | questions like:
 21 | 
 22 | -   What is the Average Daily People/Bikes/Passengers/Cars?
 23 | 
 24 | -   What is the typical daily/weekly/monthly demand profile?
 25 | 
 26 | -   Where are the points with the highest demand/flows?
 27 | 
 28 | # Some interesting datasets ...
 29 | 
 30 | Let's explore some interesting datasets. First we will install (if
 31 | necessary) and load the packages for this examples
 32 | 
 33 | ```{r}
 34 | options(repos = c(CRAN = "https://cloud.r-project.org"))
 35 | if (!require("remotes")) install.packages("remotes")
 36 | pkgs = c(
 37 |     "sf",
 38 |     "tidyverse",
 39 |     "osmextract",
 40 |     "tmap",
 41 |     "maptiles"
 42 | )
 43 | remotes::install_cran(pkgs)
 44 | sapply(pkgs, require, character.only = TRUE)
 45 | ```
 46 | 
 47 | ## Motorised vehicles counts: Leeds
 48 | 
 49 | Many cities/countries publish data from permanent traffic counters e.g.
 50 | ANPR cameras, induction loops or low-cost sensors. We are going to use
 51 | data from the sensors in Leeds (available in [Data Mill
 52 | North](https://datamillnorth.org/dataset/e6q0n/leeds-annual-traffic-growth))
 53 | 
 54 | ```{r}
 55 | leeds_car_location <- read_csv(
 56 |   "https://datamillnorth.org/download/e6q0n/9bc51361-d98e-47d3-9963-aeeca3fa0afc/Camera%20Locations.csv"
 57 |   ) 
 58 | 
 59 | leeds_car_location_sf <- leeds_car_location |> 
 60 |   st_as_sf(coords = c("X","Y"),
 61 |            crs = 27700)
 62 | ```
 63 | 
 64 | ```{r}
 65 | #| echo: false
 66 | tm_basemap("OpenStreetMap")+
 67 | tm_shape(leeds_car_location_sf)+
 68 |   tm_dots("blue")
 69 | 
 70 | ```
 71 | 
 72 | ```{r}
 73 | leeds_car_2019 <- read_csv(
 74 |   "https://datamillnorth.org/download/e6q0n/9e62c1e5-8ba5-4369-9d81-a46c4e23b9fb/Data%202019.csv"
 75 |   )
 76 | ```
 77 | 
 78 | If you are interested in open traffic count datasets see
 79 | [this](https://juanfonsecals1.github.io/Results_Summary_Maps/)
 80 | 
 81 | #### code 
 82 | 
 83 | ```{r}
 84 | leeds_car_2019 |> 
 85 |   group_by(Cosit) |> 
 86 |   summarise(mean(Volume))
 87 | 
 88 | 
 89 | mean_daily_volumes <- leeds_car_2019 |>
 90 |   # converting cosit to numeric
 91 |   mutate(Cosit = as.numeric(Cosit)) |> 
 92 |   # extracting the date
 93 |   mutate(time_date = dmy_hm(Sdate),
 94 |          # extracts the day
 95 |          date = date(time_date)) |> 
 96 |   # calculating the total flows for each day
 97 |   summarise(Volume = sum(Volume,rm.na = T),
 98 |             .by = c(date,Cosit)) |> 
 99 |   # Calculating the daily mean 
100 |   summarise(daily_volume = mean(Volume,rm.na = T),
101 |             .by = Cosit) 
102 | 
103 | 
104 | daily_volumes <- leeds_car_2019 |> 
105 |     # converting cosit to numeric
106 |   mutate(Cosit = as.numeric(Cosit)) |>
107 |   # extracting the date
108 |   mutate(time_date = dmy_hm(Sdate),
109 |          # extracts the day
110 |          date = date(time_date)) |> # calculating the total flows for each day
111 |   summarise(mean_volume = sum(Volume,rm.na = T),
112 |           .by = c(date,Cosit))
113 | 
114 | daily_volumes |> 
115 |   mutate(Cosit = as.numeric(Cosit)) |> 
116 |   filter(Cosit == 90201)|> 
117 |   ggplot(aes(x = date,y = mean_volume))+
118 |   geom_line()
119 |   
120 | 
121 | mean_daily_volumes |> 
122 |   ggplot(aes(daily_volume))+
123 |   geom_histogram()
124 | 
125 | leeds_car_location_sf |>
126 |   left_join(mean_daily_volumes,by = c("Site ID"="Cosit")) |> 
127 |   tm_shape()+
128 |   tm_dots("daily_volume",size = "daily_volume")
129 | ```
130 | 
131 | ## Cycle counts for West Yorkshire
132 | 
133 | Some cities would have some dedicated infrastructure to count the number
134 | of people using bikes at strategic points of the city. We are going to
135 | use some cycle counters from West Yorkshire that you can find
136 | [here](https://datamillnorth.org/dataset/e1dmk/leeds-annual-cycle-growth):
137 | 
138 | ```{r}
139 | leeds_bike_location <- read_csv(
140 |   "https://datamillnorth.org/download/e1dmk/a8c8a11e-1616-4915-a897-9ca5ab4e03b8/Cycle%20Counter%20Locations.csv",skip = 1
141 |   ) 
142 | 
143 | leeds_bike_location_sf <- leeds_bike_location |>
144 |   drop_na(Latitude,Longitude) |> 
145 |   st_as_sf(coords = c("Longitude","Latitude"),
146 |            crs = 4326) |> 
147 |   st_transform(27700)
148 | ```
149 | 
150 | ```{r}
151 | #| echo: false
152 | tm_basemap("OpenStreetMap")+
153 | tm_shape(leeds_bike_location_sf)+
154 |   tm_dots("darkblue")
155 | ```
156 | 
157 | The data for 2019:
158 | 
159 | ```{r}
160 | leeds_bike_2019 <- read_csv(
161 |   "https://datamillnorth.org/download/e1dmk/f13f5d49-6128-4619-a3ff-e6e12f88a71f/Cycle%20Data%202019.csv"
162 |   )
163 | ```
164 | 
165 | Other interesting datasets for you to explore are [Paris cycling
166 | counters](https://parisdata.opendatasoft.com/explore/dataset/comptage-velo-donnees-compteurs/dataviz/?disjunctive.id_compteur&disjunctive.nom_compteur&disjunctive.id&disjunctive.name)
167 | or [Scotland](https://usmart.io/org/cyclingscotland/).
168 | 
169 | ## Pedestrian Counts: Melbourne
170 | 
171 | Cities also monitor the number pedestrians in key locations. We can use
172 | data from the sensors in Melbourne accessible
173 | [here](https://data.melbourne.vic.gov.au/explore/dataset/pedestrian-counting-system-sensor-locations/export/):
174 | 
175 | ```{r}
176 | melbourne_locations_sf <- st_read("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-sensor-locations/exports/geojson?lang=en&timezone=Europe%2FLondon")
177 | ```
178 | 
179 | ```{r}
180 | #| echo: false
181 | tm_basemap("OpenStreetMap")+
182 | tm_shape(melbourne_locations_sf)+
183 |   tm_dots("darkblue")
184 | ```
185 | 
186 | We will extract
187 | 
188 | ```{r}
189 | melbourne_dec2024 <- read_csv("https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/pedestrian-counting-system-monthly-counts-per-hour/exports/csv?lang=en&refine=sensing_date%3A%222024%2F12%22&timezone=Australia%2FMelbourne&use_labels=true&delimiter=%2C")
190 | ```
191 | 
192 | ## Public transport tap-in data: Bogotá
193 | 
194 | Public transport ridership data can be difficult to obtain. Fortunately,
195 | some cities which have systems managed by a public organisation make
196 | this data available for the public. Bogotá's integrated transport system
197 | publishes the tap-in data for the BRT system (see
198 | [this](https://datosabiertos-transmilenio.hub.arcgis.com/)). We will use
199 | one of the daily reports.
200 | 
201 | ```{r}
202 | tm_stations_sf <- st_read("Estaciones_Troncales_de_TRANSMILENIO.geojson")
203 | ```
204 | 
205 | ```{r}
206 | #| echo: false
207 | tm_basemap("OpenStreetMap")+
208 | tm_shape(tm_stations_sf)+
209 |   tm_dots("darkblue")
210 | ```
211 | 
212 | Monthly boarding data can be manually obtained in the open data portal
213 | of TransMilenio
214 | [here](https://storage.googleapis.com/validaciones_tmsa/validaciones_mensuales.html?)
215 | 
216 | ```{r}
217 | url_tm <- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
218 | u_bn <- basename(url_tm)
219 | 
220 | 
221 | if(!file.exists(u_bn)){
222 |   download.file(url = url_tm,
223 |                 destfile = u_bn,
224 |                 mode = "wb")
225 | }
226 | 
227 | 
228 | 
229 | url_tm <- "https://storage.googleapis.com/validaciones_tmsa/ValidacionTroncal/2024/consolidado_2024.zip"
230 | 
231 | 
232 | tm_brt_2024 <- read_csv(unz(u_bn,"troncal_2024.csv"))
233 | ```
234 | 
235 | TfL's crowding data is also a great source of ridership data. See
236 | [this](https://datamillnorth.org/download/e1dmk/f13f5d49-6128-4619-a3ff-e6e12f88a71f/Cycle%20Data%202019.csv).
237 | 
238 | ### code
239 | 
240 | ```{r}
241 | daily_tapins <- tm_brt_2024 |> 
242 |   summarise(validaciones = sum(validaciones),
243 |             .by = c(Estacion_Parada,fecha)) |> 
244 |   summarise(validaciones = mean(validaciones),
245 |             .by = Estacion_Parada) |> 
246 |   mutate(numero_estacion = str_extract(Estacion_Parada,"\\(\\d*\\)") |> 
247 |            str_remove_all("(\\(|\\))")) 
248 |   
249 | tm_stations_sf |> 
250 |   left_join(daily_tapins,by = "numero_estacion") |> 
251 |   tm_shape()+
252 |   tm_dots("validaciones",size = "validaciones")
253 | ```
254 | 
255 | ## Network data from OSM
256 | 
257 | You may be already familiar with getting and using OSM data. This an
258 | example of how to obtain the network that can be used for pedestrians.
259 | 
260 | ```{r}
261 | my_coordinates <- c(-76.78893552474851,18.01206727612776)
262 | sf_point <- st_point(my_coordinates) |> st_sfc(crs = 4326)
263 | sf_buffer <- st_buffer(sf_point,dist = 15e3)
264 | 
265 | ```
266 | 
267 | ```{r}
268 | tm_basemap("OpenStreetMap")+
269 |   tm_shape(sf_buffer)+
270 |   tm_borders()
271 | ```
272 | 
273 | ```{r}
274 | #| results='hide'
275 | my_network <- oe_get_network(sf_buffer, mode = "walking")
276 | ```
277 | 
278 | ```{r}
279 | tm_shape(my_network)+
280 |   tm_lines("highway")
281 | ```
282 | 
283 | ***Note:*** you can access a simplified network dataset from Ordnance
284 | Survey's [OpenRoads
285 | dataset](https://www.ordnancesurvey.co.uk/products/os-open-roads).
286 | 
287 | # Links to other resources
288 | 
289 | -   See here for a detailed reproducible example of how to visualise
290 |     transport network datasets:
291 |     <https://github.com/juanfonsecaLS1/netvis-trafficflows>
292 | -   See the [`sfnetworks`
293 |     package](https://luukvdmeer.github.io/sfnetworks/) package for
294 |     information on how to work with spatial networks, including
295 |     estimating centality measures and shortest paths.
296 | 


--------------------------------------------------------------------------------