├── episodes
    ├── fig
    │   ├── .gitkeep
    │   ├── pi.png
    │   ├── gather.png
    │   ├── compute.png
    │   ├── finalize.png
    │   ├── homedir.odg
    │   ├── scatter.png
    │   ├── filesystem.png
    │   ├── initialize.png
    │   ├── linux-cloud.jpg
    │   ├── maze-direct.png
    │   ├── maze-tortuous.png
    │   ├── node_anatomy.png
    │   ├── vlad-homedir.png
    │   ├── find-file-tree.odg
    │   ├── find-file-tree.png
    │   ├── home-directories.png
    │   ├── nano-screenshot.png
    │   ├── filesystem-challenge.odg
    │   ├── filesystem-challenge.png
    │   ├── maze-tortuous-mirror.png
    │   ├── redirects-and-pipes.png
    │   ├── hpc-mpi_Speedup_factor.png
    │   ├── laptop-mpi_Speedup_factor.png
    │   ├── laptop-openclipartorg-aoguerrero.png
    │   ├── servers-openclipartorg-ericlemerdy.png
    │   ├── laptop-openclipartorg-aoguerrero.svg
    │   ├── 200px-laptop-openclipartorg-aoguerrero.svg
    │   ├── responsibility-bandwidth.tex
    │   ├── Simple_Von_Neumann_Architecture.svg
    │   └── redirects-and-pipes.svg
    ├── files
    │   ├── snippets
    │   │   ├── EPCC_ARCHER2_slurm
    │   │   │   ├── modules
    │   │   │   │   ├── missing-python.snip
    │   │   │   │   ├── python-executable-dir.snip
    │   │   │   │   ├── python-ls-dir-command.snip
    │   │   │   │   ├── module-load-python.snip
    │   │   │   │   ├── python-ls-dir-output.snip
    │   │   │   │   ├── python-module-path.snip
    │   │   │   │   ├── module_list.snip
    │   │   │   │   ├── available-modules.Rmd
    │   │   │   │   ├── wrong-gcc-version.snip
    │   │   │   │   └── software-dependencies.snip
    │   │   │   ├── scheduler
    │   │   │   │   ├── del_job.snip
    │   │   │   │   ├── basic-job-script.Rmd
    │   │   │   │   ├── runtime-exceeded-job.Rmd
    │   │   │   │   ├── terminate-job-cancel.Rmd
    │   │   │   │   ├── ls-pgm-output.snip
    │   │   │   │   ├── squeue_pending.snip
    │   │   │   │   ├── squeue_running.snip
    │   │   │   │   ├── runtime-exceeded-output.Rmd
    │   │   │   │   ├── long_job.snip
    │   │   │   │   ├── job-with-name-status.Rmd
    │   │   │   │   ├── terminate-job-begin.Rmd
    │   │   │   │   ├── parallel-script.Rmd
    │   │   │   │   ├── terminate-multiple-jobs.Rmd
    │   │   │   │   ├── option-flags-list.Rmd
    │   │   │   │   ├── basic-job-status.Rmd
    │   │   │   │   ├── print-sched-variables.Rmd
    │   │   │   │   ├── parallel-challenge.Rmd
    │   │   │   │   ├── parallel-launch-desc.Rmd
    │   │   │   │   ├── parallel-challenge2.Rmd
    │   │   │   │   ├── filesystem_issues.snip
    │   │   │   │   └── using-nodes-interactively.Rmd
    │   │   │   ├── resources
    │   │   │   │   ├── job-detail.Rmd
    │   │   │   │   ├── pi-mpi-details.Rmd
    │   │   │   │   ├── view-output.snip
    │   │   │   │   ├── serial-submit.Rmd
    │   │   │   │   ├── sharpen-details.snip
    │   │   │   │   ├── runtime-exercise.Rmd
    │   │   │   │   └── perf-exercise.Rmd
    │   │   │   ├── cluster
    │   │   │   │   ├── specific-node-info.Rmd
    │   │   │   │   └── queue-info.snip
    │   │   │   ├── login_output.Rmd
    │   │   │   └── _config_options.yml
    │   │   ├── README.md
    │   │   └── rename-snippets.sh
    │   ├── hpc-intro-data.zip
    │   ├── hpc-intro-data.tar.gz
    │   └── pi-mpi.py
    ├── lesson_config.yaml
    ├── 17-resources.Rmd
    ├── 11-hpc-intro.Rmd
    ├── 14-modules.Rmd
    └── 18-responsibility.Rmd
├── .github
    └── workflows
    │   ├── sandpaper-version.txt
    │   ├── pr-close-signal.yaml
    │   ├── pr-post-remove-branch.yaml
    │   ├── pr-preflight.yaml
    │   ├── sandpaper-main.yaml
    │   ├── update-workflows.yaml
    │   ├── pr-receive.yaml
    │   ├── update-cache.yaml
    │   ├── pr-comment.yaml
    │   └── README.md
├── docker
    ├── access.sh
    ├── .Rprofile
    └── Dockerfile
├── site
    └── README.md
├── instructors
    └── instructor-notes.md
├── learners
    ├── reference.md
    └── setup.Rmd
├── docker-compose.yml
├── FIXME.Rproj
├── CODE_OF_CONDUCT.md
├── .gitignore
├── index.Rmd
├── config.yaml
├── links.md
├── profiles
    └── learner-profiles.md
├── README.md
├── LICENSE.md
├── Makefile
└── CONTRIBUTING.md


/episodes/fig/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/workflows/sandpaper-version.txt:
--------------------------------------------------------------------------------
1 | 0.11.12
2 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/missing-python.snip:
--------------------------------------------------------------------------------
1 | /usr/bin/python3
2 | 


--------------------------------------------------------------------------------
/docker/access.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/with-contenv bash
2 | 
3 | echo "Access RSTUDIO via http://127.0.0.1:8787"
4 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/del_job.snip:
--------------------------------------------------------------------------------
1 | {{ site.host_prompt }} scancel 38759
2 | 


--------------------------------------------------------------------------------
/site/README.md:
--------------------------------------------------------------------------------
1 | This directory contains rendered lesson materials. Please do not edit files
2 | here.
3 | 


--------------------------------------------------------------------------------
/episodes/fig/pi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/pi.png


--------------------------------------------------------------------------------
/episodes/fig/gather.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/gather.png


--------------------------------------------------------------------------------
/episodes/fig/compute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/compute.png


--------------------------------------------------------------------------------
/episodes/fig/finalize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/finalize.png


--------------------------------------------------------------------------------
/episodes/fig/homedir.odg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/homedir.odg


--------------------------------------------------------------------------------
/episodes/fig/scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/scatter.png


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-executable-dir.snip:
--------------------------------------------------------------------------------
1 | /opt/cray/pe/python/3.8.5.0/bin/python3
2 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-script.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | Submitted batch job 36855
3 | ```


--------------------------------------------------------------------------------
/episodes/fig/filesystem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/filesystem.png


--------------------------------------------------------------------------------
/episodes/fig/initialize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/initialize.png


--------------------------------------------------------------------------------
/episodes/fig/linux-cloud.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/linux-cloud.jpg


--------------------------------------------------------------------------------
/episodes/fig/maze-direct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/maze-direct.png


--------------------------------------------------------------------------------
/episodes/fig/maze-tortuous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/maze-tortuous.png


--------------------------------------------------------------------------------
/episodes/fig/node_anatomy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/node_anatomy.png


--------------------------------------------------------------------------------
/episodes/fig/vlad-homedir.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/vlad-homedir.png


--------------------------------------------------------------------------------
/episodes/fig/find-file-tree.odg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/find-file-tree.odg


--------------------------------------------------------------------------------
/episodes/fig/find-file-tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/find-file-tree.png


--------------------------------------------------------------------------------
/episodes/fig/home-directories.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/home-directories.png


--------------------------------------------------------------------------------
/episodes/fig/nano-screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/nano-screenshot.png


--------------------------------------------------------------------------------
/episodes/files/hpc-intro-data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/files/hpc-intro-data.zip


--------------------------------------------------------------------------------
/instructors/instructor-notes.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'Instructor Notes'
3 | ---
4 | 
5 | This is a placeholder file. Please add content here. 
6 | 


--------------------------------------------------------------------------------
/learners/reference.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'Reference'
 3 | ---
 4 | 
 5 | ## Glossary
 6 | 
 7 | HPC
 8 | : High Performance Computing
 9 | 
10 | 


--------------------------------------------------------------------------------
/episodes/fig/filesystem-challenge.odg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/filesystem-challenge.odg


--------------------------------------------------------------------------------
/episodes/fig/filesystem-challenge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/filesystem-challenge.png


--------------------------------------------------------------------------------
/episodes/fig/maze-tortuous-mirror.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/maze-tortuous-mirror.png


--------------------------------------------------------------------------------
/episodes/fig/redirects-and-pipes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/redirects-and-pipes.png


--------------------------------------------------------------------------------
/episodes/files/hpc-intro-data.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/files/hpc-intro-data.tar.gz


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-command.snip:
--------------------------------------------------------------------------------
1 | {{ site.host_prompt-work }} ls /opt/cray/pe/python/3.8.5.0/bin
2 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-job.Rmd:
--------------------------------------------------------------------------------
1 | ```bash
2 | `r config$remote$prompt_work` cat slurm-38193.out
3 | ```


--------------------------------------------------------------------------------
/episodes/fig/hpc-mpi_Speedup_factor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/hpc-mpi_Speedup_factor.png


--------------------------------------------------------------------------------
/episodes/fig/laptop-mpi_Speedup_factor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/laptop-mpi_Speedup_factor.png


--------------------------------------------------------------------------------
/episodes/fig/laptop-openclipartorg-aoguerrero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/laptop-openclipartorg-aoguerrero.png


--------------------------------------------------------------------------------
/episodes/fig/servers-openclipartorg-ericlemerdy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EPCCed/2024-06-20-hpc-intro-shampton/main/episodes/fig/servers-openclipartorg-ericlemerdy.png


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module-load-python.snip:
--------------------------------------------------------------------------------
1 | {{ site.host_prompt-work }} module load cray-python
2 | {{ site.host_prompt-work }} which python3
3 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-cancel.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | JOBID  USER  ACCOUNT  NAME  ST  REASON  START_TIME  TIME  TIME_LEFT  NODES  CPUS
3 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/ls-pgm-output.snip:
--------------------------------------------------------------------------------
1 | -rw-r--r-- 1 userid ta028 1762743 Jun 26 17:29 fuzzy.pgm
2 | -rw------- 1 userid ta028 1678630 Jun 26 17:33 sharpened.pgm
3 | 


--------------------------------------------------------------------------------
/docker/.Rprofile:
--------------------------------------------------------------------------------
1 | setHook("rstudio.sessionInit", function(newSession) {
2 |   if (newSession && is.null(rstudioapi::getActiveProject()))
3 |     rstudioapi::openProject("/home/rstudio/lesson/.")
4 | }, action = "append")
5 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/job-detail.Rmd:
--------------------------------------------------------------------------------
1 | JOBID USER         ACCOUNT     NAME           ST REASON START_TIME         T...
2 | 36856 yourUsername yourAccount example-job.sh R  None   2017-07-01T16:47:02 ...
3 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_pending.snip:
--------------------------------------------------------------------------------
1 |         JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
2 |        119867  standard    myjob   userid PD       0:00      1 (Resources) 
3 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/squeue_running.snip:
--------------------------------------------------------------------------------
1 |         JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
2 |        119867  standard example-   userid  R    0:00:06      1 nid001609 
3 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/runtime-exceeded-output.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | This job is running on:
3 | nid001147
4 | slurmstepd: error: *** JOB 38193 ON cn01 CANCELLED AT 2017-07-02T16:35:48 DUE TO TIME LIMIT ***
5 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/long_job.snip:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH --partition=standard
 4 | #SBATCH --qos=short
 5 | #SBATCH --time=00:00:30
 6 | 
 7 | echo 'This script is running on:'
 8 | hostname
 9 | sleep 120
10 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/job-with-name-status.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | JOBID USER         ACCOUNT     NAME     ST REASON   START_TIME TIME TIME_LEFT NODES CPUS
3 | 38191 yourUsername yourAccount new_name PD Priority N/A        0:00 1:00:00   1     1
4 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-job-begin.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | Submitted batch job 38759
3 | 
4 | JOBID USER         ACCOUNT     NAME           ST REASON   START_TIME TIME TIME_LEFT NODES CPUS
5 | 38759 yourUsername yourAccount example-job.sh PD Priority N/A        0:00 1:00      1     1
6 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-ls-dir-output.snip:
--------------------------------------------------------------------------------
1 | 2to3	  cygdb   cythonize	easy_install-3.8  f2py3    idle3    nosetests	   pip	 pip3.8  pydoc3.8  py.test  python3    python3.8-config
2 | 2to3-3.8  cython  easy_install	f2py		  f2py3.8  idle3.8  nosetests-3.8  pip3  pydoc3  pytest    python   python3.8  python3-config
3 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | services:
 3 |   server:
 4 |     # image: carpentries/python-novice-gapminder:latest
 5 |     image: swc/rstudio:4.3.1
 6 |     build:
 7 |       context: ./docker
 8 |     volumes:
 9 |       - ./:/home/rstudio/lesson
10 |     ports:
11 |       - "127.0.0.1:8787:8787"
12 |     environment:
13 |       - DISABLE_AUTH=true
14 | 
15 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-script.Rmd:
--------------------------------------------------------------------------------
 1 | ```bash
 2 | #!/bin/bash
 3 | 
 4 | #SBATCH --partition=`r config$sched$partition`
 5 | #SBATCH --qos=`r config$sched$qos`
 6 | #SBATCH --time=00:05:00
 7 | 
 8 | #SBATCH --nodes=1
 9 | #SBATCH --ntasks-per-node=16
10 | 
11 | module load cray-python
12 | 
13 | srun python pi-mpi.py 10000000
14 | ```


--------------------------------------------------------------------------------
/FIXME.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Website
20 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/terminate-multiple-jobs.Rmd:
--------------------------------------------------------------------------------
1 | ::: challenge
2 | ## Cancelling multiple jobs
3 | We can also cancel all of our jobs at once using the `-u` option. This will delete all jobs for a
4 | specific user (in this case us). Note that you can only delete your own jobs.
5 | Try submitting multiple jobs and then cancelling them all with `scancel -u yourUsername`.
6 | :::


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/specific-node-info.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Explore a Worker Node
 3 | 
 4 | Finally, let's look at the resources available on the worker nodes where your jobs will actually
 5 | run. Try running this command to see the name, CPUs and memory available on one of the worker nodes:
 6 | 
 7 | ```bash
 8 | `r config$remote$prompt` sinfo -n nid001053 -o "%n %c %m"
 9 | ```
10 | :::
11 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/option-flags-list.Rmd:
--------------------------------------------------------------------------------
1 | * `--nodes=<nodes>` - Number of nodes to use
2 | * `--ntasks-per-node=<tasks-per-node>` - Number of parallel processes per node
3 | * `--cpus-per-task=<cpus-per-task>` - Number of cores to assign to each parallel process
4 | * `--time=<days-hours:minutes:seconds>` - Maximum real-world time (walltime)
5 | your job will be allowed to run. The `<days>` part can be omitted.
6 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/basic-job-status.Rmd:
--------------------------------------------------------------------------------
1 | ```output
2 | JOBID USER         ACCOUNT     NAME           ST REASON START_TIME         T...
3 | 36856 yourUsername yourAccount example-job.sh R  None   2017-07-01T16:47:02 ...
4 | ```
5 | 
6 | We can see all the details of our job, most importantly that it is in the `R`
7 | or `RUNNING` state. Sometimes our jobs might need to wait in a queue
8 | (`PENDING`) or have an error (`E`).
9 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/print-sched-variables.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Job environment variables
 3 | 
 4 | When Slurm runs a job, it sets a number of environment variables for the job. One of these will
 5 | let us check our work from the last problem. The `SLURM_CPUS_PER_TASK` variable is set to the
 6 | number of CPUs we requested with `-c`. Using the `SLURM_CPUS_PER_TASK` variable, modify your job
 7 | so that it prints how many CPUs have been allocated.
 8 | 
 9 | :::
10 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # FROM runiverse/base:latest
 2 | FROM rocker/rstudio:4.3.1
 3 | 
 4 | ENV MY_UNIVERSE='https://carpentries.r-universe.dev'
 5 | 
 6 | RUN apt update
 7 | RUN apt install -y libxml2-dev libfontconfig1-dev libxslt1-dev
 8 | 
 9 | RUN R -e 'install.packages(c("sandpaper", "varnish", "pegboard", "tinkr", "rmarkdown"), dep = TRUE, repos = c("https://carpentries.r-universe.dev/", getOption("repos")))'
10 | 
11 | COPY .Rprofile /home/rstudio/.Rprofile
12 | COPY access.sh /etc/cont-init.d/99_access
13 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/pi-mpi-details.Rmd:
--------------------------------------------------------------------------------
 1 | ::: prereq
 2 | 
 3 | ## Required Files
 4 | 
 5 | The program used in this example can be retrieved using wget or a browser and copied to the remote.
 6 | 
 7 | **Using wget**: 
 8 | ```bash
 9 | `r config$remote$prompt` wget `r config$url``r config$baseurl`/files/pi-mpi.py
10 | ```
11 | 
12 | **Using a web browser**:
13 | 
14 | [`r config$url``r config$baseurl`/files/pi-mpi.py](`r config$url``r config$baseurl`/files/pi-mpi.py)
15 | 
16 | :::


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Contributor Code of Conduct"
 3 | ---
 4 | 
 5 | As contributors and maintainers of this project,
 6 | we pledge to follow the [The Carpentries Code of Conduct][coc].
 7 | 
 8 | Instances of abusive, harassing, or otherwise unacceptable behavior
 9 | may be reported by following our [reporting guidelines][coc-reporting].
10 | 
11 | 
12 | [coc-reporting]: https://docs.carpentries.org/topic_folders/policies/incident-reporting.html
13 | [coc]: https://docs.carpentries.org/topic_folders/policies/code-of-conduct.html
14 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-close-signal.yaml:
--------------------------------------------------------------------------------
 1 | name: "Bot: Send Close Pull Request Signal"
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       [closed]
 7 | 
 8 | jobs:
 9 |   send-close-signal:
10 |     name: "Send closing signal"
11 |     runs-on: ubuntu-latest
12 |     if: ${{ github.event.action == 'closed' }}
13 |     steps:
14 |       - name: "Create PRtifact"
15 |         run: |
16 |           mkdir -p ./pr
17 |           printf ${{ github.event.number }} > ./pr/NUM
18 |       - name: Upload Diff
19 |         uses: actions/upload-artifact@v3
20 |         with:
21 |           name: pr
22 |           path: ./pr
23 | 
24 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Running parallel jobs
 3 | Modify the pi-mpi-run script that you used above to use all 128 cores on
 4 | one node.  Check the output to confirm that it used the correct number
 5 | of cores in parallel for the calculation.
 6 | 
 7 | ::: solution
 8 | Here is a modified script
 9 | 
10 | ```bash
11 | #!/bin/bash
12 | 
13 | #SBATCH --partition=`r config$sched$partition`
14 | #SBATCH --qos=`r config$sched$qos`
15 | #SBATCH --time=00:00:30
16 | 
17 | #SBATCH --nodes=1
18 | #SBATCH --ntasks-per-node=128
19 | 
20 | module load cray-python
21 | srun python pi-mpi.py 10000000
22 | ```
23 | :::
24 | :::


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/python-module-path.snip:
--------------------------------------------------------------------------------
1 | /opt/cray/pe/python/3.8.5.0/bin:/lus/cls01095/work/z19/z19/aturner/.local/bin:/lus/cls01095/work/y07/shared/bolt/0.7/bin:/work/y07/shared/utils/bin:/usr/local/maven/bin:/opt/cray/pe/perftools/20.10.0/bin:/opt/cray/pe/papi/6.0.0.4/bin:/opt/cray/libfabric/1.11.0.0.233/bin:/opt/cray/pe/craype/2.7.2/bin:/opt/cray/pe/cce/10.0.4/cce-clang/x86_64/bin:/opt/cray/pe/cce/10.0.4/binutils/x86_64/x86_64-pc-linux-gnu/bin:/opt/cray/pe/cce/10.0.4/binutils/cross/x86_64-aarch64/aarch64-linux-gnu/../bin:/opt/cray/pe/cce/10.0.4/utils/x86_64/bin:/usr/local/Modules/bin:/home/z19/z19/aturner/bin:/usr/local/bin:/usr/bin:/bin:/opt/cray/pe/bin:/usr/lib/mit/bin
2 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-launch-desc.Rmd:
--------------------------------------------------------------------------------
 1 | ```bash
 2 | srun python pi-mpi.py 10000000
 3 | ```
 4 | 
 5 | And this corresponds to the four required items we described above:
 6 | 
 7 |   1. Parallel launch program: in this case the parallel launch program is
 8 |      called `srun`; the additional argument controls which cores are used.
 9 |   2. Number of parallel processes per node: in this case this is 16,
10 |      and is specified by the option `--ntasks-per-node=16` option.
11 |   2. Total number of parallel processes: in this case this is also 16, because
12 |      we specified 1 node and 16 parallel processes per node.
13 |   4. Our program and arguments: in this case this is `python pi-mpi.py 10000000`.
14 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/view-output.snip:
--------------------------------------------------------------------------------
 1 | > ## Viewing the sharpened output image
 2 | > To see the effect of the sharpening algorithm, you can view the images using the display
 3 | > program from the ImageMagick suite.
 4 | > ```
 5 | > display sharpened.pgm
 6 | > ```
 7 | > Type `q` in the image window to close the program. To view the image you will need an X
 8 | > window client installed and you will have to have logged into {{ site.host_name }} with the `ssh -Y`
 9 | > option to export the display back to your local system. If you are using Windows, the 
10 | > MobaXterm program provides a login shell with X capability. If you are using macOS, then
11 | > you will need to install XQuartz. If you are using Linux then X should just work!
12 | {: .callout}


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/parallel-challenge2.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Configuring parallel jobs
 3 | You will see in the job output that information is displayed about
 4 | where each MPI process is running, in particular which node it is
 5 | on.
 6 | 
 7 | Modify the pi-mpi-run script that you run a total of 2 nodes and 16 processes;
 8 | but to use only 8 tasks on each of two nodes.
 9 | Check the output file to ensure that you understand the job
10 | distribution.
11 | 
12 | ::: solution
13 | ```bash
14 | #!/bin/bash
15 | 
16 | #SBATCH --partition=`r config$sched$partition`
17 | #SBATCH --qos=`r config$sched$qos`
18 | #SBATCH --time=00:00:30
19 | 
20 | #SBATCH --nodes=2
21 | #SBATCH --ntasks-per-node=8
22 | 
23 | module load cray-python
24 | srun python pi-mpi.py 10000000
25 | ```
26 | :::
27 | :::


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/serial-submit.Rmd:
--------------------------------------------------------------------------------
 1 | Creating a file called `submit-pi-mpi.slurm`:
 2 | 
 3 | ```bash
 4 | #!/bin/bash
 5 | #SBATCH --partition=`r config$sched$partition`
 6 | #SBATCH --qos=`r config$sched$qos`
 7 | 
 8 | #SBATCH --job-name=pi-mpi
 9 | #SBATCH --nodes=1
10 | #SBATCH --tasks-per-node=1
11 | #SBATCH --time=00:15:00
12 | srun python pi-mpi.py 10000000
13 | ```
14 | 
15 | Run application using a single process (i.e. in serial) with a blocking `srun` command:
16 | ```bash
17 | module load cray-python
18 | `r config$sched$prompt_work` srun --partition=`r config$sched$partition` --qos=`r config$sched$qos` python pi-mpi.py 10000000
19 | ```
20 | 
21 | Submit with to the batch queue with:
22 | 
23 | ```bash
24 | `r config$sched$prompt_work` `r config$sched$submit.name` submit-pi-mpi.slurm
25 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/sharpen-details.snip:
--------------------------------------------------------------------------------
 1 | The `sharpen` program has been precompiled on {{ site.remote.name }}, you can add it to your `PATH` variable with the commands:
 2 | 
 3 | ```
 4 | export PATH=/work/{{ site.sched.project }}/{{ site.sched.project }}/shared/bin:$PATH
 5 | export FUZZY_INPUT=/work/{{ site.sched.project }}/{{ site.sched.project }}/shared/fuzzy.pgm
 6 | ``````
 7 | {: .language-bash}
 8 | 
 9 | Once you have set the required environment variables, you can access the program as `sharpen-mpi`.
10 | You will also need to get a copy of the input file for this application.
11 | To do this, copy it from the 
12 | central install location to your directory with (note you must have loaded the 
13 | sharpen module as described above for this to work):
14 | 
15 | ```
16 | {{ site.host_prompt }} cp $FUZZY_INPUT .
17 | ```
18 | {: .language-bash}
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # sandpaper files
 2 | episodes/*html
 3 | site/*
 4 | !site/README.md
 5 | 
 6 | # History files
 7 | .Rhistory
 8 | .Rapp.history
 9 | 
10 | # Session Data files
11 | .RData
12 | 
13 | # User-specific files
14 | .Ruserdata
15 | 
16 | # Example code in package build process
17 | *-Ex.R
18 | 
19 | # Output files from R CMD build
20 | /*.tar.gz
21 | 
22 | # Output files from R CMD check
23 | /*.Rcheck/
24 | 
25 | # RStudio files
26 | .Rproj.user/
27 | 
28 | # produced vignettes
29 | vignettes/*.html
30 | vignettes/*.pdf
31 | 
32 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
33 | .httr-oauth
34 | 
35 | # knitr and R markdown default cache directories
36 | *_cache/
37 | /cache/
38 | 
39 | # Temporary files created by R markdown
40 | *.utf8.md
41 | *.knit.md
42 | 
43 | # R Environment Variables
44 | .Renviron
45 | 
46 | # pkgdown site
47 | docs/
48 | 
49 | # translation temp files
50 | po/*~
51 | 
52 | # renv
53 | renv/
54 | 
55 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-post-remove-branch.yaml:
--------------------------------------------------------------------------------
 1 | name: "Bot: Remove Temporary PR Branch"
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Bot: Send Close Pull Request Signal"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   delete:
11 |     name: "Delete branch from Pull Request"
12 |     runs-on: ubuntu-latest
13 |     if: >
14 |       github.event.workflow_run.event == 'pull_request' &&
15 |       github.event.workflow_run.conclusion == 'success'
16 |     permissions:
17 |       contents: write
18 |     steps:
19 |       - name: 'Download artifact'
20 |         uses: carpentries/actions/download-workflow-artifact@main
21 |         with:
22 |           run: ${{ github.event.workflow_run.id }}
23 |           name: pr
24 |       - name: "Get PR Number"
25 |         id: get-pr
26 |         run: |
27 |           unzip pr.zip
28 |           echo "NUM=$(<./NUM)" >> $GITHUB_OUTPUT
29 |       - name: 'Remove branch'
30 |         uses: carpentries/actions/remove-branch@main
31 |         with:
32 |           pr: ${{ steps.get-pr.outputs.NUM }}
33 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/filesystem_issues.snip:
--------------------------------------------------------------------------------
 1 | > ## Filesystem on ARCHER2
 2 | > At this point it is important to remember that ARCHER2  has two *separate*
 3 | > filesystems: `/home` and `/work`.
 4 | >
 5 | > * `/home` is meant for small files such as source code, and is the
 6 | >    filesystem that you are on when you log in
 7 | > *  `/work` is a much larger and faster filesystem, meant for production
 8 | >    runs and storing large datasets
 9 | >
10 | > The `/home` filesystem **is not mounted on the compute nodes**
11 | > meaning that programs run in the batch queues cannot read from or
12 | > write to files in your home directory. This has not been a problem
13 | > so far as none of our programs have done file input or
14 | > output. However, the parallel program we will run here reads and
15 | > writes large images.
16 | >
17 | > * When you log in, you will be in your home directory {{ site.host_homedir }}
18 | > * Before you run real programs on ARCHER2, you **must change directory** to
19 | >   {{ site.host_workdir }}
20 | {: .callout}
21 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/login_output.Rmd:
--------------------------------------------------------------------------------
 1 | ```output
 2 | This node is running Cray's Linux Environment version 1.3.2
 3 | 
 4 | #######################################################################################
 5 | 
 6 |         @@@@@@@@@
 7 |      @@@         @@@            _      ____     ____   _   _   _____   ____    ____
 8 |    @@@    @@@@@    @@@         / \    |  _ \   / ___| | | | | | ____| |  _ \  |___ \
 9 |   @@@   @@     @@   @@@       / _ \   | |_) | | |     | |_| | |  _|   | |_) |   __) |
10 |   @@   @@  @@@  @@   @@      / ___ \  |  _ <  | |___  |  _  | | |___  |  _ <   / __/
11 |   @@   @@  @@@  @@   @@     /_/   \_\ |_| \_\  \____| |_| |_| |_____| |_| \_\ |_____|
12 |   @@@   @@     @@   @@@
13 |    @@@    @@@@@    @@@       https://www.archer2.ac.uk/support-access/
14 |      @@@         @@@
15 |         @@@@@@@@@
16 | 
17 |  -         U K R I         -        E P C C        -         H P E   C r a y         -
18 | 
19 | Hostname:     uan01
20 | Distribution: SLES 15.1 1
21 | CPUS:         256
22 | Memory:       257.4GB
23 | Configured:   2021-04-27
24 | 
25 | ######################################################################################
26 | ```


--------------------------------------------------------------------------------
/episodes/lesson_config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | local:
 3 |   prompt: "[user@laptop ~]$"
 4 |   bash_shebang: "#!/bin/bash"
 5 | 
 6 | snippets: EPCC_ARCHER2_slurm
 7 | baseurl: "https://epcced.github.io/2024-06-20-hpc-intro-shampton"
 8 | 
 9 | remote:
10 |   name: "ARCHER2"
11 |   host_id: "EPCC_ARCHER2"
12 |   login: "login.archer2.ac.uk"
13 |   host: "ln03"
14 |   node: "nid001053"
15 |   location: "EPCC, The University of Edinburgh"
16 |   homedir: "/home/ta158/ta158/"
17 |   user: "userid"
18 |   prompt: "userid@ln03:~>"
19 |   prompt_work: "userid@uan01:/work/ta158/ta158/userid>"
20 |   module_python3: "cray-python"
21 |   bash_shebang: "#!/bin/bash"
22 | 
23 | sched:
24 |   name: "Slurm"
25 |   partition: "standard"
26 |   #reservation: "shortqos"
27 |   qos: "short"
28 |   budget: "short"
29 |   submit:
30 |     name: "sbatch"
31 |     options: "--partition=standard --qos=short"
32 |   queue:
33 |     debug: ""
34 |     testing: ""
35 |   status: "squeue"
36 |   flag:
37 |     user: "-u userid"
38 |     interactive: ""
39 |     histdetail: "-l -j"
40 |     name: "--job-name"
41 |     time: "--time"
42 |     queue: "--partition"
43 |     nodes: "--nodes"
44 |     tasks: ""
45 |   del: "scancel"
46 |   interactive: "srun"
47 |   info: "sinfo"
48 |   comment: "#SBATCH"
49 |   hist: "sacct"
50 |   project: "ta158"
51 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/module_list.snip:
--------------------------------------------------------------------------------
 1 | Currently Loaded Modulefiles:
 2 |  1) cpe-cray                                                                 
 3 |  2) cce/10.0.4(default)                                                      
 4 |  3) craype/2.7.2(default)                                                    
 5 |  4) craype-x86-rome                                                          
 6 |  5) libfabric/1.11.0.0.233(default)                                          
 7 |  6) craype-network-ofi                                                       
 8 |  7) cray-dsmml/0.1.2(default)                                                
 9 |  8) perftools-base/20.10.0(default)                                          
10 |  9) xpmem/2.2.35-7.0.1.0_1.9__gd50fabf.shasta(default)                       
11 | 10) cray-mpich/8.0.16(default)                                               
12 | 11) cray-libsci/20.10.1.2(default)                                           
13 | 12) bolt/0.7                                                                 
14 | 13) /work/y07/shared/archer2-modules/modulefiles-cse/epcc-setup-env          
15 | 14) /usr/local/share/epcc-module/epcc-module-loader                          
16 | 15) /work/y07/shared/archer2-modules/modulefiles-cse/epcc-setup-env-profile
17 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-preflight.yaml:
--------------------------------------------------------------------------------
 1 | name: "Pull Request Preflight Check"
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     branches:
 6 |       ["main"]
 7 |     types:
 8 |       ["opened", "synchronize", "reopened"]
 9 | 
10 | jobs:
11 |   test-pr:
12 |     name: "Test if pull request is valid"
13 |     if: ${{ github.event.action != 'closed' }}
14 |     runs-on: ubuntu-latest
15 |     outputs:
16 |       is_valid: ${{ steps.check-pr.outputs.VALID }}
17 |     permissions:
18 |       pull-requests: write
19 |     steps:
20 |       - name: "Get Invalid Hashes File"
21 |         id: hash
22 |         run: |
23 |           echo "json<<EOF
24 |           $(curl -sL https://files.carpentries.org/invalid-hashes.json)
25 |           EOF" >> $GITHUB_OUTPUT
26 |       - name: "Check PR"
27 |         id: check-pr
28 |         uses: carpentries/actions/check-valid-pr@main
29 |         with:
30 |           pr: ${{ github.event.number }}
31 |           invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }}
32 |           fail_on_error: true
33 |       - name: "Comment result of validation"
34 |         id: comment-diff
35 |         if: ${{ always() }}
36 |         uses: carpentries/actions/comment-diff@main
37 |         with:
38 |           pr: ${{ github.event.number }}
39 |           body: ${{ steps.check-pr.outputs.MSG }}
40 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/cluster/queue-info.snip:
--------------------------------------------------------------------------------
1 | ```output
2 | PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST 
3 | standard     up 1-00:00:00     27 drain* nid[001029,001050,001149,001363,001366,001391,001552,001568,001620,001642,001669,001672-001675,001688,001690-001691,001747,001751,001783,001793,001812,001832-001835] 
4 | standard     up 1-00:00:00      5  down* nid[001024,001026,001064,001239,001898] 
5 | standard     up 1-00:00:00      8  drain nid[001002,001028,001030-001031,001360-001362,001745] 
6 | standard     up 1-00:00:00    945  alloc nid[001000-001001,001003-001023,001025,001027,001032-001037,001040-001049,001051-001063,001065-001108,001110-001145,001147,001150-001238,001240-001264,001266-001271,001274-001334,001337-001359,001364-001365,001367-001390,001392-001551,001553-001567,001569-001619,001621-001637,001639-001641,001643-001668,001670-001671,001676,001679-001687,001692-001734,001736-001744,001746,001748-001750,001752-001782,001784-001792,001794-001811,001813-001824,001826-001831,001836-001890,001892-001897,001899-001918,001920,001923-001934,001936-001945,001947-001965,001967-001981,001984-001991,002006-002023] 
7 | standard     up 1-00:00:00     37   resv nid[001038-001039,001109,001146,001148,001265,001272-001273,001335-001336,001638,001677-001678,001735,001891,001919,001921-001922,001935,001946,001966,001982-001983,001992-002005] 
8 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/scheduler/using-nodes-interactively.Rmd:
--------------------------------------------------------------------------------
 1 | `srun` runs a single command in the queue system and then exits.
 2 | Let's demonstrate this by running the
 3 | `hostname` command with `srun`. (We can cancel an `srun` job with `Ctrl-c`.)
 4 | 
 5 | ```bash
 6 | `r config$host_prompt_work` srun `r config$sched$submit$options` --time=00:01:00 hostname
 7 | ```
 8 | ```output
 9 | nid001976
10 | ```
11 | 
12 | `srun` accepts all of the same options as `sbatch`. However, instead of specifying these in a
13 | script, these options are specified on the command-line when starting a job.
14 | 
15 | Typically, the resulting shell environment will be the same as that for
16 | `sbatch`.
17 | 
18 | ### Interactive jobs
19 | 
20 | Sometimes, you will need a lot of resource for interactive use. Perhaps it's our first time running
21 | an analysis or we are attempting to debug something that went wrong with a previous job.
22 | Fortunately, SLURM makes it easy to start an interactive job with `srun`:
23 | 
24 | ```bash
25 | `r config$host_prompt_work` srun `r config$sched$submit$options` --pty /bin/bash
26 | ```
27 | 
28 | You should be presented with a bash prompt. Note that the prompt may change
29 | to reflect your new location, in this case the compute node we are logged on.
30 | You can also verify this with `hostname`.
31 | 
32 | When you are done with the interactive job, type `exit` to quit your session.
33 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/available-modules.Rmd:
--------------------------------------------------------------------------------
 1 | ```output
 2 | ----------- /work/y07/shared/archer2-modules/modulefiles-cse-pyvenvs -----------
 3 | tensorflow/2.3.1-py38  torch/1.6.0-py38  
 4 | 
 5 | ----------- /work/y07/shared/archer2-modules/modulefiles-cse-pymods ------------
 6 | python-netCDF4/1.5.5.1  
 7 | 
 8 | ------------ /work/y07/shared/archer2-modules/modulefiles-cse-utils ------------
 9 | bolt/0.7                  ncview/ncview-2.1.7-gcc-10.1.0  vmd/1.9.3-mpi-gcc10  
10 | cmake/3.18.4              reframe/3.2                     xios/2.5-gcc10       
11 | ed/1.16-gcc10             tcl/8.4.20-gcc10                xthi/1.0             
12 | epcc-job-env              tcl/8.5.0-gcc10                 xthi/1.0-gcc10       
13 | epcc-reframe/0.1          tcl/8.6.0-gcc10                 
14 | genmaskcpu/1.0            tcl/8.6.10-gcc10(default)       
15 | gnuplot/5.4.1-gcc-10.1.0  tk/8.5.6-gcc10                  
16 | lzip/1.20-gcc10           tk/8.6.10-gcc10(default)        
17 | nco/4.9.6                 visidata/2.1                    
18 | nco/4.9.6-gcc-10.1.0      vmd/1.9.3-gcc10(default)        
19 | 
20 | ------------ /work/y07/shared/archer2-modules/modulefiles-cse-libs -------------
21 | adios/1.13.1     hypre/2.18.0             mumps/5.2.1     superlu-dist/6.1.1  
22 | boost/1.72.0     libxml2/2.9.7-gcc-9.3.0  parmetis/4.0.3  superlu/5.2.1       
23 | glm/0.9.9.6      matio/1.5.18             petsc/3.13.3    trilinos/12.18.1    
24 | gmp/6.1.2-gcc10  metis/5.1.0              scotch/6.0.10   
25 | ...
26 | ```


--------------------------------------------------------------------------------
/episodes/files/snippets/README.md:
--------------------------------------------------------------------------------
 1 | # HPC Carpentry Snippets Library
 2 | 
 3 | This directory contains snippets of code and output that are specific
 4 | to a particular site. For example, when the lesson shows the status
 5 | of the cluster and its nodes, it is preferable to show *your* cluster
 6 | and *your* nodes. If you replace the contents of the relevant snippet,
 7 | the website gets built with your cluster details, instead of generic
 8 | values (or, more precisely, values taken from ComputeCanada).
 9 | 
10 | The snippets have been named so that the lessons use them in roughly
11 | alphabetical order, while still reflecting something of the contents
12 | of each file. So, if you're reading (or teaching) a lesson and notice
13 | something amiss about half-way through, look to the files about
14 | half-way through the directory. If it's the first or last snippet,
15 | you're in particularly good luck.
16 | 
17 | This alphabetical ordering was not always the case. To reduce the
18 | headache of keeping forks, branches, and derivative works up-to-date,
19 | we have included a utility to rename snippets from the older scheme.
20 | To use it, run the following command, rebuild and test your site, and
21 | commit the changes.
22 | 
23 | ```bash
24 | $ ./rename-snippets.sh <your_snippet_directory>
25 | ```
26 | 
27 | If the naming seems counter-intuitive, please feel free to make
28 | changes locally, and file an issue of submit a pull request to fix it
29 | upstream. None of this is set in stone, and improvements are always
30 | welcome.
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | site: sandpaper::sandpaper_site
 3 | ---
 4 | 
 5 | ```{r load_config, include = FALSE}
 6 | library(yaml)
 7 | config <- yaml.load_file("lesson_config.yaml")
 8 | ```
 9 | 
10 | ::: prereq
11 | 
12 | To begin this lesson you must have completed the following
13 | 
14 |  * Participants must bring a laptop with a Mac, Linux, or Windows operating 
15 |    system (not a tablet, Chromebook, etc.) that they can download and run 
16 |    MobaXterm on. Alternatively, they should have a few specific software 
17 |    packages installed (listed in the Setup section below). 
18 |    They are also required to abide by the ARCHER2 Training Code of Conduct.
19 |  * Complete the software/system [setup](#shell-setup) including the
20 |    connection to ARCHER2 via [SSH](#connect-to-archer2).
21 | 
22 | :::
23 | 
24 | ## General Information
25 | 
26 | This lesson is an introduction to using high-performance computing systems
27 | effectively. We obviously can't cover every case or give an exhaustive course
28 | on parallel programming in just one lesson of teaching time. Instead, this
29 | lesson is intended to give students a good introduction and overview of the
30 | tools available and how to use them effectively.
31 | 
32 | By the end of this lesson, students will know how to:
33 | 
34 | ::: checklist
35 |  * Identify problems an HPC system can help solve
36 |  * Use the UNIX shell (also known as terminal or command line) to operate a
37 |    computer, connect to an HPC system, and write simple shell scripts.
38 |  * Submit and manage jobs on an HPC system using a scheduler, transfer files,
39 |    and use software through environment modules.
40 | :::
41 | 


--------------------------------------------------------------------------------
/.github/workflows/sandpaper-main.yaml:
--------------------------------------------------------------------------------
 1 | name: "01 Build and Deploy Site"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |   schedule:
 9 |     - cron: '0 0 * * 2'
10 |   workflow_dispatch:
11 |     inputs:
12 |       name:
13 |         description: 'Who triggered this build?'
14 |         required: true
15 |         default: 'Maintainer (via GitHub)'
16 |       reset:
17 |         description: 'Reset cached markdown files'
18 |         required: false
19 |         default: false
20 |         type: boolean
21 | jobs:
22 |   full-build:
23 |     name: "Build Full Site"
24 |     runs-on: ubuntu-latest
25 |     permissions:
26 |       checks: write
27 |       contents: write
28 |       pages: write
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       RENV_PATHS_ROOT: ~/.local/share/renv/
32 |     steps:
33 | 
34 |       - name: "Checkout Lesson"
35 |         uses: actions/checkout@v3
36 | 
37 |       - name: "Set up R"
38 |         uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           use-public-rspm: true
41 |           install-r: false
42 | 
43 |       - name: "Set up Pandoc"
44 |         uses: r-lib/actions/setup-pandoc@v2
45 | 
46 |       - name: "Setup Lesson Engine"
47 |         uses: carpentries/actions/setup-sandpaper@main
48 |         with:
49 |           cache-version: ${{ secrets.CACHE_VERSION }}
50 | 
51 |       - name: "Setup Package Cache"
52 |         uses: carpentries/actions/setup-lesson-deps@main
53 |         with:
54 |           cache-version: ${{ secrets.CACHE_VERSION }}
55 | 
56 |       - name: "Deploy Site"
57 |         run: |
58 |           reset <- "${{ github.event.inputs.reset }}" == "true"
59 |           sandpaper::package_cache_trigger(TRUE)
60 |           sandpaper:::ci_deploy(reset = reset)
61 |         shell: Rscript {0}
62 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/_config_options.yml:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------
 2 | # ComputeCanada: Graham + Slurm
 3 | #------------------------------------------------------------
 4 | 
 5 | # Cluster host and scheduler options: the defaults come from
 6 | # Graham at Compute Canada, running Slurm. Other options can
 7 | # be found in the library of snippets,
 8 | # `_includes/snippets_library`. To use one, replace options
 9 | # below with those in `_config_options.yml` from the
10 | # library. E.g, to customise for Cirrus at EPCC, running
11 | # PBS, we could replace the options below with those from
12 | # 
13 | # _includes/snippets_library/EPCC_Cirrus_pbs/_config_options.yml
14 | # 
15 | # If your cluster is not represented in the library, please
16 | # copy an existing folder, rename it, and customize for your
17 | # installation. Remember to keep the leading slash on the
18 | # `snippets` variable below!
19 | 
20 | snippets: "/snippets_library/EPCC_ARCHER2_slurm"
21 | 
22 | local:
23 |   prompt: "[user@laptop ~]$"
24 |   bash_shebang: "#!/bin/bash"
25 | 
26 | remote:
27 |   name: "ARCHER2"
28 |   login: "login.archer2.ac.uk"
29 |   host: "ln03"
30 |   node: "nid001053"
31 |   location: "EPCC, The University of Edinburgh"
32 |   homedir: "/home/ta076/ta076/"
33 |   user: "userid"
34 |   prompt: "userid@ln03:~>"
35 |   prompt-work: "userid@ln03:/work/ta076/ta076/userid>"
36 |   module_python3: "cray-python"
37 |   bash_shebang: "#!/bin/bash"
38 | 
39 | sched:
40 |   name: "Slurm"
41 |   reservation: "shortqos"
42 |   budget: "short"
43 |   submit:
44 |     name: "sbatch"
45 |     options: "--partition=standard --qos=short"
46 |   queue:
47 |     debug: ""
48 |     testing: ""
49 |   status: "squeue"
50 |   flag:
51 |     user: "-u userid"
52 |     interactive: ""
53 |     histdetail: "-j"
54 |     name: "--job-name"
55 |     time: "--time"
56 |     queue: "--partition"
57 |     nodes: "--nodes"
58 |     tasks: ""
59 |   del: "scancel"
60 |   interactive: "srun"
61 |   info: "sinfo"
62 |   comment: "#SBATCH"
63 |   hist: "sacct"
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | #------------------------------------------------------------
 3 | # Values for this lesson.
 4 | #------------------------------------------------------------
 5 | 
 6 | # Which carpentry is this (swc, dc, lc, or cp)?
 7 | # swc: Software Carpentry
 8 | # dc: Data Carpentry
 9 | # lc: Library Carpentry
10 | # cp: Carpentries (to use for instructor training for instance)
11 | # incubator: The Carpentries Incubator
12 | carpentry: "incubator"
13 | 
14 | # Overall title for pages.
15 | title: "ARCHER2 Introduction to HPC Software Carpentry"
16 | 
17 | # Date the lesson was created (YYYY-MM-DD, this is empty by default)
18 | created: 2024-06-20
19 | 
20 | # Comma-separated list of keywords for the lesson
21 | keywords: "software, data, lesson, The Carpentries, HPC, EPCC"
22 | 
23 | # Life cycle stage of the lesson
24 | # possible values: pre-alpha, alpha, beta, stable
25 | life_cycle: "beta"
26 | 
27 | # License of the lesson
28 | license: "CC-BY 4.0"
29 | 
30 | # Link to the source repository for this lesson
31 | source: "https://github.com/EPCCed/2024-06-20-hpc-intro-shampton"
32 | 
33 | # Default branch of your lesson
34 | branch: "main"
35 | 
36 | # Who to contact if there are any issues
37 | contact: "support@archer2.ac.uk"
38 | 
39 | # Navigation ------------------------------------------------
40 | #
41 | # Use the following menu items to specify the order of
42 | # individual pages in each dropdown section. Leave blank to
43 | # include all pages in the folder.
44 | #
45 | # Example -------------
46 | #
47 | # episodes:
48 | # - introduction.md
49 | # - first-steps.md
50 | #
51 | # learners:
52 | # - setup.md
53 | #
54 | # instructors:
55 | # - instructor-notes.md
56 | #
57 | # profiles:
58 | # - one-learner.md
59 | # - another-learner.md
60 | 
61 | # Order of episodes in your lesson
62 | episodes:
63 |   - 11-hpc-intro.Rmd
64 |   - 12-cluster.Rmd
65 |   - 13-scheduler.Rmd
66 |   - 14-modules.Rmd
67 |   - 15-transferring-files.Rmd
68 |   - 17-resources.Rmd
69 |   - 18-responsibility.Rmd
70 | 
71 | # Information for Learners
72 | learners:
73 |   - reference.md
74 | 
75 | # Information for Instructors
76 | instructors:
77 | 
78 | # Learner Profiles
79 | profiles:
80 | # Customisation ---------------------------------------------
81 | #
82 | # This space below is where custom yaml items (e.g. pinning
83 | # sandpaper and varnish versions) should live
84 | 
85 | 


--------------------------------------------------------------------------------
/.github/workflows/update-workflows.yaml:
--------------------------------------------------------------------------------
 1 | name: "02 Maintain: Update Workflow Files"
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       name:
 7 |         description: 'Who triggered this build (enter github username to tag yourself)?'
 8 |         required: true
 9 |         default: 'weekly run'
10 |       clean:
11 |         description: 'Workflow files/file extensions to clean (no wildcards, enter "" for none)'
12 |         required: false
13 |         default: '.yaml'
14 |   schedule:
15 |     # Run every Tuesday
16 |     - cron: '0 0 * * 2'
17 | 
18 | jobs:
19 |   check_token:
20 |     name: "Check SANDPAPER_WORKFLOW token"
21 |     runs-on: ubuntu-latest
22 |     outputs:
23 |       workflow: ${{ steps.validate.outputs.wf }}
24 |       repo: ${{ steps.validate.outputs.repo }}
25 |     steps:
26 |       - name: "validate token"
27 |         id: validate
28 |         uses: carpentries/actions/check-valid-credentials@main
29 |         with:
30 |           token: ${{ secrets.SANDPAPER_WORKFLOW }}
31 | 
32 |   update_workflow:
33 |     name: "Update Workflow"
34 |     runs-on: ubuntu-latest
35 |     needs: check_token
36 |     if: ${{ needs.check_token.outputs.workflow == 'true' }}
37 |     steps:
38 |       - name: "Checkout Repository"
39 |         uses: actions/checkout@v3
40 | 
41 |       - name: Update Workflows
42 |         id: update
43 |         uses: carpentries/actions/update-workflows@main
44 |         with:
45 |           clean: ${{ github.event.inputs.clean }}
46 | 
47 |       - name: Create Pull Request
48 |         id: cpr
49 |         if: "${{ steps.update.outputs.new }}"
50 |         uses: carpentries/create-pull-request@main
51 |         with:
52 |           token: ${{ secrets.SANDPAPER_WORKFLOW }}
53 |           delete-branch: true
54 |           branch: "update/workflows"
55 |           commit-message: "[actions] update sandpaper workflow to version ${{ steps.update.outputs.new }}"
56 |           title: "Update Workflows to Version ${{ steps.update.outputs.new }}"
57 |           body: |
58 |             :robot: This is an automated build
59 | 
60 |             Update Workflows from sandpaper version ${{ steps.update.outputs.old }} -> ${{ steps.update.outputs.new }}
61 | 
62 |             - Auto-generated by [create-pull-request][1] on ${{ steps.update.outputs.date }}
63 | 
64 |             [1]: https://github.com/carpentries/create-pull-request/tree/main
65 |           labels: "type: template and tools"
66 |           draft: false
67 | 


--------------------------------------------------------------------------------
/links.md:
--------------------------------------------------------------------------------
 1 | <!-- 
 2 | Place links that you need to refer to multiple times across pages here. Delete
 3 | any links that you are not going to use. 
 4 | 
 5 | use like [gitbash][gitbash]
 6 |  -->
 7 | 
 8 | [pandoc]: https://pandoc.org/MANUAL.html
 9 | [r-markdown]: https://rmarkdown.rstudio.com/
10 | [rstudio]: https://www.rstudio.com/
11 | [carpentries-workbench]: https://carpentries.github.io/sandpaper-docs/
12 | 
13 | [gitbash]: https://gitforwindows.org/
14 | [wsl]: https://docs.microsoft.com/en-us/windows/wsl/install-win10
15 | [mobatek]: (https://mobaxterm.mobatek.net/download-home-edition.html)
16 | 
17 | [cc-by-human]: https://creativecommons.org/licenses/by/4.0/
18 | [cc-by-legal]: https://creativecommons.org/licenses/by/4.0/legalcode
19 | [ci]: https://communityin.org/
20 | [coc-reporting]: https://docs.carpentries.org/topic_folders/policies/incident-reporting.html
21 | [coc]: https://docs.carpentries.org/topic_folders/policies/code-of-conduct.html
22 | [concept-maps]: https://carpentries.github.io/instructor-training/05-memory/
23 | [contrib-covenant]: https://contributor-covenant.org/
24 | [contributing]: {{ repo_url }}/blob/{{ source_branch }}/CONTRIBUTING.md
25 | [cran-checkpoint]: https://cran.r-project.org/package=checkpoint
26 | [cran-knitr]: https://cran.r-project.org/package=knitr
27 | [cran-stringr]: https://cran.r-project.org/package=stringr
28 | [dc-lessons]: https://www.datacarpentry.org/lessons/
29 | [email]: mailto:team@carpentries.org
30 | [github-importer]: https://import.github.com/
31 | [importer]: https://github.com/new/import
32 | [lc-lessons]: https://librarycarpentry.org/lessons/
33 | [lesson-coc]: {{ relative_root_path }}{% link CODE_OF_CONDUCT.md %}
34 | [lesson-example]: https://carpentries.github.io/lesson-example/
35 | [lesson-license]: {{ relative_root_path }}{% link LICENSE.md %}
36 | [lesson-mainpage]: {{ relative_root_path }}{% link index.md %}
37 | [mit-license]: https://opensource.org/licenses/mit-license.html
38 | [morea]: https://morea-framework.github.io/
39 | [numfocus]: https://numfocus.org/
40 | [osi]: https://opensource.org
41 | [paper-now]: https://github.com/PeerJ/paper-now
42 | [python-gapminder]: https://swcarpentry.github.io/python-novice-gapminder/
43 | [pyyaml]: https://pypi.org/project/PyYAML/
44 | [rstudio]: https://www.rstudio.com/
45 | [styles]: https://github.com/carpentries/styles/
46 | [swc-lessons]: https://software-carpentry.org/lessons/
47 | [swc-releases]: https://github.com/swcarpentry/swc-releases
48 | [training]: https://carpentries.github.io/instructor-training/
49 | [workshop-repo]: {{ site.workshop_repo }}
50 | [yaml]: https://yaml.org/
51 | 


--------------------------------------------------------------------------------
/profiles/learner-profiles.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Leaner Profiles"
 3 | ---
 4 | 
 5 | These profiles describe the potential learners that we anticipate as learners
 6 | for this lesson. These can be used if you are deciding if this material is
 7 | right for you or your students. If you plan to contribute material to this
 8 | lesson, these will help you understand the target audience so that we can have
 9 | a collaboratively developed, but cohesive lesson.
10 | 
11 | ## Tyra Crick
12 | 
13 | Tyra is an environmental biologist that uses DNA signatures obtained from soils
14 | to study species diversity in the environment. She needs to compare DNA
15 | sequences to large databases. So far, she has been able to use web-based tools
16 | for her limited datasets.
17 | 
18 | Recently, Tyra has started working with much larger datasets, and discovered
19 | that the online tool she uses has a limit of 50 entries on the online server.
20 | She has heard it should be possible to run the same tool through the command
21 | line, and managed to install it on her local Laptop. Now, however, it takes
22 | several days before each of the analyses are finished.
23 | 
24 | The workshop will teach Tyra to move her data to and from the university's
25 | computer cluster, and submit jobs using pre-installed software on the cluster.
26 | Afterwards, Tyra will be able to analyze her own data and pre-installed
27 | command-line based versions of the tool to spread the analysis over several
28 | dozen cores so it finishes in a few hours.
29 | 
30 | ## Maria Newton
31 | 
32 | As a new PhD student, Maria is given a task to select parameters for their
33 | simulation. They need to run a set of calculations on several thousand
34 | combinations of parameters. One calculation takes several minutes. They set up
35 | the problem on their laptop but quickly realise that it would take more than a
36 | month to complete the task. They are told to use local HPC but they are not
37 | sure how this would help them.
38 | 
39 | ## Dana Fisher
40 | 
41 | Dana wants to cross-validate a model for a statistics class project. This
42 | involves running the model 1000 times — but each run takes an hour.
43 | Running the model on a laptop will take over a month!
44 | 
45 | ## Rina Watson
46 | 
47 | Rina, a genomics researcher, has been using small datasets of sequence data,
48 | but soon will be receiving a new type of sequencing data that is 10 times as
49 | large. It's already challenging to open the datasets on a computer —
50 | analysing these larger datasets will probably crash it.
51 | 
52 | ## Lucy Navier-Stokes
53 | 
54 | Lucy is using a fluid dynamics package that has an option to run in parallel.
55 | So far, this option was not utilized on a desktop. In going from 2D to 3D
56 | simulations, the simulation time has more than tripled. It might be useful to
57 | take advantage of that parallel feature to speed things up.
58 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/wrong-gcc-version.snip:
--------------------------------------------------------------------------------
 1 | Let's take a closer look at the `gcc` module. GCC is an extremely widely used C/C++/Fortran
 2 | compiler. Tons of software is dependent on the GCC version, and might not compile or run if the
 3 | wrong version is loaded. In this case, there are two different versions: `gcc/4.8.5` and
 4 | `gcc/5.4.0`. How do we load each copy and which copy is the default?
 5 | 
 6 | In this case, `gcc/5.4.0` has a `(D)` next to it. This indicates that it is the default - if we type
 7 | `module load gcc`, this is the copy that will be loaded.
 8 | 
 9 | ```
10 | {{ site.host_prompt }} module load gcc
11 | {{ site.host_prompt }} gcc --version
12 | ```
13 | {: .language-bash}
14 | ```
15 | Lmod is automatically replacing "intel/2016.4" with "gcc/5.4.0".
16 | 
17 | 
18 | Due to MODULEPATH changes, the following have been reloaded:
19 |   1) openmpi/2.1.1
20 | 
21 | gcc (GCC) 5.4.0
22 | Copyright (C) 2015 Free Software Foundation, Inc.
23 | This is free software; see the source for copying conditions.  There is NO
24 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25 | ```
26 | {: .output}
27 | 
28 | Note that three things happened: the default copy of GCC was loaded (version 5.4.0), the Intel
29 | compilers (which conflict with GCC) were unloaded, and software that is dependent on compiler
30 | (OpenMPI) was reloaded. The `module` system turned what might be a super-complex operation into a
31 | single command.
32 | 
33 | So how do we load the non-default copy of a software package? In this case, the only change we need
34 | to make is be more specific about the module we are loading. There are two GCC modules: `gcc/5.4.0`
35 | and `gcc/4.8.5`. To load a non-default module, the only change we need to make to our `module load`
36 | command is to leave in the version number after the `/`.
37 | 
38 | ```
39 | {{ site.host_prompt }} module load gcc/4.8.5
40 | {{ site.host_prompt }} gcc --version
41 | ```
42 | {: .language-bash}
43 | ```
44 | Inactive Modules:
45 |   1) openmpi
46 | 
47 | The following have been reloaded with a version change:
48 |   1) gcc/5.4.0 => gcc/4.8.5
49 | 
50 | gcc (GCC) 4.8.5
51 | Copyright (C) 2015 Free Software Foundation, Inc.
52 | This is free software; see the source for copying conditions.  There is NO
53 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
54 | ```
55 | {: .output}
56 | 
57 | We now have successfully switched from GCC 5.4.0 to GCC 4.8.5. It is also important to note that
58 | there was no compatible OpenMPI module available for GCC 4.8.5. Because of this, the `module`
59 | program has "inactivated" the module. All this means for us is that if we re-load GCC 5.4.0,
60 | `module` will remember OpenMPI used to be loaded and load that module as well.
61 | 
62 | ```
63 | {{ site.host_prompt }} module load gcc/5.4.0
64 | ```
65 | {: .language-bash}
66 | ```
67 | Activating Modules:
68 |   1) openmpi/2.1.1
69 | 
70 | The following have been reloaded with a version change:
71 |   1) gcc/4.8.5 => gcc/5.4.0
72 | ```
73 | {: .output}
74 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/modules/software-dependencies.snip:
--------------------------------------------------------------------------------
 1 | To demonstrate, let's use `module list`. `module list` shows all loaded software modules.
 2 | 
 3 | ```
 4 | {{ site.host_prompt }} module list
 5 | ```
 6 | {: .language-bash}
 7 | ```
 8 | Currently Loaded Modules:
 9 |   1) nixpkgs/.16.09  (H,S)   3) gcccore/.5.4.0    (H)   5) intel/2016.4  (t)   7) StdEnv/2016.4 (S)
10 |   2) icc/.2016.4.258 (H)     4) ifort/.2016.4.258 (H)   6) openmpi/2.1.1 (m)   8) python/3.5.2  (t)
11 | 
12 |   Where:
13 |    S:  Module is Sticky, requires --force to unload or purge
14 |    m:  MPI implementations / Implémentations MPI
15 |    t:  Tools for development / Outils de développement
16 |    H:             Hidden Module
17 | ```
18 | {: .output}
19 | 
20 | ```
21 | {{ site.host_prompt }} module load beast
22 | {{ site.host_prompt }} module list
23 | ```
24 | {: .language-bash}
25 | ```
26 | Currently Loaded Modules:
27 |   1) nixpkgs/.16.09    (H,S)   5) intel/2016.4  (t)   9) java/1.8.0_121   (t)
28 |   2) icc/.2016.4.258   (H)     6) openmpi/2.1.1 (m)  10) beagle-lib/2.1.2 (bio)
29 |   3) gcccore/.5.4.0    (H)     7) StdEnv/2016.4 (S)  11) beast/2.4.0      (chem)
30 |   4) ifort/.2016.4.258 (H)     8) python/3.5.2  (t)
31 | 
32 |   Where:
33 |    S:     Module is Sticky, requires --force to unload or purge
34 |    bio:   Bioinformatic libraries/apps / Logiciels de bioinformatique
35 |    m:     MPI implementations / Implémentations MPI
36 |    t:     Tools for development / Outils de développement
37 |    chem:  Chemistry libraries/apps / Logiciels de chimie
38 |    H:                Hidden Module
39 | ```
40 | {: .output}
41 | 
42 | So in this case, loading the `beast` module (a bioinformatics software package), also loaded
43 | `java/1.8.0_121` and `beagle-lib/2.1.2` as well. Let's try unloading the `beast` package.
44 | 
45 | ```
46 | {{ site.host_prompt }} module unload beast
47 | {{ site.host_prompt }} module list
48 | ```
49 | {: .language-bash}
50 | ```
51 | Currently Loaded Modules:
52 |   1) nixpkgs/.16.09  (H,S)   3) gcccore/.5.4.0    (H)   5) intel/2016.4  (t)   7) StdEnv/2016.4 (S)
53 |   2) icc/.2016.4.258 (H)     4) ifort/.2016.4.258 (H)   6) openmpi/2.1.1 (m)   8) python/3.5.2  (t)
54 | 
55 |   Where:
56 |    S:  Module is Sticky, requires --force to unload or purge
57 |    m:  MPI implementations / Implémentations MPI
58 |    t:  Tools for development / Outils de développement
59 |    H:             Hidden Module
60 | ```
61 | {: .output}
62 | 
63 | So using `module unload` "un-loads" a module along with its dependencies.
64 | If we wanted to unload everything at once, we could run `module purge` (unloads everything).
65 | 
66 | ```
67 | {{ site.host_prompt }} module purge
68 | ```
69 | {: .language-bash}
70 | ```
71 | The following modules were not unloaded:
72 |   (Use "module --force purge" to unload all):
73 | 
74 |   1) StdEnv/2016.4    3) icc/.2016.4.258   5) ifort/.2016.4.258   7) imkl/11.3.4.258
75 |   2) nixpkgs/.16.09   4) gcccore/.5.4.0    6) intel/2016.4        8) openmpi/2.1.1
76 | ```
77 | {: .output}
78 | 
79 | Note that `module purge` is informative. It lets us know that all but a default set of packages 
80 | have been unloaded (and how to actually unload these if we truly so desired).
81 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/runtime-exercise.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Benchmarking the parallel performance
 3 | Modify your job script to run on multiple cores and evaluate the performance of `pi-mpi.py`
 4 | on a variety of different core counts and use multiple runs to complete a table like the one
 5 | below.
 6 | If you examine the log file you will see that it contains two timings: the total time taken by the
 7 | entire program and the time taken solely by the calculation. The calculation of Pi from the Monte-Carlo counts
 8 | is not parallelised so this is a serial overhead, performed by a single processor.
 9 | The calculation part is, in theory, perfectly parallel (each processor operates on independent sets of unique random numbers
10 | ) so this should get faster on more cores. The Calculation core seconds is the
11 | *calculation time* multiplied by the number of cores.
12 | 
13 | | Cores      | Overall run time (s) | Calculation time (s) | Calculation core seconds |
14 | |------------|----------------------|----------------------|--------------------------|
15 | | 1 (serial) |                      |                      |                          |
16 | | 2          |                      |                      |                          |
17 | | 4          |                      |                      |                          |
18 | | 8          |                      |                      |                          |
19 | | 16         |                      |                      |                          |
20 | | 32         |                      |                      |                          |
21 | | 64         |                      |                      |                          |
22 | | 128        |                      |                      |                          |
23 | | 256        |                      |                      |                          |
24 | 
25 | Look at your results – do they make sense? Given the structure of the code, you would
26 | expect the performance of the calculation to increase
27 | linearly with the number of cores: this would give a roughly constant figure for the Calculation core
28 | seconds. Is this what you observe?
29 | 
30 | ::: solution
31 | 
32 | The table below shows example timings for runs on `r config$remote$name`
33 | 
34 | | Cores      | Overall run time (s) | Calculation time (s) |       Calculation core seconds |
35 | |-----------:|---------------------:|---------------------:|-------------------------------:|
36 | |          1 |                3.931 |                3.854 |                          3.854 |
37 | |          2 |                2.002 |                1.930 |                          3.859 |
38 | |          4 |                1.048 |                0.972 |                          3.888 |
39 | |          8 |                0.572 |                0.495 |                          3.958 |
40 | |         16 |                0.613 |                0.536 |                          8.574 |
41 | |         32 |                0.360 |                0.278 |                          8.880 |
42 | |         64 |                0.249 |                0.163 |                         10.400 |
43 | |        128 |                0.170 |                0.083 |                         10.624 |
44 | |        256 |                0.187 |                0.135 |                         34.560 |
45 | 
46 | :::
47 | :::


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Carpentries Workbench Template Markdown Lesson
 2 | 
 3 | This lesson is a template lesson that uses [The Carpentries Workbench][workbench].
 4 | 
 5 | ## Create a new repository from this template
 6 | 
 7 | To use this template to start a new lesson repository, 
 8 | make sure you're logged into Github 
 9 | visit https://github.com/carpentries/workbench-template-md/generate
10 | and follow the instructions.
11 | Checking the 'Include all branches' option will save some time waiting for the first website build
12 | when your new repository is initialised.
13 | 
14 | If you have any questions, contact [@zkamvar](https://github.com/zkamvar)
15 | 
16 | ## Configure a new lesson
17 | 
18 | Follow the steps below to
19 | complete the initial configuration of a new lesson repository built from this template:
20 | 
21 | 1. **Make sure GitHub Pages is activated:**
22 |    navigate to _Settings_,
23 |    select _Pages_ from the left sidebar,
24 |    and make sure that `gh-pages` is selected as the branch to build from.
25 |    If no `gh-pages` branch is available, check _Actions_ to see if the first
26 |    website build workflows are still running.
27 |    The branch should become available when those have completed.
28 | 1. **Adjust the `config.yaml` file:**
29 |    this file contains global parameters for your lesson site.
30 |    Individual fields within the file are documented with comments (beginning with `#`)
31 |    At minimum, you should adjust all the fields marked 'FIXME':
32 |    - `title`
33 |    - `created`
34 |    - `keywords`
35 |    - `life_cycle` (the default, _pre-alpha_, is the appropriate for brand new lessons)
36 |    - `contact`
37 | 1. **Annotate the repository** with site URL and topic tags:
38 |    navigate back to the repository landing page and
39 |    click on the gear wheel/cog icon (similar to ⚙️) 
40 |    at the top-right of the _About_ box.
41 |    Check the "Use your GitHub Pages website" option,
42 |    and [add some keywords and other annotations to describe your lesson](https://cdh.carpentries.org/the-carpentries-incubator.html#topic-tags)
43 |    in the _Topics_ field.
44 |    At minimum, these should include:
45 |    - `lesson`
46 |    - the life cycle of the lesson (e.g. `pre-alpha`)
47 |    - the human language the lesson is written in (e.g. `deutsch`)
48 | 1. **Adjust the 
49 |    `CODE_OF_CONDUCT.md`, `CONTRIBUTING.md`, and `LICENSE.md` files**
50 |    as appropriate for your project.
51 |    -  `CODE_OF_CONDUCT.md`: 
52 |       if you are using this template for a project outside The Carpentries,
53 |       you should adjust this file to describe 
54 |       who should be contacted with Code of Conduct reports,
55 |       and how those reports will be handled.
56 |    -  `CONTRIBUTING.md`:
57 |       depending on the current state and maturity of your project,
58 |       the contents of the template Contributing Guide may not be appropriate.
59 |       You should adjust the file to help guide contributors on how best
60 |       to get involved and make an impact on your lesson.
61 |    -  `LICENSE.md`:
62 |       in line with the terms of the CC-BY license,
63 |       you should ensure that the copyright information 
64 |       provided in the license file is accurate for your project.
65 | 1. **Update this README with 
66 |    [relevant information about your lesson](https://carpentries.github.io/lesson-development-training/26-external.html#readme)**
67 |    and delete this section.
68 | 
69 | [workbench]: https://carpentries.github.io/sandpaper-docs/
70 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Licenses"
 3 | ---
 4 | 
 5 | ## Instructional Material
 6 | 
 7 | All Carpentries (Software Carpentry, Data Carpentry, and Library Carpentry)
 8 | instructional material is made available under the [Creative Commons
 9 | Attribution license][cc-by-human]. The following is a human-readable summary of
10 | (and not a substitute for) the [full legal text of the CC BY 4.0
11 | license][cc-by-legal].
12 | 
13 | You are free:
14 | 
15 | - to **Share**---copy and redistribute the material in any medium or format
16 | - to **Adapt**---remix, transform, and build upon the material
17 | 
18 | for any purpose, even commercially.
19 | 
20 | The licensor cannot revoke these freedoms as long as you follow the license
21 | terms.
22 | 
23 | Under the following terms:
24 | 
25 | - **Attribution**---You must give appropriate credit (mentioning that your work
26 |   is derived from work that is Copyright (c) The Carpentries and, where
27 |   practical, linking to <https://carpentries.org/>), provide a [link to the
28 |   license][cc-by-human], and indicate if changes were made. You may do so in
29 |   any reasonable manner, but not in any way that suggests the licensor endorses
30 |   you or your use.
31 | 
32 | - **No additional restrictions**---You may not apply legal terms or
33 |   technological measures that legally restrict others from doing anything the
34 |   license permits.  With the understanding that:
35 | 
36 | Notices:
37 | 
38 | * You do not have to comply with the license for elements of the material in
39 |   the public domain or where your use is permitted by an applicable exception
40 |   or limitation.
41 | * No warranties are given. The license may not give you all of the permissions
42 |   necessary for your intended use. For example, other rights such as publicity,
43 |   privacy, or moral rights may limit how you use the material.
44 | 
45 | ## Software
46 | 
47 | Except where otherwise noted, the example programs and other software provided
48 | by The Carpentries are made available under the [OSI][osi]-approved [MIT
49 | license][mit-license].
50 | 
51 | Permission is hereby granted, free of charge, to any person obtaining a copy of
52 | this software and associated documentation files (the "Software"), to deal in
53 | the Software without restriction, including without limitation the rights to
54 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
55 | of the Software, and to permit persons to whom the Software is furnished to do
56 | so, subject to the following conditions:
57 | 
58 | The above copyright notice and this permission notice shall be included in all
59 | copies or substantial portions of the Software.
60 | 
61 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
62 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
63 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
64 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
65 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
66 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
67 | SOFTWARE.
68 | 
69 | ## Trademark
70 | 
71 | "The Carpentries", "Software Carpentry", "Data Carpentry", and "Library
72 | Carpentry" and their respective logos are registered trademarks of [Community
73 | Initiatives][ci].
74 | 
75 | [cc-by-human]: https://creativecommons.org/licenses/by/4.0/
76 | [cc-by-legal]: https://creativecommons.org/licenses/by/4.0/legalcode
77 | [mit-license]: https://opensource.org/licenses/mit-license.html
78 | [ci]: https://communityin.org/
79 | [osi]: https://opensource.org
80 | 


--------------------------------------------------------------------------------
/episodes/fig/laptop-openclipartorg-aoguerrero.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 11 |    version="1.1"
 12 |    width="58.160458"
 13 |    height="58.006683"
 14 |    id="svg4977">
 15 |   <defs
 16 |      id="defs4979">
 17 |     <linearGradient
 18 |        x1="67.533508"
 19 |        y1="171.0041"
 20 |        x2="68.869576"
 21 |        y2="188.68176"
 22 |        id="linearGradient6187"
 23 |        xlink:href="#linearGradient6181"
 24 |        gradientUnits="userSpaceOnUse" />
 25 |     <linearGradient
 26 |        id="linearGradient6181">
 27 |       <stop
 28 |          id="stop6183"
 29 |          style="stop-color:#eeffaa;stop-opacity:1"
 30 |          offset="0" />
 31 |       <stop
 32 |          id="stop6189"
 33 |          style="stop-color:#ffffff;stop-opacity:1"
 34 |          offset="1" />
 35 |     </linearGradient>
 36 |     <linearGradient
 37 |        x1="67.533508"
 38 |        y1="171.0041"
 39 |        x2="68.869576"
 40 |        y2="188.68176"
 41 |        id="linearGradient4975"
 42 |        xlink:href="#linearGradient6181"
 43 |        gradientUnits="userSpaceOnUse" />
 44 |   </defs>
 45 |   <metadata
 46 |      id="metadata4982">
 47 |     <rdf:RDF>
 48 |       <cc:Work
 49 |          rdf:about="">
 50 |         <dc:format>image/svg+xml</dc:format>
 51 |         <dc:type
 52 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
 53 |         <dc:title></dc:title>
 54 |       </cc:Work>
 55 |     </rdf:RDF>
 56 |   </metadata>
 57 |   <g
 58 |      transform="translate(-436.62104,-254.79085)"
 59 |      id="layer1">
 60 |     <g
 61 |        transform="translate(388.59398,97.199304)"
 62 |        id="g6312">
 63 |       <path
 64 |          d="m 105.69961,194.09145 0,3 -31.177002,18 -0.368304,-2.85491 z"
 65 |          id="path3637"
 66 |          style="fill:#4d4d4d" />
 67 |       <path
 68 |          d="m 48.53125,176.09375 2.5625,23 0.03125,0.0312 0,2.46875 23.40625,13.5 0,-3 -20.75,-11.96875 -2.46875,-22.625 -2.78125,-1.40625 z"
 69 |          id="path3639"
 70 |          style="fill:#1a1a1a" />
 71 |       <path
 72 |          d="m 84.919608,182.09145 20.785002,12 -31.177002,18 -20.792892,-11.96843 z"
 73 |          id="path3641"
 74 |          style="fill:#666666" />
 75 |       <path
 76 |          d="m 51.140608,177.59145 2.615238,22.54464 31.4124,-18.04464 -2.850638,-22.5 z"
 77 |          id="path3643"
 78 |          style="fill:#4d4d4d" />
 79 |       <path
 80 |          d="m 79.719608,158.09145 2.692802,1.46843 -31.587375,18.25254 -2.282427,-1.72097 z"
 81 |          id="path3645"
 82 |          style="fill:#666666" />
 83 |       <path
 84 |          d="m 91.399608,201.38145 -5.1962,-3 -5.1962,3 5.1962,3 5.1962,-3 z"
 85 |          id="path3653"
 86 |          style="fill:#4d4d4d;stroke:none" />
 87 |       <path
 88 |          d="m 95.309608,191.09145 -10.392,-6 -25.981,15 10.392,6 25.981,-15 z"
 89 |          id="path3657"
 90 |          style="fill:#333333;stroke:none" />
 91 |       <path
 92 |          d="m 79.729608,158.09145 2.5938,1.5 2.5938,22.5 20.781002,12 0,3 -31.156002,18 -23.182786,-13.54464 -2.817014,-25.45536 31.188,-18 z"
 93 |          id="path3655"
 94 |          style="fill:none;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none" />
 95 |       <path
 96 |          d="m 53.515107,179.00685 26.91829,-15.51577 1.94951,17.78227 -27.077055,15.42649 z"
 97 |          id="path5411"
 98 |          style="fill:url(#linearGradient4975);fill-opacity:1;stroke:none" />
 99 |     </g>
100 |   </g>
101 | </svg>
102 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/rename-snippets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script renames snippets from the argument directory in an effort
 4 | # to make the names more meaningful, thereby simplifying the process of
 5 | # porting to a new site.
 6 | 
 7 | if [[ $# != 1 ]] || [[ $1 == "-h" ]] || [[ $1 == "--help" ]]; then
 8 |     echo "Please supply a folder name containing snippets for your site. This"
 9 |     echo "script will rename those snippets based on the script activity. E.g.,"
10 |     echo "    $0 ComputeCanada_Graham_slurm"
11 |     exit 1
12 | fi
13 | 
14 | if [[ $(which git) == "" ]]; then
15 |     echo "Error: This script requires git. Please install it and try again."
16 |     exit 1
17 | fi
18 | 
19 | PREFIX=$1
20 | 
21 | function rename_snip {
22 |     mv $1 $2
23 | }
24 | 
25 | ## Episode 12: Working on a remote HPC system
26 | 
27 | #$ {{ site.sched.info }}
28 | rename_snip  ${PREFIX}/12/info.snip     ${PREFIX}/12/queue-info.snip
29 | #$ sinfo -n {{ site.remote.node }} -o "%n %c %m"
30 | rename_snip  ${PREFIX}/12/explore.snip  ${PREFIX}/12/specific-node-info.snip
31 | 
32 | ## Episode 13: Scheduling jobs
33 | 
34 | # {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh
35 | rename_snip  ${PREFIX}/13/submit_output.snip           ${PREFIX}/13/basic-job-script.snip
36 | #$ {{ site.sched.status }} {{ site.sched.flag.user }}
37 | rename_snip  ${PREFIX}/13/statu_output.snip            ${PREFIX}/13/basic-job-status.snip
38 | #$ {{ site.sched.status }} {{ site.sched.flag.user }}
39 | rename_snip  ${PREFIX}/13/statu_name_output.snip       ${PREFIX}/13/job-with-name-status.snip
40 | ## The following are several key resource requests:
41 | rename_snip  ${PREFIX}/13/stat_options.snip            ${PREFIX}/13/option-flags-list.snip
42 | ## Print SLURM_CPUS_PER_TASK, PBS_O_WORKDIR, or similar
43 | rename_snip  ${PREFIX}/13/env_challenge.snip           ${PREFIX}/13/print-sched-variables.snip
44 | #$ {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh
45 | rename_snip  ${PREFIX}/13/long_job_cat.snip            ${PREFIX}/13/runtime-exceeded-job.snip
46 | #$ {{ site.sched.status }} {{ site.sched.flag.user }}
47 | rename_snip  ${PREFIX}/13/long_job_err.snip            ${PREFIX}/13/runtime-exceeded-output.snip
48 | #$ {{ site.sched.submit.name }} {{ site.sched.submit.options }} example-job.sh
49 | #$ {{ site.sched.status }} {{ site.sched.flag.user }}
50 | rename_snip  ${PREFIX}/13/del_job_output1.snip         ${PREFIX}/13/terminate-job-begin.snip
51 | #$ {{site.sched.del }} 38759
52 | rename_snip  ${PREFIX}/13/del_job_output2.snip         ${PREFIX}/13/terminate-job-cancel.snip
53 | #$ {{site.sched.del }} {{ site.sched.flag.user }}
54 | rename_snip  ${PREFIX}/13/del_multiple_challenge.snip  ${PREFIX}/13/terminate-multiple-jobs.snip
55 | ## use the compute node resources interactively
56 | rename_snip  ${PREFIX}/13/interactive_example.snip     ${PREFIX}/13/using-nodes-interactively.snip
57 | 
58 | ## Episode 14: Accessing software
59 | 
60 | #$ module avail
61 | rename_snip  ${PREFIX}/14/module_avail.snip   ${PREFIX}/14/available-modules.snip
62 | #$ which python
63 | rename_snip  ${PREFIX}/14/which_missing.snip  ${PREFIX}/14/missing-python.snip
64 | #$ module load python[3]
65 | rename_snip  ${PREFIX}/14/load_python.snip    ${PREFIX}/14/module-load-python.snip
66 | #$ which python
67 | rename_snip  ${PREFIX}/14/which_python.snip   ${PREFIX}/14/python-executable-dir.snip
68 | #$ echo $PATH
69 | rename_snip  ${PREFIX}/14/path.snip           ${PREFIX}/14/python-module-path.snip
70 | #$ ls $(dirname $(which python))
71 | rename_snip  ${PREFIX}/14/ls_dir.snip         ${PREFIX}/14/python-ls-dir-command.snip
72 | rename_snip  ${PREFIX}/14/ls_dir_output.snip  ${PREFIX}/14/python-ls-dir-output.snip
73 | ## Loading & unloading software and dependencies
74 | rename_snip  ${PREFIX}/14/depend_demo.snip    ${PREFIX}/14/software-dependencies.snip
75 | ## gcc example
76 | rename_snip  ${PREFIX}/14/gcc_example.snip    ${PREFIX}/14/wrong-gcc-version.snip
77 | 
78 | ## Episode 15: Transferring files
79 | 
80 | ## Episode 16: Using resources effectively
81 | 
82 | #$ {{ site.sched.hist }}
83 | rename_snip  ${PREFIX}/16/stat_output.snip  ${PREFIX}/16/account-history.snip
84 | #$ top
85 | rename_snip  ${PREFIX}/16/top_output.snip   ${PREFIX}/16/monitor-processes-top.snip
86 | #$ free -h
87 | rename_snip  ${PREFIX}/16/free_output.snip  ${PREFIX}/16/system-memory-free.snip
88 | 
89 | ## Episode 17: Using shared resources responsibly
90 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-receive.yaml:
--------------------------------------------------------------------------------
  1 | name: "Receive Pull Request"
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types:
  6 |       [opened, synchronize, reopened]
  7 | 
  8 | concurrency:
  9 |   group: ${{ github.ref }}
 10 |   cancel-in-progress: true
 11 | 
 12 | jobs:
 13 |   test-pr:
 14 |     name: "Record PR number"
 15 |     if: ${{ github.event.action != 'closed' }}
 16 |     runs-on: ubuntu-latest
 17 |     outputs:
 18 |       is_valid: ${{ steps.check-pr.outputs.VALID }}
 19 |     steps:
 20 |       - name: "Record PR number"
 21 |         id: record
 22 |         if: ${{ always() }}
 23 |         run: |
 24 |           echo ${{ github.event.number }} > ${{ github.workspace }}/NR # 2022-03-02: artifact name fixed to be NR
 25 |       - name: "Upload PR number"
 26 |         id: upload
 27 |         if: ${{ always() }}
 28 |         uses: actions/upload-artifact@v3
 29 |         with:
 30 |           name: pr
 31 |           path: ${{ github.workspace }}/NR
 32 |       - name: "Get Invalid Hashes File"
 33 |         id: hash
 34 |         run: |
 35 |           echo "json<<EOF
 36 |           $(curl -sL https://files.carpentries.org/invalid-hashes.json)
 37 |           EOF" >> $GITHUB_OUTPUT
 38 |       - name: "echo output"
 39 |         run: |
 40 |           echo "${{ steps.hash.outputs.json }}"
 41 |       - name: "Check PR"
 42 |         id: check-pr
 43 |         uses: carpentries/actions/check-valid-pr@main
 44 |         with:
 45 |           pr: ${{ github.event.number }}
 46 |           invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }}
 47 | 
 48 |   build-md-source:
 49 |     name: "Build markdown source files if valid"
 50 |     needs: test-pr
 51 |     runs-on: ubuntu-latest
 52 |     if: ${{ needs.test-pr.outputs.is_valid == 'true' }}
 53 |     env:
 54 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 55 |       RENV_PATHS_ROOT: ~/.local/share/renv/
 56 |       CHIVE: ${{ github.workspace }}/site/chive
 57 |       PR: ${{ github.workspace }}/site/pr
 58 |       MD: ${{ github.workspace }}/site/built
 59 |     steps:
 60 |       - name: "Check Out Main Branch"
 61 |         uses: actions/checkout@v3
 62 | 
 63 |       - name: "Check Out Staging Branch"
 64 |         uses: actions/checkout@v3
 65 |         with:
 66 |           ref: md-outputs
 67 |           path: ${{ env.MD }}
 68 | 
 69 |       - name: "Set up R"
 70 |         uses: r-lib/actions/setup-r@v2
 71 |         with:
 72 |           use-public-rspm: true
 73 |           install-r: false
 74 | 
 75 |       - name: "Set up Pandoc"
 76 |         uses: r-lib/actions/setup-pandoc@v2
 77 | 
 78 |       - name: "Setup Lesson Engine"
 79 |         uses: carpentries/actions/setup-sandpaper@main
 80 |         with:
 81 |           cache-version: ${{ secrets.CACHE_VERSION }}
 82 | 
 83 |       - name: "Setup Package Cache"
 84 |         uses: carpentries/actions/setup-lesson-deps@main
 85 |         with:
 86 |           cache-version: ${{ secrets.CACHE_VERSION }}
 87 | 
 88 |       - name: "Validate and Build Markdown"
 89 |         id: build-site
 90 |         run: |
 91 |           sandpaper::package_cache_trigger(TRUE)
 92 |           sandpaper::validate_lesson(path = '${{ github.workspace }}')
 93 |           sandpaper:::build_markdown(path = '${{ github.workspace }}', quiet = FALSE)
 94 |         shell: Rscript {0}
 95 | 
 96 |       - name: "Generate Artifacts"
 97 |         id: generate-artifacts
 98 |         run: |
 99 |           sandpaper:::ci_bundle_pr_artifacts(
100 |             repo         = '${{ github.repository }}',
101 |             pr_number    = '${{ github.event.number }}',
102 |             path_md      = '${{ env.MD }}',
103 |             path_pr      = '${{ env.PR }}',
104 |             path_archive = '${{ env.CHIVE }}',
105 |             branch       = 'md-outputs'
106 |           )
107 |         shell: Rscript {0}
108 | 
109 |       - name: "Upload PR"
110 |         uses: actions/upload-artifact@v3
111 |         with:
112 |           name: pr
113 |           path: ${{ env.PR }}
114 | 
115 |       - name: "Upload Diff"
116 |         uses: actions/upload-artifact@v3
117 |         with:
118 |           name: diff
119 |           path: ${{ env.CHIVE }}
120 |           retention-days: 1
121 | 
122 |       - name: "Upload Build"
123 |         uses: actions/upload-artifact@v3
124 |         with:
125 |           name: built
126 |           path: ${{ env.MD }}
127 |           retention-days: 1
128 | 
129 |       - name: "Teardown"
130 |         run: sandpaper::reset_site()
131 |         shell: Rscript {0}
132 | 


--------------------------------------------------------------------------------
/.github/workflows/update-cache.yaml:
--------------------------------------------------------------------------------
  1 | name: "03 Maintain: Update Package Cache"
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       name:
  7 |         description: 'Who triggered this build (enter github username to tag yourself)?'
  8 |         required: true
  9 |         default: 'monthly run'
 10 |   schedule:
 11 |     # Run every tuesday
 12 |     - cron: '0 0 * * 2'
 13 | 
 14 | jobs:
 15 |   preflight:
 16 |     name: "Preflight Check"
 17 |     runs-on: ubuntu-latest
 18 |     outputs:
 19 |       ok: ${{ steps.check.outputs.ok }}
 20 |     steps:
 21 |       - id: check
 22 |         run: |
 23 |           if [[ ${{ github.event_name }} == 'workflow_dispatch' ]]; then
 24 |             echo "ok=true" >> $GITHUB_OUTPUT
 25 |             echo "Running on request"
 26 |           # using single brackets here to avoid 08 being interpreted as octal
 27 |           # https://github.com/carpentries/sandpaper/issues/250
 28 |           elif [ `date +%d` -le 7 ]; then
 29 |             # If the Tuesday lands in the first week of the month, run it
 30 |             echo "ok=true" >> $GITHUB_OUTPUT
 31 |             echo "Running on schedule"
 32 |           else
 33 |             echo "ok=false" >> $GITHUB_OUTPUT
 34 |             echo "Not Running Today"
 35 |           fi
 36 | 
 37 |   check_renv:
 38 |     name: "Check if We Need {renv}"
 39 |     runs-on: ubuntu-latest
 40 |     needs: preflight
 41 |     if: ${{ needs.preflight.outputs.ok == 'true'}}
 42 |     outputs:
 43 |       needed: ${{ steps.renv.outputs.exists }}
 44 |     steps:
 45 |       - name: "Checkout Lesson"
 46 |         uses: actions/checkout@v3
 47 |       - id: renv
 48 |         run: |
 49 |           if [[ -d renv ]]; then
 50 |             echo "exists=true" >> $GITHUB_OUTPUT
 51 |           fi
 52 | 
 53 |   check_token:
 54 |     name: "Check SANDPAPER_WORKFLOW token"
 55 |     runs-on: ubuntu-latest
 56 |     needs: check_renv
 57 |     if: ${{ needs.check_renv.outputs.needed == 'true' }}
 58 |     outputs:
 59 |       workflow: ${{ steps.validate.outputs.wf }}
 60 |       repo: ${{ steps.validate.outputs.repo }}
 61 |     steps:
 62 |       - name: "validate token"
 63 |         id: validate
 64 |         uses: carpentries/actions/check-valid-credentials@main
 65 |         with:
 66 |           token: ${{ secrets.SANDPAPER_WORKFLOW }}
 67 | 
 68 |   update_cache:
 69 |     name: "Update Package Cache"
 70 |     needs: check_token
 71 |     if: ${{ needs.check_token.outputs.repo== 'true' }}
 72 |     runs-on: ubuntu-latest
 73 |     env:
 74 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 75 |       RENV_PATHS_ROOT: ~/.local/share/renv/
 76 |     steps:
 77 | 
 78 |       - name: "Checkout Lesson"
 79 |         uses: actions/checkout@v3
 80 | 
 81 |       - name: "Set up R"
 82 |         uses: r-lib/actions/setup-r@v2
 83 |         with:
 84 |           use-public-rspm: true
 85 |           install-r: false
 86 | 
 87 |       - name: "Update {renv} deps and determine if a PR is needed"
 88 |         id: update
 89 |         uses: carpentries/actions/update-lockfile@main
 90 |         with:
 91 |           cache-version: ${{ secrets.CACHE_VERSION }}
 92 | 
 93 |       - name: Create Pull Request
 94 |         id: cpr
 95 |         if: ${{ steps.update.outputs.n > 0 }}
 96 |         uses: carpentries/create-pull-request@main
 97 |         with:
 98 |           token: ${{ secrets.SANDPAPER_WORKFLOW }}
 99 |           delete-branch: true
100 |           branch: "update/packages"
101 |           commit-message: "[actions] update ${{ steps.update.outputs.n }} packages"
102 |           title: "Update ${{ steps.update.outputs.n }} packages"
103 |           body: |
104 |             :robot: This is an automated build
105 | 
106 |             This will update ${{ steps.update.outputs.n }} packages in your lesson with the following versions:
107 | 
108 |             ```
109 |             ${{ steps.update.outputs.report }}
110 |             ```
111 | 
112 |             :stopwatch: In a few minutes, a comment will appear that will show you how the output has changed based on these updates.
113 | 
114 |             If you want to inspect these changes locally, you can use the following code to check out a new branch:
115 | 
116 |             ```bash
117 |             git fetch origin update/packages
118 |             git checkout update/packages
119 |             ```
120 | 
121 |             - Auto-generated by [create-pull-request][1] on ${{ steps.update.outputs.date }}
122 | 
123 |             [1]: https://github.com/carpentries/create-pull-request/tree/main
124 |           labels: "type: package cache"
125 |           draft: false
126 | 


--------------------------------------------------------------------------------
/episodes/files/snippets/EPCC_ARCHER2_slurm/resources/perf-exercise.Rmd:
--------------------------------------------------------------------------------
 1 | ::: challenge
 2 | ## Computing the speedup and parallel efficiency
 3 | Use your *Overall run times* from above to fill in a table like the one below.
 4 | 
 5 | | Cores      | Overall run time (s) | Actual speedup  | Ideal speedup | Parallel efficiency |
 6 | |------------|----------------------|-----------------|----------------|---------------------|
 7 | | 1 (serial) |        $t_{c1}$      |       -         |       1        |          1         |
 8 | | 2          |        $t_{c2}$      | $s_2 = t_{c1}/t_{c2}$ | $i_2 = 2$ |  $s_2 / i_2$     |
 9 | | 4          |        $t_{c4}$      | $s_4 = t_{c1}/t_{c4}$ | $i_4 = 4$ |  $s_4 / i_4$     |                   
10 | | 8          |                      |                 |                |                     | 
11 | | 16         |                      |                 |                |                     |
12 | | 32         |                      |                 |                |                     |
13 | | 64         |                      |                 |                |                     |             
14 | | 128        |                      |                 |                |                     |
15 | | 256        |                      |                 |                |                     |
16 | 
17 | Given your results, try to answer the following questions:
18 | 
19 | 1. What is the core count where you get the **most** efficient use of resources, irrespective
20 |   of run time?
21 | 2. What is the core count where you get the fastest solution, irrespective of efficiency?
22 | 3. What do you think a good core count choice would be for this application that balances
23 |    time to solution and efficiency? Why did you choose this option?
24 | 
25 | ::: solution
26 | 
27 | The table below gives example results for `r config$remote$name` based on the example 
28 | runtimes given in the solution above.
29 | 
30 | | Cores      | Overall run time (s) | Actual speedup | Ideal speedup | Parallel efficiency |
31 | |-----------:|---------------------:|---------------:|--------------:|--------------------:|
32 | |          1 |                3.931 |          1.000 |         1.000 |               1.000 |
33 | |          2 |                2.002 |          1.963 |         2.000 |               0.982 |
34 | |          4 |                1.048 |          3.751 |         4.000 |               0.938 |
35 | |          8 |                0.572 |          6.872 |         8.000 |               0.859 |
36 | |         16 |                0.613 |          6.408 |        16.000 |               0.401 |
37 | |         32 |                0.360 |         10.928 |        32.000 |               0.342 |
38 | |         64 |                0.249 |         15.767 |        64.000 |               0.246 |
39 | |        128 |                0.170 |         23.122 |       128.000 |               0.181 |
40 | |        256 |                0.187 |         21.077 |       256.000 |               0.082 |
41 | 
42 | ### What is the core count where you get the **most** efficient use of resources?
43 | Just using a single core is the cheapest (and always will be unless your speedup is better
44 | than perfect – “super-linear” speedup). However, it may not be possible to run on small
45 | numbers of cores depending on how much memory you need or other technical constraints.
46 | **Note:** on most high-end systems, nodes are not shared between users. This means you are
47 | charged for all the CPU-cores on a node regardless of whether you actually use them. Typically
48 | we would be running on many hundreds of CPU-cores not a few tens, so the real question in
49 | practice is: what is the optimal number of nodes to use?
50 | ### What is the core count where you get the fastest solution, irrespective of efficiency?
51 | 256 cores gives the fastest time to solution.
52 | The fastest time to solution does not often make the most efficient use of resources so 
53 | to use this option, you may end up wasting your resources. Sometimes, when there is 
54 | time pressure to run the calculations, this may be a valid approach to running 
55 | applications.
56 | ### What do you think a good core count choice would be for this application to use?
57 | 
58 | 8 cores is probably a good number of cores to use with a parallel efficiency of 86%.
59 | Usually, the best choice is one that delivers good parallel efficiency with an acceptable
60 | time to solution. Note that *acceptable time to solution* differs depending on circumstances
61 | so this is something that the individual researcher will have to assess. Good parallel
62 | efficiency is often considered to be 70% or greater though many researchers will be happy
63 | to run in a regime with parallel efficiency greater than 60%. As noted above, running with
64 | worse parallel efficiency may also be useful if the time to solution is an overriding factor.
65 | 
66 | :::
67 | :::


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | ## ========================================
  2 | ## Commands for both workshop and lesson websites.
  3 | 
  4 | # Settings
  5 | MAKEFILES=Makefile $(wildcard *.mk)
  6 | R=/usr/bin/R -e
  7 | RS=/usr/bin/R
  8 | DST=site
  9 | 
 10 | # Find Docker
 11 | DOCKER := $(shell which docker 2>/dev/null)
 12 | 
 13 | # Default target
 14 | .DEFAULT_GOAL := commands
 15 | 
 16 | ## I. Commands for both workshop and lesson websites
 17 | ## =================================================
 18 | 
 19 | .PHONY: site docker-serve repo-check clean clean-rmd
 20 | 
 21 | ## * serve            : render website and run a local server
 22 | serve : lesson-md index.md
 23 | 	${R} -e 'sandpaper::serve()'
 24 | 
 25 | ## * site             : build website but do not run a server
 26 | site : lesson-md index.md
 27 | 	${R} 'sandpaper::build_lesson()'
 28 | 
 29 | ## * docker-serve     : use Docker to serve the site
 30 | docker-serve :
 31 | ifeq (, $(DOCKER))
 32 | 	$(error Your system does not appear to have Docker installed)
 33 | else
 34 | 	@$(DOCKER) build --pull -t carpentries/workbench:latest docker
 35 | 	@$(DOCKER) run --rm -it \
 36 | 		-v $${PWD}:/lesson \
 37 | 		-p 4321:4321 \
 38 | 		carpentries/workbench:latest \
 39 | 		${R} 'sandpaper::serve(host="0.0.0.0")'
 40 | endif
 41 | 
 42 | ## * docker-serve     : use Docker to serve the site
 43 | docker-build :
 44 | ifeq (, $(DOCKER))
 45 | 	$(error Your system does not appear to have Docker installed)
 46 | else
 47 | 	@$(DOCKER) build --pull -t carpentries/workbench:latest docker
 48 | 	@$(DOCKER) run --rm -it \
 49 | 		-v $${PWD}:/lesson \
 50 | 		-p 4321:4321 \
 51 | 		carpentries/workbench:latest \
 52 | 	    /usr/bin/Rscript --no-save /lesson/docker/sessioncheck.R
 53 | 		
 54 | 	
 55 | endif
 56 | 
 57 | ## * clean            : clean up junk files
 58 | clean :
 59 | 	@rm -rf ${DST}
 60 | 
 61 | ##
 62 | ## II. Commands specific to workshop websites
 63 | ## =================================================
 64 | 
 65 | ##
 66 | ## III. Commands specific to lesson websites
 67 | ## =================================================
 68 | 
 69 | .PHONY : lesson-check lesson-md lesson-files lesson-fixme install-rmd-deps
 70 | 
 71 | # RMarkdown files
 72 | RMD_SRC = $(wildcard _episodes_rmd/*.Rmd)
 73 | RMD_DST = $(patsubst _episodes_rmd/%.Rmd,_episodes/%.md,$(RMD_SRC))
 74 | 
 75 | # Lesson source files in the order they appear in the navigation menu.
 76 | MARKDOWN_SRC = \
 77 |   index.md \
 78 |   CODE_OF_CONDUCT.md \
 79 |   setup.md \
 80 |   $(sort $(wildcard _episodes/*.md)) \
 81 |   reference.md \
 82 |   $(sort $(wildcard _extras/*.md)) \
 83 |   LICENSE.md
 84 | 
 85 | # Generated lesson files in the order they appear in the navigation menu.
 86 | HTML_DST = \
 87 |   ${DST}/index.html \
 88 |   ${DST}/conduct/index.html \
 89 |   ${DST}/setup/index.html \
 90 |   $(patsubst _episodes/%.md,${DST}/%/index.html,$(sort $(wildcard _episodes/*.md))) \
 91 |   ${DST}/reference.html \
 92 |   $(patsubst _extras/%.md,${DST}/%/index.html,$(sort $(wildcard _extras/*.md))) \
 93 |   ${DST}/license/index.html
 94 | 
 95 | ## * install-rmd-deps : Install R packages dependencies to build the RMarkdown lesson
 96 | install-rmd-deps:
 97 | 	@${SHELL} bin/install_r_deps.sh
 98 | 
 99 | ## * lesson-md        : convert Rmarkdown files to markdown
100 | lesson-md : ${RMD_DST}
101 | 
102 | _episodes/%.md: _episodes_rmd/%.Rmd install-rmd-deps
103 | 	@mkdir -p _episodes
104 | 	@$(SHELL) bin/knit_lessons.sh $< $@
105 | 
106 | ## * lesson-check     : validate lesson Markdown
107 | lesson-check : python lesson-fixme
108 | 	@${PYTHON} bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md
109 | 
110 | ## * lesson-check-all : validate lesson Markdown, checking line lengths and trailing whitespace
111 | lesson-check-all : python
112 | 	@${PYTHON} bin/lesson_check.py -s . -p ${PARSER} -r _includes/links.md -l -w --permissive
113 | 
114 | ## * unittest         : run unit tests on checking tools
115 | unittest : python
116 | 	@${PYTHON} bin/test_lesson_check.py
117 | 
118 | ## * lesson-files     : show expected names of generated files for debugging
119 | lesson-files :
120 | 	@echo 'RMD_SRC:' ${RMD_SRC}
121 | 	@echo 'RMD_DST:' ${RMD_DST}
122 | 	@echo 'MARKDOWN_SRC:' ${MARKDOWN_SRC}
123 | 	@echo 'HTML_DST:' ${HTML_DST}
124 | 
125 | ## * lesson-fixme     : show FIXME markers embedded in source files
126 | lesson-fixme :
127 | 	@grep --fixed-strings --word-regexp --line-number --no-messages FIXME ${MARKDOWN_SRC} || true
128 | 
129 | ##
130 | ## IV. Auxililary (plumbing) commands
131 | ## =================================================
132 | 
133 | .PHONY : commands python
134 | 
135 | ## * commands         : show all commands.
136 | commands :
137 | 	@sed -n -e '/^##/s|^##[[:space:]]*||p' $(MAKEFILE_LIST)
138 | 
139 | python :
140 | ifeq (, $(PYTHON))
141 | 	$(error $(PYTHON_NOTE))
142 | else
143 | 	@:
144 | endif
145 | 
146 | index.md :
147 | ifeq (, $(wildcard index.md))
148 | 	$(error index.md not found)
149 | else
150 | 	@:
151 | endif
152 | 


--------------------------------------------------------------------------------
/episodes/fig/200px-laptop-openclipartorg-aoguerrero.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:xlink="http://www.w3.org/1999/xlink"
 11 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 12 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 13 |    version="1.1"
 14 |    width="200"
 15 |    height="200"
 16 |    id="svg4977"
 17 |    sodipodi:docname="200px-laptop-openclipartorg-aoguerrero.svg"
 18 |    inkscape:version="0.92.3 (2405546, 2018-03-11)">
 19 |   <sodipodi:namedview
 20 |      pagecolor="#ffffff"
 21 |      bordercolor="#666666"
 22 |      borderopacity="1"
 23 |      objecttolerance="10"
 24 |      gridtolerance="10"
 25 |      guidetolerance="10"
 26 |      inkscape:pageopacity="0"
 27 |      inkscape:pageshadow="2"
 28 |      inkscape:window-width="2558"
 29 |      inkscape:window-height="1420"
 30 |      id="namedview21"
 31 |      showgrid="false"
 32 |      inkscape:zoom="2.0342484"
 33 |      inkscape:cx="-106.1313"
 34 |      inkscape:cy="90.739789"
 35 |      inkscape:window-x="1600"
 36 |      inkscape:window-y="18"
 37 |      inkscape:window-maximized="1"
 38 |      inkscape:current-layer="svg4977" />
 39 |   <defs
 40 |      id="defs4979">
 41 |     <linearGradient
 42 |        x1="67.533508"
 43 |        y1="171.0041"
 44 |        x2="68.869576"
 45 |        y2="188.68176"
 46 |        id="linearGradient6187"
 47 |        xlink:href="#linearGradient6181"
 48 |        gradientUnits="userSpaceOnUse" />
 49 |     <linearGradient
 50 |        id="linearGradient6181">
 51 |       <stop
 52 |          id="stop6183"
 53 |          style="stop-color:#eeffaa;stop-opacity:1"
 54 |          offset="0" />
 55 |       <stop
 56 |          id="stop6189"
 57 |          style="stop-color:#ffffff;stop-opacity:1"
 58 |          offset="1" />
 59 |     </linearGradient>
 60 |     <linearGradient
 61 |        x1="67.533508"
 62 |        y1="171.0041"
 63 |        x2="68.869576"
 64 |        y2="188.68176"
 65 |        id="linearGradient4975"
 66 |        xlink:href="#linearGradient6181"
 67 |        gradientUnits="userSpaceOnUse" />
 68 |   </defs>
 69 |   <metadata
 70 |      id="metadata4982">
 71 |     <rdf:RDF>
 72 |       <cc:Work
 73 |          rdf:about="">
 74 |         <dc:format>image/svg+xml</dc:format>
 75 |         <dc:type
 76 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
 77 |         <dc:title></dc:title>
 78 |       </cc:Work>
 79 |     </rdf:RDF>
 80 |   </metadata>
 81 |   <g
 82 |      transform="matrix(3.4390349,0,0,3.4390349,-1500.7435,-875.76091)"
 83 |      id="layer1">
 84 |     <g
 85 |        transform="translate(388.59398,97.199304)"
 86 |        id="g6312">
 87 |       <path
 88 |          d="m 105.69961,194.09145 v 3 l -31.177002,18 -0.368304,-2.85491 z"
 89 |          id="path3637"
 90 |          style="fill:#4d4d4d"
 91 |          inkscape:connector-curvature="0" />
 92 |       <path
 93 |          d="m 48.53125,176.09375 2.5625,23 0.03125,0.0312 v 2.46875 l 23.40625,13.5 v -3 l -20.75,-11.96875 -2.46875,-22.625 -2.78125,-1.40625 z"
 94 |          id="path3639"
 95 |          style="fill:#1a1a1a"
 96 |          inkscape:connector-curvature="0" />
 97 |       <path
 98 |          d="m 84.919608,182.09145 20.785002,12 -31.177002,18 -20.792892,-11.96843 z"
 99 |          id="path3641"
100 |          style="fill:#666666"
101 |          inkscape:connector-curvature="0" />
102 |       <path
103 |          d="m 51.140608,177.59145 2.615238,22.54464 31.4124,-18.04464 -2.850638,-22.5 z"
104 |          id="path3643"
105 |          style="fill:#4d4d4d"
106 |          inkscape:connector-curvature="0" />
107 |       <path
108 |          d="m 79.719608,158.09145 2.692802,1.46843 -31.587375,18.25254 -2.282427,-1.72097 z"
109 |          id="path3645"
110 |          style="fill:#666666"
111 |          inkscape:connector-curvature="0" />
112 |       <path
113 |          d="m 91.399608,201.38145 -5.1962,-3 -5.1962,3 5.1962,3 z"
114 |          id="path3653"
115 |          style="fill:#4d4d4d;stroke:none"
116 |          inkscape:connector-curvature="0" />
117 |       <path
118 |          d="m 95.309608,191.09145 -10.392,-6 -25.981,15 10.392,6 z"
119 |          id="path3657"
120 |          style="fill:#333333;stroke:none"
121 |          inkscape:connector-curvature="0" />
122 |       <path
123 |          d="m 79.729608,158.09145 2.5938,1.5 2.5938,22.5 20.781002,12 v 3 l -31.156002,18 -23.182786,-13.54464 -2.817014,-25.45536 31.188,-18 z"
124 |          id="path3655"
125 |          style="fill:none;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
126 |          inkscape:connector-curvature="0" />
127 |       <path
128 |          d="m 53.515107,179.00685 26.91829,-15.51577 1.94951,17.78227 -27.077055,15.42649 z"
129 |          id="path5411"
130 |          style="fill:url(#linearGradient4975);fill-opacity:1;stroke:none"
131 |          inkscape:connector-curvature="0" />
132 |     </g>
133 |   </g>
134 | </svg>
135 | 


--------------------------------------------------------------------------------
/episodes/files/pi-mpi.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """Parallel example code for estimating the value of π.
  4 | 
  5 | We can estimate the value of π by a stochastic algorithm. Consider a
  6 | circle of radius 1, inside a square that bounds it, with vertices at
  7 | (1,1), (1,-1), (-1,-1), and (-1,1). The area of the circle is just π,
  8 | whereas the area of the square is 4. So, the fraction of the area of the
  9 | square which is covered by the circle is π/4.
 10 | 
 11 | A point selected at random uniformly from the square thus has a
 12 | probability π/4 of being within the circle.
 13 | 
 14 | We can estimate π by examining a large number of randomly-selected
 15 | points from the square, and seeing what fraction of them lie within the
 16 | circle. If this fraction is f, then our estimate for π is π ≈ 4f.
 17 | 
 18 | Thanks to symmetry, we can compute points in one quadrant, rather
 19 | than within the entire unit square, and arrive at identical results.
 20 | 
 21 | This task lends itself naturally to parallelization -- the task of
 22 | selecting a sample point and deciding whether or not it's inside the
 23 | circle is independent of all the other samples, so they can be done
 24 | simultaneously. We only need to aggregate the data at the end to compute
 25 | our fraction f and our estimate for π.
 26 | """
 27 | 
 28 | import numpy as np
 29 | import sys
 30 | import datetime
 31 | from mpi4py import MPI
 32 | 
 33 | wtime1 = MPI.Wtime()
 34 | 
 35 | def inside_circle(total_count):
 36 |     """Single-processor task for a group of samples.
 37 | 
 38 |     Generates uniform random x and y arrays of size total_count, on the
 39 |     interval [0,1), and returns the number of the resulting (x,y) pairs
 40 |     which lie inside the unit circle.
 41 |     """
 42 | 
 43 |     host_name = MPI.Get_processor_name()
 44 |     print("Rank {} generating {:n} samples on host {}.".format(
 45 |             rank, total_count, host_name))
 46 |     x = np.float64(np.random.uniform(size=total_count))
 47 |     y = np.float64(np.random.uniform(size=total_count))
 48 | 
 49 |     radii = np.sqrt(x*x + y*y)
 50 | 
 51 |     count = len(radii[np.where(radii<=1.0)])
 52 | 
 53 |     return count
 54 | 
 55 | if __name__ == '__main__':
 56 |     """Main executable.
 57 | 
 58 |     This function runs the 'inside_circle' function with a defined number
 59 |     of samples. The results are then used to estimate π.
 60 | 
 61 |     An estimate of the required memory, elapsed calculation time, and
 62 |     accuracy of calculating π are also computed.
 63 |     """
 64 | 
 65 |     # Declare an MPI Communicator for the parallel processes to talk through
 66 |     comm = MPI.COMM_WORLD
 67 | 
 68 |     # Read the number of parallel processes tied into the comm channel
 69 |     cpus = comm.Get_size()
 70 | 
 71 |     # Find out the index ("rank") of *this* process
 72 |     rank = comm.Get_rank()
 73 | 
 74 |     if len(sys.argv) > 1:
 75 |         n_samples = int(sys.argv[1])
 76 |     else:
 77 |         n_samples = 8738128 # trust me, this number is not random :-)
 78 | 
 79 |     if rank == 0:
 80 |         # Time how long it takes to estimate π.
 81 |         start_time = datetime.datetime.now()
 82 |         print("Generating {:n} samples.".format(n_samples))
 83 |         # Rank zero builds two arrays with one entry for each rank:
 84 |         # one for the number of samples they should run, and
 85 |         # one to store the count info each rank returns.
 86 |         partitions = [ int(n_samples / cpus) ] * cpus
 87 |         counts = [ int(0) ] * cpus
 88 |     else:
 89 |         partitions = None
 90 |         counts = None
 91 | 
 92 |     # All ranks participate in the "scatter" operation, which assigns
 93 |     # the local scalar values to their appropriate array components.
 94 |     # partition_item is the number of samples this rank should generate,
 95 |     # and count_item is the place to put the number of counts we see.
 96 |     partition_item = comm.scatter(partitions, root=0)
 97 |     count_item = comm.scatter(counts, root=0)
 98 | 
 99 |     # Each rank locally populates its count_item variable.
100 |     count_item = inside_circle(partition_item)
101 | 
102 |     # All ranks participate in the "gather" operation, which sums the
103 |     # rank's count_items into the total "counts".
104 |     counts = comm.gather(count_item, root=0)
105 | 
106 |     if rank == 0:
107 |         # Only rank zero writes the result, although it's known to all.
108 |         my_pi = 4.0 * sum(counts) / n_samples
109 |         elapsed_time = (datetime.datetime.now() - start_time).total_seconds()
110 | 
111 |         # Memory required is dominated by the size of x, y, and radii from
112 |         # inside_circle(), calculated in MiB
113 |         size_of_float = np.dtype(np.float64).itemsize
114 |         memory_required = 3 * n_samples * size_of_float / (1024**2)
115 | 
116 |         # accuracy is calculated as a percent difference from a known estimate
117 |         # of π.
118 |         pi_specific = np.pi
119 |         accuracy = 100*(1-my_pi/pi_specific)
120 | 
121 |         # Print the values of Pi
122 |         print('Numpy Pi: ', pi_specific)
123 |         print('My Estimate of Pi: ', my_pi)
124 | 
125 |         # Uncomment either summary format for verbose or terse output
126 |         summary = "{:d} core(s), {:d} samples, {:f} MiB memory, {:f} seconds, {:f}% error"
127 |         # summary = "{:d},{:d},{:f},{:f},{:f}"
128 |         print(summary.format(cpus, n_samples, memory_required, elapsed_time,
129 |                             accuracy))
130 | 
131 |         print(f"Total run time={MPI.Wtime()-wtime1}s")
132 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | ## Contributing
  2 | 
  3 | [The Carpentries][cp-site] ([Software Carpentry][swc-site], [Data
  4 | Carpentry][dc-site], and [Library Carpentry][lc-site]) are open source
  5 | projects, and we welcome contributions of all kinds: new lessons, fixes to
  6 | existing material, bug reports, and reviews of proposed changes are all
  7 | welcome.
  8 | 
  9 | ### Contributor Agreement
 10 | 
 11 | By contributing, you agree that we may redistribute your work under [our
 12 | license](LICENSE.md). In exchange, we will address your issues and/or assess
 13 | your change proposal as promptly as we can, and help you become a member of our
 14 | community. Everyone involved in [The Carpentries][cp-site] agrees to abide by
 15 | our [code of conduct](CODE_OF_CONDUCT.md).
 16 | 
 17 | ### How to Contribute
 18 | 
 19 | The easiest way to get started is to file an issue to tell us about a spelling
 20 | mistake, some awkward wording, or a factual error. This is a good way to
 21 | introduce yourself and to meet some of our community members.
 22 | 
 23 | 1. If you do not have a [GitHub][github] account, you can [send us comments by
 24 |    email][contact]. However, we will be able to respond more quickly if you use
 25 |    one of the other methods described below.
 26 | 
 27 | 2. If you have a [GitHub][github] account, or are willing to [create
 28 |    one][github-join], but do not know how to use Git, you can report problems
 29 |    or suggest improvements by [creating an issue][repo-issues]. This allows us
 30 |    to assign the item to someone and to respond to it in a threaded discussion.
 31 | 
 32 | 3. If you are comfortable with Git, and would like to add or change material,
 33 |    you can submit a pull request (PR). Instructions for doing this are
 34 |    [included below](#using-github). For inspiration about changes that need to
 35 |    be made, check out the [list of open issues][issues] across the Carpentries.
 36 | 
 37 | Note: if you want to build the website locally, please refer to [The Workbench
 38 | documentation][template-doc].
 39 | 
 40 | ### Where to Contribute
 41 | 
 42 | 1. If you wish to change this lesson, add issues and pull requests here.
 43 | 2. If you wish to change the template used for workshop websites, please refer
 44 |    to [The Workbench documentation][template-doc].
 45 | 
 46 | 
 47 | ### What to Contribute
 48 | 
 49 | There are many ways to contribute, from writing new exercises and improving
 50 | existing ones to updating or filling in the documentation and submitting [bug
 51 | reports][issues] about things that do not work, are not clear, or are missing.
 52 | If you are looking for ideas, please see [the list of issues for this
 53 | repository][repo-issues], or the issues for [Data Carpentry][dc-issues],
 54 | [Library Carpentry][lc-issues], and [Software Carpentry][swc-issues] projects.
 55 | 
 56 | Comments on issues and reviews of pull requests are just as welcome: we are
 57 | smarter together than we are on our own. **Reviews from novices and newcomers
 58 | are particularly valuable**: it's easy for people who have been using these
 59 | lessons for a while to forget how impenetrable some of this material can be, so
 60 | fresh eyes are always welcome.
 61 | 
 62 | ### What *Not* to Contribute
 63 | 
 64 | Our lessons already contain more material than we can cover in a typical
 65 | workshop, so we are usually *not* looking for more concepts or tools to add to
 66 | them. As a rule, if you want to introduce a new idea, you must (a) estimate how
 67 | long it will take to teach and (b) explain what you would take out to make room
 68 | for it. The first encourages contributors to be honest about requirements; the
 69 | second, to think hard about priorities.
 70 | 
 71 | We are also not looking for exercises or other material that only run on one
 72 | platform. Our workshops typically contain a mixture of Windows, macOS, and
 73 | Linux users; in order to be usable, our lessons must run equally well on all
 74 | three.
 75 | 
 76 | ### Using GitHub
 77 | 
 78 | If you choose to contribute via GitHub, you may want to look at [How to
 79 | Contribute to an Open Source Project on GitHub][how-contribute]. In brief, we
 80 | use [GitHub flow][github-flow] to manage changes:
 81 | 
 82 | 1. Create a new branch in your desktop copy of this repository for each
 83 |    significant change.
 84 | 2. Commit the change in that branch.
 85 | 3. Push that branch to your fork of this repository on GitHub.
 86 | 4. Submit a pull request from that branch to the [upstream repository][repo].
 87 | 5. If you receive feedback, make changes on your desktop and push to your
 88 |    branch on GitHub: the pull request will update automatically.
 89 | 
 90 | NB: The published copy of the lesson is usually in the `main` branch.
 91 | 
 92 | Each lesson has a team of maintainers who review issues and pull requests or
 93 | encourage others to do so. The maintainers are community volunteers, and have
 94 | final say over what gets merged into the lesson.
 95 | 
 96 | ### Other Resources
 97 | 
 98 | The Carpentries is a global organisation with volunteers and learners all over
 99 | the world. We share values of inclusivity and a passion for sharing knowledge,
100 | teaching and learning. There are several ways to connect with The Carpentries
101 | community listed at <https://carpentries.org/connect/> including via social
102 | media, slack, newsletters, and email lists. You can also [reach us by
103 | email][contact].
104 | 
105 | [repo]: https://example.com/FIXME
106 | [repo-issues]: https://example.com/FIXME/issues
107 | [contact]: mailto:team@carpentries.org
108 | [cp-site]: https://carpentries.org/
109 | [dc-issues]: https://github.com/issues?q=user%3Adatacarpentry
110 | [dc-lessons]: https://datacarpentry.org/lessons/
111 | [dc-site]: https://datacarpentry.org/
112 | [discuss-list]: https://lists.software-carpentry.org/listinfo/discuss
113 | [github]: https://github.com
114 | [github-flow]: https://guides.github.com/introduction/flow/
115 | [github-join]: https://github.com/join
116 | [how-contribute]: https://egghead.io/courses/how-to-contribute-to-an-open-source-project-on-github
117 | [issues]: https://carpentries.org/help-wanted-issues/
118 | [lc-issues]: https://github.com/issues?q=user%3ALibraryCarpentry
119 | [swc-issues]: https://github.com/issues?q=user%3Aswcarpentry
120 | [swc-lessons]: https://software-carpentry.org/lessons/
121 | [swc-site]: https://software-carpentry.org/
122 | [lc-site]: https://librarycarpentry.org/
123 | [template-doc]: https://carpentries.github.io/workbench/
124 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-comment.yaml:
--------------------------------------------------------------------------------
  1 | name: "Bot: Comment on the Pull Request"
  2 | 
  3 | # read-write repo token
  4 | # access to secrets
  5 | on:
  6 |   workflow_run:
  7 |     workflows: ["Receive Pull Request"]
  8 |     types:
  9 |       - completed
 10 | 
 11 | concurrency:
 12 |   group: pr-${{ github.event.workflow_run.pull_requests[0].number }}
 13 |   cancel-in-progress: true
 14 | 
 15 | 
 16 | jobs:
 17 |   # Pull requests are valid if:
 18 |   #  - they match the sha of the workflow run head commit
 19 |   #  - they are open
 20 |   #  - no .github files were committed
 21 |   test-pr:
 22 |     name: "Test if pull request is valid"
 23 |     runs-on: ubuntu-latest
 24 |     if: >
 25 |       github.event.workflow_run.event == 'pull_request' &&
 26 |       github.event.workflow_run.conclusion == 'success'
 27 |     outputs:
 28 |       is_valid: ${{ steps.check-pr.outputs.VALID }}
 29 |       payload: ${{ steps.check-pr.outputs.payload }}
 30 |       number: ${{ steps.get-pr.outputs.NUM }}
 31 |       msg: ${{ steps.check-pr.outputs.MSG }}
 32 |     steps:
 33 |       - name: 'Download PR artifact'
 34 |         id: dl
 35 |         uses: carpentries/actions/download-workflow-artifact@main
 36 |         with:
 37 |           run: ${{ github.event.workflow_run.id }}
 38 |           name: 'pr'
 39 | 
 40 |       - name: "Get PR Number"
 41 |         if: ${{ steps.dl.outputs.success == 'true' }}
 42 |         id: get-pr
 43 |         run: |
 44 |           unzip pr.zip
 45 |           echo "NUM=$(<./NR)" >> $GITHUB_OUTPUT
 46 | 
 47 |       - name: "Fail if PR number was not present"
 48 |         id: bad-pr
 49 |         if: ${{ steps.dl.outputs.success != 'true' }}
 50 |         run: |
 51 |           echo '::error::A pull request number was not recorded. The pull request that triggered this workflow is likely malicious.'
 52 |           exit 1
 53 |       - name: "Get Invalid Hashes File"
 54 |         id: hash
 55 |         run: |
 56 |           echo "json<<EOF
 57 |           $(curl -sL https://files.carpentries.org/invalid-hashes.json)
 58 |           EOF" >> $GITHUB_OUTPUT
 59 |       - name: "Check PR"
 60 |         id: check-pr
 61 |         if: ${{ steps.dl.outputs.success == 'true' }}
 62 |         uses: carpentries/actions/check-valid-pr@main
 63 |         with:
 64 |           pr: ${{ steps.get-pr.outputs.NUM }}
 65 |           sha: ${{ github.event.workflow_run.head_sha }}
 66 |           headroom: 3 # if it's within the last three commits, we can keep going, because it's likely rapid-fire
 67 |           invalid: ${{ fromJSON(steps.hash.outputs.json)[github.repository] }}
 68 |           fail_on_error: true
 69 | 
 70 |   # Create an orphan branch on this repository with two commits
 71 |   #  - the current HEAD of the md-outputs branch
 72 |   #  - the output from running the current HEAD of the pull request through
 73 |   #    the md generator
 74 |   create-branch:
 75 |     name: "Create Git Branch"
 76 |     needs: test-pr
 77 |     runs-on: ubuntu-latest
 78 |     if: ${{ needs.test-pr.outputs.is_valid == 'true' }}
 79 |     env:
 80 |       NR: ${{ needs.test-pr.outputs.number }}
 81 |     permissions:
 82 |       contents: write
 83 |     steps:
 84 |       - name: 'Checkout md outputs'
 85 |         uses: actions/checkout@v3
 86 |         with:
 87 |           ref: md-outputs
 88 |           path: built
 89 |           fetch-depth: 1
 90 | 
 91 |       - name: 'Download built markdown'
 92 |         id: dl
 93 |         uses: carpentries/actions/download-workflow-artifact@main
 94 |         with:
 95 |           run: ${{ github.event.workflow_run.id }}
 96 |           name: 'built'
 97 | 
 98 |       - if: ${{ steps.dl.outputs.success == 'true' }}
 99 |         run: unzip built.zip
100 | 
101 |       - name: "Create orphan and push"
102 |         if: ${{ steps.dl.outputs.success == 'true' }}
103 |         run: |
104 |           cd built/
105 |           git config --local user.email "actions@github.com"
106 |           git config --local user.name "GitHub Actions"
107 |           CURR_HEAD=$(git rev-parse HEAD)
108 |           git checkout --orphan md-outputs-PR-${NR}
109 |           git add -A
110 |           git commit -m "source commit: ${CURR_HEAD}"
111 |           ls -A | grep -v '^.git$' | xargs -I _ rm -r '_'
112 |           cd ..
113 |           unzip -o -d built built.zip
114 |           cd built
115 |           git add -A
116 |           git commit --allow-empty -m "differences for PR #${NR}"
117 |           git push -u --force --set-upstream origin md-outputs-PR-${NR}
118 | 
119 |   # Comment on the Pull Request with a link to the branch and the diff
120 |   comment-pr:
121 |     name: "Comment on Pull Request"
122 |     needs: [test-pr, create-branch]
123 |     runs-on: ubuntu-latest
124 |     if: ${{ needs.test-pr.outputs.is_valid == 'true' }}
125 |     env:
126 |       NR: ${{ needs.test-pr.outputs.number }}
127 |     permissions:
128 |       pull-requests: write
129 |     steps:
130 |       - name: 'Download comment artifact'
131 |         id: dl
132 |         uses: carpentries/actions/download-workflow-artifact@main
133 |         with:
134 |           run: ${{ github.event.workflow_run.id }}
135 |           name: 'diff'
136 | 
137 |       - if: ${{ steps.dl.outputs.success == 'true' }}
138 |         run: unzip ${{ github.workspace }}/diff.zip
139 | 
140 |       - name: "Comment on PR"
141 |         id: comment-diff
142 |         if: ${{ steps.dl.outputs.success == 'true' }}
143 |         uses: carpentries/actions/comment-diff@main
144 |         with:
145 |           pr: ${{ env.NR }}
146 |           path: ${{ github.workspace }}/diff.md
147 | 
148 |   # Comment if the PR is open and matches the SHA, but the workflow files have
149 |   # changed
150 |   comment-changed-workflow:
151 |     name: "Comment if workflow files have changed"
152 |     needs: test-pr
153 |     runs-on: ubuntu-latest
154 |     if: ${{ always() && needs.test-pr.outputs.is_valid == 'false' }}
155 |     env:
156 |       NR: ${{ github.event.workflow_run.pull_requests[0].number }}
157 |       body: ${{ needs.test-pr.outputs.msg }}
158 |     permissions:
159 |       pull-requests: write
160 |     steps:
161 |       - name: 'Check for spoofing'
162 |         id: dl
163 |         uses: carpentries/actions/download-workflow-artifact@main
164 |         with:
165 |           run: ${{ github.event.workflow_run.id }}
166 |           name: 'built'
167 | 
168 |       - name: 'Alert if spoofed'
169 |         id: spoof
170 |         if: ${{ steps.dl.outputs.success == 'true' }}
171 |         run: |
172 |           echo 'body<<EOF' >> $GITHUB_ENV
173 |           echo '' >> $GITHUB_ENV
174 |           echo '## :x: DANGER :x:' >> $GITHUB_ENV
175 |           echo 'This pull request has modified workflows that created output. Close this now.' >> $GITHUB_ENV
176 |           echo '' >> $GITHUB_ENV
177 |           echo 'EOF' >> $GITHUB_ENV
178 | 
179 |       - name: "Comment on PR"
180 |         id: comment-diff
181 |         uses: carpentries/actions/comment-diff@main
182 |         with:
183 |           pr: ${{ env.NR }}
184 |           body: ${{ env.body }}
185 | 
186 | 


--------------------------------------------------------------------------------
/episodes/fig/responsibility-bandwidth.tex:
--------------------------------------------------------------------------------
 1 | \documentclass{standalone}
 2 | \usepackage{amsmath, graphicx, siunitx}
 3 | \usepackage{tikz}
 4 | \usetikzlibrary{arrows, calc, backgrounds, positioning}
 5 | 
 6 | \newcommand\dataline[5]{
 7 |     \draw [line width=#1 mm, color=gray] (#3) to [#5] (#4);
 8 |     \draw [line width=#2 mm, color=blue] (#3) to [#5] (#4);
 9 | }
10 | 
11 | \tikzstyle{background} = [fill=green!5!white,draw=green!75!black,very thick]
12 | \tikzstyle{titlebox} = [fill=green!75!black,text=black,rounded corners]
13 | 
14 | \tikzstyle{disk}       = [rectangle,fill=black!20,align=left,rotate=90,inner sep=0]
15 | \tikzstyle{controller} = [rectangle,fill=red!20,align=left,inner sep=0]
16 | \tikzstyle{network}    = [rectangle,fill=blue!20,align=left,inner sep=0]
17 | \tikzstyle{switch}     = [rectangle,fill=green!20,align=left,inner sep=0]
18 | 
19 | % Network latency is 0.004 ms
20 | %  Disk   latency is 4     ms
21 | 
22 | % If the sep between NIC and switch is 2cm, then using a log scale, the
23 | % disks are (2 cm) * log(4 / 0.004) = 13.8 cm below the RAID controller
24 | 
25 | \begin{document}
26 |     \begin{tikzpicture}
27 |         \node[controller]           (raid)                                        {RAID controller:\\ 63 Gb/s interface,\\ forwards 40 Gb/s};
28 |         \node[disk,anchor=north]    (disk 1)     at ($(raid.south)+(-1in, -3cm)$) {\begin{minipage}{2.7cm}
29 |                                                                                        SSD 1:\\
30 |                                                                                        12 Gb/s interface,\\
31 |                                                                                        sends 4 Gb/s,\\
32 |                                                                                        \SI{4}{\milli\second} latency
33 |                                                                                    \end{minipage}
34 |                                                                                    \begin{minipage}{1cm}
35 |                                                                                        \includegraphics[height=\textwidth,angle=90,origin=c]{maze-tortuous}
36 |                                                                                    \end{minipage}};
37 |         \node[disk]                 (disk 2)     at ($(disk 1) + (2cm, 0)$)       {\begin{minipage}{2.7cm}
38 |                                                                                        SSD 2:\\
39 |                                                                                        12 Gb/s interface,\\
40 |                                                                                        sends 4 Gb/s,\\
41 |                                                                                        \SI{4}{\milli\second} latency
42 |                                                                                    \end{minipage}
43 |                                                                                    \begin{minipage}{1cm}
44 |                                                                                        \includegraphics[height=\textwidth,angle=270,origin=c]{maze-tortuous}
45 |                                                                                    \end{minipage}};
46 |         \node                       (dots)       at ($(disk 2) + (1.5cm, 0)$)     {$\cdots$};
47 |         \node[disk]                 (disk N)     at ($(dots) + (+1.5cm, 0)$)      {\begin{minipage}{2.7cm}
48 |                                                                                        SSD $N$:\\
49 |                                                                                        12 Gb/s interface,\\
50 |                                                                                        sends 4 Gb/s,\\
51 |                                                                                        \SI{4}{\milli\second} latency
52 |                                                                                    \end{minipage}
53 |                                                                                    \begin{minipage}{1cm}
54 |                                                                                        \includegraphics[height=\textwidth,angle=90,origin=c]{maze-tortuous-mirror}
55 |                                                                                    \end{minipage}};
56 |         \node[network]              (nic)        [right=of raid]                  {\begin{minipage}{2.7cm}
57 |                                                                                        network card:\\
58 |                                                                                        56 Gb/s interface,\\
59 |                                                                                        forwards 40 Gb/s
60 |                                                                                    \end{minipage}
61 |                                                                                    \begin{minipage}{1cm}
62 |                                                                                        \includegraphics[height=\textwidth,angle=90,origin=c]{maze-direct}
63 |                                                                                    \end{minipage}};
64 |         \node[switch,anchor=west]   (switch)     [right=of nic]                   {network switch:\\
65 |                                                                                    56 Gb/s interfaces,\\
66 |                                                                                    forwards 40 Gb/s,\\
67 |                                                                                    \SI{0.004}{\milli\second} latency};
68 |         \node[network,anchor=south] (node 1 nic) [above=of switch]                {node 1 network card:\\
69 |                                                                                    56 Gb/s interface,\\
70 |                                                                                    receives 20 Gb/s};
71 |         \node[network,anchor=north] (node 2 nic) [below=of switch]                {node 2 network card:\\
72 |                                                                                    56 Gb/s interface,\\
73 |                                                                                    receives 20 Gb/s};
74 | 
75 |         \node[titlebox] (server) at ($(raid.north)+(2cm, 0.5cm)$) {\textbf{Server}};
76 | 
77 |         \dataline{1.2}{0.4}{disk 1.east}{raid}{out=90,in=260};
78 |         \dataline{1.2}{0.4}{disk 2.east}{raid}{out=90,in=270}
79 |         \dataline{1.2}{0.4}{disk N.east}{raid}{out=90,in=280}
80 |         \dataline{5.6}{4.0}{raid}{nic}{}
81 |         \dataline{5.6}{4.0}{nic}{switch}{}
82 |         \dataline{4.0}{2.0}{switch}{node 1 nic}{}
83 |         \dataline{4.0}{2.0}{switch}{node 2 nic}{}
84 | 
85 |         \begin{pgfonlayer}{background}
86 | 		    \draw [background] ($(nic.north east)+(0, 0.35cm)$) rectangle ($(disk 1.north west)$);
87 |         \end{pgfonlayer}
88 |     \end{tikzpicture}
89 | \end{document}
90 | 


--------------------------------------------------------------------------------
/episodes/17-resources.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using resources effectively"
  3 | teaching: 10
  4 | exercises: 30
  5 | ---
  6 | 
  7 | ```{r load_config, include=FALSE}
  8 | library(yaml)
  9 | config <- yaml.load_file("lesson_config.yaml")
 10 | snippets <- paste('./files/snippets/', config$snippets, sep='')
 11 | ```
 12 | 
 13 | ::: questions
 14 |  - How do we monitor our jobs?
 15 |  - How can I get my jobs scheduled more easily?
 16 | :::
 17 | 
 18 | ::: objectives
 19 |  - Understand how to look up job statistics and profile code.
 20 |  - Understand job size implications.
 21 | :::
 22 | 
 23 | We've touched on all the skills you need to interact with an HPC cluster:
 24 | logging in over SSH, loading software modules, submitting parallel jobs, and
 25 | finding the output. Let's learn about estimating resource usage and why it
 26 | might matter. To do this we need to understand the basics of *benchmarking*.
 27 | Benchmarking is essentially performing simple experiments to help understand
 28 | how the performance of our work varies as we change the properties of the
 29 | jobs on the cluster - including input parameters, job options and resources used.
 30 | 
 31 | ::: discussion
 32 | ## Our example
 33 | In the rest of this episode, we will use an example parallel application that calculates 
 34 | an estimate of the value of Pi. Although this is a toy problem, it exhibits all the properties of a full
 35 | parallel application that we are interested in for this course.
 36 | 
 37 | The main resource we will consider here is the use of compute core time as this is the
 38 | resource you are usually charged for on HPC resources. However, other resources - such
 39 | as memory use - may also have a bearing on how you choose resources and constrain your
 40 | choice.
 41 | 
 42 | For those that have come across HPC benchmarking before, you may be aware that people
 43 | often make a distinction between *strong scaling* and *weak scaling*:
 44 | 
 45 | - Strong scaling is where the problem size (i.e. the *application*) stays the same 
 46 |   size and we try to use more cores to solve the problem faster.
 47 | - Weak scaling is where the problem size increases at the same rate as we increase
 48 |   the core count so we are using more cores to solve a larger problem.
 49 | 
 50 | Both of these approaches are equally valid uses of HPC. This example looks at strong scaling. 
 51 | 
 52 | :::
 53 | 
 54 | Before we work on benchmarking, it is useful to define some terms for the example we will
 55 | be using
 56 | 
 57 |   - **Program** The computer program we are executing (`pi-mpi.py` in the examples below)
 58 |   - **Application** The combination of computer program with particular input parameters
 59 | 
 60 | ## Accessing the software and input
 61 | 
 62 | ```{r, child=paste(snippets, '/resources/pi-mpi-details.Rmd', sep=''), eval=TRUE}
 63 | ```
 64 | 
 65 | ## Baseline: running in serial
 66 | 
 67 | Before starting to benchmark an application to understand what resources are best to use,
 68 | you need a *baseline* performance result. In more formal benchmarking, your baseline
 69 | is usually the minimum number of cores or nodes you can run on. However, for understanding
 70 | how best to use resources, as we are doing here, your baseline could be the performance on
 71 | any number of cores or nodes that you can measure the change in performance from.
 72 | 
 73 | Our `pi-mpi.py` application is small enough that we can run a serial (i.e. using a single core)
 74 | job for our baseline performance so that is where we will start
 75 | 
 76 | ::: challenge
 77 | ## Run a single core job
 78 | Write a job submission script that runs the `pi-mpi.py` application on a single core. You
 79 | will need to take an initial guess as to the walltime to request to give the job time 
 80 | to complete. Submit the job and check the contents of the STDOUT file to see if the 
 81 | application worked or not.
 82 | 
 83 | ::: solution
 84 | 
 85 | ```{r, child=paste(snippets, '/resources/serial-submit.Rmd', sep=''), eval=TRUE}
 86 | ```
 87 | 
 88 | Output in the job log should look something like:
 89 | 
 90 | ```output
 91 | Generating 10000000 samples.
 92 | Rank 0 generating 10000000 samples on host nid001246.
 93 | Numpy Pi:  3.141592653589793
 94 | My Estimate of Pi:  3.1416708
 95 | 1 core(s), 10000000 samples, 228.881836 MiB memory, 0.423903 seconds, -0.002487% error
 96 | ```
 97 | :::
 98 | :::
 99 | 
100 | Once your job has run, you should look in the output to identify the performance. Most 
101 | HPC programs should print out timing or performance information (usually somewhere near
102 | the bottom of the summary output) and `pi-mpi.py` is no exception. You should see two 
103 | lines in the output that look something like:
104 | 
105 | ```bash
106 | 256 core(s), 100000000 samples, 2288.818359 MiB memory, 0.135041 seconds, -0.004774% error
107 | Total run time=0.18654435999997077s
108 | ```
109 | 
110 | You can also get an estimate of the overall run time from the final job statistics. If
111 | we look at how long the finished job ran for, this will provide a quick way to see
112 | roughly what the runtime was. This can be useful if you want to know quickly if a 
113 | job was faster or not than a previous job (as you do not have to find the output file
114 | to look up the performance) but the number is not as accurate as the performance recorded
115 | by the application itself and also includes static overheads from running the job
116 | (such as loading modules and startup time) that can skew the timings. To do this on
117 | use ``r config$sched$hist` `r config$sched$flag$histdetail`` with the job ID, e.g.:
118 | 
119 | ```bash
120 | `r config$remote$prompt_work` `r config$sched$hist` `r config$sched$flag$histdetail` 12345
121 | ```
122 | ```output
123 | ```{r, child=paste(snippets, '/resources/job-detail.Rmd', sep=''), eval=TRUE}
124 | ```
125 | ```
126 | 
127 | ## Running in parallel and benchmarking performance
128 | 
129 | We have now managed to run the `pi-mpi.py` application using a single core and have a baseline
130 | performance we can use to judge how well we are using resources on the system.
131 | 
132 | Note that we also now have a good estimate of how long the application takes to run so we can
133 | provide a better setting for the walltime for future jobs we submit. Lets now look at how
134 | the runtime varies with core count.
135 | 
136 | ```{r, child=paste(snippets, '/resources/runtime-exercise.Rmd', sep=''), eval=TRUE}
137 | ```
138 | 
139 | ## Understanding the performance
140 | 
141 | Now we have some data showing the performance of our application we need to try and draw some
142 | useful conclusions as to what the most efficient set of resources are to use for our jobs. To
143 | do this we introduce two metrics:
144 | 
145 |   - **Actual speedup** The ratio of the baseline runtime (or runtime on the lowest core count)
146 |     to the runtime at the specified core count. i.e. baseline runtime divided by runtime
147 |     at the specified core count.
148 |   - **Ideal speedup** The expected speedup if the application showed perfect scaling. i.e. if
149 |     you double the number of cores, the application should run twice as fast.
150 |   - **Parallel efficiency** The fraction of *ideal speedup* actually obtained for a given
151 |     core count. This gives an indication of how well you are exploiting the additional resources
152 |     you are using.
153 | 
154 | We will now use our performance results to compute these two metrics for the sharpen application
155 | and use the metrics to evaluate the performance and make some decisions about the most 
156 | effective use of resources.
157 | 
158 | ```{r, child=paste(snippets, '/resources/perf-exercise.Rmd', sep=''), eval=TRUE}
159 | ```
160 | 
161 | ## Tips
162 | 
163 | Here are a few tips to help you use resources effectively and efficiently on HPC systems:
164 | 
165 |  - Know what your priority is: do you want the results as fast as possible or are you happy
166 |   to wait longer but get more research for the resources you have been allocated?
167 |  - Use your real research application to benchmark but try to shorten the run so you can turn
168 |   around your benchmarking runs in a short timescale. Ideally, it should run for 10-30 minutes;
169 |   short enough to run quickly but long enough so the performance is not dominated by static startup
170 |   overheads (though this is application dependent). Ways to do this potentially include, for example:
171 |   using a smaller number of time steps, restricting the number of SCF cycles, restricting the
172 |   number of optimisation steps.
173 |  - Use basic benchmarking to help define the best resource use for your application. One 
174 |   useful strategy: take the core count you are using as the baseline, halve the number of
175 |   cores/nodes and rerun and then double the number of cores/nodes from your baseline and
176 |   rerun. Use the three data points to assess your efficiency and the impact of different
177 |   core/node counts.
178 | 
179 | ::: keypoints
180 |  - "The smaller your job, the faster it will schedule."
181 | :::
182 | 


--------------------------------------------------------------------------------
/episodes/11-hpc-intro.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Why use an HPC System?"
  3 | teaching: 15
  4 | exercises: 5
  5 | ---
  6 | 
  7 | ```{r load_config, include=FALSE}
  8 | library(yaml)
  9 | config <- yaml.load_file("lesson_config.yaml")
 10 | ```
 11 | 
 12 | ::: questions
 13 |  - Why would I be interested in High Performance Computing (HPC)?
 14 |  - What can I expect to learn from this course?
 15 | :::
 16 | 
 17 | ::: objectives
 18 |  - Be able to describe what an HPC system is
 19 |  - Identify how an HPC system could benefit you.
 20 | :::
 21 | 
 22 | ## HPC research examples
 23 | 
 24 | Frequently, research problems that use computing can outgrow the capabilities
 25 | of the desktop or laptop computer where they started:
 26 | 
 27 | * A statistics student wants to cross-validate a model. This involves running
 28 |   the model 1000 times &mdash; but each run takes an hour. Running the model on
 29 |   a laptop will take over a month! In this research problem, final results are
 30 |   calculated after all 1000 models have run, but typically only one model is
 31 |   run at a time (in **serial**) on the laptop. Since each of the 1000 runs is
 32 |   independent of all others, and given enough computers, it's theoretically
 33 |   possible to run them all at once (in **parallel**).
 34 | * A genomics researcher has been using small datasets of sequence data, but
 35 |   soon will be receiving a new type of sequencing data that is 10 times as
 36 |   large. It's already challenging to open the datasets on a computer &mdash;
 37 |   analyzing these larger datasets will probably crash it. In this research
 38 |   problem, the calculations required might be impossible to parallelize, but a
 39 |   computer with **more memory** would be required to analyze the much larger
 40 |   future data set.
 41 | * An engineer is using a fluid dynamics package that has an option to run in
 42 |   parallel. So far, this option was not utilized on a desktop. In going from 2D
 43 |   to 3D simulations, the simulation time has more than tripled. It might be
 44 |   useful to take advantage of that option or feature. In this research problem,
 45 |   the calculations in each region of the simulation are largely independent of
 46 |   calculations in other regions of the simulation. It's possible to run each
 47 |   region's calculations simultaneously (in **parallel**), communicate selected
 48 |   results to adjacent regions as needed, and repeat the calculations to
 49 |   converge on a final set of results. In moving from a 2D to a 3D model, **both
 50 |   the amount of data and the amount of calculations increases greatly**, and
 51 |   it's theoretically possible to distribute the calculations across multiple
 52 |   computers communicating over a shared network.
 53 | 
 54 | In all these cases, access to more (and larger) computers is needed. Those
 55 | computers should be usable at the same time, **solving many researchers'
 56 | problems in parallel**.
 57 | 
 58 | ::: discussion
 59 | 
 60 | ## Break the Ice
 61 | Talk to your neighbour, office mate or [rubber duck](https://rubberduckdebugging.com/) about your research.
 62 | 
 63 |  * How does computing help you do your research?
 64 |  * How could more computing help you do more or better research?
 65 | :::
 66 | 
 67 | ## A Standard Laptop for Standard Tasks
 68 | 
 69 | Today, people coding or analysing data typically work with laptops.
 70 | 
 71 | ![A standard laptop](fig/200px-laptop-openclipartorg-aoguerrero.svg){alt="A standard laptop"}
 72 | 
 73 | 
 74 | Let's dissect what resources programs running on a laptop require:
 75 | 
 76 | * the keyboard and/or touchpad is used to tell the computer what to do
 77 |   (**Input**)
 78 | * the internal computing resources **Central Processing Unit** and **Memory**
 79 |   perform calculation
 80 | * the display depicts progress and results (**Output**)
 81 | 
 82 | Schematically, this can be reduced to the following:
 83 | 
 84 | ![Schematic of how a computer works](fig/Simple_Von_Neumann_Architecture.svg){
 85 |    alt="Schematic of how a computer works"}
 86 | 
 87 | ## When Tasks Take Too Long
 88 | 
 89 | When the task to solve becomes heavy on computations, the operations are
 90 | typically out-sourced from the local laptop or desktop to elsewhere. Take for
 91 | example the task to find the directions for your next vacation. The
 92 | capabilities of your laptop are typically not enough to calculate that route
 93 | spontaneously: [finding the shortest path](https://en.wikipedia.org/wiki/Dijkstra's_algorithm) through a network runs on
 94 | the order of (*v* log *v*) time, where *v* (vertices) represents the number of
 95 | intersections in your map. Instead of doing this yourself, you use a website,
 96 | which in turn runs on a server, that is almost definitely not in the same room
 97 | as you are.
 98 | 
 99 | ```{asis}
100 | <figure style="max-width: 30%; text-align: center">
101 |   <img src="fig/servers-openclipartorg-ericlemerdy.svg" alt="A rack half full with servers" class="figure">
102 |   <div class="figcaption">A rack half full with servers</div>
103 | </figure>
104 | ```
105 | 
106 | Note here, that a server is mostly a noisy computer mounted into a rack cabinet
107 | which in turn resides in a data center. The internet made it possible that
108 | these data centers do not require to be nearby your laptop. What people call
109 | **the cloud** is mostly a web-service where you can rent such servers by
110 | providing your credit card details and requesting remote resources that satisfy
111 | your requirements. This is often handled through an online, browser-based
112 | interface listing the various machines available and their capacities in terms
113 | of processing power, memory, and storage.
114 | 
115 | The server itself has no direct display or input methods attached to it. But
116 | most importantly, it has much more storage, memory and compute capacity than
117 | your laptop will ever have. In any case, you need a local device (laptop,
118 | workstation, mobile phone or tablet) to interact with this remote machine,
119 | which people typically call 'a server'.
120 | 
121 | ## When One Server Is Not Enough
122 | 
123 | If the computational task or analysis to complete is daunting for a single
124 | server, larger agglomerations of servers are used. These go by the name of
125 | "clusters" or "super computers".
126 | 
127 | ```{asis}
128 | <figure style="max-width: 30%; text-align: center; display: block">
129 |   <img src="fig/serverrack-openclipartorg-psteinb-basedon-ericlemerdy.svg" alt="A rack with servers" class="figure">
130 |   <div class="figcaption">A rack with servers</div>
131 | </figure>
132 | ```
133 | 
134 | The methodology of providing the input data, configuring the program options,
135 | and retrieving the results is quite different to using a plain laptop.
136 | Moreover, using a graphical interface is often discarded in favor of using the
137 | command line. This imposes a double paradigm shift for prospective users asked
138 | to
139 | 
140 | 1. work with the command line interface (CLI), rather than a graphical user
141 |    interface (GUI)
142 | 1. work with a distributed set of computers (called nodes) rather than the
143 |    machine attached to their keyboard & mouse
144 | 
145 | ::: challenge
146 | 
147 | ## I've Never Used a Server, Have I?
148 | Take a minute and think about which of your daily interactions with a
149 | computer may require a remote server or even cluster to provide you with
150 | results.
151 | 
152 | ::: solution
153 | 
154 | ## Some Ideas
155 | 
156 | * Checking email: your computer (possibly in your pocket) contacts a remote
157 |   machine, authenticates, and downloads a list of new messages; it also
158 |   uploads changes to message status, such as whether you read, marked as
159 |   junk, or deleted the message. Since yours is not the only account, the
160 |   mail server is probably one of many in a data center.
161 | * Searching for a phrase online involves comparing your search term against
162 |   a massive database of all known sites, looking for matches. This "query"
163 |   operation can be straightforward, but building that database is a
164 |   [monumental task](https://en.wikipedia.org/wiki/MapReduce)! Servers are
165 |   involved at every step.
166 | * Searching for directions on a mapping website involves connecting your
167 |   (A) starting and (B) end points by [traversing a graph](
168 |   https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm) in search of
169 |   the "shortest" path by distance, time, expense, or another metric.
170 |   Converting a map into the right form is relatively simple, but
171 |   calculating all the possible routes between A and B is expensive.
172 | 
173 | Checking email could be serial: your machine connects to one server and
174 | exchanges data. Searching by querying the database for your search term (or
175 | endpoints) could also be serial, in that one machine receives your query
176 | and returns the result. However, assembling and storing the full database
177 | is far beyond the capability of any one machine. Therefore, these functions
178 | are served in parallel by a large, ["hyperscale"](https://en.wikipedia.org/wiki/Hyperscale_computing) 
179 | collection of servers working together.
180 | :::
181 | :::
182 | 
183 | ::: keypoints
184 |  - "High Performance Computing (HPC) typically involves connecting to very large
185 |   computing systems elsewhere in the world."
186 |  - "These other systems can be used to do work that would either be impossible
187 |   or much slower on smaller systems."
188 |  - "The standard method of interacting with such systems is via a command line
189 |   interface called Bash."
190 | :::
191 | 


--------------------------------------------------------------------------------
/.github/workflows/README.md:
--------------------------------------------------------------------------------
  1 | # Carpentries Workflows
  2 | 
  3 | This directory contains workflows to be used for Lessons using the {sandpaper}
  4 | lesson infrastructure. Two of these workflows require R (`sandpaper-main.yaml`
  5 | and `pr-recieve.yaml`) and the rest are bots to handle pull request management.
  6 | 
  7 | These workflows will likely change as {sandpaper} evolves, so it is important to
  8 | keep them up-to-date. To do this in your lesson you can do the following in your
  9 | R console:
 10 | 
 11 | ```r
 12 | # Install/Update sandpaper
 13 | options(repos = c(carpentries = "https://carpentries.r-universe.dev/", 
 14 |   CRAN = "https://cloud.r-project.org"))
 15 | install.packages("sandpaper")
 16 | 
 17 | # update the workflows in your lesson
 18 | library("sandpaper")
 19 | update_github_workflows()
 20 | ```
 21 | 
 22 | Inside this folder, you will find a file called `sandpaper-version.txt`, which
 23 | will contain a version number for sandpaper. This will be used in the future to
 24 | alert you if a workflow update is needed.
 25 | 
 26 | What follows are the descriptions of the workflow files:
 27 | 
 28 | ## Deployment
 29 | 
 30 | ### 01 Build and Deploy (sandpaper-main.yaml)
 31 | 
 32 | This is the main driver that will only act on the main branch of the repository.
 33 | This workflow does the following:
 34 | 
 35 |  1. checks out the lesson
 36 |  2. provisions the following resources
 37 |    - R
 38 |    - pandoc
 39 |    - lesson infrastructure (stored in a cache)
 40 |    - lesson dependencies if needed (stored in a cache)
 41 |  3. builds the lesson via `sandpaper:::ci_deploy()`
 42 | 
 43 | #### Caching
 44 | 
 45 | This workflow has two caches; one cache is for the lesson infrastructure and 
 46 | the other is for the the lesson dependencies if the lesson contains rendered
 47 | content. These caches are invalidated by new versions of the infrastructure and
 48 | the `renv.lock` file, respectively. If there is a problem with the cache, 
 49 | manual invaliation is necessary. You will need maintain access to the repository
 50 | and you can either go to the actions tab and [click on the caches button to find
 51 | and invalidate the failing cache](https://github.blog/changelog/2022-10-20-manage-caches-in-your-actions-workflows-from-web-interface/) 
 52 | or by setting the `CACHE_VERSION` secret to the current date (which will
 53 | invalidate all of the caches).
 54 | 
 55 | ## Updates
 56 | 
 57 | ### Setup Information
 58 | 
 59 | These workflows run on a schedule and at the maintainer's request. Because they
 60 | create pull requests that update workflows/require the downstream actions to run,
 61 | they need a special repository/organization secret token called 
 62 | `SANDPAPER_WORKFLOW` and it must have the `public_repo` and `workflow` scope. 
 63 | 
 64 | This can be an individual user token, OR it can be a trusted bot account. If you
 65 | have a repository in one of the official Carpentries accounts, then you do not
 66 | need to worry about this token being present because the Carpentries Core Team
 67 | will take care of supplying this token.
 68 | 
 69 | If you want to use your personal account: you can go to 
 70 | <https://github.com/settings/tokens/new?scopes=public_repo,workflow&description=Sandpaper%20Token>
 71 | to create a token. Once you have created your token, you should copy it to your
 72 | clipboard and then go to your repository's settings > secrets > actions and
 73 | create or edit the `SANDPAPER_WORKFLOW` secret, pasting in the generated token.
 74 | 
 75 | If you do not specify your token correctly, the runs will not fail and they will
 76 | give you instructions to provide the token for your repository. 
 77 | 
 78 | ### 02 Maintain: Update Workflow Files (update-workflow.yaml)
 79 | 
 80 | The {sandpaper} repository was designed to do as much as possible to separate 
 81 | the tools from the content. For local builds, this is absolutely true, but 
 82 | there is a minor issue when it comes to workflow files: they must live inside 
 83 | the repository. 
 84 | 
 85 | This workflow ensures that the workflow files are up-to-date. The way it work is
 86 | to download the update-workflows.sh script from GitHub and run it. The script 
 87 | will do the following:
 88 | 
 89 | 1. check the recorded version of sandpaper against the current version on github
 90 | 2. update the files if there is a difference in versions
 91 | 
 92 | After the files are updated, if there are any changes, they are pushed to a
 93 | branch called `update/workflows` and a pull request is created. Maintainers are
 94 | encouraged to review the changes and accept the pull request if the outputs
 95 | are okay.
 96 | 
 97 | This update is run ~~weekly or~~ on demand.
 98 | 
 99 | ### 03 Maintain: Update Pacakge Cache (update-cache.yaml)
100 | 
101 | For lessons that have generated content, we use {renv} to ensure that the output
102 | is stable. This is controlled by a single lockfile which documents the packages
103 | needed for the lesson and the version numbers. This workflow is skipped in 
104 | lessons that do not have generated content.
105 | 
106 | Because the lessons need to remain current with the package ecosystem, it's a
107 | good idea to make sure these packages can be updated periodically. The 
108 | update cache workflow will do this by checking for updates, applying them in a
109 | branch called `updates/packages` and creating a pull request with _only the
110 | lockfile changed_. 
111 | 
112 | From here, the markdown documents will be rebuilt and you can inspect what has
113 | changed based on how the packages have updated. 
114 | 
115 | ## Pull Request and Review Management
116 | 
117 | Because our lessons execute code, pull requests are a secruity risk for any
118 | lesson and thus have security measures associted with them. **Do not merge any
119 | pull requests that do not pass checks and do not have bots commented on them.**
120 | 
121 | This series of workflows all go together and are described in the following 
122 | diagram and the below sections:
123 | 
124 | ![Graph representation of a pull request](https://carpentries.github.io/sandpaper/articles/img/pr-flow.dot.svg)
125 | 
126 | ### Pre Flight Pull Request Validation (pr-preflight.yaml)
127 | 
128 | This workflow runs every time a pull request is created and its purpose is to
129 | validate that the pull request is okay to run. This means the following things:
130 | 
131 | 1. The pull request does not contain modified workflow files
132 | 2. If the pull request contains modified workflow files, it does not contain 
133 |    modified content files (such as a situation where @carpentries-bot will
134 |    make an automated pull request)
135 | 3. The pull request does not contain an invalid commit hash (e.g. from a fork
136 |    that was made before a lesson was transitioned from styles to use the
137 |    workbench).
138 | 
139 | Once the checks are finished, a comment is issued to the pull request, which 
140 | will allow maintainers to determine if it is safe to run the 
141 | "Receive Pull Request" workflow from new contributors.
142 | 
143 | ### Recieve Pull Request (pr-recieve.yaml)
144 | 
145 | **Note of caution:** This workflow runs arbitrary code by anyone who creates a
146 | pull request. GitHub has safeguarded the token used in this workflow to have no
147 | priviledges in the repository, but we have taken precautions to protect against
148 | spoofing.
149 | 
150 | This workflow is triggered with every push to a pull request. If this workflow
151 | is already running and a new push is sent to the pull request, the workflow
152 | running from the previous push will be cancelled and a new workflow run will be
153 | started.
154 | 
155 | The first step of this workflow is to check if it is valid (e.g. that no
156 | workflow files have been modified). If there are workflow files that have been
157 | modified, a comment is made that indicates that the workflow is not run. If 
158 | both a workflow file and lesson content is modified, an error will occurr.
159 | 
160 | The second step (if valid) is to build the generated content from the pull
161 | request. This builds the content and uploads three artifacts:
162 | 
163 | 1. The pull request number (pr)
164 | 2. A summary of changes after the rendering process (diff)
165 | 3. The rendered files (build)
166 | 
167 | Because this workflow builds generated content, it follows the same general 
168 | process as the `sandpaper-main` workflow with the same caching mechanisms.
169 | 
170 | The artifacts produced are used by the next workflow.
171 | 
172 | ### Comment on Pull Request (pr-comment.yaml)
173 | 
174 | This workflow is triggered if the `pr-recieve.yaml` workflow is successful.
175 | The steps in this workflow are:
176 | 
177 | 1. Test if the workflow is valid and comment the validity of the workflow to the
178 |    pull request.
179 | 2. If it is valid: create an orphan branch with two commits: the current state
180 |    of the repository and the proposed changes.
181 | 3. If it is valid: update the pull request comment with the summary of changes
182 | 
183 | Importantly: if the pull request is invalid, the branch is not created so any
184 | malicious code is not published.
185 | 
186 | From here, the maintainer can request changes from the author and eventually 
187 | either merge or reject the PR. When this happens, if the PR was valid, the 
188 | preview branch needs to be deleted. 
189 | 
190 | ### Send Close PR Signal (pr-close-signal.yaml)
191 | 
192 | Triggered any time a pull request is closed. This emits an artifact that is the
193 | pull request number for the next action
194 | 
195 | ### Remove Pull Request Branch (pr-post-remove-branch.yaml)
196 | 
197 | Tiggered by `pr-close-signal.yaml`. This removes the temporary branch associated with
198 | the pull request (if it was created).
199 | 


--------------------------------------------------------------------------------
/episodes/14-modules.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Accessing software via Modules"
  3 | teaching: 30
  4 | exercises: 15
  5 | ---
  6 | 
  7 | ```{r load_config, include=FALSE}
  8 | library(yaml)
  9 | config <- yaml.load_file("lesson_config.yaml")
 10 | snippets <- paste('./files/snippets/', config$snippets, sep='')
 11 | ```
 12 | 
 13 | ::: questions
 14 |  - How do we load and unload software packages?
 15 | :::
 16 | 
 17 | ::: objectives
 18 |  - Understand how to load and use a software package.
 19 | :::
 20 | 
 21 | On a high-performance computing system, it is seldom the case that the software
 22 | we want to use is available when we log in. It is installed, but we will need
 23 | to "load" it before it can run.
 24 | 
 25 | Before we start using individual software packages, however, we should
 26 | understand the reasoning behind this approach. The three biggest factors are:
 27 | 
 28 |  - software incompatibilities
 29 |  - versioning
 30 |  - dependencies
 31 | 
 32 | Software incompatibility is a major headache for programmers. Sometimes the
 33 | presence (or absence) of a software package will break others that depend on
 34 | it. Two of the most famous examples are Python 2 and 3 and C compiler versions.
 35 | Python 3 famously provides a `python` command that conflicts with that provided
 36 | by Python 2. Software compiled against a newer version of the C libraries and
 37 | then used when they are not present will result in a nasty `'GLIBCXX_3.4.20' not found` error, for instance.
 38 | 
 39 | Software versioning is another common issue. A team might depend on a certain
 40 | package version for their research project - if the software version was to
 41 | change (for instance, if a package was updated), it might affect their results.
 42 | Having access to multiple software versions allow a set of researchers to
 43 | prevent software versioning issues from affecting their results.
 44 | 
 45 | Dependencies are where a particular software package (or even a particular
 46 | version) depends on having access to another software package (or even a
 47 | particular version of another software package). For example, the VASP
 48 | materials science software may depend on having a particular version of the
 49 | FFTW (Fastest Fourier Transform in the West) software library available for it
 50 | to work.
 51 | 
 52 | ## Environment Modules
 53 | 
 54 | Environment modules are the solution to these problems. A *module* is a
 55 | self-contained description of a software package --- it contains the
 56 | settings required to run a software package and, usually, encodes required
 57 | dependencies on other software packages.
 58 | 
 59 | There are a number of different environment module implementations commonly
 60 | used on HPC systems: the two most common are *TCL modules* and *Lmod*. Both of
 61 | these use similar syntax and the concepts are the same so learning to use one
 62 | will allow you to use whichever is installed on the system you are using. In
 63 | both implementations the `module` command is used to interact with environment
 64 | modules. An additional subcommand is usually added to the command to specify
 65 | what you want to do. For a list of subcommands you can use `module -h` or
 66 | `module help`. As for all commands, you can access the full help on the *man*
 67 | pages with `man module`.
 68 | 
 69 | On login you may start out with a default set of modules loaded or you may
 70 | start out with an empty environment; this depends on the setup of the system
 71 | you are using.
 72 | 
 73 | ### Listing Available Modules
 74 | 
 75 | To see available software modules, use `module avail`:
 76 | 
 77 | ```bash
 78 | `r config$remote$prompt_work` module avail
 79 | ```
 80 | ```{r, child=paste(snippets, '/modules/available-modules.Rmd', sep=''), eval=TRUE}
 81 | ```
 82 | 
 83 | ### Listing Currently Loaded Modules
 84 | 
 85 | You can use the `module list` command to see which modules you currently have
 86 | loaded in your environment. If you have no modules loaded, you will see a
 87 | message telling you so
 88 | 
 89 | ```bash
 90 | `r config$remote$prompt_work` module list
 91 | ```
 92 | ```output
 93 | Currently Loaded Modulefiles:
 94 |  1) cpe-cray                          8) perftools-base/20.10.0(default)                                  
 95 |  2) cce/10.0.4(default)               9) xpmem/2.2.35-7.0.1.0_1.9__gd50fabf.shasta(default)               
 96 |  3) craype/2.7.2(default)            10) cray-mpich/8.0.16(default)                                       
 97 |  4) craype-x86-rome                  11) cray-libsci/20.10.1.2(default)                                   
 98 |  5) libfabric/1.11.0.0.233(default)  12) bolt/0.7                                                         
 99 |  6) craype-network-ofi               13) /work/y07/shared/archer2-modules/modulefiles-cse/epcc-setup-env  
100 |  7) cray-dsmml/0.1.2(default)        14) /usr/local/share/epcc-module/epcc-module-loader  
101 | ```
102 | 
103 | ## Loading and Unloading Software
104 | 
105 | To load a software module, use `module load`. Let's say we would like
106 | to use the NetCDF utility `ncdump`. 
107 | 
108 | On login, `ncdump` is not available. We can test this by using the `which`
109 | command. `which` looks for programs the same way that Bash does,
110 | so we can use it to tell us where a particular piece of software is stored.
111 | 
112 | ```bash
113 | `r config$host_prompt` which ncdump
114 | ```
115 | ```output
116 | which: no ncdump in (/usr/local/maven/bin:/lus/cls01095/work/y07/shared/bolt/0.7/bin:/work/y07/shared/utils/bin:/opt/cray/pe/perftools/20.10.0/bin:/opt/cray/pe/papi/6.0.0.4/bin:/opt/cray/libfabric/1.11.0.0.233/bin:/opt/cray/pe/craype/2.7.2/bin:/opt/cray/pe/cce/10.0.4/cce-clang/x86_64/bin:/opt/cray/pe/cce/10.0.4/binutils/x86_64/x86_64-pc-linux-gnu/bin:/opt/cray/pe/cce/10.0.4/binutils/cross/x86_64-aarch64/aarch64-linux-gnu/../bin:/opt/cray/pe/cce/10.0.4/utils/x86_64/bin:/usr/local/Modules/bin:/usr/local/bin:/usr/bin:/bin:/opt/cray/pe/bin:/usr/lib/mit/bin)
117 | ```
118 | 
119 | We can find the `ncdump` command by using `module load`:
120 | 
121 | ```bash
122 | `r config$host_work` module load cray-hdf5
123 | `r config$host_work` module load cray-netcdf
124 | which ncdump
125 | ```
126 | ```output
127 | /opt/cray/pe/netcdf/4.7.4.2/bin/ncdump
128 | ```
129 | 
130 | So, what just happened?
131 | 
132 | To understand the output, first we need to understand the nature of the
133 | `$PATH` environment variable. `$PATH` is a special environment variable
134 | that controls where a UNIX system looks for software. Specifically,
135 | `$PATH` is a list of directories (separated by `:`) that the OS searches
136 | through for a command before giving up and telling us it can't find it.
137 | As with all environment variables we can print it out using `echo`.
138 | 
139 | ```bash
140 | `r config$host_work` echo $PATH
141 | ```
142 | ```output
143 | /opt/cray/pe/netcdf/4.7.4.2/bin:/opt/cray/pe/python/3.8.5.0/bin:/lus/cls01095/work/z19/z19/aturner/.local/bin:/lus/cls01095/work/y07/shared/bolt/0.7/bin:/work/y07/shared/utils/bin:/usr/local/maven/bin:/opt/cray/pe/perftools/20.10.0/bin:/opt/cray/pe/papi/6.0.0.4/bin:/opt/cray/libfabric/1.11.0.0.233/bin:/opt/cray/pe/craype/2.7.2/bin:/opt/cray/pe/cce/10.0.4/cce-clang/x86_64/bin:/opt/cray/pe/cce/10.0.4/binutils/x86_64/x86_64-pc-linux-gnu/bin:/opt/cray/pe/cce/10.0.4/binutils/cross/x86_64-aarch64/aarch64-linux-gnu/../bin:/opt/cray/pe/cce/10.0.4/utils/x86_64/bin:/usr/local/Modules/bin:/home/z19/z19/aturner/bin:/usr/local/bin:/usr/bin:/bin:/opt/cray/pe/bin:/usr/lib/mit/bin
144 | ```
145 | 
146 | You'll notice a similarity to the output of the `which` command. In this case,
147 | there's only one difference: the different directory at the beginning. When we
148 | ran the `module load` command, it added a directory to the beginning of our
149 | `$PATH`. Let's examine what's there:
150 | 
151 | ```bash
152 | `r config$host_work` ls /opt/cray/pe/netcdf/4.7.4.2/bin
153 | ```
154 | ```output
155 | nc-config  nccopy  ncdump  ncgen  ncgen3  ncxx4-config  nf-config
156 | ```
157 | 
158 | In summary, `module load` will add software to your `$PATH`.
159 | `module load` may also load additional modules with software dependencies.
160 | 
161 | To unload a module, use `module unload` with the relevant module name.
162 | 
163 | ::: challenge
164 | ## Unload!
165 | Confirm you can unload the `cray-netcdf` module and check what happens to the `PATH` environment variable.
166 | :::
167 | 
168 | ## Software versioning
169 | 
170 | So far, we've learned how to load and unload software packages. This is very useful. However, we
171 | have not yet addressed the issue of software versioning. At some point or other, you will run into
172 | issues where only one particular version of some software will be suitable. Perhaps a key bugfix
173 | only happened in a certain version, or version X broke compatibility with a file format you use. In
174 | either of these example cases, it helps to be very specific about what software is loaded.
175 | 
176 | Let's examine the output of `module avail` more closely.
177 | 
178 | ```bash
179 | `r config$host_work` module avail cray-netcdf
180 | ```
181 | ```output
182 | --------------------------- /opt/cray/pe/modulefiles ---------------------------
183 | cray-netcdf-hdf5parallel/4.7.4.0           cray-netcdf/4.7.4.0           
184 | cray-netcdf-hdf5parallel/4.7.4.2(default)  cray-netcdf/4.7.4.2(default)  
185 | ```
186 | 
187 | Note that we have two different versions of `cray-netcdf` (and also two
188 | versions of something else `cray-netcdf-hdf5parallel` which match our
189 | search).
190 | 
191 | ::: challenge
192 | ## Using `module swap`
193 | Load module `cray-netcdf` as before. Note that if we do not specifify
194 | a particular version, we load a default version.
195 | If we wish to change versions, we can use
196 | `module swap <old-module> <new-module>`. Try this to obtain
197 | `cray-netcdf/4.7.4.0`. Check what has happened to the location of
198 | the `ncdump` utility.
199 | :::
200 | 
201 | ::: challenge
202 | ## Using Software Modules in Scripts
203 | Create a job that is able to run `ncdump --version`. Running a job
204 | is just like logging on to the system
205 | (you should not assume a module loaded on the login node is loaded on a
206 | compute node).
207 | 
208 | ::: solution
209 | 
210 | ```bash
211 | `r config$remote$prompt_work` nano ncdump-module.sh
212 | `r config$remote$prompt_work` cat ncdump-module.sh
213 | ```
214 | ```output
215 | `r config$remote$bash_shebang`
216 | `r config$sched$comment` --partition=standard
217 | `r config$sched$comment` --qos=`r config$sched$qos`
218 | module load epcc-job-env
219 | module load cray-netcdf
220 | ncdump --version
221 | ```
222 | 
223 | ```bash
224 | `r config$remote$prompt_work` `r config$sched$submit.name` python-module.sh
225 | ```
226 | :::
227 | :::
228 | 
229 | ::: keypoints
230 |  - "Load software with `module load softwareName`."
231 |  - "Unload software with `module purge`"
232 |  - "The module system handles software versioning and package conflicts for you
233 |   automatically."
234 | :::
235 | 


--------------------------------------------------------------------------------
/learners/setup.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Setup
  3 | ---
  4 | 
  5 | ```{r load_config, include=FALSE}
  6 | library(yaml)
  7 | config <- yaml.load_file("lesson_config.yaml")
  8 | ```
  9 | 
 10 | # Setup
 11 | 
 12 | There are several pieces of software you will wish to install before the
 13 | workshop. Though installation help will be provided at the workshop, we
 14 | recommend that these tools are installed (or at least downloaded) beforehand.
 15 | 
 16 | ::: prereq
 17 | 
 18 | ## Bash and SSH
 19 | 
 20 | This lesson requires a terminal application (`bash`, `zsh`, or others) with
 21 | the ability to securely connect to a remote machine (`ssh`).
 22 | 
 23 | :::
 24 | 
 25 | ## Where to type commands: How to open a new shell
 26 | 
 27 | The shell is a program that enables us to send commands to the computer and
 28 | receive output. It is also referred to as the terminal or command line.
 29 | 
 30 | Some computers include a default Unix Shell program. The steps below describe
 31 | some methods for identifying and opening a Unix Shell program if you already
 32 | have one installed. There are also options for identifying and downloading a
 33 | Unix Shell program, a Linux/UNIX emulator, or a program to access a Unix Shell
 34 | on a server.
 35 | 
 36 | ## Shell Setup
 37 | 
 38 | ::::::::::::::::::::::::::::::::::::::: discussion
 39 | 
 40 | ### Details
 41 | 
 42 | Setup for different systems can be presented in dropdown menus via a `solution`
 43 | tag. They will join to this discussion block, so you can give a general overview
 44 | of the software used in this lesson here and fill out the individual operating
 45 | systems (and potentially add more, e.g. online setup) in the solutions blocks.
 46 | 
 47 | :::::::::::::::::::::::::::::::::::::::::::::::::::
 48 | 
 49 | :::::::::::::::: solution
 50 | 
 51 | ### Windows
 52 | 
 53 | Computers with Windows operating systems do not automatically have a Unix Shell
 54 | program installed. In this lesson, we encourage you to use an emulator included
 55 | in MobaXterm for Windows, which gives you access to both Bash shell commands and SSH.
 56 | 
 57 | Once installed, you can open a terminal by running the program MobaXterm from
 58 | the Windows start menu.
 59 | 
 60 | #### Reference
 61 | 
 62 | * [MobaXterm](https://mobaxterm.mobatek.net/download-home-edition.html) &mdash; *Recommended*
 63 | * [Git for Windows](https://gitforwindows.org/) &mdash; Alternative option used by other Software Carpentries
 64 | * [Windows Subsystem for Linux](
 65 |   https://docs.microsoft.com/en-us/windows/wsl/install-win10)
 66 |   &mdash; advanced option for Windows 10
 67 | 
 68 | ::: discussion
 69 | ## Alternatives to MobaXterm and Git for Windows 
 70 | Other solutions are available for running Bash commands on Windows. There is
 71 | now a Bash shell command-line tool available for Windows 10. Additionally,
 72 | you can run Bash commands on a remote computer or server that already has a
 73 | Unix Shell, from your Windows machine. This can usually be done through a
 74 | Secure Shell (SSH) client. One such client available for free for Windows
 75 | computers is PuTTY. See the reference below for information on installing and
 76 | using PuTTY, using the Windows 10 command-line tool, or installing and using
 77 | a Unix/Linux emulator.
 78 | 
 79 | For advanced users, you may choose one of the following alternatives: 
 80 | 
 81 |  * Install the [Windows Subsystem for   Linux](https://docs.microsoft.com/en-us/windows/wsl/install-win10)
 82 |  * Use the Windows [Powershell](https://docs.microsoft.com/en-us/powershell/scripting/learn/remoting/ssh-remoting-in-powershell-core?view=powershell-7)
 83 |  * Read up on [Using a Unix/Linux emulator (Cygwin) or Secure Shell (SSH) client (Putty)](https://faculty.smu.edu/reynolds/unixtut/windows.html)
 84 | > ## Warning
 85 | >
 86 | > Commands in the Windows Subsystem for Linux (WSL), Powershell, or Cygwin
 87 | > may differ slightly from those shown in the lesson or presented in the
 88 | > workshop. Please ask if you encounter such a mismatch &mdash; you're
 89 | > probably not alone.
 90 | 
 91 | :::
 92 | 
 93 | :::::::::::::::::::::::::
 94 | 
 95 | :::::::::::::::: solution
 96 | 
 97 | ### MacOS
 98 | 
 99 | On macOS, the default Unix Shell is accessible by running the Terminal program
100 | from the `/Application/Utilities` folder in Finder.
101 | 
102 | To open Terminal, try one or both of the following:
103 | 
104 | * In Finder, select the Go menu, then select Utilities. Locate Terminal in the
105 |   Utilities folder and open it.
106 | * Use the Mac ‘Spotlight’ computer search function. Search for: `Terminal` and
107 |   press <kbd>Return</kbd>.
108 | 
109 | #### Reference 
110 | 
111 | [How to Use Terminal on a Mac](
112 | https://www.macworld.co.uk/feature/mac-software/how-use-terminal-on-mac-3608274/)
113 | 
114 | :::::::::::::::::::::::::
115 | 
116 | 
117 | :::::::::::::::: solution
118 | 
119 | ### Linux
120 | 
121 | On most versions of Linux, the default Unix Shell is accessible by running the
122 | [(Gnome) Terminal](https://help.gnome.org/users/gnome-terminal/stable/) or
123 | [(KDE) Konsole](https://konsole.kde.org/) or
124 | [xterm](https://en.wikipedia.org/wiki/Xterm), which can be found via the
125 | applications menu or the search bar.
126 | 
127 | #### Special cases
128 | 
129 | If none of the options above address your circumstances, try an online search
130 | for: `Unix shell [your operating system]`.
131 | 
132 | This setup takes you through getting ready for the course by ensuring you have an SSH client installed
133 | and registering for an ARCHER2 account (you can also use an existing ARCHER2 account if you are already
134 | a user).
135 | 
136 | :::::::::::::::::::::::::
137 | 
138 | 
139 | ## Setup an SSH client
140 | 
141 | ::::::::::::::::::::::::: discussion
142 | 
143 | ### Details
144 | 
145 | All attendees should have an SSH client installed.
146 | SSH is a tool that allows us to connect to and use a remote computer as our own.
147 | Please follow the directions below to install an SSH client for your system if you do not 
148 | already have one.
149 | 
150 | :::::::::::::::::::::::::
151 | 
152 | ::: solution
153 | 
154 | ### Windows
155 | 
156 | Modern versions of Windows have SSH available in Powershell. You can test if it is available by typing `ssh --help` in Powershell. If it is
157 | installed, you should see some useful output. If it is not installed, you will get an error. If SSH is not available in Powershell, then
158 | you should install MobaXterm as described below.
159 | 
160 | An alternative is to install MobaXterm from [https://mobaxterm.mobatek.net](https://mobaxterm.mobatek.net). You will want to get the Home edition (Installer edition). However, if Powershell works, you do not need this.
161 | 
162 | :::
163 | 
164 | ::: solution
165 | 
166 | ### macOS
167 | 
168 | macOS comes with SSH pre-installed, so you should not need to install anything. Use your "Terminal" app.
169 | 
170 | :::
171 | 
172 | ::: solution
173 | ### Linux
174 | 
175 | Linux users do not need to install anything, you should be set! Use your terminal application.
176 | 
177 | :::
178 | 
179 | ## Connect to ARCHER2
180 | 
181 | ::::::::::::::::: discussion
182 | 
183 | ## Details
184 | 
185 | **Note:** If you already have an ARCHER2 account you can use that for this course. Please see
186 | see the notes at the end of this page rather than requesting a new account. Of course, if 
187 | you prefer to sign up for another ARCHER2 account specifically for this course, you are 
188 | welcome to do this instead!
189 | 
190 | Please sign up for your account on our HPC machine, ARCHER2, which will be available to
191 | you for the duration of the course and for a few days afterwards, to allow you to
192 | complete the practical exercises and put some of what you have learned into practice.
193 | 
194 | ### Sign up for a SAFE account
195 | 
196 | To sign up, you must first register for an account on SAFE (our service administration
197 | web application):
198 | 
199 | If you are already registered on the SAFE you do not need to re-register. Please proceed to the next step.
200 | 
201 | 1. Go to the [SAFE New User Signup Form](https://safe.epcc.ed.ac.uk/signup.jsp)
202 | 2. Fill in your personal details. You can come back later and change them if you wish. Note: you should register using your institutional or company email address - email domains such as gmail.com, outlook.com, etc. are not allowed to be used for access to ARCHER2
203 | 3. Click “Submit”
204 | 4. You are now registered. A single use login link will be emailed to the email address you provided. You can use this link to login and set your password.
205 | 
206 | ### Sign up for an account on ARCHER2 through SAFE
207 | 
208 | 1. [Login to SAFE](https://safe.epcc.ed.ac.uk)
209 | 2. Go to the Menu "Login accounts" and select "Request login account"
210 | 3. Choose the ``r config$sched$project`` project “Choose Project for Machine Account” box and click "Next"
211 | 4. On the next page, the ARCHER2 system should be selected. Click "Next"
212 | 5. Enter the username you would prefer to use on ARCHER2. Every username must be unique, so if your chosen name is taken, you will need to choose another
213 | 
214 | Now you have to wait for the course organiser to accept your request to register. When this has happened, your account will be created on ARCHER2.
215 | Once this has been done, you should be sent an email. _If you have not received an email but believe that your account should have been activated, check your account status in SAFE which will also show when the account has been activated._ You can then pick up your one shot initial password for ARCHER2 from your SAFE account.
216 | 
217 | ### Generate an SSH key pair and upload it to SAFE
218 | 
219 | In addition to your password, you will need an SSH key pair to access ARCHER2. There is useful guidance on how
220 | to generate SSH key pairs in [the ARCHER2 documentation](https://docs.archer2.ac.uk/user-guide/connecting/#ssh-key-pairs).
221 | 
222 | Once you have generated your key pair, you need to add the public part to your ARCHER2 account in SAFE:
223 | 
224 | 1. [Login to SAFE](https://safe.epcc.ed.ac.uk)
225 | 2. Go to the Menu “Login accounts” and select the ARCHER2 account you want to add the SSH key to
226 | 3. On the subsequent Login account details page click the “Add Credential” button
227 | 4. Select “SSH public key” as the Credential Type and click “Next”
228 | 5. Either copy and paste the public part of your SSH key into the “SSH Public key” box or use the button to select the public key file on your computer.
229 | 6. Click “Add” to associate the public SSH key part with your account
230 | 
231 | The public SSH key part will now be added to your login account on the ARCHER2 system.
232 | 
233 | ### Configure TOTP passwords
234 | 
235 | Archer2 now uses one time passwords (TOTP) for multi-factor authentication (MFA). One time passwords are a
236 | common security measure used by banking, cloud services and apps that create a changing time limited code to
237 | verify your identity beyond a password and username.
238 | 
239 | To setup your MFA TOTP you will need an authenticator application on your phone or laptop. Follow the 
240 | [steps](https://epcced.github.io/safe-docs/safe-for-users/#how-to-turn-on-mfa-on-your-machine-account)
241 | at the SAFE documentation, ensuring you create the code for your ``r config$sched$project`` project
242 | account.
243 | 
244 | You will only be prompted at login for your TOTP code once a day.
245 | 
246 | ### Log into ARCHER2
247 | 
248 | You should now be able to log into ARCHER2 by following the [login instructions in the ARCHER2 documentation](https://docs.archer2.ac.uk/user-guide/connecting/#ssh-clients).
249 | 
250 | ### Using an existing ARCHER2 account
251 | 
252 | If you wish to use an existing ARCHER2 account for the course, that is perfectly
253 | fine. The only differences from the specific course account are that you will not
254 | have access to the course account code. Instead, you can use your existing
255 | account code (the cost of the jobs is negligible as they are very short and
256 | small).
257 | 
258 | ::::::::::::::::::::::
259 | 


--------------------------------------------------------------------------------
/episodes/fig/Simple_Von_Neumann_Architecture.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Generator: Adobe Illustrator 16.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    version="1.1"
 13 |    id="Layer_1"
 14 |    x="0px"
 15 |    y="0px"
 16 |    width="601.68542"
 17 |    height="295.04343"
 18 |    viewBox="0 0 601.68542 295.04343"
 19 |    enable-background="new 0 0 510 295"
 20 |    xml:space="preserve"
 21 |    sodipodi:docname="Simple_Von_Neumann_Architecture.svg"
 22 |    inkscape:version="0.92.3 (2405546, 2018-03-11)"><metadata
 23 |    id="metadata249"><rdf:RDF><cc:Work
 24 |        rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 25 |          rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
 26 |    id="defs247">
 27 | 	
 28 | 	
 29 | 	
 30 | 	
 31 | 
 32 | 		
 33 | 		
 34 | 	
 35 | 		
 36 | 		
 37 | 		
 38 | 	
 39 | 		
 40 | 		
 41 | 		
 42 | 		
 43 | 	
 44 | 			
 45 | 			
 46 | 			
 47 | 			
 48 | 		
 49 | 				
 50 | 				
 51 | 			
 52 | 				
 53 | 				
 54 | 			
 55 | 				
 56 | 				
 57 | 				
 58 | 				
 59 | 				
 60 | 				
 61 | 				
 62 | 				
 63 | 				
 64 | 				
 65 | 				
 66 | 				
 67 | 				
 68 | 				
 69 | 				
 70 | 				
 71 | 				
 72 | 				
 73 | 				
 74 | 				
 75 | 				
 76 | 			
 77 | 			
 78 | 			
 79 | 		
 80 | 			
 81 | 			
 82 | 		</defs><sodipodi:namedview
 83 |    pagecolor="#ffffff"
 84 |    bordercolor="#666666"
 85 |    borderopacity="1"
 86 |    objecttolerance="10"
 87 |    gridtolerance="10"
 88 |    guidetolerance="10"
 89 |    inkscape:pageopacity="0"
 90 |    inkscape:pageshadow="2"
 91 |    inkscape:window-width="2558"
 92 |    inkscape:window-height="1420"
 93 |    id="namedview245"
 94 |    showgrid="false"
 95 |    fit-margin-top="0"
 96 |    fit-margin-left="0"
 97 |    fit-margin-right="0"
 98 |    fit-margin-bottom="0"
 99 |    inkscape:zoom="1.9320454"
100 |    inkscape:cx="228.83391"
101 |    inkscape:cy="203.92768"
102 |    inkscape:window-x="1600"
103 |    inkscape:window-y="18"
104 |    inkscape:window-maximized="1"
105 |    inkscape:current-layer="Layer_1" />
106 | <path
107 |    style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10"
108 |    inkscape:connector-curvature="0"
109 |    id="path2"
110 |    d="M 188.372,146.58472"
111 |    stroke-miterlimit="10" /><rect
112 |    x="181.73489"
113 |    y="1.0435507"
114 |    stroke-miterlimit="10"
115 |    width="239.61528"
116 |    height="292.95633"
117 |    id="rect4"
118 |    style="fill:#65a8a1;stroke:#000000;stroke-width:4;stroke-miterlimit:10;stroke-dasharray:none;stroke-linejoin:round" /><rect
119 |    style="fill:#ffffff;stroke:#000000;stroke-width:3;stroke-miterlimit:10;stroke-dasharray:none;stroke-linejoin:round"
120 |    id="rect6"
121 |    height="94.822578"
122 |    width="209.78149"
123 |    stroke-miterlimit="10"
124 |    y="15.797925"
125 |    x="195.56429" /><rect
126 |    style="fill:#ffffff;stroke:#000000;stroke-width:3;stroke-miterlimit:10;stroke-dasharray:none;stroke-linejoin:round"
127 |    id="rect128"
128 |    height="106.96806"
129 |    width="190"
130 |    stroke-miterlimit="10"
131 |    y="171.05365"
132 |    x="206.37201" /><g
133 |    id="g166"
134 |    transform="translate(46.372004,-66.478284)">
135 | 			<g
136 |    id="g158">
137 | 				<line
138 |    stroke-miterlimit="10"
139 |    x1="244"
140 |    y1="182.5"
141 |    x2="244"
142 |    y2="224.5"
143 |    id="line154"
144 |    style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
145 | 				<polygon
146 |    stroke-miterlimit="10"
147 |    points="249,224.5 244,230.5 239,224.5 244,224.5 "
148 |    id="polygon156"
149 |    style="stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
150 | 			</g>
151 | 			<g
152 |    id="g164">
153 | 				<line
154 |    stroke-miterlimit="10"
155 |    x1="266"
156 |    y1="232.5"
157 |    x2="266"
158 |    y2="190.5"
159 |    id="line160"
160 |    style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
161 | 				<polygon
162 |    stroke-miterlimit="10"
163 |    points="260.998,190.5 265.998,184.5 270.999,190.5 265.998,190.5 "
164 |    id="polygon162"
165 |    style="stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
166 | 			</g>
167 | 		</g><g
168 |    id="g174"
169 |    transform="matrix(0.833204,0,0,1.0175108,60.82952,-3.0436136)">
170 | 			<line
171 |    stroke-miterlimit="10"
172 |    x1="86"
173 |    y1="146.5"
174 |    x2="136"
175 |    y2="146.5"
176 |    id="line170"
177 |    style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
178 | 			<polygon
179 |    stroke-miterlimit="10"
180 |    points="136,141.5 142,146.5 136,151.5 136,146.5 "
181 |    id="polygon172"
182 |    style="stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
183 | 		</g><g
184 |    id="g198"
185 |    transform="translate(46.372004,-0.47828364)">
186 | 				
187 | 				
188 | 				
189 | 				
190 | 				
191 | 				
192 | 				
193 | 				
194 | 				
195 | 				
196 | 				
197 | 			</g><g
198 |    id="g210"
199 |    transform="matrix(0.75446429,0,0,1,149.74254,-0.47828364)">
200 | 			<line
201 |    stroke-miterlimit="10"
202 |    x1="365"
203 |    y1="145.5"
204 |    x2="415"
205 |    y2="145.5"
206 |    id="line206"
207 |    style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
208 | 			<polygon
209 |    stroke-miterlimit="10"
210 |    points="415,140.5 421,145.5 415,150.5 415,145.5 "
211 |    id="polygon208"
212 |    style="stroke:#000000;stroke-width:2;stroke-miterlimit:10" />
213 | 		</g>
214 | <text
215 |    xml:space="preserve"
216 |    style="font-style:normal;font-weight:normal;font-size:26.66666603px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
217 |    x="300.97357"
218 |    y="52.507393"
219 |    id="text4333"><tspan
220 |      sodipodi:role="line"
221 |      x="304.21835"
222 |      y="52.507393"
223 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:18.66666603px;line-height:90.30000567%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle"
224 |      id="tspan4335">Central Processing </tspan><tspan
225 |      sodipodi:role="line"
226 |      x="300.97357"
227 |      y="85.840721"
228 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:18.66666603px;line-height:90.30000567%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle"
229 |      id="tspan4339">Unit</tspan></text>
230 | 
231 | <text
232 |    xml:space="preserve"
233 |    style="font-style:normal;font-weight:normal;font-size:26.66666603px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
234 |    x="300.78607"
235 |    y="228.64314"
236 |    id="text4333-6"><tspan
237 |      sodipodi:role="line"
238 |      x="300.78607"
239 |      y="228.64314"
240 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:18.66666603px;line-height:90.30000567%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle"
241 |      id="tspan4339-5">Memory</tspan></text>
242 | 
243 | <text
244 |    xml:space="preserve"
245 |    style="font-style:normal;font-weight:normal;font-size:18.66666603px;line-height:125%;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
246 |    x="66.563416"
247 |    y="115.0928"
248 |    id="text4381"><tspan
249 |      sodipodi:role="line"
250 |      id="tspan4379"
251 |      x="66.563416"
252 |      y="115.0928"
253 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle">Input</tspan><tspan
254 |      sodipodi:role="line"
255 |      x="69.534775"
256 |      y="138.42613"
257 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
258 |      id="tspan4389">(Keyboard, </tspan><tspan
259 |      sodipodi:role="line"
260 |      x="66.563416"
261 |      y="161.75946"
262 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
263 |      id="tspan4393">Mouse, Disk,</tspan><tspan
264 |      sodipodi:role="line"
265 |      x="66.563416"
266 |      y="185.09279"
267 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
268 |      id="tspan4395"> Network,...)</tspan><tspan
269 |      sodipodi:role="line"
270 |      x="66.563416"
271 |      y="208.42613"
272 |      id="tspan4383"
273 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans'" /></text>
274 | 
275 | <text
276 |    xml:space="preserve"
277 |    style="font-style:normal;font-weight:normal;font-size:18.66666603px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
278 |    x="113.50416"
279 |    y="157.02171"
280 |    id="text4387"><tspan
281 |      sodipodi:role="line"
282 |      id="tspan4385"
283 |      x="113.50416"
284 |      y="173.53734"
285 |      style="text-align:start;text-anchor:start" /></text>
286 | 
287 | <rect
288 |    style="opacity:0.35200004;fill:none;fill-opacity:1;stroke:#000000;stroke-width:4.45300007;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.9959183"
289 |    id="rect5202"
290 |    width="128.67383"
291 |    height="104.81161"
292 |    x="2.2265"
293 |    y="92.113434" /><text
294 |    xml:space="preserve"
295 |    style="font-style:normal;font-weight:normal;font-size:18.66666603px;line-height:125%;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
296 |    x="535.12201"
297 |    y="115.0928"
298 |    id="text4381-1"><tspan
299 |      sodipodi:role="line"
300 |      id="tspan4379-9"
301 |      x="535.12201"
302 |      y="115.0928"
303 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle">Output</tspan><tspan
304 |      sodipodi:role="line"
305 |      x="538.09338"
306 |      y="138.42613"
307 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
308 |      id="tspan4393-6">(Display, </tspan><tspan
309 |      sodipodi:role="line"
310 |      x="535.12201"
311 |      y="161.75946"
312 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
313 |      id="tspan5239">Disk,</tspan><tspan
314 |      sodipodi:role="line"
315 |      x="535.12201"
316 |      y="185.09279"
317 |      style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:100%;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans';text-align:center;text-anchor:middle"
318 |      id="tspan4395-0"> Network,...)</tspan><tspan
319 |      sodipodi:role="line"
320 |      x="535.12201"
321 |      y="208.42613"
322 |      id="tspan4383-6"
323 |      style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold'" /></text>
324 | 
325 | <rect
326 |    style="opacity:0.35200004;fill:none;fill-opacity:1;stroke:#000000;stroke-width:4.45300007;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.9959183"
327 |    id="rect5202-4"
328 |    width="128.67383"
329 |    height="104.81161"
330 |    x="470.7851"
331 |    y="92.113434" /></svg>


--------------------------------------------------------------------------------
/episodes/fig/redirects-and-pipes.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <svg width="800" height="560" xmlns="http://www.w3.org/2000/svg">
 3 |  <!-- Created with Method Draw - http://github.com/duopixel/Method-Draw/ -->
 4 |  <g>
 5 |   <title>background</title>
 6 |   <rect fill="#fff" id="canvas_background" height="562" width="802" y="-1" x="-1"/>
 7 |   <g display="none" overflow="visible" y="0" x="0" height="100%" width="100%" id="canvasGrid">
 8 |    <rect fill="url(#gridpattern)" stroke-width="0" y="0" x="0" height="100%" width="100%"/>
 9 |   </g>
10 |  </g>
11 |  <g>
12 |   <title>Layer 1</title>
13 |   <rect stroke="#000" id="svg_44" height="26" width="117" y="460.383083" x="386.042797" stroke-opacity="null" stroke-width="0" fill="#9b59b6"/>
14 |   <rect stroke="#000" id="svg_40" height="26" width="117" y="460.383083" x="195.97974" stroke-opacity="null" stroke-width="0" fill="#f39c12"/>
15 |   <rect stroke="#000" id="svg_11" height="26" width="117" y="80.25697" x="205.983058" stroke-opacity="null" stroke-width="0" fill="#333333"/>
16 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_1" y="47.007301" x="31.506638" stroke-width="0" stroke="#000" fill="#e74c3c">wc -l *.pdb</text>
17 |   <rect stroke="#000" rx="6" id="svg_2" height="44" width="168" y="71.006638" x="21.5" stroke-width="0" fill="#e74c3c"/>
18 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_3" y="97.006638" x="44.5" stroke-width="0" stroke="#000" fill="#ffffff">wc -l *.pdb</text>
19 |   <rect stroke="#000" rx="6" id="svg_5" height="124.999996" width="151.999996" y="34.006638" x="251.470131" stroke-opacity="null" stroke-width="0" fill="#333333"/>
20 |   <rect stroke="#ffffff" rx="6" fill-opacity="0" id="svg_6" height="52.999999" width="74.999995" y="52.006638" x="287.470131" stroke-width="2" fill="#000000"/>
21 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="16" id="svg_7" y="71.006638" x="294.470131" stroke-width="0" stroke="#000000" fill="#ffffff">$</text>
22 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="16" id="svg_8" y="138.006638" x="273.470131" stroke-width="0" stroke="#000000" fill="#ffffff">Output in Shell</text>
23 |   <path stroke="#000" id="svg_9" d="m188.739929,80.246132l24,0l19.999954,13.020409l-19.999954,13.020416l-24,0l0,-26.040825z" stroke-opacity="null" stroke-width="0" fill="#e74c3c"/>
24 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_12" y="96.756313" x="190.236738" stroke-width="0" stroke="#000000" fill="#ffffff">OUT</text>
25 |   <rect stroke="#000" id="svg_13" height="26" width="117" y="278.322682" x="205.983058" stroke-opacity="null" stroke-width="0" fill="#3498db"/>
26 |   <rect stroke="#000" rx="6" id="svg_15" height="44" width="168" y="269.07235" x="21.5" stroke-width="0" fill="#e74c3c"/>
27 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_16" y="295.07235" x="44.5" stroke-width="0" stroke="#000" fill="#ffffff">wc -l *.pdb</text>
28 |   <rect stroke="#000" rx="6" id="svg_17" height="134.00298" width="151.999996" y="232.07235" x="251.470131" stroke-opacity="null" stroke-width="0" fill="#3498db"/>
29 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="16" id="svg_20" y="346.075668" x="278.47179" stroke-width="0" stroke="#000000" fill="#ffffff">Output in File</text>
30 |   <path stroke="#000" id="svg_21" d="m188.739929,278.311859l24,0l19.999954,13.020447l-19.999954,13.020447l-24,0l0,-26.040894z" stroke-opacity="null" stroke-width="0" fill="#e74c3c"/>
31 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_22" y="294.822025" x="190.236738" stroke-width="0" stroke="#000000" fill="#ffffff">OUT</text>
32 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_24" y="425.132751" x="31.506638" stroke-width="0" stroke="#000" fill="#e74c3c">wc -l *.pdb</text>
33 |   <rect stroke="#000" rx="6" id="svg_25" height="44" width="168" y="451.132751" x="21.5" stroke-width="0" fill="#e74c3c"/>
34 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_26" y="477.132751" x="44.5" stroke-width="0" stroke="#000" fill="#ffffff">wc -l *.pdb</text>
35 |   <path stroke="#000" id="svg_31" d="m188.739929,460.372406l24.000031,0l19.999924,13.020447l-19.999924,13.020447l-24.000031,0l0,-26.040894z" stroke-opacity="null" stroke-width="0" fill="#e74c3c"/>
36 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_32" y="476.882427" x="190.236738" stroke-width="0" stroke="#000000" fill="#ffffff">OUT</text>
37 |   <rect stroke="#000" id="svg_33" height="26" width="117" y="460.383083" x="586.109172" stroke-opacity="null" stroke-width="0" fill="#333333"/>
38 |   <rect stroke="#000" rx="6" id="svg_34" height="124.999996" width="151.999996" y="414.132751" x="631.596245" stroke-opacity="null" stroke-width="0" fill="#333333"/>
39 |   <rect stroke="#ffffff" rx="6" fill-opacity="0" id="svg_35" height="52.999999" width="74.999995" y="432.132751" x="667.596245" stroke-width="2" fill="#000000"/>
40 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="16" id="svg_36" y="451.132751" x="674.596245" stroke-width="0" stroke="#000000" fill="#ffffff">$</text>
41 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="16" id="svg_37" y="518.132751" x="653.596245" stroke-width="0" stroke="#000000" fill="#ffffff">Output in Shell</text>
42 |   <rect stroke="#000" rx="6" id="svg_38" height="44" width="125.986061" y="451.132751" x="261.579651" stroke-width="0" fill="#f39c12"/>
43 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_39" y="477.132751" x="284.579651" stroke-width="0" stroke="#000" fill="#ffffff">sort -n</text>
44 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_41" y="476.882427" x="242.253996" stroke-width="0" stroke="#000000" fill="#ffffff">IN</text>
45 |   <path stroke="#000" id="svg_42" d="m386.805756,460.372406l24,0l19.999969,13.020447l-19.999969,13.020447l-24,0l0,-26.040894z" stroke-opacity="null" stroke-width="0" fill="#f39c12"/>
46 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_43" y="476.882427" x="388.30245" stroke-width="0" stroke="#000000" fill="#ffffff">OUT</text>
47 |   <rect stroke="#000" rx="6" id="svg_45" height="44" width="125.986061" y="451.132751" x="451.642708" stroke-width="0" fill="#9b59b6"/>
48 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_46" y="476.882427" x="436.31838" stroke-width="0" stroke="#000000" fill="#ffffff">IN</text>
49 |   <path stroke="#000" id="svg_47" d="m576.868774,460.372406l23.999939,0l19.999878,13.020447l-19.999878,13.020447l-23.999939,0l0,-26.040894z" stroke-opacity="null" stroke-width="0" fill="#9b59b6"/>
50 |   <text font-weight="normal" xml:space="preserve" text-anchor="left" font-family="Helvetica, Arial, sans-serif" font-size="10" id="svg_48" y="476.882427" x="579.365839" stroke-width="0" stroke="#000000" fill="#ffffff">OUT</text>
51 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_49" y="477.132751" x="475.64304" stroke-width="0" stroke="#000" fill="#ffffff">head -1</text>
52 |   <path id="svg_50" d="m341.411133,252.601318c-3.907471,-3.522247 -8.143738,-6.336105 -11.600037,-6.722321c-0.192017,-0.025604 -0.384033,-0.038422 -0.589478,-0.044434l-22.816406,0c-0.369263,0 -0.72644,0.133331 -0.986938,0.373383c-0.24707,0.240982 -0.397461,0.570831 -0.397461,0.906677l0,53.464569c0,0.335754 0.136963,0.65863 0.397461,0.906433c0.260498,0.240234 0.617676,0.367554 0.986938,0.367554l41.245819,0c0.369263,0 0.713013,-0.127319 0.986938,-0.367554c0.260498,-0.247803 0.397461,-0.563843 0.397461,-0.906433l0,-35.577515c-0.081909,-4.124603 -3.578491,-8.630341 -7.624298,-12.40036zm-1.947021,1.806381c1.810059,1.634659 3.441559,3.484604 4.648712,5.265396c-1.041992,-0.387085 -2.152496,-0.671631 -3.181061,-0.899811c-2.289429,-0.469086 -4.511719,-0.658783 -5.895966,-0.741669c0,-0.272583 0,-0.582794 0,-0.924591c0,-1.79953 -0.123535,-4.372452 -0.794891,-6.640259c-0.014771,-0.02565 -0.014771,-0.044464 -0.028198,-0.064087c1.728119,1.045898 3.593079,2.433594 5.251404,4.00502zm6.801178,44.885422l-38.487762,0l0,-50.905304l21.444092,0l0,0.006836c0.933228,-0.0513 1.741577,0.804077 2.359253,2.755737c0.561279,1.856812 0.698242,4.277588 0.698242,5.956635c0,1.235626 -0.068481,2.059357 -0.068481,2.059357l-0.108765,1.355164l1.481079,0.012848c0,0 3.399719,0.038452 6.73114,0.734863c3.209259,0.640015 5.718903,1.914093 5.951202,3.358185c0,0.127319 0.013428,0.259766 0,0.380249l0,34.285431l0,0z" stroke-width="0" stroke="#000000" fill="#ffffff"/>
53 |   <text xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="14" id="svg_51" y="318.005959" x="297.465986" stroke-opacity="null" stroke-width="0" stroke="#000" fill="#ffffff">lengths</text>
54 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_52" y="47.007301" x="11.5" stroke-width="0" stroke="#000" fill="#000000">$</text>
55 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_53" y="425.132751" x="11.5" stroke-width="0" stroke="#000" fill="#000000">$</text>
56 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_54" y="425.132751" x="161.549782" stroke-width="0" stroke="#000" fill="#000000">|</text>
57 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_55" y="425.132751" x="181.556419" stroke-width="0" stroke="#000" fill="#f39c12">sort -n</text>
58 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_56" y="425.132751" x="271.586288" stroke-width="0" stroke="#000" fill="#000000">|</text>
59 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_57" y="425.132751" x="291.592926" stroke-width="0" stroke="#000" fill="#9b59b6">head -1</text>
60 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_58" y="425.132751" x="31.506638" stroke-width="0" stroke="#000" fill="#e74c3c">wc -l *.pdb</text>
61 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_59" y="425.132751" x="11.5" stroke-width="0" stroke="#000" fill="#000000">$</text>
62 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_60" y="425.132751" x="161.549782" stroke-width="0" stroke="#000" fill="#000000">|</text>
63 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_61" y="425.132751" x="181.556419" stroke-width="0" stroke="#000" fill="#f39c12">sort -n</text>
64 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_62" y="425.132751" x="271.586288" stroke-width="0" stroke="#000" fill="#000000">|</text>
65 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_63" y="425.132751" x="291.592926" stroke-width="0" stroke="#000" fill="#9b59b6">head -1</text>
66 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_64" y="223.065712" x="31.506638" stroke-width="0" stroke="#000" fill="#e74c3c">wc -l *.pdb</text>
67 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_65" y="223.065712" x="11.5" stroke-width="0" stroke="#000" fill="#000000">$</text>
68 |   <text style="cursor: move;" font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_66" y="223.065712" x="161.549782" stroke-width="0" stroke="#000" fill="#000000">&gt;</text>
69 |   <text font-weight="bold" xml:space="preserve" text-anchor="left" font-family="'Courier New', Courier, monospace" font-size="18" id="svg_67" y="223.065712" x="181.556419" stroke-width="0" stroke="#000" fill="#3498db">lengths</text>
70 |  </g>
71 | </svg>


--------------------------------------------------------------------------------
/episodes/18-responsibility.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using shared resources responsibly"
  3 | teaching: 15
  4 | exercises: 5
  5 | ---
  6 | 
  7 | ```{r load_config, include=FALSE}
  8 | library(yaml)
  9 | config <- yaml.load_file("lesson_config.yaml")
 10 | snippets <- paste('./files/snippets/', config$snippets, sep='')
 11 | ```
 12 | 
 13 | ::: questions
 14 |  - How can I be a responsible user?
 15 |  - How can I protect my data?
 16 |  - How can I best get large amounts of data off an HPC system?
 17 | :::
 18 | 
 19 | ::: objectives
 20 |  - Learn how to be a considerate shared system citizen.
 21 |  - Understand how to protect your critical data.
 22 |  - Appreciate the challenges with transferring large amounts of data off HPC
 23 |   systems.
 24 |  - Understand how to convert many files to a single archive file using tar.
 25 | :::
 26 | 
 27 | One of the major differences between using remote HPC resources and your own
 28 | system (e.g. your laptop) is that remote resources are shared. How many users
 29 | the resource is shared between at any one time varies from system to system but
 30 | it is unlikely you will ever be the only user logged into or using such a
 31 | system.
 32 | 
 33 | The widespread usage of scheduling systems where users submit jobs on HPC
 34 | resources is a natural outcome of the shared nature of these resources. There
 35 | are other things you, as an upstanding member of the community, need to
 36 | consider.
 37 | 
 38 | ## Be Kind to the Login Nodes
 39 | 
 40 | The login node is often busy managing all of the logged in users, creating and
 41 | editing files and compiling software. If the machine runs out of memory or
 42 | processing capacity, it will become very slow and unusable for everyone. While
 43 | the machine is meant to be used, be sure to do so responsibly --- in ways
 44 | that will not adversely impact other users' experience.
 45 | 
 46 | Login nodes are always the right place to launch jobs. Cluster policies vary,
 47 | but they may also be used for proving out workflows, and in some cases, may
 48 | host advanced cluster-specific debugging or development tools. The cluster may
 49 | have modules that need to be loaded, possibly in a certain order, and paths or
 50 | library versions that differ from your laptop, and doing an interactive test
 51 | run on the head node is a quick and reliable way to discover and fix these
 52 | issues.
 53 | 
 54 | ::: callout
 55 | ## Login Nodes Are a Shared Resource
 56 | 
 57 | Remember, the login node is shared with all other users and your actions
 58 | could cause issues for other people. Think carefully about the potential
 59 | implications of issuing commands that may use large amounts of resource.
 60 | 
 61 | Unsure? Ask your friendly systems administrator ("sysadmin") or service
 62 | desk if the thing you're contemplating is suitable for the login node,
 63 | or if there's another mechanism to get it done safely.
 64 | 
 65 | You can contact the ARCHER2 Service Desk at [support@archer2.ac.uk](mailto:support@archer2.ac.uk)
 66 | 
 67 | :::
 68 | 
 69 | You can always use the commands `top` and `ps ux` to list the processes that
 70 | are running on the login node along with the amount of CPU and memory they are
 71 | using. If this check reveals that the login node is somewhat idle, you can
 72 | safely use it for your non-routine processing task. If something goes wrong
 73 | --- the process takes too long, or doesn't respond --- you can use the
 74 | `kill` command along with the *PID* to terminate the process.
 75 | 
 76 | ::: challenge
 77 | ## Login Node Etiquette
 78 | Which of these commands would be a routine task to run on the login node?
 79 | 
 80 |  1. `python physics_sim.py`
 81 |  2. `make`
 82 |  3. `create_directories.sh`
 83 |  4. `molecular_dynamics_2`
 84 |  5. `tar -xzf R-3.3.0.tar.gz`
 85 | 
 86 | ::: solution
 87 | 
 88 | Building software, creating directories, and unpacking software are common and acceptable tasks for the login node: options #2 (`make`), #3
 89 | (`mkdir`), and #5 (`tar`) are probably OK. Note that script names do not
 90 | always reflect their contents: before launching #3, please
 91 | `less create_directories.sh` and make sure it's not a Trojan horse.
 92 | Running resource-intensive applications is frowned upon. Unless you are
 93 | sure it will not affect other users, do not run jobs like #1 (`python`)
 94 | or #4 (custom MD code). If you're unsure, ask your friendly sysadmin for
 95 | advice.
 96 | 
 97 | :::
 98 | :::
 99 | 
100 | If you experience performance issues with a login node you should report it to
101 | the system staff (usually via the helpdesk) for them to investigate.
102 | 
103 | ## Test Before Scaling
104 | 
105 | Remember that you are generally charged for usage on shared systems. A simple
106 | mistake in a job script can end up costing a large amount of resource budget.
107 | Imagine a job script with a mistake that makes it sit doing nothing for 24
108 | hours on 1000 cores or one where you have requested 2000 cores by mistake and
109 | only use 100 of them! This problem can be compounded when people write scripts
110 | that automate job submission (for example, when running the same calculation or
111 | analysis over lots of different parameters or files). When this happens it
112 | hurts both you (as you waste lots of charged resource) and other users (who are
113 | blocked from accessing the idle compute nodes).
114 | 
115 | On very busy resources you may wait many days in a queue for your job to fail
116 | within 10 seconds of starting due to a trivial typo in the job script. This is
117 | extremely frustrating! Most systems provide dedicated resources for testing
118 | that have short wait times to help you avoid this issue.
119 | 
120 | ::: callout
121 | ## Test Job Submission Scripts That Use Large Amounts of Resources
122 | 
123 | Before submitting a large run of jobs, submit one as a test first to make
124 | sure everything works as expected.
125 | 
126 | Before submitting a very large or very long job submit a short truncated test
127 | to ensure that the job starts as expected.
128 | 
129 | :::
130 | 
131 | ## Have a Backup Plan
132 | 
133 | Although many HPC systems keep backups, it does not always cover all the file
134 | systems available and may only be for disaster recovery purposes (*i.e.* for
135 | restoring the whole file system if lost rather than an individual file or
136 | directory you have deleted by mistake). Protecting critical data from
137 | corruption or deletion is primarily your responsibility: keep your own backup
138 | copies.
139 | 
140 | Version control systems (such as Git) often have free, cloud-based offerings
141 | (e.g., GitHub and GitLab) that are generally used for storing source code. Even
142 | if you are not writing your own programs, these can be very useful for storing
143 | job scripts, analysis scripts and small input files.
144 | 
145 | For larger amounts of data, you should make sure you have a robust system in
146 | place for taking copies of critical data off the HPC system wherever possible
147 | to backed-up storage. Tools such as `rsync` can be very useful for this.
148 | 
149 | Your access to the shared HPC system will generally be time-limited so you
150 | should ensure you have a plan for transferring your data off the system before
151 | your access finishes. The time required to transfer large amounts of data
152 | should not be underestimated and you should ensure you have planned for this
153 | early enough (ideally, before you even start using the system for your
154 | research).
155 | 
156 | In all these cases, the service desk of the system you are using should be able to
157 | provide useful guidance on your options for data transfer for the volumes of
158 | data you will be using.
159 | 
160 | ::: callout
161 | ## Your Data Is Your Responsibility
162 | Make sure you understand what the backup policy is on the file systems on the
163 | system you are using and what implications this has for your work if you lose
164 | your data on the system. Plan your backups of critical data and how you will
165 | transfer data off the system throughout the project.
166 | 
167 | On ARCHER2, the home file systems are backed up so you can restore data you 
168 | deleted by mistake. A copy of the data on home file system is also kept off
169 | site for disaster recovery purposes. The work file systems are not backed up
170 | in any way.
171 | 
172 | :::
173 | 
174 | ## Transferring Data
175 | 
176 | As mentioned above, many users run into the challenge of transferring large
177 | amounts of data off HPC systems at some point (this is more often in
178 | transferring data off than onto systems but the advice below applies in either
179 | case). Data transfer speed may be limited by many different factors so the best
180 | data transfer mechanism to use depends on the type of data being transferred
181 | and where the data is going. 
182 | 
183 | The components between your data's source and destination have 
184 | varying levels of performance, and in particular, may have 
185 | different capabilities with respect to **bandwidth** and **latency**.
186 | 
187 | **Bandwidth** is generally the raw amount of data per unit time a
188 | device is capable of transmitting or receiving. It's a common
189 | and generally well-understood metric.
190 | 
191 | **Latency** is a bit more subtle. For data transfers, it may be thought
192 | of as the amount of time it takes to get data out of storage and into
193 | a transmittable form. Latency issues are the reason it's advisable
194 | to execute data transfers by moving a small number of large 
195 | files, rather than the converse.
196 | 
197 | Some of the key components and their associated issues are:
198 | 
199 | - **Disk speed**: File systems on HPC systems are often highly parallel,
200 |   consisting of a very large number of high performance disk drives. This
201 |   allows them to support a very high data bandwidth. Unless the remote system
202 |   has a similar parallel file system you may find your transfer speed limited
203 |   by disk performance at that end.
204 | - **Meta-data performance**: *Meta-data operations* such as opening and closing
205 |   files or listing the owner or size of a file are much less parallel than
206 |   read/write operations. If your data consists of a very large number of small
207 |   files you may find your transfer speed is limited by meta-data operations.
208 |   Meta-data operations performed by other users of the system can also interact
209 |   strongly with those you perform so reducing the number of such operations you
210 |   use (by combining multiple files into a single file) may reduce variability
211 |   in your transfer rates and increase transfer speeds.
212 | - **Network speed**: Data transfer performance can be limited by network speed.
213 |   More importantly it is limited by the slowest section of the network between
214 |   source and destination. If you are transferring to your laptop/workstation,
215 |   this is likely to be its connection (either via LAN or WiFi).
216 | - **Firewall speed**: Most modern networks are protected by some form of
217 |   firewall that filters out malicious traffic. This filtering has some overhead
218 |   and can result in a reduction in data transfer performance. The needs of a
219 |   general purpose network that hosts email/web-servers and desktop machines are
220 |   quite different from a research network that needs to support high volume
221 |   data transfers. If you are trying to transfer data to or from a host on a
222 |   general purpose network you may find the firewall for that network will limit
223 |   the transfer rate you can achieve.
224 | 
225 | As mentioned above, if you have related data that consists of a large number of
226 | small files it is strongly recommended to pack the files into a larger
227 | *archive* file for long term storage and transfer. A single large file makes
228 | more efficient use of the file system and is easier to move, copy and transfer
229 | because significantly fewer metadata operations are required. Archive files can
230 | be created using tools like `tar` and `zip`. We have already met `tar` when we
231 | talked about data transfer earlier.
232 | 
233 | ::: challenge
234 | 
235 | ## Consider the Best Way to Transfer Data
236 | If you are transferring large amounts of data you will need to think about
237 | what may affect your transfer performance. It is always useful to run some
238 | tests that you can use to extrapolate how long it will take to transfer your
239 | data.
240 | Say you have a "data" folder containing 10,000 or so files, a healthy mix of
241 | small and large ASCII and binary data. Which of the following would be the
242 | best way to transfer them to `r config$remote$name`?
243 | 
244 |  1. Using `scp`?
245 |  
246 | ```bash
247 | `r config$local$prompt` scp -r data `r config$remote$user`@`r config$remote$login`:~/
248 | ```
249 | 
250 | 
251 |  2. Using `rsync`?
252 | 
253 | ```bash
254 | `r config$local$prompt` rsync -ra data `r config$remote$user`@`r config$remote$login`:~/
255 | ```
256 | 
257 |  3. Using `rsync` with compression?
258 | ```bash
259 | `r config$local$prompt` rsync -raz data `r config$remote$user`@`r config$remote$login`:~/
260 | ```
261 | 
262 |  4. Creating a `tar` archive first for `rsync`?
263 | ```bash
264 | `r config$local$prompt` tar -cvf data.tar data
265 | `r config$local$prompt` rsync -raz data.tar `r config$remote$user`@`r config$remote$login`:~/
266 | ```
267 | 
268 |  5. Creating a compressed `tar` archive for `rsync`?
269 | ```bash
270 | `r config$local$prompt` tar -cvzf data.tar.gz data
271 | `r config$local$prompt` rsync -ra data.tar.gz `r config$remote$user`@`r config$remote$login`:~/
272 | ```
273 | 
274 | ::: solution
275 | 
276 | Lets go through each option
277 | 
278 |  1. `scp` will recursively copy the directory. This works, but without compression.
279 |  2. `rsync -ra` works like `scp -r`, but preserves file information like creation times. This is marginally better.
280 |  3. `rsync -raz` adds compression, which will save some bandwidth. If you have a strong CPU at both ends of the line, and you're on a slow network, this is a good choice.
281 |  4. This command first uses `tar` to merge everything into a single file, then `rsync -z` to transfer it with compression. With this large *number* of files, metadata overhead can hamper your transfer, so this is a good idea.
282 |  5. This command uses `tar -z` to compress the archive, then `rsync` to transfer it. This may perform similarly to #4, but in most cases (for large datasets), it's the best combination of high throughput and low latency (making the most of your time and network connection).
283 | 
284 | :::
285 | :::
286 | 
287 | ::: keypoints
288 |  - "Be careful how you use the login node."
289 |  - "Your data on the system is your responsibility."
290 |  - "Plan and test large data transfers."
291 |  - "It is often best to convert many files to a single archive file before
292 |   transferring."
293 |  - "Again, don't run stuff on the login node."
294 | :::
295 | 


--------------------------------------------------------------------------------