├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .github ├── dependabot.yml └── workflows │ ├── check_links.yml │ ├── check_redirects.yml │ ├── check_vale.yml │ └── pages-deployment.yml ├── .gitignore ├── .gitmodules ├── .ignore ├── .lycheeignore ├── .pre-commit-config.yaml ├── .python-version ├── .vale.ini ├── .vscode └── settings.json ├── DEPLOY.md ├── INSTALL.md ├── LICENSE ├── README.md ├── docs ├── _headers ├── _redirects ├── access-policies.md ├── actions-intro.md ├── actions-pipelines.md ├── actions-reusable.md ├── actions-scripts.md ├── adding-a-paper.md ├── case-control-studies.md ├── code-reviews.md ├── codelist-creation.md ├── codelist-intro.md ├── codelist-project.md ├── codelist-updating.md ├── contributing.md ├── copiloting-service.md ├── css │ ├── extra.css │ ├── lite-yt-embed.css │ └── youtube.css ├── data-access-policy.md ├── data-sources │ ├── apc.md │ ├── cpns.md │ ├── ecds.md │ ├── emis.md │ ├── hcd.md │ ├── icnarc.md │ ├── index.md │ ├── intro.md │ ├── isaric.md │ ├── onsdeaths.md │ ├── sgsscovid.md │ ├── systmone.md │ ├── therapeutics.md │ └── ukrr.md ├── datapast │ ├── index.md │ └── the_clinical_datapast_function.pdf ├── documents │ └── OpenSAFELY_Output_Review_Form_ADD_WORKSPACE_NAME_ADD_DATE.docx ├── federation.md ├── five-safes.md ├── getting-started │ ├── explanation │ │ ├── index.md │ │ ├── options-for-running-opensafely │ │ │ └── index.md │ │ ├── understanding-github-codespaces │ │ │ └── index.md │ │ └── understanding-the-software-used-to-run-opensafely │ │ │ └── index.md │ ├── how-to │ │ ├── add-github-codespaces-to-your-project │ │ │ ├── download_raw_file.png │ │ │ └── index.md │ │ ├── create-a-code-repository-for-your-project │ │ │ └── index.md │ │ ├── index.md │ │ ├── troubleshoot-common-codespaces-issues │ │ │ ├── codespace-additional-permissions.png │ │ │ ├── directory-listing.png │ │ │ ├── index.md │ │ │ ├── r-session-error.png │ │ │ ├── vscode-popup.png │ │ │ └── vscode-ports.png │ │ ├── update-github-codespaces-in-your-project │ │ │ └── index.md │ │ ├── use-git-effectively │ │ │ └── index.md │ │ ├── use-github-codespaces-in-your-project │ │ │ └── index.md │ │ └── use-released-outputs-in-github-codespaces │ │ │ └── index.md │ ├── index.md │ └── tutorial │ │ ├── add-a-scripted-action-to-the-pipeline │ │ └── index.md │ │ ├── check-the-automated-tests-pass │ │ └── index.md │ │ ├── create-a-github-account │ │ └── index.md │ │ ├── create-a-github-codespace │ │ └── index.md │ │ ├── create-a-github-repository │ │ └── index.md │ │ ├── delete-the-github-codespace │ │ └── index.md │ │ ├── generate-a-first-dataset │ │ └── index.md │ │ ├── index.md │ │ ├── introduction │ │ └── index.md │ │ ├── publish-the-changes-to-github │ │ └── index.md │ │ ├── run-the-project-pipeline │ │ └── index.md │ │ ├── see-the-next-steps │ │ └── index.md │ │ └── update-the-dataset-definition │ │ └── index.md ├── git-workflow.md ├── how-to-get-help.md ├── images │ ├── NON-COVID-GP-data-OpenSAFELY-platform-architecture-and-dataflows-V5-for-DPIA-DPN.drawio.svg │ ├── OpenSAFELY-security-levels-2023-DPN.svg │ ├── adding-codelist-id-tag.png │ ├── adding-codelist-id-version.png │ ├── c4-container.svg │ ├── c4-system-context.svg │ ├── code-review-main.png │ ├── codelists-jobs-warning.png │ ├── codespaces-create.png │ ├── codespaces-options.png │ ├── codespaces-setup-screen.png │ ├── codespaces-template.png │ ├── create_new_workspace.png │ ├── example-dashboard-chart.png │ ├── excel-export-csv.png │ ├── getting-started-codespaces-button.png │ ├── getting-started-codespaces-commit-message.png │ ├── getting-started-codespaces-push-to-github.png │ ├── getting-started-codespaces-repository-additional-permissions.png │ ├── getting-started-codespaces-setting-up.png │ ├── getting-started-codespaces-stage-changes.png │ ├── getting-started-codespaces-start.png │ ├── getting-started-create-repository-owner-name.png │ ├── getting-started-create-repository-public-private.png │ ├── getting-started-github-actions-tab.png │ ├── getting-started-github-actions-workflow-success.png │ ├── getting-started-github-desktop-commit-all.png │ ├── getting-started-github-desktop-commit-message.png │ ├── getting-started-github-desktop-push-to-github.png │ ├── good-pr-pic.png │ ├── job_request_8676_timings.png │ ├── job_request_8680_all_noncoviddeath.png │ ├── line-profiler-output-dummy-data.png │ ├── macos-docker-privileges-escalation-warning.png │ ├── macos-docker-privileges-escalation.png │ ├── macos-docker-skip-intro.png │ ├── macos-menu-bar.png │ ├── pr-desc.png │ ├── project-edit-button.png │ ├── project-edit-page.png │ ├── releases.png │ ├── run_jobs.png │ ├── t1oos.png │ ├── token.png │ ├── use-this-template-button.png │ ├── use-this-template.png │ ├── view_project.png │ ├── win-anaconda-prompt.png │ └── win-docker-starting.png ├── img │ ├── favicon.svg │ └── icon.svg ├── index.md ├── install-docker.md ├── install-linux.md ├── install-macos.md ├── install-python.md ├── install-visual-studio-code.md ├── jobs-site.md ├── js │ ├── extra.js │ └── lite-yt-embed.js ├── legacy │ ├── requesting-release-offline-process.md │ ├── study-def-codelists.md │ ├── study-def-dates.md │ ├── study-def-expectations.md │ ├── study-def-flowcharts.md │ ├── study-def-measures.md │ ├── study-def-tricks.md │ ├── study-def-variables.md │ └── study-def.md ├── level-4-server.md ├── memory-efficient-working.md ├── open-data-manifesto.md ├── open-methods.md ├── opensafely-cli.md ├── outputs │ ├── index.md │ ├── output-checking.md │ ├── releasing-overview.md │ ├── requesting-file-release.md │ ├── sdc.md │ ├── viewing-released-files.md │ └── viewing-with-airlock.md ├── paper_template.txt ├── plan-s.md ├── project-changes.md ├── project-completion.md ├── protocol.md ├── reports │ ├── create-a-draft.md │ ├── images │ │ ├── job-server-direct-output-file-link-published.jpg │ │ ├── job-server-direct-output-file-link-release.jpg │ │ ├── job-server-published-outputs.jpg │ │ ├── job-server-workspace-latest-outputs.jpg │ │ └── reports-admin-add-report.jpg │ ├── intro.md │ ├── publish-a-report.md │ └── review-process.md ├── repositories.md ├── requesting-libraries.md ├── security-levels.md ├── system-integration.md ├── technical-architecture.md ├── type-one-opt-outs.md ├── updating-the-docs.md └── workflow.md ├── hooks ├── __init__.py ├── ehrql_branch.py ├── ehrql_css.py └── parent_snippets.py ├── includes ├── cohort-extractor-deprecated.md ├── glossary.md ├── imd-warning-header.md ├── isaric-warning-header.md └── vmp-ids-warning.md ├── justfile ├── main.py ├── mkdocs.yml ├── overrides ├── main.html └── partials │ └── integrations │ └── analytics │ └── plausible.html ├── pyproject.toml ├── requirements.dev.in ├── requirements.dev.txt ├── requirements.prod.in ├── requirements.prod.txt ├── requirements.txt ├── scripts └── wordcount.py ├── styles └── OpenSAFELY │ ├── Branding.yml │ ├── HereLinks.yml │ └── InternalLinks.yml └── templates └── python └── material └── parameters.html /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/devcontainers/base:ubuntu-24.04 2 | 3 | RUN echo "deb http://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu noble main" > /etc/apt/sources.list.d/deadsnakes-ppa.list && \ 4 | /usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc 5 | 6 | RUN apt-get update && \ 7 | apt-get install -y \ 8 | just \ 9 | python3.11 \ 10 | python3.11-venv 11 | 12 | # Workaround for Python babel package resulting in 13 | # ValueError: ZoneInfo keys may not be absolute paths, got: /UTC 14 | ENV TZ="Europe/London" 15 | 16 | USER vscode 17 | 18 | # Clone cohort-extractor submodule via HTTPS. 19 | # GitHub does not allow SSH access without adding a private key. 20 | RUN git config --global url.https://github.com/.insteadOf git@github.com: 21 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Documentation development", 3 | "dockerFile": "Dockerfile", 4 | "postAttachCommand": "just run", 5 | "customizations": { 6 | "vscode": { 7 | "settings": { 8 | "extensions.ignoreRecommendations": true, 9 | "files.autoSave": "afterDelay", 10 | "files.autoSaveDelay": 3000, 11 | "git.autofetch": true 12 | } 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | open-pull-requests-limit: 5 8 | 9 | - package-ecosystem: "github-actions" 10 | directory: "/" 11 | schedule: 12 | interval: "monthly" 13 | 14 | - package-ecosystem: "gitsubmodule" 15 | directory: "/" 16 | schedule: 17 | interval: "daily" 18 | -------------------------------------------------------------------------------- /.github/workflows/check_links.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Check links' 3 | 4 | # yamllint disable-line rule:truthy 5 | on: 6 | workflow_dispatch: 7 | schedule: 8 | - cron: '37 11 * * 2' 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | 15 | links: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout repo 19 | uses: actions/checkout@v4 20 | with: 21 | submodules: true 22 | 23 | - name: Install Python and just 24 | uses: opensafely-core/setup-action@v1 25 | with: 26 | install-just: true 27 | python-version: '3.11' 28 | 29 | - name: Build site 30 | env: 31 | AccessToken: ${{ secrets.GITHUB_TOKEN }} 32 | MKDOCS_SITE_URL: https://docs.opensafely.org 33 | EHRQL_BRANCH: main 34 | run: just build 35 | 36 | - name: Move imported documentation to correct relative path 37 | run: mv imported_docs/* docs/ 38 | 39 | # Use --max-concurrency to prevent 429 errors with OpenSAFELY Jobs. 40 | # This workaround may not be needed in future if lychee has better rate limiting. 41 | # https://github.com/lycheeverse/lychee/issues/36 42 | - name: Check links 43 | uses: lycheeverse/lychee-action@82202e5e9c2f4ef1a55a3d02563e1cb6041e5332 # v2.4.1 44 | with: 45 | args: "--exclude-all-private --include-verbatim --max-concurrency 24 --require-https --verbose --no-progress --offline 46 | --accept '100..=103,200..=299,429,500..=511' 47 | --timeout 60 './docs/**/*.md' './docs/**/*.html' './imported_docs/**/*.md' './imported_docs/**/*.html' --exclude-path './docs/ehrql/includes/generated_docs'" 48 | -------------------------------------------------------------------------------- /.github/workflows/check_redirects.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Check Cloudflare redirects' 3 | 4 | # yamllint disable-line rule:truthy 5 | on: 6 | workflow_dispatch: 7 | schedule: 8 | - cron: '25 10 * * 2' 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | redirects: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout repo 18 | uses: actions/checkout@v4 19 | 20 | - name: Check Cloudflare redirects 21 | run: | 22 | awk '{print "https://docs.opensafely.org"$1}' docs/_redirects | xargs -n1 curl -o /dev/null --silent --head --write-out '%{url_effective} %{http_code} \n' > redirect_check_results 23 | cat redirect_check_results 24 | diff <(awk '{print $NF}' redirect_check_results) <(awk '{print $NF}' docs/_redirects) 25 | -------------------------------------------------------------------------------- /.github/workflows/check_vale.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 'Check docs with Vale' 3 | 4 | # yamllint disable-line rule:truthy 5 | on: 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | summarise-vale-docs-warnings: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout repo 16 | uses: actions/checkout@v4 17 | with: 18 | submodules: true 19 | 20 | - name: Install just 21 | uses: opensafely-core/setup-action@v1 22 | with: 23 | install-just: true 24 | 25 | - name: Install Vale 26 | run: | 27 | curl -L $(curl "https://api.github.com/repos/errata-ai/vale/releases/latest" | jq -r '.assets[].browser_download_url | select(contains("Linux_64-bit"))') > /tmp/vale.tar.gz 28 | mkdir -p "$HOME/.local/bin" 29 | tar -xvzf "/tmp/vale.tar.gz" -C "$HOME/.local/bin" 30 | 31 | - name: Run Vale on docs 32 | run: NO_COLOR=1 just lint-docs >> "$GITHUB_STEP_SUMMARY" 33 | -------------------------------------------------------------------------------- /.github/workflows/pages-deployment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | on: 3 | push: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | deploy: 8 | 9 | permissions: 10 | contents: read 11 | deployments: write 12 | 13 | runs-on: ubuntu-latest 14 | 15 | name: Deploy to Cloudflare Pages 16 | steps: 17 | - name: Checkout repo 18 | uses: actions/checkout@v4 19 | with: 20 | submodules: true 21 | 22 | - name: Install Python and just 23 | uses: opensafely-core/setup-action@v1 24 | with: 25 | install-just: true 26 | python-version: '3.11' 27 | 28 | - name: Build site 29 | env: 30 | AccessToken: ${{ secrets.GITHUB_TOKEN }} 31 | MKDOCS_SITE_URL: https://docs.opensafely.org 32 | MKDOCS_MULTIREPO_CLEANUP: true 33 | EHRQL_BRANCH: main 34 | run: just build 35 | 36 | - name: Add a version file 37 | run: echo ${{ github.sha }} > site/version.html 38 | 39 | - name: Publish 40 | if: ${{ github.actor != 'dependabot[bot]' }} 41 | uses: cloudflare/pages-action@f0a1cd58cd66095dee69bfa18fa5efd1dde93bca # v1.5.0 42 | with: 43 | accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} 44 | apiToken: ${{ secrets.CLOUDFLARE_DIRECT_UPLOAD_API_TOKEN }} 45 | directory: "site" 46 | gitHubToken: ${{ secrets.GITHUB_TOKEN }} 47 | projectName: "opensafely-docs" 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python virtual environment 2 | .direnv 3 | .venv 4 | databuilder/.venv 5 | venv 6 | 7 | # Python 8 | __pycache__/ 9 | 10 | # IDE related files 11 | .idea/ 12 | .DS_Store 13 | 14 | # MkDocs output 15 | site/ 16 | 17 | # OpenSAFELY CLI metadata output for tutorial dataset definitions 18 | databuilder/ehrql-tutorial-examples/metadata/ 19 | 20 | # mkdocs-multirepo-plugin 21 | temp_dir/ 22 | imported_docs/ 23 | public_docs.json 24 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/cohort-extractor"] 2 | path = src/cohort-extractor 3 | url = git@github.com:opensafely-core/cohort-extractor.git 4 | -------------------------------------------------------------------------------- /.ignore: -------------------------------------------------------------------------------- 1 | src 2 | -------------------------------------------------------------------------------- /.lycheeignore: -------------------------------------------------------------------------------- 1 | # Preconnect URL for fonts: 404 when the domain itself is accessed. 2 | https://fonts.gstatic.com 3 | 4 | # Not accessible without being logged into a Google account with access. 5 | https?://docs\.google\.com/document/d/1tQveWA7NWaHSx0ETR9FcXYxbtNa1NHiqwygVl6XanKw(?:/.*)?$ 6 | https?://docs\.google\.com/document/d/1uWRiFps6tDA2SpxSxf0C2G9mOVWMQ6TQ(?:/.*)?$ 7 | 8 | # Not accessible without being logged in to GitHub. 9 | https?://github\.com/ebmdatalab/opensafely-output-review(?:/.*)?$ 10 | https?://github\.com/opensafely/covid-vaccine-effectiveness-research(?:/.*)?$ 11 | https?://github\.com/opensafely/rapid-reports(?:/.*)?$ 12 | https?://github\.com/opensafely/server-instructions(?:/.*)?$ 13 | https?://github\.com/opensafely/documentation/network/updates 14 | https?://github\.com/opensafely/research-template/generate 15 | 16 | # Block either the GitHub Actions runner or the lychee client. 17 | https?://developers\.cloudflare\.com(?:/.*)?$ 18 | https?://www\.encepp\.eu(?:/.*)?$ 19 | https?://www\.tandfonline\.com(?:/.*)?$ 20 | https?://www\.icnarc\.org(?:/.*)?$ 21 | https?://doi\.org(?:/.*)?$ 22 | https?://digital.nhs.uk(?:/.*$)?$ 23 | https?://journalprivacyconfidentiality\.org(?:/.*$)?$ 24 | 25 | # Twitter used to work but is being very flaky. 26 | # This exclusion should be reviewed in future to see if the issue persists. 27 | https?://twitter\.com(?:/.*$)?$ 28 | 29 | # An example OpenSAFELY project that is not intended to exist. 30 | https?://github\.com/opensafely/example-research-repository(?:/.*)?$ 31 | 32 | # The following ignore line ignores this link in the source: ../data-sources 33 | # 34 | # MkDocs actually uses an index.md file to generate this, 35 | # but lychee cannot find it from the link and reports it broken. 36 | # 37 | # What lychee tries to resolve in a GitHub Actions runner is: 38 | # file:///home/runner/work/documentation/documentation/data-sources 39 | # which doesn't exist. 40 | # 41 | # The partial checkout path is included: 42 | # * because there's no other easy way to try and generalise, 43 | # in case someone tries to run this outside of GitHub Actions 44 | # * to try and avoid any false matches, 45 | # in case a similar URL path is used elsewhere in the documentation 46 | # (though this is unlikely) 47 | # This line depends on the repository checkout being in `documentation/`. 48 | file:///.*/documentation/data-sources/?$ 49 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.11 3 | 4 | repos: 5 | - repo: local 6 | hooks: 7 | - id: check 8 | name: check 9 | entry: just check 10 | language: system 11 | types: [python] 12 | require_serial: true 13 | pass_filenames: false 14 | 15 | - repo: https://github.com/pre-commit/pre-commit-hooks 16 | rev: v4.6.0 17 | hooks: 18 | - id: trailing-whitespace 19 | - id: end-of-file-fixer 20 | - id: debug-statements 21 | - id: check-ast 22 | - id: check-json 23 | - id: check-toml 24 | - id: check-yaml 25 | exclude: mkdocs.yml 26 | - id: check-yaml 27 | name: check-yaml-mkdocs 28 | # --unsafe is a workaround for the use of !! in mkdocs.yml. 29 | args: [--unsafe] 30 | files: mkdocs.yml 31 | - id: detect-private-key 32 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.11 2 | -------------------------------------------------------------------------------- /.vale.ini: -------------------------------------------------------------------------------- 1 | StylesPath = styles 2 | 3 | MinAlertLevel = suggestion 4 | 5 | [*.md] 6 | BasedOnStyles = OpenSAFELY 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.wordWrap": "on" 3 | } -------------------------------------------------------------------------------- /DEPLOY.md: -------------------------------------------------------------------------------- 1 | # Deploying 2 | 3 | [Production](https://docs.opensafely.org) is served by Cloudflare Pages. 4 | 5 | Pull requests will generate a preview deployment of your changes. 6 | 7 | ## Redirects 8 | 9 | These should be recorded in the [`docs/_redirects`](docs/_redirects) file. 10 | 11 | Redirects are handled automatically by this `_redirects` file using [Cloudflare Pages' Redirect config](https://developers.cloudflare.com/pages/platform/redirects). 12 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | This project is built using [mkdocs](https://www.mkdocs.org/). 4 | 5 | It uses the [material theme](https://squidfunk.github.io/mkdocs-material/), and 6 | [this page](https://squidfunk.github.io/mkdocs-material/reference/abbreviations/) 7 | in their docs includes lots of very nice customisations for formatting documentation. 8 | 9 | ## Running locally 10 | 11 | Use [`just run`](https://github.com/casey/just) to run the MkDocs server. 12 | 13 | This should install everything required. 14 | 15 | There is also a dev container setup that allows running the site in Codespaces 16 | or locally with VSCode and Docker. 17 | 18 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/opensafely/documentation) 19 | 20 | ### Inclusion of ehrQL documentation 21 | 22 | The ehrQL documentation is imported from the 23 | [ehrQL](https://github.com/opensafely-core/ehrql) repo using the 24 | [mkdocs-multirepo-plugin](https://github.com/jdoiro3/mkdocs-multirepo-plugin) and built alongside the docs in this repo. By default it uses the main branch. This can 25 | be configured with the `EHRQL_BRANCH` environment variable: 26 | 27 | ``` 28 | EHRQL_BRANCH=my-branch just run 29 | ``` 30 | 31 | ## Updating Cohort Extractor 32 | 33 | [cohort-extractor](https://github.com/opensafely-core/cohort-extractor) is a documentation dependency. 34 | We use the cohort-extractor docstrings to generate some content here. 35 | 36 | cohort-extractor is currently a Git submodule instead, for installation simplicity (see #832). 37 | **We currently do not install cohort-extractor into a virtualenv because we are only using the docstrings** 38 | 39 | ### Updating cohort-extractor via Dependabot 40 | 41 | You can update the cohort-extractor submodule via Dependabot. 42 | 43 | Dependabot runs daily and will create a new pull request to update 44 | cohort-extractor if a newer version is available. 45 | 46 | If you don't want to wait, you can also trigger a Dependabot check 47 | manually via the ["Dependency 48 | graph"](https://github.com/opensafely/documentation/network/updates) 49 | section of this repository. 50 | 51 | ### Updating cohort-extractor manually 52 | 53 | Alternatively, you can pull in the latest version of the cohortextractor 54 | docstrings, for local development or to update the requirements entirely 55 | by hand: `just update-cohort-extractor` 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | OpenSAFELY Documentation 2 | Copyright (C) University of Oxford 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenSAFELY Documentation 2 | 3 | This is the public documentation for using the [OpenSAFELY platform](https://www.opensafely.org/). 4 | 5 | It provides information on how to get set up with and use the platform. 6 | 7 | ## Running the site 8 | 9 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/opensafely/documentation) 10 | 11 | When you see "Your application running on port 8910 is available", 12 | you can click "Open in Browser" to see a preview, 13 | and edit the content files in `docs/` to change the content. 14 | It may take a few seconds for changes you make to appear. 15 | 16 | See INSTALL.md for more details, 17 | and for information on running locally instead of in Codespaces. 18 | 19 | ## Automated checks 20 | 21 | We have some automated checks that help us maintain the documentation. 22 | 23 | These are not enforced for new contributions, 24 | but can be run by GitHub Actions. 25 | 26 | ### Content style checks 27 | 28 | We have a small number of style rules written for the [Vale style checker](https://github.com/errata-ai/vale). 29 | 30 | The purpose of these checks is to help keep our documentation more consistent, 31 | even when multiple authors are working on it. 32 | 33 | If these style checks prove useful, 34 | we could expand on these into a more fully featured style guide. 35 | 36 | The rules are stored in the [`styles` directory](styles/). 37 | The rules are written in YAML. 38 | See [Vale's documentation](https://vale.sh/docs/) for more information on Vale rules. 39 | 40 | These checks are not scheduled 41 | and are run [manually via GitHub Actions](https://github.com/opensafely/documentation/actions/workflows/check_vale.yml). 42 | 43 | ### URL validity checks 44 | 45 | To help keep the content up-to-date, 46 | there is a scheduled weekly link check 47 | run with the [lychee link checker](https://github.com/lycheeverse/lychee/). 48 | -------------------------------------------------------------------------------- /docs/_headers: -------------------------------------------------------------------------------- 1 | /* 2 | Permissions-Policy: browsing-topics=() 3 | -------------------------------------------------------------------------------- /docs/access-policies.md: -------------------------------------------------------------------------------- 1 | The following policies control access to the OpenSAFELY platform. 2 | 3 | * [Policies for Researchers](https://www.opensafely.org/policies-for-researchers/) 4 | * [Data Access Policy](data-access-policy.md) 5 | * [The Clinical DATAPAST Function](datapast/index.md) 6 | -------------------------------------------------------------------------------- /docs/actions-intro.md: -------------------------------------------------------------------------------- 1 | Analytic code can be divided up into logical units. You might have a script which prepares and cleans data, and another which outputs a summary descriptive table. 2 | 3 | In OpenSAFELY, each logical unit is called an _action_. Actions can be scripts, Jupyter notebook generators, or specialised functions provided by the framework. 4 | 5 | An OpenSAFELY project must refer to its actions in a [_pipeline_](actions-pipelines.md). This is a file called `project.yaml` which defines all the actions in a project, how they should be run, and how their outputs should be saved. 6 | 7 | * Every pipeline will start with an [_ehrQL_](ehrql/index.md) action, to generate an analysis-ready dataset of real or dummy data. 8 | * You can create custom [scripted actions](actions-scripts.md) in a number of other coding languages and choose from (or create your own) [reusable actions](actions-reusable.md). 9 | 10 | Dividing your analysis into actions and describing them in a pipeline has a few purposes: 11 | 12 | * It aids readability and bug-finding 13 | * It allows common code to be reused without copy-and-paste 14 | * The OpenSAFELY [pipeline](actions-pipelines.md) system ensures your actions run efficiently and quickly 15 | * It allows reviewers to see your intent and check your code for privacy and security accordingly 16 | -------------------------------------------------------------------------------- /docs/actions-reusable.md: -------------------------------------------------------------------------------- 1 | Like [scripted actions](actions-scripts.md), reusable actions are logical units of analytic code. 2 | However, whereas a scripted action is written to solve a problem for one study and must be copied-and-pasted to solve a similar problem for another study, a reusable action is written to solve a problem for several studies *without copying-and-pasting between them*. 3 | This makes reusable actions ideal for tasks that must be completed by several studies, such as joining datasets or producing deciles charts. 4 | 5 | ## Running reusable actions 6 | 7 | You can browse existing reusable actions at . 8 | Although each is different, they have a common API. 9 | Consider the following extract from a study's *project.yaml*: 10 | 11 | ```yaml 12 | actions: 13 | generate_dataset: 14 | run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz 15 | outputs: 16 | highly_sensitive: 17 | dataset: output/dataset.csv.gz 18 | 19 | run_a_reusable_action: 20 | # We will run version `v1.0.0` of the reusable action called `a_reusable_action`. 21 | # The reusable action accepts an argument; in this case, a path to a file. 22 | run: a_reusable_action:v1.0.0 output/dataset.csv.gz 23 | # The reusable action accepts a configuration option; 24 | # in this case, an output format. 25 | config: 26 | output-format: PNG 27 | needs: [generate_dataset] 28 | outputs: 29 | moderately_sensitive: 30 | output: output/output_from_a_reusable_action.png 31 | ``` 32 | 33 | In the above extract, the `run` and `config` properties of `run_a_reusable_action` are the common API. 34 | The `run` property, which is required, describes how the reusable action is run. 35 | It comprises the name and the version of the reusable action, and, optionally, one or more arguments. 36 | The `config` property, which is optional, describes configuration options. 37 | 38 | ## Developing reusable actions 39 | 40 | !!! note 41 | If you're thinking about developing a reusable action, then start by creating a new study within the [`opensafely`](https://github.com/opensafely) organisation that encapsulates the problem. 42 | As a minimum, the study should [extract](ehrql/index.md) and operate on a dataset: 43 | indeed, the code that operates on the dataset *is* the reusable action. 44 | 45 | At this point, you should [open an issue](https://github.com/opensafely-actions/.github/issues). 46 | Below, we describe how to convert the study into a reusable action. 47 | 48 | A reusable action is a public repo within the [`opensafely-actions`](https://github.com/opensafely-actions) organisation. 49 | It has a `main` branch, which is the release branch; 50 | versions are marked with [tags](https://git-scm.com/book/en/v2/Git-Basics-Tagging). 51 | In the above extract from a study's *project.yaml*, for example, we will run version `v1.0.0` of the reusable action called `a_reusable_action`. 52 | 53 | The repo has the following minimal structure: 54 | 55 | ```sh 56 | . 57 | ├── README.md 58 | └── action.yaml 59 | ``` 60 | 61 | *README.md* contains information about the reusable action, which is displayed by . 62 | 63 | *action.yaml* contains a run command, which is composed of a runtime image (either `python`, `r`, or `stata-mp`) and an entry point. 64 | The entry point is a path to a script. 65 | For example: 66 | 67 | ```yaml 68 | run: python:v2 python action/cli.py 69 | ``` 70 | 71 | Where *action/cli.py* is: 72 | 73 | ```python 74 | def main(): 75 | print("A reusable action") 76 | 77 | if __name__ == "__main__": 78 | main() 79 | ``` 80 | 81 | When developing a reusable action, just as when developing a scripted action, the action's dependencies are made available by the runtime; they are not made available by the action. 82 | 83 | * The Python runtime is provided by [`python-docker`](https://github.com/opensafely-core/python-docker). 84 | Its dependencies are in [*requirements.txt*](https://github.com/opensafely-core/python-docker/blob/main/v2/packages.md). 85 | In practice, this means that a Python action's *requirements.txt* is ignored. 86 | * The R runtime is provided by [`r-docker`](https://github.com/opensafely-core/r-docker). 87 | For the v1 r image its dependencies are in [*v1/packages.md*](https://github.com/opensafely-core/r-docker/blob/main/v1/packages.md). 88 | For the v2 r image its dependencies are in [*v2/packages.md*](https://github.com/opensafely-core/r-docker/blob/main/v2/packages.md). 89 | * The Stata runtime is provided by [`stata-docker`](https://github.com/opensafely-core/stata-docker). 90 | Its dependencies are in [*libraries*](https://github.com/opensafely-core/stata-docker/tree/main/libraries). 91 | -------------------------------------------------------------------------------- /docs/adding-a-paper.md: -------------------------------------------------------------------------------- 1 | # Adding an OpenSAFELY paper to OpenSAFELY.org 2 | 3 | We are very keen to compile all pre-printed and published OpenSAFELY studies on [the OpenSAFELY website](https://www.opensafely.org/research). 4 | 5 | We have provided a template file to help you provide this information to us in the most helpful format. The template can be previewed in full below. 6 | 7 | Please [download the template](paper_template.txt), fill it in (being careful not to change the format), and send it to your co-pilot. 8 | 9 | 10 | 11 | ```yaml 12 | --8<-- "docs/paper_template.txt" 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/codelist-intro.md: -------------------------------------------------------------------------------- 1 | ## What are Codelists? 2 | 3 | Codes are alphanumerical codes that are attached to a clinical or event 4 | description. There are a few different code systems such as SNOMED CT and CTV3, 5 | which means one clinical diagnosis can have different codes, depending on the 6 | system used. Most code systems arrange their codes in a hierarchical structure, 7 | meaning a code can be a parent of another code or codes, and a child of another 8 | code. This hierarchy allows all possible events within the clinical environment 9 | to be organised in a relatively systematic way. 10 | 11 | Each code refers to a particular event or clinical term such as "Type 1 Diabetes 12 | Mellitus". Even within a single coding system, there are multiple codes for each 13 | disease or symptoms with very precise terms. Clinicians use these codes 14 | precisely, though not always consistently, in their everyday work. This means 15 | that to find all the patients with Type 1 diabetes, you may have to search for 16 | 30 plus codes in the clinical record. 17 | 18 | For more information, see [our blog post on constructing codelists](https://www.bennett.ox.ac.uk/blog/2023/09/what-are-codelists-and-how-are-they-constructed/). 19 | 20 | ## OpenCodelists 21 | We built a system for building, reviewing and maintaining codelists at 22 | [OpenCodelists](https://www.opencodelists.org/). 23 | We've made an introductory video to help explain OpenCodelists in more detail. Codelists 24 | that are hosted on this website can be used directly in the Dataset Definition. This means 25 | there is no need to download or alter these codelists in the dataset definition, and 26 | they can be reused. 27 | 28 |
29 | 30 |
31 | 32 | ## Applying the principle of open working to making codelists 33 | 34 | !!! note "Our recommended codelist workflow is still in flux" 35 | We are developing several new features on OpenSAFELY Codelists to help audit and quality-assess codelists. In the mean time, here is what was suggest as best practice for recording your decision-making 36 | 37 | 38 | - Make an issue in the [codelist repo](https://github.com/opensafely/codelist-development) for the Codelist to be discussed 39 | - Discuss all decisions along the way in the issue, for example, why you decided to exclude 40 | "historical asthma" from an asthma codelist 41 | - Who signed off the codelist should be easy to find and transparent. It should be in the GitHub issue (preferably 42 | by the person signing off so they can be contacted via GitHub) and on the published website. 43 | - Write a good description for the website on what it does and does not include and summarise 44 | any key decisions 45 | - All study repos will at some point become public (if they are not already), so bear in mind the 46 | discussion and conversations will be available for examination 47 | - These discussions should be linked to from the website - i.e. link the issue to the 48 | final codelist where it appears in OpenSAFELY Codelists 49 | -------------------------------------------------------------------------------- /docs/codelist-project.md: -------------------------------------------------------------------------------- 1 | ## Adding a codelist 2 | 3 | Your example research template doesn't include any codelists but the folder structure and text files that are needed to include codelists already exist. 4 | Take a look at the `codelists/codelists.txt` file in the repo, this file is currently empty but any codelists that you add to your project will appear here. 5 | 6 | You can add a codelist from [OpenCodelists](https://www.opencodelists.org) to your project by [manually editing](#manually-editing-codeliststxt) the codelists.txt file or by using the `opensafely codelists add` command. 7 | 8 | For example, running the following command in your terminal: 9 | 10 | ```bash 11 | opensafely codelists add https://www.opencodelists.org/codelist/opensafely/covid-identification/2020-06-03/ 12 | ``` 13 | 14 | will add the [OpenSAFELY COVID Identification](https://www.opencodelists.org/codelist/opensafely/covid-identification) codelist to `codelists.txt` and also download and add `opensafely-covid-identification.csv` to your project. 15 | 16 | ### Manually editing codelists.txt 17 | The naming convention of the line that you need to add to the `codelists/codelists.txt` file follows this structure: a ` `is followed by `/` and a ``. 18 | Note that the version ID is a sequence of 8 characters. Some codelists may also have a version tag in the form of a date (YYYY-MM-DD) or a version number (e.g., v1.2) that can be 19 | used in place of the version ID. 20 | 21 | ```bash 22 | / 23 | ``` 24 | 25 | To find this information on the page for each of the codelists on [OpenCodelists](https://www.opencodelists.org), see orange boxes in the screenshot below. 26 | 27 | ![Finding the codelist ID and version ID on OpenCodelists.](images/adding-codelist-id-version.png) 28 | 29 | You need to add each line into a new line of the `codelists.txt` file. 30 | The next time you run the command `opensafely codelists update` in your terminal, the codelists you specified earlier will be added to the the `codelists/` subfolder in your project automatically so you don't need to add these files manually to your project. 31 | 32 | For example, a `codelists.txt` file of a project may consist of four different lines: 33 | 34 | ```bash 35 | opensafely/aplastic-anaemia/58ac196d 36 | opensafely/asplenia/3ce9e642 37 | opensafely/current-asthma/2020-05-06 38 | primis-covid19-vacc-uptake/bmi_stage/v1.2 39 | ``` 40 | 41 | After running the command `opensafely codelists update` the following four .csv files will be added to your project: 42 | 43 | ```bash 44 | opensafely-aplastic-anaemia.csv 45 | opensafely-asplenia.csv 46 | opensafely-current-asthma.csv 47 | primis-covid19-vacc-uptake-bmi_stage.csv 48 | ``` 49 | 50 | A codelist may be owned by an individual user, rather than an organisation. In this case, the 51 | entry in `codelists.txt` follows this structure: `user///`. 52 | 53 | If necessary, during initial development you can even import codelists this way before they are published (provided they have been put "under review", not in "draft" state), but ensure they are finalised and updated in your study before running in the real data. 54 | 55 | ## Adding/updating a codelist CSV file 56 | Once you have listed the codelists you need from OpenCodelists in the `codelists.txt` file, you can download the specified files into the `codelists/` folder using the `opensafely` program by running 57 | 58 | ```bash 59 | opensafely codelists update 60 | ``` 61 | 62 | This command should be re-run every time a codelist is added or removed from the `codelists.txt` file. Running this command will automatically generate a file called `codelists.json`, which contains a dictionary of codelists files, the URLs they have been downloaded from, download dates, and secure hash algorithms (SHA). The file should not be manually edited; however, you will need to add and commit the change and push to GitHub. If you don't, or a newer version is available than that committed, the automated tests will fail with an error message. Beware that in Windows, if one or more of these codelist files is open then this command won't be able to run; close them first. 63 | 64 | If necessary, you can also import CSVs not via OpenCodelists - just manually copy the CSV files into `codelists/`. However, we would recommend uploading these to OpenCodelists to import them as above. Note, if you are _also_ using some codelists from OpenCodelists, any manually imported codelists should be stored in a `local_codelists` folder so that they are not overwritten in the next step, as manual changes to CSV files will be clobbered the next time the command is run. 65 | 66 | See more on using Codelists in your study definition in [Working with codelists](legacy/study-def-codelists.md). 67 | -------------------------------------------------------------------------------- /docs/codelist-updating.md: -------------------------------------------------------------------------------- 1 | Once your codelists are [imported into your study](codelist-project.md), they are ready to be 2 | used for running jobs on the [jobs site](jobs-site.md). 3 | 4 | You may encounter a warning message when you try to run jobs, that looks something like this: 5 | 6 | ![Out of date codelists warning on Jobs site.](images/codelists-jobs-warning.png) 7 | 8 | 9 | To fix this, you will need to follow the steps to [add a codelist into your study](codelist-project.md#addingupdating-a-codelist-csv-file) again. 10 | 11 | Note that this warning is only relevant if the jobs you are running require access to the 12 | backend database. Analysis jobs that use data that has already been extracted in a previous 13 | run do not need to update codelists in order to run successfully. 14 | 15 | 16 | !!! Info 17 | 18 | [The Dictionary of Medicines and Devices (dm+d)](https://www.bennett.ox.ac.uk/blog/2019/08/what-is-the-dm-d-the-nhs-dictionary-of-medicines-and-devices/)[^1] 19 | is a dictionary of descriptions and (SNOMED-CT) codes which represent medicines and devices in use across the NHS. The codes representing specific medicines can change, and [require special treatment](#addressing-changing-dmd-codes), described below. As a result, 20 | dm+d codelists now download with standardised column headings: "code" (the dm+d code), and "term" (the description) in the CSV files. For backwards compatibility, they also 21 | include a column with the original code column heading (typically "dmd_id") [^2]. 22 | 23 | 24 | ### Addressing changing dm+d codes 25 | 26 | The dm+d coding system is a particular concern with regards to keeping codelists up to date. 27 | dm+d is updated and released on a weekly basis. Codes for Virtual Medicinal Products (VMPs)[^1] 28 | can change, which means that after a new release of dm+d, a VMP with a changed code will no longer match patients that it did previously. 29 | 30 | In order to address this, OpenCodelists maintains a mapping of changed VMP codes. When you run 31 | `opensafely codelists update` to download codelist CSV files into your study repo, dm+d 32 | codelist CSV files will include the codes explicitly specified in the codelist *and* any 33 | previous or subsequent changes to those codes. 34 | 35 | If a new release of dm+d introduces new VMP mappings that affect codes in your codelists, you 36 | may be prompted (by the opensafely command line tool, automated tests in GitHub, or the jobs site) to re-run `opensafely codelists update`, commit the changes and push them to GitHub 37 | before you can run jobs. 38 | 39 | ## "out-of-date" codelists 40 | 41 | Codelists for any coding system may go "out-of-date". All coding systems change (with the exception of CTv3, which is no longer updated), and new releases are published which may add new codes or retire codes. 42 | 43 | A codelist version on OpenCodelists is associated with a specific release of a coding system, 44 | and, once under review or published, it cannot change. This means that, for the most part, any 45 | codelist that has been specified in `codelists.txt` with a `version-id` and downloaded into 46 | a study repo will not need to be updated again. 47 | 48 | !!! warning 49 | 50 | This does not mean that the codelist is up-to-date with the most recent release of a coding 51 | system. It only means that the version downloaded in the study has not changed on 52 | OpenCodelists. 53 | 54 | You may need to create new versions of codelists in order to update them to a more recent 55 | coding system release. To do this, go to an existing Codelist page and click on Create new 56 | version. 57 | 58 | [^1]: For further information, refer to our [blog post describing the dm+d coding system]( 59 | https://www.bennett.ox.ac.uk/blog/2019/08/what-is-the-dm-d-the-nhs-dictionary-of-medicines-and-devices/). 60 | [^2]: dm+d codelists are often created by converting a PseudoBNF codelist, which results in 61 | specific column headings (including "dmd_code" for the code column). For more details, see our 62 | [blog post on the relationship between BNF, dm+d and SNOMED-CT](https://www.bennett.ox.ac.uk/blog/2022/11/difference-between-bnf-dm-d-and-snomed-ct-codes/) 63 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | OpenSAFELY is an open source project; all our code is available for inspection and reuse, and we can accept contributions in the following ways: 4 | 5 | ## Documentation 6 | 7 | Typos and other small improvements are always very welcome. Raise an issue, or even better a Pull Request, in the [documentation repo](https://github.com/opensafely/documentation/). We will incorporate these contributions after we've checked them for consistency with our house style. 8 | 9 | We welcome proposals for longer topic-based guides. Suggest them as an [issue in the documentation repo](https://github.com/opensafely/documentation/issues), and we'll work with you to define an overall structure and format, before you write and submit the draft. 10 | 11 | ## Reusable actions 12 | These are [units of software that solve a problem for several studies](./actions-reusable.md) without the need to copy-and-paste between them. They can be shared between researchers, even between groups that use different programming languages, and are one of the best ways you can make contributions that benefit the community. If you've written a reusable action you'd like to contribute to the actions library, please get in touch at [team@opensafely.org](mailto:team@opensafely.org). 13 | 14 | ## Peer support 15 | 16 | We encourage researchers to post questions in the [Q&A Forum](https://github.com/opensafely/documentation/discussions). We would love more people to chip in and attempt to answer questions! 17 | 18 | ## Core code 19 | We would like to accept _ad hoc_ contributions to the [core code](https://github.com/opensafely-core/), but this is relatively hard for us to do. New features and bugfixes need a lot work from our side to integrate and maintain, because a lot of the design and prioritisation decisions are made with lots of different parts of the framework in mind. Most code changes have an associated administrative and legal burden: we need to secure contributor agreements from individuals or their employers. We plan to seek funding to help resource this kind of work, as we believe it can really strengthen and build our community, but for the time being, we suggest [filing bug reports or feature requests](https://github.com/opensafely-core) rather than contributing code. 20 | 21 | An exception is where external contributors are in a position to dedicate significant resource (e.g. more than one full-time person equivalent) for a prolonged period of time. In these cases, we will be happy to consider allocating resource from our side to help with the planning and integration work; just get in touch with your proposed features at [team@opensafely.org](mailto:team@opensafely.org). 22 | -------------------------------------------------------------------------------- /docs/css/extra.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --colorPrimary: #002147; 3 | --colorLightPrimary: #122f53; 4 | 5 | --colorDarkBlue: #001c3d; 6 | --colorWashedOutBlue: #193658; 7 | --colorVeryLightBlue: #f0f5f8; 8 | 9 | --colorLink: #2f72a8; 10 | --colorLinkHover: #44687d; 11 | --colorLinkHeader: #70a9d6; 12 | 13 | --colorWarning: #bc9a23; 14 | 15 | --colorOffBlack: #333333; 16 | --colorLightGrey: #e0ded9; 17 | --colorVLightGrey: #f9f8f5; 18 | --colorWhite: #ffffff; 19 | } 20 | 21 | [data-md-color-scheme="opensafely"] { 22 | --md-accent-fg-color: var(--colorWashedOutBlue); 23 | --md-admonition-bg-color: var(--colorVeryLightBlue); 24 | --md-code-bg-color: var(--colorVLightGrey); 25 | --md-default-fg-color--light: #77706b; 26 | --md-footer-bg-color--dark: var(--colorDarkBlue); 27 | --md-footer-bg-color: var(--colorPrimary); 28 | --md-primary-fg-color--dark: var(--md-primary-fg-color); 29 | --md-primary-fg-color--light: var(--md-primary-fg-color); 30 | --md-primary-fg-color: var(--colorPrimary); 31 | --md-code-hl-generic-color: #77706b; 32 | --md-code-hl-variable-color: #756e69; 33 | --md-default-bg-color: var(--colorVeryLightBlue); 34 | } 35 | 36 | body { 37 | color: var(--colorOffBlack); 38 | font-family: "Public Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", 39 | Roboto, Oxygen, Ubuntu, Cantarell, "Open Sans", "Helvetica Neue", sans-serif; 40 | letter-spacing: -0.02em; 41 | -webkit-font-smoothing: unset; 42 | -moz-osx-font-smoothing: unset; 43 | } 44 | 45 | .md-header[data-md-state="shadow"] { 46 | box-shadow: none; 47 | } 48 | 49 | .md-header-nav__topic { 50 | font-weight: 600; 51 | } 52 | 53 | .md-nav__link:focus, 54 | .md-nav__link:hover { 55 | text-decoration: underline; 56 | } 57 | 58 | .md-nav__item .md-nav__link--active { 59 | text-decoration: underline; 60 | } 61 | 62 | .md-typeset h1, 63 | .md-typeset h2, 64 | .md-typeset h3, 65 | .md-typeset h4, 66 | .md-typeset h5, 67 | .md-typeset h6 { 68 | color: var(--colorBodyCopy); 69 | font-weight: 700; 70 | } 71 | 72 | .md-typeset a { 73 | color: var(--colorLink); 74 | font-weight: 600; 75 | text-decoration: underline; 76 | text-underline-offset: 2px; 77 | text-decoration-color: var(--colorLinkHeader); 78 | } 79 | 80 | .md-typeset a:hover, 81 | .md-typeset a:focus { 82 | color: var(--colorLinkHover); 83 | text-decoration-color: var(--colorLinkHover); 84 | outline-offset: 2px; 85 | outline-color: var(--colorLinkHeader); 86 | } 87 | 88 | .md-typeset .headerlink { 89 | text-decoration: none; 90 | } 91 | 92 | .md-content { 93 | margin-bottom: 2rem; 94 | } 95 | 96 | .md-main { 97 | background-color: var(--colorVeryLightBlue); 98 | } 99 | 100 | .md-content { 101 | box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); 102 | background: var(--colorWhite); 103 | border-radius: 8px; 104 | } 105 | 106 | .md-footer-copyright { 107 | color: var(--colorCopyrightGrey); 108 | } 109 | 110 | .md-footer-copyright__highlight { 111 | color: var(--colorCopyrightGrey); 112 | margin: 1em 0; 113 | } 114 | 115 | .md-header-nav__title { 116 | padding-left: 0.5rem; 117 | } 118 | 119 | .md-header__button.md-logo { 120 | margin: 0 0.25rem 0 0; 121 | padding: 0.25rem; 122 | } 123 | 124 | .md-header__button.md-logo img, 125 | .md-header__button.md-logo svg { 126 | height: 1.75rem; 127 | } 128 | 129 | [dir=ltr] .md-header__title { 130 | margin-left: 0; 131 | } 132 | 133 | /* Details element */ 134 | .md-typeset .admonition { 135 | background-color: var(--colorVLightGrey); 136 | box-shadow: none; 137 | border-color: var(--colorLightGrey) !important; 138 | font-size: 0.95em; 139 | } 140 | 141 | .md-typeset .note .admonition-title { 142 | background-color: var(--colorLightPrimary); 143 | color: var(--colorWhite); 144 | } 145 | 146 | .md-typeset .note .admonition-title a { 147 | color: var(--colorWhite); 148 | } 149 | 150 | .md-typeset .note > .admonition-title::before { 151 | background-color: var(--colorWhite); 152 | } 153 | 154 | .md-typeset .warning > .admonition-title::before { 155 | background-color: var(--colorWarning); 156 | } 157 | 158 | .md-nav--secondary { 159 | background-color: transparent; 160 | } 161 | 162 | .md-nav--secondary .md-nav { 163 | background-color: transparent; 164 | } 165 | 166 | .md-header-nav__source .md-source__facts { 167 | display: none; 168 | } 169 | 170 | .highlight .gp, .highlight .go { /* Generic.Prompt, Generic.Output */ 171 | user-select: none; 172 | } 173 | 174 | .md-sidebar { 175 | padding: 0; 176 | } 177 | 178 | .footer-nav-buttons { 179 | border-top: 1px solid var(--colorLightGrey); 180 | 181 | .md-footer__link { 182 | color: var(--colorLink); 183 | font-weight: 600; 184 | text-decoration-color: var(--colorLinkHeader); 185 | text-underline-offset: 2px; 186 | 187 | .md-footer__title { 188 | font-size: 0.8rem; 189 | } 190 | 191 | .md-footer__direction { 192 | opacity: 0.9; 193 | } 194 | 195 | .md-ellipsis { 196 | text-decoration: underline; 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /docs/css/lite-yt-embed.css: -------------------------------------------------------------------------------- 1 | lite-youtube { 2 | background-color: #000; 3 | position: relative; 4 | display: block; 5 | contain: content; 6 | background-position: center center; 7 | background-size: cover; 8 | cursor: pointer; 9 | max-width: 720px; 10 | } 11 | 12 | /* gradient */ 13 | lite-youtube::before { 14 | content: ''; 15 | display: block; 16 | position: absolute; 17 | top: 0; 18 | background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAADGCAYAAAAT+OqFAAAAdklEQVQoz42QQQ7AIAgEF/T/D+kbq/RWAlnQyyazA4aoAB4FsBSA/bFjuF1EOL7VbrIrBuusmrt4ZZORfb6ehbWdnRHEIiITaEUKa5EJqUakRSaEYBJSCY2dEstQY7AuxahwXFrvZmWl2rh4JZ07z9dLtesfNj5q0FU3A5ObbwAAAABJRU5ErkJggg==); 19 | background-position: top; 20 | background-repeat: repeat-x; 21 | height: 60px; 22 | padding-bottom: 50px; 23 | width: 100%; 24 | transition: all 0.2s cubic-bezier(0, 0, 0.2, 1); 25 | } 26 | 27 | /* responsive iframe with a 16:9 aspect ratio 28 | thanks https://css-tricks.com/responsive-iframes/ 29 | */ 30 | lite-youtube::after { 31 | content: ""; 32 | display: block; 33 | padding-bottom: calc(100% / (16 / 9)); 34 | } 35 | lite-youtube > iframe { 36 | width: 100%; 37 | height: 100%; 38 | position: absolute; 39 | top: 0; 40 | left: 0; 41 | border: 0; 42 | } 43 | 44 | /* play button */ 45 | lite-youtube > .lty-playbtn { 46 | display: block; 47 | /* Make the button element cover the whole area for a large hover/click target… */ 48 | width: 100%; 49 | height: 100%; 50 | /* …but visually it's still the same size */ 51 | background: no-repeat center/68px 48px; 52 | /* YT's actual play button svg */ 53 | background-image: url('data:image/svg+xml;utf8,'); 54 | position: absolute; 55 | cursor: pointer; 56 | z-index: 1; 57 | filter: grayscale(100%); 58 | transition: filter .1s cubic-bezier(0, 0, 0.2, 1); 59 | border: 0; 60 | } 61 | 62 | lite-youtube:hover > .lty-playbtn, 63 | lite-youtube .lty-playbtn:focus { 64 | filter: none; 65 | } 66 | 67 | /* Post-click styles */ 68 | lite-youtube.lyt-activated { 69 | cursor: unset; 70 | } 71 | lite-youtube.lyt-activated::before, 72 | lite-youtube.lyt-activated > .lty-playbtn { 73 | opacity: 0; 74 | pointer-events: none; 75 | } 76 | 77 | .lyt-visually-hidden { 78 | clip: rect(0 0 0 0); 79 | clip-path: inset(50%); 80 | height: 1px; 81 | overflow: hidden; 82 | position: absolute; 83 | white-space: nowrap; 84 | width: 1px; 85 | } 86 | -------------------------------------------------------------------------------- /docs/css/youtube.css: -------------------------------------------------------------------------------- 1 | .video-wrapper { 2 | position: relative; 3 | display: block; 4 | height: 0; 5 | padding: 0; 6 | overflow: hidden; 7 | padding-bottom: 56.25%; 8 | } 9 | .video-wrapper > iframe { 10 | position: absolute; 11 | top: 0; 12 | bottom: 0; 13 | left: 0; 14 | width: 100%; 15 | height: 100%; 16 | border: 0; 17 | } 18 | 19 | .youtube-embed { 20 | display: block; 21 | padding: 1rem 0; 22 | 23 | lite-youtube { 24 | margin: 0 auto; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /docs/data-sources/cpns.md: -------------------------------------------------------------------------------- 1 | The Covid-19 Patient Notification System (CPNS) is the route by which NHS England are informed of positive COVID-19 in-hospital deaths. 2 | Initially this was lab-confirmed covid deaths only, but also includes suspected cases from 28 April 2020 onwards. 3 | 4 | ## Metadata 5 | 6 | * **Data provider** NHS England. 7 | * **Participation / Coverage** In-hospital covid-related deaths in England. 8 | * **Update frequency in OpenSAFELY** Weekly. 9 | * **Delay between event occurring and event appearing in OpenSAFELY** Within 2 weeks. 10 | * **Available from** 24/03/2020 onwards (all test-positive deaths); 28/04/2020 onwards (test-positive and covid documented as underlying cause). 11 | * **Collected information** Admission, test, result, and death dates; hospital and department of death; basic demographics. 12 | 13 | ## More information 14 | * [Notebook showing breakdown of ethnicity codes](https://github.com/opensafely/rapid-reports/blob/master/notebooks/ethnicity-codes.ipynb) - (private, pending review & publication) 15 | * [Press release from Arden&GEM](https://www.ardengemcsu.nhs.uk/showcase/news-events/news-events/supporting-providers-to-record-covid-19-patient-notifications/) 16 | * [Letter from NHSE to Trust chief execs regarding changes to data collection](https://www.england.nhs.uk/coronavirus/wp-content/uploads/sites/52/2020/04/C0389-update-to-cpns-reporting-letter-27-april-2020.pdf) 17 | * [Technical summary on data series on deaths in people with COVID-19](https://www.gov.uk/government/publications/phe-data-series-on-deaths-in-people-with-covid-19-technical-summary) 18 | -------------------------------------------------------------------------------- /docs/data-sources/ecds.md: -------------------------------------------------------------------------------- 1 | The Emergency Care Data Set (ECDS) is the national data set for urgent and emergency care. 2 | ECDS is part of Hospital Episode Statistics (HES) and is provided to OpenSAFELY via NHS Digital's Secondary Use Service (SUS) 3 | 4 | ## Metadata 5 | 6 | * **Data provider** NHS England. 7 | * **Participation / Coverage** Emergency Department attendances in NHS Trusts in England. 8 | * **Update frequency in OpenSAFELY** Approximately monthly. 9 | * **Delay between event occurring and event appearing in OpenSAFELY** * Approximately 1-2 months. 10 | * **Collected information** Attendance, admission and discharge dates; locations; presenting complaints; diagnoses. 11 | 12 | Diagnoses and discharge destinations are coded using SNOMED CT. 13 | 14 | ## More information 15 | * [Notebook showing breakdown of ethnicity codes](https://github.com/opensafely/rapid-reports/blob/master/notebooks/ethnicity-codes.ipynb) - (private, pending review & publication) 16 | * [NHS Digital ECDS site](https://digital.nhs.uk/data-and-information/data-collections-and-data-sets/data-sets/emergency-care-data-set-ecds) 17 | * [NHS Digital SUS site](https://digital.nhs.uk/services/secondary-uses-service-sus/secondary-uses-services-sus-guidance) 18 | * [NHS Digital HES site](https://digital.nhs.uk/data-and-information/data-tools-and-services/data-services/hospital-episode-statistics) 19 | * [NHS Digital ECDS data dictionary](https://www.datadictionary.nhs.uk/data_sets/cds_v6-2/cds_v6-2-3_type_011_-_emergency_care_cds.html) 20 | * [NHS Digital ECDS Information Standard](https://digital.nhs.uk/data-and-information/information-standards/information-standards-and-data-collections-including-extractions/publications-and-notifications/standards-and-collections/dcb0092-2062-commissioning-data-sets-emergency-care-data-set ) 21 | * [NHS Digital ECDS Technical Output Specification](https://digital.nhs.uk/data-and-information/data-collections-and-data-sets/data-sets/emergency-care-data-set-ecds/ecds-latest-update) 22 | * [ECDS Data Quality Dashboards](https://digital.nhs.uk/data-and-information/data-tools-and-services/data-services/emergency-care-data-set-ecds-data-quality ) 23 | -------------------------------------------------------------------------------- /docs/data-sources/emis.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | Research access to the backend provided by EMIS is temporarily unavailable, 3 | pending funding arrangements between NHS England and EMIS. 4 | When funding has been secured, 5 | we will publish a timeline for gradually reopening access. 6 | 7 | ## OpenSAFELY-EMIS database builds 8 | 9 | When EMIS receive data from external data sources (eg ONS), records are appended to the 10 | relevant tables, not updated. The relevant tables have an `upload_date` column, and to get the latest data, we have to query for on `upload_date`. 11 | -------------------------------------------------------------------------------- /docs/data-sources/hcd.md: -------------------------------------------------------------------------------- 1 | The High Cost Drugs (HCD) data is a dataset containing specialist medicines prescribed by hospitals to patients for the management of long term conditions. 2 | 3 | ## Metadata 4 | 5 | * **Data provider** North East Commissioning Support Unit (NECS) with approvals from NHS Digital. 6 | * **Participation / Coverage** Hospital prescribing of [high cost drugs](https://www.england.nhs.uk/publication/nhs-england-drugs-list/) across all 135 CCGs nationally. 7 | * **Update frequency in OpenSAFELY** Data covering April 2018 to March 2020 is currently available in OpenSAFELY, with no updates planned (see discussion section of [paper](https://wellcomeopenresearch.org/articles/6-360) for our proposal). 8 | * **Delay between event occurring and event appearing in OpenSAFELY** NA - currently a one off extract. 9 | * **Collected information** Patient characteristics; clinical indications; medicine prescribed. 10 | 11 | 12 | ## Basic Background 13 | 14 | Hospitals in England supply medicines to patients either directly or through “homecare” providers who deliver medicines to a patient's home. The majority of medicines are funded through overall hospital contracts, included in tariffs; however, for certain HCDs, hospitals are required to provide a submission for each patient to the relevant commissioner, either NHSE or one of 135 local CCGs, in order to receive payment. 15 | 16 | The majority of submissions relate to a prescription of a HCD, although some submissions relate to associated services (i.e. home care delivery charges). There is a [national list](https://www.england.nhs.uk/publication/nhs-england-drugs-list/) of the medicines that are funded by NHSE and locally agreed lists for each CCG. These patient-level submissions are processed by intermediate organisations, Commissioning Support Units (CSUs), to support financial payments and associated activities like summary reporting. This dataset contains data collated from all 135 CCGs to create a single comprehensive dataset. 17 | 18 | The HCD dataset is a patient-level dataset and includes variables on patient characteristics, clinical indications and medicine prescribed. Details of the national specification for submissions to the dataset are published in the [NHS data dictionary](https://www.datadictionary.nhs.uk/data_sets/supporting_data_sets/drugs_patient_level_contract_monitoring_data_set.html?hl=drugs%2Cpatient%2Clevel%2Ccontract%2Cmonitoring%2Cdata%2Cset). 19 | 20 | A [paper describing](https://wellcomeopenresearch.org/articles/6-360) the HCD dataset was published in December 2021. 21 | 22 | 23 | ## More information 24 | 25 | * [A comprehensive high cost drugs dataset from the NHS in England - An OpenSAFELY-TPP Short Data Report](https://wellcomeopenresearch.org/articles/6-360) 26 | * National Data Dictionary: [Drugs Patient Level Contract Monitoring Data Set](https://www.datadictionary.nhs.uk/data_sets/supporting_data_sets/drugs_patient_level_contract_monitoring_data_set.html?hl=drugs%2Cpatient%2Clevel%2Ccontract%2Cmonitoring%2Cdata%2Cset) 27 | 28 | 29 | --8<-- "includes/glossary.md" 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/data-sources/icnarc.md: -------------------------------------------------------------------------------- 1 | The ICNARC-CMP dataset contains information on covid-related intensive care admissions in England. 2 | 3 | !!! warning 4 | ICNARC data can only be used in collaboration with ICNARC researchers who must be involved in working on the study and writing it up. 5 | Please contact your co-pilot, or if you have any questions. 6 | 7 | !!! warning 8 | Data from ICNARC were last imported on 21-Jan-2021, with no further imports currently planned. Alternative data on ICU admission can be gleaned from SUS (i.e. returning=days_in_critical_care). 9 | 10 | From ICNARC's website: 11 | 12 | > The Case Mix Programme (CMP) is an audit of patient outcomes from adult, general critical care units (intensive care and combined intensive care/high dependency units) covering England, Wales and Northern Ireland. 13 | > The CMP is listed in the Department of Health's 'Quality Accounts' as a recognised national audit by the National Advisory Group on Clinical Audit & Enquiries (NAGCAE) for 'Acute' care. 14 | 15 | Currently only Covid-19 positive patients are provided by ICNARC. 16 | 17 | ## Metadata 18 | 19 | * **Data provider** ICNARC 20 | * **Participation / Coverage** Adult ICU/HDUs admissions in England, Wales, NI. 21 | Specialist units (eg neuro / cardiac) also participate. covid-19 admissions only 22 | * **Provenance** ICUs and HDUs 23 | * **Update frequency in OpenSAFELY** Approximately weekly. 24 | * **Delay between event occurring and event appearing in OpenSAFELY** Approximately 1-2 weeks. 25 | * **Collected information** Admission, discharge and transfer dates; reason for admission; clinical support/interventions; clinical findings 26 | 27 | ## More Information 28 | 29 | [CMP resources]( https://www.icnarc.org/Our-Audit/Audits/Cmp/Resources) — this is comprehensive, and includes links to the data dictionary, CRF, data flows, and other useful resources. 30 | 31 | ## Datasource-specific glossary 32 | 33 | * **Advanced respiratory support** unclear, possibly non-intubated ventilation 34 | * **Basic respiratory support** unclear, possibly intubated ventilation 35 | * **Admission** these include _any_ admission to ICU even if the patient did not require critical care, for example due to bed pressures elsewhere in the hospital. 36 | It may be useful to filter by severity, e.g., ventilated patients only. 37 | Similarly, potentially critically ill patients cared-for by ICU staff but who are admitted to a different unit will not be included in the CMP. 38 | Each admission is a row, so patients transferred to other units will appear in the dataset multiple times, even if it’s part of the same spell. 39 | -------------------------------------------------------------------------------- /docs/data-sources/index.md: -------------------------------------------------------------------------------- 1 | # Data Sources 2 | 3 | {{ build_toc(navigation, page) }} 4 | -------------------------------------------------------------------------------- /docs/data-sources/intro.md: -------------------------------------------------------------------------------- 1 | This section provides contextual information on the core primary care EHR systems inside which OpenSAFELY is built (currently TPP and EMIS), as well as all external datasets imported to the secure EHR environment. 2 | To learn about querying the data, refer to the [documentation on ehrQL](../ehrql/index.md). 3 | 4 | View information on [available datasets](index.md). 5 | 6 | 7 | ## What is primary care data? 8 | The core patient-level data used within OpenSAFELY is based on electronic GP records that are collected and securely stored to facilitate patient management by healthcare providers. They capture symptoms, test results, diagnoses, prescriptions, onward referrals, demographic and social characteristics, and so on. Essentially, everything about a patient that is electronically recorded or accessed by GPs. 9 | 10 | GP records, or primary care records, can also be used for conducting health research, which is what OpenSAFELY was built for. We've made a video to help explain primary care data in more detail; essential viewing if you're new to this domain. 11 | 12 |
13 | 14 |
15 | -------------------------------------------------------------------------------- /docs/data-sources/isaric.md: -------------------------------------------------------------------------------- 1 | The ISARIC dataset contains data from the International Severe Acute Respiratory and Emerging Infection Consortium. 2 | 3 | ---8<-- 'includes/isaric-warning-header.md' 4 | 5 | !!! warning 6 | This section is a work in progress. 7 | -------------------------------------------------------------------------------- /docs/data-sources/onsdeaths.md: -------------------------------------------------------------------------------- 1 | Date and cause of death based on information recorded when deaths are certified and registered in England and Wales. 2 | 3 | ## Metadata 4 | 5 | * **Data provider** Office for National Statistics. 6 | * **Participation / Coverage** Deaths occurring in England and Wales, including non-residents. 7 | * **Provenance** General Register Office. 8 | * **Update frequency in OpenSAFELY** Approximately weeekly. 9 | * **Available from** deaths recorded from February 2019 onwards. 10 | * **Delay between event occurring and event appearing in OpenSAFELY** Approximately 1-2 weeks. 11 | * **Collected information** Date of death, causes of death, age, sex, place of death. 12 | 13 | Causes of death are coded using ICD-10. 14 | 15 | ## More information 16 | 17 | * [Information collected at death registration](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/methodologies/userguidetomortalitystatisticsjuly2017#information-collected-at-death-registration) 18 | * [User guide to mortality statistics](https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/methodologies/userguidetomortalitystatisticsjuly2017) 19 | * [Metadata](https://www.ons.gov.uk/aboutus/transparencyandgovernance/freedomofinformationfoi/howdeathregistrationsarerecordedandstoredbyons) 20 | 21 | ## Glossary 22 | * **Underlying cause of death (icd10u)** A cause code that identifies the medical condition judged to be the underlying cause of death according to the rules of the _10th Revision of the International Classification of Diseases_. This field appears once for each cause coded routine and inquest death (deaths at age 28 days and over). This code is generally known as 'the cause of death' and is used in single cause tabulations and analyses. The death is assigned the ICD10U code on the basis of ICD-10 codes allocated to the death by the MICAR program. A coder may also manually assign these codes to a death record. 23 | * **Primary / secondary cause of death** These are not concepts used in ONS cause of death data. There is the underlying cause, and then a list of up to 15 medical conditions (`ICD10001` to `ICD10015`, given as ICD-10 codes) mentioned on the death certificate. These codes are not ordered meaningfully. 24 | -------------------------------------------------------------------------------- /docs/data-sources/sgsscovid.md: -------------------------------------------------------------------------------- 1 | > The Second Generation Surveillance System (SGSS) is an application that stores and manages data on laboratory isolates and notifications, and is the preferred method for capturing routine laboratory surveillance data on infectious diseases and antimicrobial resistance from laboratories across England. -- [UKHSA Laboratory Reporting Guidelines, page 5](https://assets.publishing.service.gov.uk/media/66e2e0ba0d913026165c3d77/UKHSA_Laboratory_reporting_guidelines_May_2023.pdf#page=5) 2 | 3 | 4 | SGSS data currently available in OpenSAFELY is for SARS-COV-2 test results from the UK's Pillar 1 and Pillar 2 tests. 5 | Some SARS-CoV-2 testing info also flows directly into the primary care record from SGSS (see below). 6 | 7 | 8 | ## Metadata 9 | 10 | 11 | * **Data provider** Public Health England 12 | * **Participation / Coverage** Unclear, and varies over time 13 | * **Provenance** PHE and NHS hospital testing labs 14 | * **Update frequency in OpenSAFELY** Approximately monthly 15 | * **Delay between event occurring and event appearing in OpenSAFELY** Approximately 1-2 weeks. 16 | * **Collected information** Earliest specimen date, lab report date, age, sex, county, test result, source ("pillar 2" or "other") 17 | 18 | 19 | ## Overview 20 | SGSS contains information on patients receiving a swab test for SARS-CoV-2, from Pillar 1 (NHS and PHE labs) and Pillar 2 (commercial partners). 21 | 22 | It includes "earliest specimen date" (when the sample was taken); "lab report date" (when the result was uploaded to SGSS system); pillar 2 or "other"; result (pos/neg); some demographics. 23 | 24 | The are two tables, one for positive tests and one for negative tests. 25 | 26 | Multiple tests for the same person are treated as a single 'infection episode', no matter how far apart, and so the system will only return the earliest test (sample date and report date), and all subsequent tests are dropped. 27 | 'Infection episodes' are split by test results, so SGSS will in theory return data for both the earliest positive test, and the earliest negative test. 28 | However, negative testing data appears to be incomplete. 29 | Other viruses/organisms may have a finite episode length, so that any tests occurring within say the first 14 days of the first test are dropped, but for coronavirus the episode length is indefinite, though this may change in future. 30 | 31 | **Negative test data is unreliable — DO NOT USE** 32 | 33 | SARS-CoV-2 test results from various sources are also coded [within SystmOne](systmone.md). 34 | 35 | 36 | ## More information 37 | 38 | * [Regarding lab-confirmed cases](https://coronavirus.data.gov.uk/about#total-and-daily-uk-cases) 39 | * [National COVID-19 surveillance reports](https://www.gov.uk/government/publications/national-covid-19-surveillance-reports) 40 | * [Testing methodology](https://www.gov.uk/government/publications/coronavirus-covid-19-testing-data-methodology/covid-19-testing-data-methodology-note) 41 | * [Testing strategy](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/878121/coronavirus-covid-19-testing-strategy.pdf#page=8) 42 | 43 | 44 | --8<-- "includes/glossary.md" 45 | -------------------------------------------------------------------------------- /docs/data-sources/therapeutics.md: -------------------------------------------------------------------------------- 1 | The Therapeutics dataset contains information on COVID treatments used in inpatient and outpatient settings. 2 | 3 | ## Metadata 4 | 5 | * **Data provider** NHS England 6 | * **Participation / Coverage** Inpatients and outpatients treated with antivirals/nMABs for COVID-19 in England 7 | * **Provenance** Data sourced largely from BlueTeq system (forms completed by clinicians) 8 | * **Update frequency in OpenSAFELY** Approximately weekly 9 | * **Delay between event occurring and event appearing in OpenSAFELY** Approximately 2-9 days 10 | * **Collected information** Treatment start date; therapeutic intervention; COVID indication, current status, risk group, region 11 | 12 | 13 | ## Overview 14 | Antivirals and neutralising monoclonal antibodies (nMABs) for COVID-19 can be administered in inpatient setting or, for outpatients, in COVID Medicine Delivery Units (CMDUs) specifically set up for this purpose. For patients considered for these treatments, clinicians submit completed forms to NHS England. Each row represents one completed form for one course of treatment. Data received by OpenSAFELY currently covers patients who were approved for treatment. The patient may or may not have actually received the treatment or completed the course (but we assume that they usually do). They may have another form completed for another treatment, either because it was decided to give them a different treatment, or for some other reason. They may in theory also have another form completed some months later for another instance of infection. 15 | 16 | Treatment dates may be in the past or future at the point when the form is submitted. 17 | 18 | 19 | ## More Information 20 | 21 | * [Treatment guidelines](https://www.nice.org.uk/guidance/ta878) 22 | 23 | ## Datasource-specific glossary 24 | 25 | * **date**: From `TreatmentStartDate`. This is entered by the clinician completing the form and may be in the past, future or the same date on which the form is submitted. 26 | * **therapeutic**: Name of treatment(s) given. When used as a `returning` option, is a comma-separated string from `Intervention` (removing " and "). 27 | * **region**: The region is one of seven NHS regions, derived from the NHS Trust/site from which the form was submitted. 28 | * **risk group**: This is present for outpatients only. Derived from tick boxes on the form for Molnupiravir, Sotrovimab and Casivirimab combined into one comma-separated string. May return duplicates to be resolved e.g. "cancer,cancer". Can be missing (if clinician selected "Other" and supplied risk group as free text). Risk groups are only returned if they match [a defined set of available values](https://github.com/opensafely-core/cohort-extractor/blob/main/cohortextractor/therapeutics_utils.py#L23). 29 | * **with_these_therapeutics**: As `therapeutic` above. Filters on `Intervention`. Case insensitive. Currently available options: 'remdesivir', 'tocilizumab', 'sarilumab', 'casirivimab and imdevimab', 'molnupiravir', 'sotrovimab', 'paxlovid'. 30 | * **with_these_statuses**: Filters on `CurrentStatus`. Should be one or more of 'Approved', 'Treatment Complete', 'Treatment Not Started', 'Treatment Stopped'. 31 | Case insensitive. 32 | * **with_these_indications**: Filters on COVID_indication. Should be one or more of 'non-hospitalised', 'hospital_onset', 'hospitalised_with'. 33 | -------------------------------------------------------------------------------- /docs/data-sources/ukrr.md: -------------------------------------------------------------------------------- 1 | The UK Renal Registry (UKRR) contains data on patients under secondary renal care (advanced chronic kidney disease stages 4 and 5, dialysis, and kidney transplantation). The purpose of the UKRR is to audit secondary kidney 2 | care delivery in the UK especially dialysis and kidney transplant. Data is collected from many different renal IT systems. 3 | 4 | !!! warning 5 | UKRR data can only be used subject to additional approvals. 6 | Please contact your co-pilot, or if you have any questions. 7 | 8 | There are 5 different datasets available to OpenSAFELY. These are: 9 | 10 | * **2019_prevalence** *a prevalence cohort of patients alive and on RRT in December 2019* 11 | * **2020_prevalence** *a prevalence cohort of patients alive and on RRT in December 2020* 12 | * **2021_prevalence** *a prevalence cohort of patients alive and on RRT in December 2021* 13 | * **2020_incidence** *an incidence cohort of patients who started RRT in 2020* 14 | * **2020_ckd** *a snapshot prevalence cohort of patient with Stage 4 or 5 CKD who were reported to the UKRR to be under renal care in December 2020* 15 | -------------------------------------------------------------------------------- /docs/datapast/the_clinical_datapast_function.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/datapast/the_clinical_datapast_function.pdf -------------------------------------------------------------------------------- /docs/documents/OpenSAFELY_Output_Review_Form_ADD_WORKSPACE_NAME_ADD_DATE.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/documents/OpenSAFELY_Output_Review_Form_ADD_WORKSPACE_NAME_ADD_DATE.docx -------------------------------------------------------------------------------- /docs/five-safes.md: -------------------------------------------------------------------------------- 1 | OpenSAFELY follows the [Five Safes](https://ukdataservice.ac.uk/help/secure-lab/what-is-the-five-safes-framework/) framework for data access to allow safe and efficient use of data. This framework models data access into 5 independent dimensions: 2 | 3 | * **Safe projects** - does the project make good use of the data? 4 | * **Safe people** - are the researchers using the data appropriately trained and aware of their role in data protection? 5 | * **Safe data** - what is the potential for individuals to be identified in the data? 6 | * **Safe settings** - are there technical controls on access to the data? 7 | * **Safe outputs** - is there any residual risk in outputs being released from the environment? 8 | 9 | You can read more about how we use the Five Safes Framework to design the OpenSAFELY platform in our blog post [The ‘Five Safes’ Framework and applying it to OpenSAFELY](https://www.bennett.ox.ac.uk/blog/2023/03/the-five-safes-framework-and-applying-it-to-opensafely/). 10 | -------------------------------------------------------------------------------- /docs/getting-started/explanation/index.md: -------------------------------------------------------------------------------- 1 | This section contains background information about running OpenSAFELY. 2 | 3 | * [Options for running OpenSAFELY](options-for-running-opensafely/index.md) 4 | * [Understanding GitHub Codespaces](understanding-github-codespaces/index.md) 5 | * [Understanding the software used to run OpenSAFELY](understanding-the-software-used-to-run-opensafely/index.md) 6 | -------------------------------------------------------------------------------- /docs/getting-started/explanation/options-for-running-opensafely/index.md: -------------------------------------------------------------------------------- 1 | In this guide, we've documented two different ways to work with OpenSAFELY: 2 | 3 | 1. *Use an online environment where the needed software is already 4 | installed for you* (our recommended option). 5 | 6 | Services such as [GitHub Codespaces](https://github.com/features/codespaces) provide online 7 | computing environments. GitHub Codespaces has a free plan with a generous 8 | monthly usage limit for working with code in public or 9 | private repositories. We've adapted our demonstration research 10 | study and this guide such that you can follow along with GitHub Codespaces, 11 | should you choose to do so. 12 | 13 | You might prefer an online environment if one or more of the 14 | following apply: 15 | 16 | * you are investigating what OpenSAFELY offers and want to start 17 | using it immediately; 18 | * you do not have administrative access to install software on your 19 | work computer; 20 | * you are not sure about whether your computer supports Docker, 21 | which OpenSAFELY uses; 22 | * you want to try OpenSAFELY via a device other than a desktop or 23 | laptop computer, e.g. a tablet. 24 | 25 | To use GitHub Codespaces, the only requirements are: 26 | 27 | * you have a current version of a modern web browser (e.g. Chrome, 28 | Edge, Firefox, Safari); 29 | * your internet connection allows access to GitHub Codespaces 30 | 31 | If your internet connection is not managed by you directly — 32 | perhaps you are in the office or connecting via your employer's 33 | virtual private network (VPN) — it may be that [corporate firewalls 34 | prevent access to 35 | GitHub Codespaces](https://docs.github.com/en/codespaces/troubleshooting/troubleshooting-your-connection-to-github-codespaces#browser-cannot-connect); 36 | please contact the IT staff who manage your internet connection for help. 37 | 38 | 2. *Install the required software to your own computer*. 39 | 40 | You might prefer a local installation if one or more of the following 41 | apply: 42 | 43 | * you already have the software required (Docker, Python and Git or 44 | GitHub Desktop) installed; 45 | * or you don't already have the required software installed, but 46 | are comfortable installing and configuring these tools yourself; 47 | * you want to have more control on the tools you use to develop 48 | studies for OpenSAFELY. 49 | 50 | Refer to the [how-to section of this documentation](http://localhost:8910/getting-started/how-to/#setting-up-opensafely) 51 | that lists the installation guides. 52 | 53 | The [current local installation guide](../../../getting-started/how-to/index.md) is aimed at 54 | Windows users. Mac users should be able to follow along as well, with 55 | a few hopefully-obvious alterations; see also the [macOS Install 56 | Guide](../../../install-macos.md)! We aim to integrate macOS instructions 57 | into this guide in future. 58 | -------------------------------------------------------------------------------- /docs/getting-started/explanation/understanding-github-codespaces/index.md: -------------------------------------------------------------------------------- 1 | ## What is GitHub Codespaces? 2 | 3 | [Codespaces](https://github.com/features/codespaces) is a coding environment 4 | hosted online by GitHub. 5 | Codespaces can be accessed via your web browser 6 | without any additional installation. 7 | 8 | A codespace provides: 9 | 10 | * a "virtual machine" — a computer running as software inside *another* computer 11 | — that is hosted by GitHub 12 | * a Visual Studio Code environment 13 | for editing your project and running commands 14 | 15 | When you open a codespace in browser, 16 | you get access to the Visual Studio Code environment. 17 | Through that interface, 18 | you can run commands inside the codespace's virtual machine. 19 | This is just as if it were a real desktop or laptop that you were working on, 20 | except this virtual machines has been configured for OpenSAFELY use. 21 | 22 | The OpenSAFELY research template contains a GitHub Codespaces configuration 23 | to allow you to run OpenSAFELY 24 | **without any installation required on your own computer**. 25 | 26 | This removes the need to have anything other than a web browser installed 27 | to work on OpenSAFELY projects. 28 | 29 | GitHub provide users with a free monthly quota of Codespaces use. 30 | 31 | These cloud-hosted virtual machines have no persistent storage, 32 | which is to say any data on them will be lost when the machine is deleted 33 | if not saved elsewhere. 34 | Codespaces are primarily designed for writing code 35 | (such as an OpenSAFELY study) 36 | and are tightly integrated with GitHub, 37 | making it easy to commit and push your work to GitHub avoiding any data loss. 38 | 39 | ## Understanding GitHub Codespaces billing 40 | 41 | GitHub gives all users a free and decent-sized monthly quota for Codespaces. 42 | This is accessible without a paid account. 43 | See [GitHub's pricing details](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces). 44 | 45 | !!! note 46 | You will not get billed for using Codespaces, 47 | unless you both: 48 | 49 | * set a Codespaces spending limit 50 | * and add a payment method 51 | 52 | Without billing configured, 53 | you will not get invoiced or charged. 54 | 55 | If you run out of free quota, 56 | GitHub can only block you from using Codespaces until the next monthly cycle starts. 57 | 58 | !!! note 59 | Approved OpenSAFELY users working on projects in the GitHub `opensafely` organization 60 | can request to use Codespaces for those projects. 61 | -------------------------------------------------------------------------------- /docs/getting-started/explanation/understanding-the-software-used-to-run-opensafely/index.md: -------------------------------------------------------------------------------- 1 | ## Why OpenSAFELY requires several pieces of installed software to run 2 | 3 | Some of the software needed is so you can execute code on your computer 4 | in **exactly the same way** it is run in the secure environment: even a slight 5 | mismatch in the versions of the software could cause bugs and delays. 6 | 7 | OpenSAFELY is also designed to encourage analysts to adopt best-practice 8 | software development processes, like using `git` for version control. 9 | 10 | If you're new to these concepts, there may be quite a lot to learn, and you'll need 11 | to use further software to work with them. The investment will be worthwhile: 12 | you'll find your software quality and efficiency will benefit hugely. 13 | 14 | !!! note 15 | If you use the OpenSAFELY GitHub Codespaces environment, 16 | all of the required software is already installed for you. 17 | 18 | ## The software used when working with OpenSAFELY 19 | 20 | The software needed to work with OpenSAFELY is designed to run cross-platform, 21 | on Windows, macOS and Linux. 22 | 23 | * OpenSAFELY code is written and edited in a text editor or interactive development environment (IDE). 24 | Examples include: 25 | * Visual Studio Code 26 | * R Studio 27 | * or some other editor of your choice 28 | * Git is version control software. 29 | Git allows you to work on software, 30 | record its history in *repositories*, 31 | and collaborate on that software with others. 32 | To run against real patient data, 33 | code using the OpenSAFELY platform is published to GitHub. 34 | GitHub is an online platform for hosting Git repositories. 35 | * The OpenSAFELY command-line interface (OpenSAFELY CLI) is a program 36 | that is used to run OpenSAFELY projects, 37 | whether on your own computer or on the platform hosting real patient data. 38 | * Docker is software that the OpenSAFELY CLI uses 39 | to run your data extraction and analysis scripts in a reproducible way. 40 | * Python is the programming language that the OpenSAFELY CLI is written in, 41 | and is required to run the OpenSAFELY CLI. 42 | 43 | !!! note 44 | While most of the required software listed above is open source and free of charge to use, 45 | Docker Desktop may require you to obtain a paid license 46 | if you work in a commercial 47 | or government organisation. 48 | 49 | Consult the [Docker Desktop license agreement](https://docs.docker.com/subscription/desktop-license/). 50 | -------------------------------------------------------------------------------- /docs/getting-started/how-to/add-github-codespaces-to-your-project/download_raw_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/add-github-codespaces-to-your-project/download_raw_file.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/create-a-code-repository-for-your-project/index.md: -------------------------------------------------------------------------------- 1 | !!! info 2 | A "code repository" is where code gets published on GitHub. 3 | This is not to be confused with a codespace. 4 | 5 | A codespace refers specifically to a virtual machine environment, 6 | that contains a *copy* of your repository code to work on. 7 | 8 | You only need to create a code repository once for a particular project. 9 | 10 | Create a new research code repository based on the research template. 11 | 12 | 1. In your web browser, 13 | go to the [research code template repository](https://github.com/opensafely/research-template). 14 | 1. Click the "Use this template" button 15 | to begin the process of creating a new research code repository for you to work on. 16 | The screenshot below shows this. 17 | 18 | ![A screenshot showing the "Use this template" button for the research code template repository.](../../../images/codespaces-template.png) 19 | 20 | There are several options given 21 | when creating a new repository from a template. 22 | 23 | Here is a quick explanation of the options: 24 | 25 | * If you are creating a repository for an OpenSAFELY project, 26 | you should choose `opensafely` as the repository owner. 27 | * If you are creating a repository for testing OpenSAFELY out, 28 | you should choose *your own GitHub user account* as the repository owner. 29 | * You can enter any name and description that you choose for your repository. 30 | * You can choose whether the repository visibility is public or private. 31 | * You do not need to select "Include all branches". 32 | * You can ignore the mention of "GitHub Apps from GitHub Marketplace" 33 | in GitHub's instructions for creating a new repository from a template. 34 | 35 | If you are unsure of what to do, 36 | refer to GitHub's step-by-step instructions for [creating a new repository from a template](https://docs.github.com/en/repositories/creating-and-managing-repositories/creating-a-repository-from-a-template#creating-a-repository-from-a-template). 37 | 38 | !!! warning 39 | Creating a repository owned by your GitHub user account 40 | will enable you to: 41 | 42 | * work on your OpenSAFELY research code in Codespaces 43 | * check that your research code works with the OpenSAFELY platform 44 | 45 | **It will not allow you to run code on OpenSAFELY's platform.** 46 | 47 | For that, you would have to request that your repository is transferred to the `opensafely` organization. 48 | 49 | Approved OpenSAFELY users are able to create a repository within the `opensafely` organization directly. 50 | -------------------------------------------------------------------------------- /docs/getting-started/how-to/index.md: -------------------------------------------------------------------------------- 1 | The how-to guides provide practical steps for setting up and using OpenSAFELY. 2 | 3 | ## Setting up GitHub Codespaces 4 | 5 | * [How to add GitHub Codespaces to your project](add-github-codespaces-to-your-project/index.md) 6 | * [How to update GitHub Codespaces in your project](update-github-codespaces-in-your-project/index.md) 7 | 8 | ## Using GitHub Codespaces 9 | 10 | * [How to use GitHub Codespaces in your project](use-github-codespaces-in-your-project/index.md) 11 | * [How to use released outputs in GitHub Codespaces](use-released-outputs-in-github-codespaces/index.md) 12 | * [How to troubleshoot common issues with GitHub Codespaces](troubleshoot-common-codespaces-issues/index.md) 13 | 14 | ## Using Git, GitHub, and the OpenSAFELY command-line interface 15 | 16 | * [How to use Git effectively](use-git-effectively/index.md) 17 | * [How to use the OpenSAFELY command-line interface](../../opensafely-cli.md) 18 | * [How to create a code repository for your 19 | project](create-a-code-repository-for-your-project/index.md) 20 | 21 | ## Running OpenSAFELY on your computer 22 | 23 | All you need to get started with OpenSAFELY is an up to date web browser and an internet connection. 24 | At some point, however, you may need to run OpenSAFELY on your computer. 25 | This is sometimes referred to as running OpenSAFELY *locally* 26 | and can be worthwhile when you don't have a reliable internet connection. 27 | 28 | * How to install Python on 29 | [Windows](../../install-python.md#windows), 30 | [macOS](../../install-python.md#macos), and 31 | [Linux](../../install-linux.md#installing-python) 32 | * How to install Docker on 33 | [Windows](../../install-docker.md#windows), 34 | [macOS](../../install-macos.md#docker-for-mac), and 35 | [Linux](../../install-linux.md#installing-docker) 36 | * How to install the OpenSAFELY command-line interface on 37 | [Windows](../../opensafely-cli.md#installing-opensafely), 38 | [macOS](../../install-macos.md#opensafely-cli), and 39 | [Linux](../../install-linux.md#installing-the-opensafely-cli) 40 | -------------------------------------------------------------------------------- /docs/getting-started/how-to/troubleshoot-common-codespaces-issues/codespace-additional-permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/troubleshoot-common-codespaces-issues/codespace-additional-permissions.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/troubleshoot-common-codespaces-issues/directory-listing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/troubleshoot-common-codespaces-issues/directory-listing.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/troubleshoot-common-codespaces-issues/r-session-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/troubleshoot-common-codespaces-issues/r-session-error.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/troubleshoot-common-codespaces-issues/vscode-popup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/troubleshoot-common-codespaces-issues/vscode-popup.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/troubleshoot-common-codespaces-issues/vscode-ports.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/getting-started/how-to/troubleshoot-common-codespaces-issues/vscode-ports.png -------------------------------------------------------------------------------- /docs/getting-started/how-to/update-github-codespaces-in-your-project/index.md: -------------------------------------------------------------------------------- 1 | This page describes how to update the GitHub Codespaces in your project, 2 | if your project already has GitHub Codespaces added. 3 | 4 | You may wish to update the GitHub Codespaces configuration, 5 | if you want to use the latest configuration as provided by the research template 6 | in your existing project. 7 | 8 | ## Updating the GitHub Codespaces configuration in your project 9 | 10 | * Delete the files inside `.devcontainer` in your project. 11 | * Now follow the instructions as for [adding GitHub Codespaces to an existing 12 | project](../add-github-codespaces-to-your-project/index.md). 13 | -------------------------------------------------------------------------------- /docs/getting-started/how-to/use-git-effectively/index.md: -------------------------------------------------------------------------------- 1 | This page links to resources that describe how to use Git effectively. 2 | 3 | ## Using Git with `git` 4 | 5 | The `git` command-line tool has a large number of commands. 6 | However, only a small number are important and commonly used. 7 | These are summarised in GitHub's "[Git Cheat Sheet](https://education.github.com/git-cheat-sheet-education.pdf)". 8 | 9 | [Julia Evans](https://jvns.ca/) has produced a free "[Git Cheat Sheet](https://wizardzines.com/git-cheat-sheet.pdf)" 10 | and a paid-for "[How Git Works](https://wizardzines.com/zines/git/)" zine. 11 | Both are light-hearted and approachable. 12 | 13 | ## Using Git with VS Code 14 | 15 | VS Code's Source Control view provides a visual interface to important and commonly used commands. 16 | For more information, see the "[Introduction to Git in VS Code](https://code.visualstudio.com/docs/sourcecontrol/intro-to-git)" page in VS Code's docs. 17 | 18 | ## Diving deeper 19 | 20 | The [Turing Way](https://book.the-turing-way.org/) project has a section on its website entitled "[Version Control](https://book.the-turing-way.org/reproducible-research/vcs)". 21 | 22 | The University of Bristol's Electronic Health Records Group have a section on their website entitled "[Introduction to using Git and GitHub](https://grpehr.github.io/training/01-index.html)". 23 | 24 | Git's "[Documentation](https://git-scm.com/doc)" page links to several videos and books, 25 | including the "[Pro Git](https://git-scm.com/book/en/v2)" book by Scott Chacon and Ben Straub. 26 | 27 | [Dangit, Git!?!](https://dangitgit.com/) describes several Git pitfalls, with advice about how to escape from them. 28 | There's also [a version with swears](https://ohshitgit.com/). 29 | -------------------------------------------------------------------------------- /docs/getting-started/how-to/use-released-outputs-in-github-codespaces/index.md: -------------------------------------------------------------------------------- 1 | Whilst it's possible to execute a project pipeline entirely inside a secure environment, 2 | in practice it's often more convenient to execute the final stage outside. 3 | This is because the final stage frequently involves carefully crafting figures and tables for publication, 4 | making many small adjustments that would otherwise entail multiple round-trips to the OpenSAFELY Jobs site. 5 | 6 | Executing the final stage of a project pipeline outside a secure environment is only possible when the outputs from the previous stage have been released to the OpenSAFELY Jobs site. 7 | [Released outputs](../../../outputs/index.md) have been subject to statistical disclosure control and have been reviewed by two trained OpenSAFELY output checkers. 8 | 9 | To upload released outputs to a Codespace, using VS Code: 10 | 11 | * Download the released outputs from the OpenSAFELY Jobs site to your computer. 12 | * Right-click on the `output` folder in the Explorer, which is in the Primary Side Bar. 13 | The "[User Interface](https://code.visualstudio.com/docs/getstarted/userinterface)" page in VS Code's documentation locates the Explorer and the Primary Side Bar. 14 | * Click "Upload...". 15 | * Select the released outputs to upload. 16 | 17 | To download figures and tables from a Codespace, using VS Code: 18 | 19 | * Locate the figures and tables in the `output` folder in the Explorer, which is in the Primary Side Bar. 20 | The "[User Interface](https://code.visualstudio.com/docs/getstarted/userinterface)" page in VS Code's documentation locates the Explorer and the Primary Side Bar. 21 | * Right-click on each figure or table. 22 | * Click "Download...". 23 | 24 | Uploading released outputs to a Codespace, 25 | and downloading figures and tables from a Codespace, 26 | are examples of **sharing results**. 27 | The "[Datasets Used](https://www.opensafely.org/policies-for-researchers/#datasets-used)" section 28 | of the "Policies for Researchers" page on the OpenSAFELY website contains more information about sharing results. 29 | In short, results 30 | — such as released outputs, figures, and tables — 31 | should be shared **in confidence**, 32 | and **only** with key members of the wider research team. 33 | 34 | !!! warning 35 | Never commit released outputs, figures, or tables to your project's repository. 36 | Doing so would contravene the "Policies for Researchers" page on the OpenSAFELY website. 37 | 38 | Only a Codespace's creator can access the Codespace, 39 | unless they enable [Live Share](https://code.visualstudio.com/learn/collaboration/live-share) in VS Code or 40 | unless they enable a service, such as JupyterLab, in the Codespace. 41 | Enabling Live Share or enabling a service are also examples of **sharing results**. 42 | Again, the "[Datasets Used](https://www.opensafely.org/policies-for-researchers/#datasets-used)" section 43 | of the "Policies for Researchers" page on the OpenSAFELY website contains more information about sharing results. 44 | -------------------------------------------------------------------------------- /docs/getting-started/index.md: -------------------------------------------------------------------------------- 1 | This section contains information that will help you get started with OpenSAFELY. 2 | 3 | First, complete the [tutorial](tutorial/index.md). 4 | In the tutorial, you will create a study that uses dummy patient data. 5 | Because the study uses dummy patient data, anyone can complete the tutorial. 6 | All you need is an up to date web browser and an internet connection. 7 | 8 | When you've completed the tutorial, 9 | browse the [how-to guides](how-to/index.md). 10 | These provide practical steps for setting up and using OpenSAFELY. 11 | Finally, the [explanation](explanation/index.md) section contains background information about running OpenSAFELY. 12 | 13 | !!! info "GitHub Codespaces" 14 | Many of the pages in this section mention GitHub Codespaces. 15 | For more information about what GitHub Codespaces are, 16 | see our blog post on "[Research-ready computers in the cloud](https://www.bennett.ox.ac.uk/blog/2024/07/research-ready-computers-in-the-cloud/)". 17 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/add-a-scripted-action-to-the-pipeline/index.md: -------------------------------------------------------------------------------- 1 | **Every** study starts with a *dataset definition* like the one you just edited. 2 | When executed, a dataset definition generates a compressed CSV (`.csv.gz `) of patient data. 3 | 4 | A real analysis will have several further steps after this. Each step is defined 5 | in a separate file, and can be written in [any of the programming languages supported in 6 | OpenSAFELY](../../../actions-scripts.md). 7 | 8 | ## Create a new action 9 | 10 | In this tutorial, we're going to draw a 11 | histogram of ages, using either four lines of Python or just a few more lines of R. 12 | 13 | === "Python" 14 | 15 | 1. Right-click on the `analysis` folder in the editor's Explorer and select 16 | "New file". Type "report.py" as the filename and press ++enter++. 17 | 2. Add the following to `report.py`:. 18 | ```python 19 | import pandas as pd 20 | 21 | data = pd.read_csv("output/dataset.csv.gz") 22 | 23 | fig = data.age.plot.hist().get_figure() 24 | fig.savefig("output/report.png") 25 | ``` 26 | 27 | === "R" 28 | 29 | 1. Right-click on the `analysis` folder in the editor's Explorer and select 30 | "New file". Type "report.R" as the filename and press ++enter++. 31 | 2. Add the following to `report.R`:. 32 | ```R 33 | library('tidyverse') 34 | 35 | df_input <- read_csv( 36 | here::here("output", "dataset.csv.gz"), 37 | col_types = cols(patient_id = col_integer(),age = col_double()) 38 | ) 39 | 40 | plot_age <- ggplot(data=df_input, aes(df_input$age)) + geom_histogram() 41 | 42 | ggsave( 43 | plot= plot_age, 44 | filename="report.png", path=here::here("output"), 45 | ) 46 | ``` 47 | 48 | 49 | This code reads the CSV of patient data, and saves a histogram of ages to a new file. 50 | 51 | ## Add the action to the pipline 52 | 53 |
    54 |
  1. 55 | Open project.yaml in the editor. This file will be near the end of the 56 | list of files and folders. This file describes how each step in your analysis should 57 | be run. It already defines a single generate_dataset action 58 | which defines the output that we've generated so far. This file is in a format 59 | called YAML: the way it's indented matters, so be careful to copy and paste the 60 | following carefully. 61 |
  2. 62 |
  3. 63 | Add a generate_report action to the file, so the entire file looks like this: 64 |
  4. 65 |
66 | 67 | === "Python" 68 | 69 | ```yaml linenums="1" hl_lines="10 11 12 13 14 15" 70 | version: "4.0" 71 | 72 | actions: 73 | generate_dataset: 74 | run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz 75 | outputs: 76 | highly_sensitive: 77 | dataset: output/dataset.csv.gz 78 | 79 | generate_report: 80 | run: python:v2 python analysis/report.py 81 | needs: [generate_dataset] 82 | outputs: 83 | moderately_sensitive: 84 | chart: output/report.png 85 | ``` 86 | 87 | === "R" 88 | 89 | ```yaml linenums="1" hl_lines="10 11 12 13 14 15" 90 | version: "4.0" 91 | 92 | actions: 93 | generate_dataset: 94 | run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz 95 | outputs: 96 | highly_sensitive: 97 | dataset: output/dataset.csv.gz 98 | 99 | generate_report: 100 | run: r:v2 analysis/report.R 101 | needs: [generate_dataset] 102 | outputs: 103 | moderately_sensitive: 104 | chart: output/report.png 105 | ``` 106 | 107 | - **Line 10** tells the system we want to create a new action called `generate_report`. 108 | - **Line 11** says how to run the script (using the `python` or `R` runner). 109 | - **Line 12** tells the system that this action depends on the outputs of the 110 | `generate_dataset` being present. 111 | - **Lines 13-15** describe the files that the action creates. Line 14 says that the 112 | items indented below it are *moderately* sensitive, which means they may be released 113 | to the public after a careful review (and possible redaction). Line 15 says that 114 | there's one output file, which will be found at `output/report.png`. 115 | 116 | In the Visual Studio Code Terminal, type: 117 | 118 | ``` 119 | opensafely run generate_report 120 | ``` 121 | and press ++enter++. This should end by telling you a file containing the histogram has been created. 122 | Open the `output` folder — you can do this via Visual Studio Code's Explorer — and check that it contains `report.png`. 123 | 124 | Double click on `report.png` to display the image, 125 | or right-click on `report.png` and select Download to download the image. 126 | 127 | !!! warning 128 | Changes will not persist outside of the GitHub codespace 129 | unless you *commit* and *push* them to GitHub, as described in the 130 | next section. 131 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/check-the-automated-tests-pass/index.md: -------------------------------------------------------------------------------- 1 | Your repository is automatically configured with tests to verify the project is runnable. 2 | These tests run each time you push. 3 | 4 | Now that you have published the changes from your codespace to your GitHub repository, 5 | we can see if these tests pass. 6 | 7 | Visit your repository on GitHub's site. Click on the **Actions** tab 8 | ![The GitHub Actions tab in a repository.](../../../images/getting-started-github-actions-tab.png) 9 | 10 | You'll see a *Workflow* running with the *commit message* of your last 11 | commit. The workflow verifies that all of the actions in your project pipeline can run. 12 | 13 | If the action icon is green (as shown below), it's succeeded. If it's yellow, it's still running. If it's red, it has failed. You want it to be green! 14 | ![The GitHub Actions tab showing a successful workflow.](../../../images/getting-started-github-actions-workflow-success.png) 15 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/create-a-github-account/index.md: -------------------------------------------------------------------------------- 1 | To use OpenSAFELY, you must have a GitHub account. GitHub is a widely-used 2 | website for storing and collaborating on software, using the version control 3 | software `git`. GitHub is where your open, reproducible research will be 4 | published. 5 | 6 | This tutorial also uses GitHub Codespaces to work with OpenSAFELY. 7 | 8 | ## Creating a GitHub account 9 | 10 | !!! note 11 | If you already have a GitHub account, 12 | you can use that account. 13 | 14 | If you do not already have a GitHub account, you should create one 15 | first. 16 | 17 | Visit [GitHub](https://github.com) and click the "Sign up" button. 18 | You will have to provide an email address and password. GitHub will 19 | also send you a confirmation email containing a link that you need 20 | to visit to confirm your account. 21 | 22 | ## Securing your GitHub account 23 | 24 | We recommend that you [secure your GitHub account with two-factor 25 | authentication](https://docs.github.com/en/github/authenticating-to-github/securing-your-account-with-two-factor-authentication-2fa). 26 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/create-a-github-codespace/index.md: -------------------------------------------------------------------------------- 1 | For the repository you just created, 2 | there is a Code button on GitHub. 3 | 4 | To open your repository with GitHub Codespaces: 5 | 6 | 1. Click the Code button. 7 | 2. Click the Codespaces tab. 8 | 3. Click the "Create codespace on main". 9 | The screenshot below shows this. 10 | 11 | ![A screenshot showing the "Create codespace on main" button.](../../../images/getting-started-codespaces-button.png) 12 | 13 | You may see the following screen requesting additional permissions for your Codepsace: 14 | 15 | ![A screenshot showing "This codepsace is requesting additional permissions, with a green 'Authorize and continue button at the bottom right".](../../../images/getting-started-codespaces-repository-additional-permissions.png) 16 | 17 | If so, click "Authorize and continue". 18 | 19 | You should then see a "Setting up your codespace" screen: 20 | 21 | ![A screenshot showing "Setting up your codespace".](../../../images/getting-started-codespaces-setting-up.png) 22 | 23 | A GitHub codespace containing the Visual Studio Code editor with a 24 | command-line interface "terminal" should then appear. **This may take 25 | a little bit longer the first time a codespace is started, perhaps 26 | a minute or two.** 27 | 28 | ![GitHub's codespace showing the editor, terminal and 29 | Explorer.](../../../images/getting-started-codespaces-start.png) 30 | 31 | The terminal at the bottom-right of the GitHub codespace runs 32 | commands on a computer (virtual machine) provided by GitHub. 33 | 34 | The large, upper-right area holds the **main editor**, which is where you will 35 | view and edit files that you are working on. The left **"side bar"** 36 | holds the Explorer when you first start the codespace. There are 37 | other useful menus in this area that can be accessed with the icons 38 | to the far left side. Finally, the button at the top-left with three 39 | horizontal lines (`≡`) is the **menu button**, which allows you to 40 | access many more options. 41 | 42 | If you find yourself using GitHub regularly for working on research, 43 | we have more information on [working with GitHub codespaces](../../how-to/use-github-codespaces-in-your-project/index.md). 44 | 45 | ## Running `opensafely` 46 | 47 | The `opensafely` software should already be installed if you start 48 | a GitHub codespace for the OpenSAFELY research template. 49 | 50 | You can confirm this by typing `opensafely` in the terminal at the 51 | prompt `$` and pressing ++enter++. You should see an output that looks 52 | something like: 53 | 54 | ```shell-session 55 | $ opensafely 56 | usage: opensafely [-h] [--version] COMMAND ... 57 | 58 | optional arguments: 59 | -h, --help show this help message and exit 60 | --version show program's version number and exit 61 | 62 | available commands: 63 | 64 | COMMAND 65 | help Show this help message and exit 66 | run Run project.yaml actions locally 67 | codelists 68 | Commands for interacting with https://codelists.opensafely.org/ 69 | pull Command for updating the docker images used to run OpenSAFELY studies locally 70 | upgrade Upgrade the opensafely cli tool. 71 | ``` 72 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/create-a-github-repository/index.md: -------------------------------------------------------------------------------- 1 | Here, you'll copy our OpenSAFELY research template to your own GitHub 2 | account and use it to develop your own study: 3 | 4 | 1. Click on the link below to create new repository based on our template. 5 | You may need to log in to GitHub if you are not already logged in: 6 |
https://github.com/opensafely/research-template/generate. 7 | 1. Leave the "Include all branches" option unchecked. 8 | 1. Use the **Owner** drop-down menu, and select your GitHub account as the owner. 9 | 1. Enter `opensafely-getting-started` as the name for your repository. 10 | 1. Enter "The OpenSAFELY getting started tutorial" as the description of your repository. 11 | 1. Choose "Public" as [repository visibility](../../../repositories.md#repository-visibility). 12 | ![Entering a description and choosing to make a repository public or private, when creating a repository from the research template.](../../../images/getting-started-create-repository-public-private.png) 13 | 1. Click **Create repository from template** 14 | 1. The new GitHub repository will take a moment to initialise, as it is running 15 | some setup in background. Wait about 1 minute, then reload the page, and you 16 | should see that the README displayed now reflects the name you gave to the new 17 | repository. 18 | 19 | If you see `${GITHUB_REPOSITORY_NAME}` in your README, the repo is not yet initialised; wait a few seconds longer and reload. 20 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/delete-the-github-codespace/index.md: -------------------------------------------------------------------------------- 1 | ## Tidy up 2 | 3 | If you close a codespace in your browser, it still continues running. So, once you've finished using your codespace, you can delete it. 4 | 5 | There's more information about how to delete your codespace, and why you should delete any codespaces you no longer need, in [GitHub's documentation on deleting a codespace](https://docs.github.com/en/codespaces/developing-in-a-codespace/deleting-a-codespace). 6 | 7 | It's important to note that saving files in your codespace only ensures that changes persist in that codespace's storage. So, when you delete your codespace, any changes will be lost unless they are pushed to the remote repository that GitHub hosts. 8 | 9 | !!! warning 10 | By default, codespaces are automatically deleted after a period of inactivity and any changes not pushed to the remote GitHub repository will be lost. 11 | For the `opensafely` organization, 12 | this period is 30 days. 13 | 14 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/generate-a-first-dataset/index.md: -------------------------------------------------------------------------------- 1 | Now you're ready to run your first study. 2 | 3 | In the terminal, type the following: 4 | 5 | ```shell-session 6 | opensafely exec ehrql:v1 generate-dataset analysis/dataset_definition.py 7 | ``` 8 | 9 | pressing ++enter++ once you've typed the command. 10 | 11 | This command makes use of files that already exist in the repository to generate a dummy dataset. 12 | 13 | The first time you run this command, it may take a few seconds to download the 14 | required software. Eventually, you should see output that contains lines like the following: 15 | 16 | ```shell-session 17 | … 18 | [info ] Compiling dataset definition from analysis/dataset_definition.py 19 | [info ] Building dataset and writing results 20 | … 21 | ``` 22 | 23 | followed by the dataset displayed in the terminal in CSV format. 24 | 25 | This should look something like the following, 26 | although the data you see will differ, 27 | because it is randomly generated: 28 | 29 | ``` 30 | patient_id,sex 31 | 1,unknown 32 | 2,male 33 | 3,unknown 34 | 4,unknown 35 | 5,female 36 | 6,unknown 37 | 7,intersex 38 | 8,unknown 39 | 9,intersex 40 | 10,male 41 | ``` 42 | 43 | Notice the columns: `patient_id` and `sex`. 44 | 45 | In the next part of the tutorial, 46 | we will see how to add another data column. 47 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/index.md: -------------------------------------------------------------------------------- 1 | The tutorial provides practical steps for 2 | creating an OpenSAFELY study. 3 | 4 | * [Introduction](introduction/index.md) 5 | * [Create a GitHub account](create-a-github-account/index.md) 6 | * [Create a GitHub repository](create-a-github-repository/index.md) 7 | * [Create a GitHub codespace](create-a-github-codespace/index.md) 8 | * [Generate a first dataset](generate-a-first-dataset/index.md) 9 | * [Update the dataset definition](update-the-dataset-definition/index.md) 10 | * [Run the project pipeline](run-the-project-pipeline/index.md) 11 | * [Add a scripted action to the pipeline](add-a-scripted-action-to-the-pipeline/index.md) 12 | * [Publish the changes to GitHub](publish-the-changes-to-github/index.md) 13 | * [Check the automated tests pass](check-the-automated-tests-pass/index.md) 14 | * [Delete the GitHub codespace](delete-the-github-codespace/index.md) 15 | * [See the next steps](see-the-next-steps/index.md) 16 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/introduction/index.md: -------------------------------------------------------------------------------- 1 | OpenSAFELY is designed to allow you to do your analytic work on your own 2 | computer, without ever having to access the real, sensitive, patient-level data. 3 | 4 | Because the tutorial study uses dummy patient data, 5 | anyone can complete the tutorial. 6 | 7 | We ask all potential collaborators to complete this tutorial 8 | before applying to run their project against real data. 9 | 10 | ## Learning outcome 11 | 12 | After completing this tutorial, 13 | you will have seen the steps needed to 14 | write an OpenSAFELY-compliant study 15 | that runs with "dummy" (randomly-generated) patient data. 16 | 17 | You will: 18 | 19 | 1. Create a GitHub account 20 | 2. Create a new GitHub repository 21 | 3. Create a study that queries "dummy" patient data to generate a dataset 22 | 4. Run the study in a development environment 23 | 5. Update the study to add extra data columns to the dataset 24 | 6. Add an analysis step to the study 25 | 6. Run the study in a testing environment 26 | 27 | Please ask any questions in our [Q&A forum](https://github.com/opensafely/documentation/discussions)! 28 | 29 | ## Requirements 30 | 31 | The requirements for this tutorial are minimal. 32 | 33 | You will need a computer with: 34 | 35 | * an up-to-date web browser 36 | * an internet connection 37 | 38 | The tutorial will be completed in the GitHub Codespaces environment. 39 | Free GitHub accounts are given a monthly quota of Codespaces. 40 | If you do not have a GitHub account, 41 | we will create one in this tutorial. 42 | 43 | --- 44 | 45 | * Next: [Create a GitHub account](../create-a-github-account/index.md) 46 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/publish-the-changes-to-github/index.md: -------------------------------------------------------------------------------- 1 | So far, 2 | the changes you have made only exist in the codespace in which you are working. 3 | 4 | In this section, you will first add the study changes that you've made 5 | to a new *commit* in your local repository — a commit represents a stored 6 | version of your work — and then send that commit to the remote repository that GitHub hosts by *pushing* 7 | the new commit. 8 | 9 | Pushing changes to your remote GitHub repository: 10 | 11 | * ensures the changes you have made persist even when the codespace is deleted 12 | * enables others with permission to access your repository to see those changes 13 | 14 | ## Add your changes to the local repository 15 | 16 | If you know how to use Git with the command-line, you can do 17 | so in the Visual Studio Code Terminal. Alternatively, you can use Visual 18 | Studio Code's Source Control feature, as demonstrated below. 19 | 20 | Back in the GitHub codespace, open the Source Control panel by 21 | selecting the icon that has round dots connected by lines on the 22 | left-hand side. It should be below the magnifying glass icon. 23 | 24 | ![Opening Source Control in 25 | GitHub.](../../../images/getting-started-codespaces-stage-changes.png) 26 | 27 | When files in the repository are edited and then saved, Source 28 | Control should list those changes. Note that Visual Studio Code in 29 | the codespace has Auto Save enabled by default. If you left-click on a file 30 | in Source Control, you'll see how your copy of the file has changed 31 | from the previous repository state. If you hover over a file in 32 | Source Control under "Changes", you can propose to add the changes 33 | to the repository by clicking the `+` icon next to the filename. 34 | These "staged" changes then appear in the "Staged Changes" section. 35 | 36 | Staged changes are changes that you are proposing to include in the next *commit* of 37 | this study repository. These could be modifications of existing 38 | files or entirely new files that you include. 39 | 40 | It is also possible to "Discard Changes" if you accidentally stage a 41 | file that you do not want to include. You can do this by hovering 42 | over a file listed in the "Staged Changes" section and clicking the 43 | `-` icon next to the filename. 44 | 45 | When you've finished staging all your changes, you are now ready to 46 | make the new commit: 47 | 48 | 1. Type "Generate age histograms" where it says "Message" above the Commit button. 49 | This message summarises what your staged changes do. 50 | 1. Click the Commit button or press ++ctrl+enter++ 51 | to *commit* the staged changes to 52 | add them to the repository as stored in the codespace. 53 | 54 | ![Committing changes in GitHub.](../../../images/getting-started-codespaces-commit-message.png) 55 | 56 | ## Push the changes to the remote GitHub repository 57 | 58 | The changes have been stored as a new commit in the codespace's 59 | *local* copy of the repository. We now need to *push* the 60 | commit to the *remote* GitHub repository, to make the changes show up there. 61 | 62 | Click the "Sync Changes" button to push your commits. Alternatively, 63 | click the ellipsis (`⋯`) icon next to "Source Control" and then select 64 | "Push". This should submit your changes to the GitHub repository that 65 | you created earlier. 66 | 67 | ![Pushing changes to GitHub.](../../../images/getting-started-codespaces-push-to-github.png) 68 | 69 | You will see a prompt: 'This action will pull and push commits from 70 | and to "origin/main".' — click OK. 71 | 72 | ## Seeing the changes in the remote GitHub repository 73 | 74 | The repository was created at: 75 | `https://github.com//opensafely-getting-started` 76 | 77 | If you now visit the repository on the main GitHub site, 78 | you should see the updated state of the repository 79 | with the changes you made in the codespace. 80 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/run-the-project-pipeline/index.md: -------------------------------------------------------------------------------- 1 | In this section, 2 | we will look at the OpenSAFELY project pipeline. 3 | 4 | So far, 5 | we have run the single dataset definition step, or *scripted action*, 6 | using the command line with the command: 7 | 8 | ```sh 9 | opensafely exec ehrql:v1 generate-dataset analysis/dataset_definition.py` 10 | ``` 11 | 12 | A complete OpenSAFELY study may include multiple actions. 13 | For example, the first action might extract a dataset, 14 | and a subsequent action might generate a table or chart from that data. 15 | 16 | The `project.yaml` file in the study repository 17 | defines the actions for an OpenSAFELY project pipeline. 18 | 19 | ## The `project.yaml` file 20 | 21 | In the Visual Studio Code file Explorer, 22 | open the `project.yaml` file by clicking on it. This file will be near the end of the list of files and folders. 23 | 24 | You should see a tab with the following content: 25 | 26 | ```yaml linenums="1" hl_lines="5" 27 | version: "4.0" 28 | 29 | actions: 30 | generate_dataset: 31 | run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz 32 | outputs: 33 | highly_sensitive: 34 | dataset: output/dataset.csv.gz 35 | ``` 36 | 37 | There is a single action defined, called `generate_dataset`, 38 | in this project pipeline. 39 | 40 | The highlighted line is the command that the action runs, 41 | and is very similar to the command we previously ran. 42 | 43 | The difference is that `generate_dataset` defines an output 44 | stored in the `output` folder. 45 | 46 | ## Running the action in the pipeline 47 | 48 |
    49 |
  1. 50 | In the Visual Studio Code file Explorer, 51 | confirm that the output folder only contains a .gitkeep file. 52 |
  2. 53 | 54 |
  3. 55 | In the Visual Studio Code Terminal, 56 | type: 57 | 58 | ```sh 59 | opensafely run generate_dataset 60 | ``` 61 | 62 | and press ++enter++ on your keyboard to run the pipeline action. 63 | 64 | You should see output that ends something like the following: 65 | 66 | ``` 67 | <...several lines of output...> 68 | generate_dataset: Extracting output file: output/dataset.csv.gz 69 | generate_dataset: Finished recording results 70 | generate_dataset: Completed successfully 71 | generate_dataset: Cleaning up container and volume 72 | 73 | => generate_dataset 74 | Completed successfully 75 | 76 | log file: metadata/generate_dataset.log 77 | outputs: 78 | output/dataset.csv.gz - highly_sensitive 79 | ``` 80 | 81 | The final line tells you a file of (randomly-generated) patient data has been created at 82 | output/dataset.csv.gz, and that it should be considered highly sensitive 83 | data. What you see here is exactly the same process that would happen on a real, secure 84 | server. 85 |
  4. 86 | 87 |
  5. 88 | When the command completes, recheck the output folder 89 | and see that it contains a dataset.csv.gz file. 90 |
  6. 91 |
92 | 93 | ### Viewing the dataset output 94 | 95 | This `.csv.gz` file is a compressed CSV file that contains a small amount of *dummy data* (patient ID and sex) 96 | based on the dataset definition at `analysis/dataset_definition.py`. 97 | 98 | To view it, first run: 99 | ``` 100 | opensafely unzip output 101 | ``` 102 | then open that file (by left-clicking the filename in Visual Studio Code's Explorer, or 103 | software like Excel). You'll see that it contains rows for ten 104 | randomly-generated dummy patients. 105 | 106 | ## The difference between `opensafely exec` and `opensafely run` 107 | 108 | Both `opensafely exec` and `opensafely run` can run actions. 109 | 110 | The difference between them is that: 111 | 112 | * `opensafely exec` runs actions *outside* of the project pipeline 113 | and is useful for quick feedback during interactive development 114 | * `opensafely run` runs actions *inside* the project pipeline - 115 | that is, just as they would be in the secure OpenSAFELY environment 116 | containing real patient data 117 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/see-the-next-steps/index.md: -------------------------------------------------------------------------------- 1 | Congratulations! You've covered all the basics that you need to develop a study 2 | on your own computer, verify that it can run against real data, and publish it 3 | to GitHub. 4 | 5 | To proceed from here, 6 | there is an application process to follow, 7 | and more about OpenSAFELY that you will need to learn. 8 | 9 | ## Applying for OpenSAFELY access 10 | 11 | To write a real study and run it against actual patient data, you will first need to get permission for your project from the data controllers for the NHS England OpenSAFELY COVID-19 service. 12 | 13 | * [Read about our onboarding programme](https://www.opensafely.org/onboarding-new-users/). 14 | 15 | ## Further learning 16 | 17 | To learn more about OpenSAFELY, 18 | we recommend the sections of our documentation about: 19 | 20 | * [our electronic health record query language, ehrQL](../../../ehrql/index.md) 21 | * [scripted actions](../../../actions-scripts.md) 22 | * [using Git](../../how-to/use-git-effectively/index.md) 23 | * [using GitHub Codespaces in your project](../../how-to/use-github-codespaces-in-your-project/index.md) 24 | 25 | We also recommend: 26 | 27 | * [OpenSAFELY Jobs](https://jobs.opensafely.org) 28 | * [the Visual Studio Code introduction to Git](https://code.visualstudio.com/docs/sourcecontrol/intro-to-git) 29 | -------------------------------------------------------------------------------- /docs/getting-started/tutorial/update-the-dataset-definition/index.md: -------------------------------------------------------------------------------- 1 | You've successfully generated a dataset from the code in your study, but at the moment it only contains one data column. 2 | 3 | Now we'll add some code to create an extra column. 4 | 5 | ## Add an `age` column 6 | 7 | 1. The "Explorer" on the left hand side lists the files and folders in 8 | your research repository. Find and click on the `dataset_definition.py` 9 | file inside the `analysis` folder. This file contains a dataset definition, 10 | specifying the population that you'd like to study (dataset rows) 11 | and what you need to know about them (dataset columns). 12 | It is written in [ehrQL](../../../ehrql/index.md). 13 | 1. Add some text so that the file looks like this (new text highlighted): 14 | ```python linenums="1" hl_lines="15" 15 | from ehrql import create_dataset 16 | from ehrql.tables.tpp import patients, practice_registrations 17 | 18 | dataset = create_dataset() 19 | 20 | index_date = "2020-03-31" 21 | 22 | has_registration = practice_registrations.for_patient_on( 23 | index_date 24 | ).exists_for_patient() 25 | 26 | dataset.define_population(has_registration) 27 | 28 | dataset.sex = patients.sex 29 | dataset.age = patients.age_on(index_date) 30 | ``` 31 | Lines 8-12 mean "*I'm interested in all patients who were registered at a practice 32 | on the index date*"; line 14 "*Give me a column of data corresponding 33 | to the sex of each patient*"; and line 15 "*Give me a column of data corresponding 34 | to the age of each patient on the given date*". 35 | 1. If you type the following into your terminal: 36 | 37 | ```shell-session 38 | opensafely exec ehrql:v1 generate-dataset analysis/dataset_definition.py 39 | ``` 40 | 41 | and press ++enter++, you will see a new randomly generated dataset which now contains the additional `age` column. 42 | -------------------------------------------------------------------------------- /docs/git-workflow.md: -------------------------------------------------------------------------------- 1 | 2 | `git` is a command-line tool for recording, sharing and collaborating on code. 3 | 4 | [GitHub](https://github.com) is a Microsoft-owned company (and website) that makes it easy to use `git`, and adds extra collaboration and security tools on top. They also make GitHub Desktop, a convenient graphical user interface for git. 5 | 6 | GitHub provide good [guides and documentation about best practices](https://guides.github.com/). The [hello-world walkthrough](https://guides.github.com/activities/hello-world/) is essential reading for new users, and the [GitHub flow slideshow](https://guides.github.com/introduction/flow/) summarises some best practice. If you prefer learning via video, there is also a [GitHub Youtube Channel](https://www.youtube.com/channel/UCP7RrmoueENv9TZts3HXXtw). 7 | 8 | ## GitHub workflow 9 | 10 | The general workflow for making changes to your code is as follows: 11 | 12 | * Create a clone — a copy — of the existing code repository to work on 13 | * Create a new branch. A branch is a way for you to record and publish your own changes without breaking things for other people who are using the same code. It is also a good way of collecting changes ("commits") into a meaningful unit that can be reviewed by others. 14 | * Edit/add/delete files in the repo on that branch, committing regularly with informative commit messages. 15 | * Push the changes to GitHub, so that others can view the branch. 16 | * Continue to commit and push changes on that branch until you believe it's ready to be merged back into the main codebase that everyone uses. 17 | * Submit a pull request (PR), requesting that the branch be reviewed by somebody else. A PR is simply a way of viewing, commenting on, and approving code to be merged, "pulled" into the main codebase. 18 | * Watch out for the automated tests passing or failing! 19 | 20 | For a more generic overview, see [GitHub's own guidance](https://guides.github.com/introduction/flow/). 21 | 22 | ## Cloning the study code to your computer with GitHub Desktop 23 | 24 | Please follow [GitHub's instructions](https://docs.github.com/en/desktop/contributing-and-collaborating-using-github-desktop/adding-and-cloning-repositories/cloning-a-repository-from-github-to-github-desktop) 25 | for cloning study code with GitHub Desktop. 26 | 27 | ## Pushing changes to GitHub with GitHub Desktop 28 | 29 | 1. Open GitHub Desktop to view your repository. When you make changes to files in 30 | your text editor and save them locally, you also see the changes in 31 | GitHub Desktop. To add all changes in all files to a single "*commit*", tick the 32 | checkbox at the top of the list.
33 | ![Committing files with GitHub Desktop.](images/getting-started-github-desktop-commit-all.png) 34 | 1. At the bottom of the list of changes, in the **Summary** field, type a short, 35 | meaningful description of the changes (this is called the *commit message*). 36 | Optionally, you can add more information about the change in the 37 | **Description** field. Press the blue button to make the commit.
38 | ![Writing a commit message with GitHub Desktop.](images/getting-started-github-desktop-commit-message.png) 39 | 1. Click **Push origin** to push your local changes to the remote repository on 40 | GitHub ![Pushing changes to GitHub with GitHub Desktop.](images/getting-started-github-desktop-push-to-github.png) 41 | -------------------------------------------------------------------------------- /docs/images/adding-codelist-id-tag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/adding-codelist-id-tag.png -------------------------------------------------------------------------------- /docs/images/adding-codelist-id-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/adding-codelist-id-version.png -------------------------------------------------------------------------------- /docs/images/code-review-main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/code-review-main.png -------------------------------------------------------------------------------- /docs/images/codelists-jobs-warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/codelists-jobs-warning.png -------------------------------------------------------------------------------- /docs/images/codespaces-create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/codespaces-create.png -------------------------------------------------------------------------------- /docs/images/codespaces-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/codespaces-options.png -------------------------------------------------------------------------------- /docs/images/codespaces-setup-screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/codespaces-setup-screen.png -------------------------------------------------------------------------------- /docs/images/codespaces-template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/codespaces-template.png -------------------------------------------------------------------------------- /docs/images/create_new_workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/create_new_workspace.png -------------------------------------------------------------------------------- /docs/images/example-dashboard-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/example-dashboard-chart.png -------------------------------------------------------------------------------- /docs/images/excel-export-csv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/excel-export-csv.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-button.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-commit-message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-commit-message.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-push-to-github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-push-to-github.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-repository-additional-permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-repository-additional-permissions.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-setting-up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-setting-up.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-stage-changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-stage-changes.png -------------------------------------------------------------------------------- /docs/images/getting-started-codespaces-start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-codespaces-start.png -------------------------------------------------------------------------------- /docs/images/getting-started-create-repository-owner-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-create-repository-owner-name.png -------------------------------------------------------------------------------- /docs/images/getting-started-create-repository-public-private.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-create-repository-public-private.png -------------------------------------------------------------------------------- /docs/images/getting-started-github-actions-tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-github-actions-tab.png -------------------------------------------------------------------------------- /docs/images/getting-started-github-actions-workflow-success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-github-actions-workflow-success.png -------------------------------------------------------------------------------- /docs/images/getting-started-github-desktop-commit-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-github-desktop-commit-all.png -------------------------------------------------------------------------------- /docs/images/getting-started-github-desktop-commit-message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-github-desktop-commit-message.png -------------------------------------------------------------------------------- /docs/images/getting-started-github-desktop-push-to-github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/getting-started-github-desktop-push-to-github.png -------------------------------------------------------------------------------- /docs/images/good-pr-pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/good-pr-pic.png -------------------------------------------------------------------------------- /docs/images/job_request_8676_timings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/job_request_8676_timings.png -------------------------------------------------------------------------------- /docs/images/job_request_8680_all_noncoviddeath.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/job_request_8680_all_noncoviddeath.png -------------------------------------------------------------------------------- /docs/images/line-profiler-output-dummy-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/line-profiler-output-dummy-data.png -------------------------------------------------------------------------------- /docs/images/macos-docker-privileges-escalation-warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/macos-docker-privileges-escalation-warning.png -------------------------------------------------------------------------------- /docs/images/macos-docker-privileges-escalation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/macos-docker-privileges-escalation.png -------------------------------------------------------------------------------- /docs/images/macos-docker-skip-intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/macos-docker-skip-intro.png -------------------------------------------------------------------------------- /docs/images/macos-menu-bar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/macos-menu-bar.png -------------------------------------------------------------------------------- /docs/images/pr-desc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/pr-desc.png -------------------------------------------------------------------------------- /docs/images/project-edit-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/project-edit-button.png -------------------------------------------------------------------------------- /docs/images/project-edit-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/project-edit-page.png -------------------------------------------------------------------------------- /docs/images/releases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/releases.png -------------------------------------------------------------------------------- /docs/images/run_jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/run_jobs.png -------------------------------------------------------------------------------- /docs/images/t1oos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/t1oos.png -------------------------------------------------------------------------------- /docs/images/token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/token.png -------------------------------------------------------------------------------- /docs/images/use-this-template-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/use-this-template-button.png -------------------------------------------------------------------------------- /docs/images/use-this-template.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/use-this-template.png -------------------------------------------------------------------------------- /docs/images/view_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/view_project.png -------------------------------------------------------------------------------- /docs/images/win-anaconda-prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/win-anaconda-prompt.png -------------------------------------------------------------------------------- /docs/images/win-docker-starting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/images/win-docker-starting.png -------------------------------------------------------------------------------- /docs/img/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/img/icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /docs/install-linux.md: -------------------------------------------------------------------------------- 1 | # Linux Install Guide 2 | 3 | This is a series of guidelines for installing the OpenSAFELY CLI on Linux 4 | as opposed to concrete steps. 5 | 6 | This is partly because: 7 | 8 | * various Linux distributions have different ways of installing software 9 | * it is assumed that you are comfortable enough with your Linux installation enough to make appropriate installation decisions yourself 10 | 11 | ## Installing Python 12 | 13 | Your system package manager may already have a recent version of Python 3 installed. 14 | That version may be sufficient for installing OpenSAFELY. 15 | 16 | You can check via PyPI what versions of Python are currently supported. 17 | Check the "Requires" information on the [package page](https://pypi.org/project/opensafely/). 18 | 19 | Alternatively, you can use [pyenv](https://github.com/pyenv/pyenv) to install and manage additional Python versions. 20 | 21 | ## Installing Docker 22 | 23 | There are multiple ways you can install Docker: 24 | 25 | * your distribution's package manager may have a version of Docker Engine available, 26 | though this may be slightly older than the current version that Docker offer themselves 27 | * Docker themselves provide [up-to-date package repositories](https://docs.docker.com/engine/install/) for installing Docker Engine 28 | * Docker offer [Docker Desktop for Linux](https://docs.docker.com/desktop/linux/install/). 29 | 30 | !!! warning 31 | 32 | Docker Desktop does have license restrictions for some commercial use. 33 | Check Docker's [license agreement](https://docs.docker.com/subscription/#docker-desktop-license-agreement). 34 | 35 | ## Installing the OpenSAFELY CLI 36 | 37 | It is useful to use [pipx](https://github.com/pypa/pipx) to install Python applications. 38 | 39 | pipx installs Python software into a Python virtual environment. 40 | pipx allows you to isolate Python package installations for different software, 41 | and still easily run that software. 42 | 43 | Install the [OpenSAFELY CLI](opensafely-cli.md) with pipx: 44 | 45 | ``` 46 | pipx install opensafely 47 | ``` 48 | 49 | !!! note 50 | 51 | If you installed a newer version of Python than available on your system via pyenv, 52 | you may want to install the OpenSAFELY CLI using that specific Python version. 53 | 54 | You may do this via a command of the form: 55 | 56 | ``` 57 | pipx install opensafely --python ~/.pyenv/shims/python3.10 58 | ``` 59 | 60 | The actual Python version might vary. 61 | 62 | 63 | Test the installation of OpenSAFELY CLI. 64 | This should print out the usage and available sub commands: 65 | 66 | ``` shell-session 67 | $ opensafely --help 68 | usage: opensafely [-h] [--version] COMMAND ... 69 | 70 | optional arguments: 71 | -h, --help show this help message and exit 72 | --version show program's version number and exit 73 | 74 | available commands: 75 | 76 | COMMAND 77 | help Show this help message and exit 78 | run Run project.yaml actions locally 79 | codelists 80 | Commands for interacting with https://www.opencodelists.org/ 81 | ``` 82 | 83 | You're done! 84 | 85 | Now you can navigate to a research repo on your local machine, 86 | and [use `opensafely` via the command line](opensafely-cli.md#using-opensafely-at-the-command-line). 87 | -------------------------------------------------------------------------------- /docs/install-macos.md: -------------------------------------------------------------------------------- 1 | # macOS Install Guide 2 | 3 | !!! note "This guide was created using macOS 11.1" 4 | It is expected that this guide should work from 10.15 upwards but has only been tested with 11.1 5 | 6 | Use our [guide to install Python](install-python.md) first. 7 | 8 | ## OpenSAFELY CLI 9 | Then install the [OpenSAFELY CLI](opensafely-cli.md) with pip: 10 | 11 | ```bash 12 | pip install opensafely 13 | ``` 14 | 15 | And test the installation: 16 | 17 | ```bash 18 | opensafely --help 19 | ``` 20 | 21 | If it is functioning, it should print out the usage and available sub commands: 22 | 23 | ``` 24 | usage: opensafely [-h] [--version] COMMAND ... 25 | 26 | optional arguments: 27 | -h, --help show this help message and exit 28 | --version show program's version number and exit 29 | 30 | available commands: 31 | 32 | COMMAND 33 | help Show this help message and exit 34 | run Run project.yaml actions locally 35 | codelists 36 | Commands for interacting with https://www.opencodelists.org/ 37 | ``` 38 | 39 | ## Docker for Mac 40 | Set up Docker by opening the app you installed earlier: 41 | 42 | ```bash 43 | open /Applications/Docker.app 44 | ``` 45 | 46 | You'll be warned about the system dialogue which is about to pop up, choose "OK". 47 | 48 | ![macOS prompting for Docker's privileged access.](./images/macos-docker-privileges-escalation-warning.png) 49 | 50 | 51 | Enter your password and click "Install Helper". 52 | 53 | ![macOS prompting that Docker is installing a new helper tool.](./images/macos-docker-privileges-escalation.png) 54 | 55 | 56 | Now that the Docker application is open you can click "Skip tutorial" and close the window. 57 | The Docker service will continue to run in the background and can be accessed from the Docker icon in your menu bar. 58 | 59 | ![macOS Docker's getting started screen.](./images/macos-docker-skip-intro.png) 60 | 61 | 62 | You're done! 63 | 64 | Now you can navigate to a research repo, on your local machine, and [use `opensafely` via the command line](opensafely-cli.md#using-opensafely-at-the-command-line). 65 | 66 | ## Older style pipx installations 67 | 68 | Earlier versions of this document used pipx to install `opensafely`. If you are looking to update such a pipx-style installation and are having problems with `opensafely upgrade`, you could also try `pipx reinstall opensafely --python ~/.pyenv/shims/python3.10`. 69 | -------------------------------------------------------------------------------- /docs/install-python.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | **Please read even if you already have Python installed** 3 | 4 | For security, consistency, and readability, OpenSAFELY provides an API built in [**Python**](https://www.python.org/) for using the platform. 5 | This API includes script-based functions for specifying the patients and variables that make up a study dataset (using [ehrQL](ehrql/index.md)), 6 | and command line functions for importing codelists, generating dummy data, and testing that the study definition can be run successfully on the server. 7 | **Python version 3.7 or higher** must be installed on your machine to perform these tasks. 8 | 9 | Many functions are provided in a Python module called `opensafely` which will also need to be installed — see the [`opensafely` CLI section](opensafely-cli.md) for more details. 10 | 11 | ## Windows 12 | For Windows users, we recommend that you install [Anaconda (Individual Edition)](https://www.anaconda.com/products/individual), a popular Python distribution that includes an recent version of Python, many useful Python packages, and an environment manager. 13 | This will help avoid some fiddly annoyances when dealing with multiple versions/installations of Python. 14 | 15 | To install: 16 | 17 | 1. [Download and run the Anaconda Python 18 | installer](https://docs.anaconda.com/anaconda/install/windows/). 19 | Accept the default/recommended settings unless you understand the consequences of changing them 20 | This should have added Python and Anaconda Prompt to your machine (as well as a few other things). 21 | 1. When you've done, to verify your installation, open Anaconda Prompt by 22 | clicking Start, search, or selecting Anaconda Prompt (or Anaconda Powershell) 23 | from the menu. ![Finding Anaconda Prompt on 24 | Windows](images/win-anaconda-prompt.png) 25 | To verify that you can run Python with Anaconda Prompt, open it and run `python --version`. 26 | 27 | You should use the _Anaconda Prompt_ whenever you want to use the `opensafely` package. 28 | Go to the [`opensafely` CLI section](opensafely-cli.md) for instructions on how to install this module. 29 | 30 | ## macOS 31 | 32 | !!! note "This guide was created using macOS 11.1" 33 | It is expected that this guide should work from 10.15 upwards but has only been tested with 11.1 34 | 35 | Open Terminal.app by clicking the magnifying glass icon in the top right of your screen. 36 | Type `terminal` and hit ++enter++. 37 | 38 | ### Homebrew 39 | Install [Homebrew](https://brew.sh/), this should install the Xcode Command Line Tools for you as well. 40 | 41 | !!! note "This command might take a while to run depending on the speed of your internet connection." 42 | 43 | ```bash 44 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" 45 | ``` 46 | 47 | Once homebrew is installed use it to install [pyenv](https://github.com/pyenv/pyenv): 48 | 49 | ```bash 50 | brew install pyenv 51 | ``` 52 | 53 | Next, install [Docker for Mac](https://docs.docker.com/docker-for-mac/install/), [GitHub Desktop](https://desktop.github.com/), and [Visual Studio Code](https://code.visualstudio.com/): 54 | 55 | ```bash 56 | brew install --cask docker github visual-studio-code 57 | ``` 58 | 59 | ### pyenv 60 | Configure your shell to use pyenv: 61 | 62 | !!! note 63 | If you are using a shell other than ZSH you will need to edit and source 64 | the appropriate config file. pyenv has documentation for getting set up 65 | on [various shells](https://github.com/pyenv/pyenv#set-up-your-shell-environment-for-pyenv). 66 | 67 | ```bash 68 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.zshrc 69 | echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.zshrc 70 | echo 'eval "$(pyenv init -)"' >> ~/.zshrc 71 | ``` 72 | 73 | ### Python 74 | Use pyenv to install Python: 75 | 76 | !!! note "This command might take a while to run depending on the speed of your computer." 77 | 78 | ```bash 79 | pyenv install 3.10:latest 80 | ``` 81 | 82 | Look for the line `Installing Python-3.10.n...` (where `n` is a number). 83 | This is the full version it has installed for you, eg `3.10.1`. 84 | 85 | Then enable this version (eg `3.10.1`) in pyenv: 86 | 87 | ```bash 88 | pyenv global system 3.10.1 89 | ``` 90 | -------------------------------------------------------------------------------- /docs/install-visual-studio-code.md: -------------------------------------------------------------------------------- 1 | # Visual Studio Code as a code editor 2 | 3 | When writing code in the OpenSAFELY framework, a code editor will help. 4 | GitHub users already use Visual Studio Code by default in a GitHub codespace. 5 | 6 | Visual Studio Code is also available free of charge for Windows, macOS 7 | and Linux. If you are working on your own computer and **not** in 8 | GitHub, and are already comfortable using another code editor, then that 9 | editor will be suitable. 10 | 11 | There are several [short videos and written 12 | guides](https://code.visualstudio.com/docs#first-steps) elsewhere on how 13 | to use more of Visual Studio Code's features. 14 | 15 | ## Installing Visual Studio Code 16 | 17 | Download and install [Visual Studio Code](https://code.visualstudio.com/download). 18 | 19 | ## Opening a research repository in Visual Studio Code 20 | 21 | Start Visual Studio Code and use the menu to open a clone of your research repository 22 | (**File > Open Folder...**) 23 | -------------------------------------------------------------------------------- /docs/js/extra.js: -------------------------------------------------------------------------------- 1 | function getTextWithoutPromptAndOutput(targetSelector) { 2 | const targetElement = document.querySelector(targetSelector); 3 | 4 | // exclude "Generic Prompt" and "Generic Output" spans from copy 5 | const excludedClasses = ["gp", "go"]; 6 | 7 | const clipboardText = []; 8 | [...targetElement.childNodes].map((node) => { 9 | // If the element does not contain the matching class, 10 | // add to the clipboard text array 11 | if ( 12 | !excludedClasses.some((className) => node?.classList?.contains(className)) 13 | ) { 14 | return clipboardText.push(node.textContent); 15 | } 16 | 17 | return null; 18 | }); 19 | 20 | return clipboardText.join("").trim(); 21 | } 22 | 23 | function patchCopyCodeButtons() { 24 | // select all "copy" buttons whose target selector is a element 25 | [ 26 | ...document.querySelectorAll( 27 | `button.md-clipboard[data-clipboard-target$="code"]`, 28 | ), 29 | ].map((btn) => 30 | btn.setAttribute( 31 | "data-clipboard-text", 32 | getTextWithoutPromptAndOutput(btn.dataset.clipboardTarget), 33 | ), 34 | ); 35 | } 36 | 37 | document.addEventListener("DOMContentLoaded", () => { 38 | patchCopyCodeButtons(); 39 | }); 40 | 41 | /** 42 | * Move the existing footer buttons to the main content section to make them more visible. 43 | * Material MKDocs doesn’t seem to have any options to do this natively. 44 | * @returns {void} 45 | */ 46 | function moveNavButtons() { 47 | /** @type {HTMLDivElement} */ 48 | const contentArea = document.querySelector(`[data-md-component="content"]`); 49 | /** @type {HTMLElement} */ 50 | const footerNav = document.querySelector(`[aria-label="Footer"]`); 51 | /** @type {HTMLElement} */ 52 | const footClone = footerNav.cloneNode(true); 53 | footClone.classList.add("footer-nav-buttons"); 54 | contentArea.appendChild(footClone); 55 | footerNav.setAttribute("hidden", "true"); 56 | } 57 | 58 | /** @type {string[]} */ 59 | const buttonPaths = [ 60 | "/ehrql/tutorial/", 61 | "/getting-started/tutorial/", 62 | "/outputs/output-checking/", 63 | "/outputs/releasing-overview/", 64 | "/outputs/requesting-file-release/", 65 | "/outputs/sdc/", 66 | "/outputs/viewing-released-files/", 67 | ]; 68 | /** @type {string} */ 69 | const docPath = window.location.pathname; 70 | 71 | for (const path of buttonPaths) { 72 | if (docPath.startsWith(path)) { 73 | moveNavButtons(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /docs/legacy/requesting-release-offline-process.md: -------------------------------------------------------------------------------- 1 | !!! note 2 | This page describes the process used for requesting release of file prior to 3 | Airlock. New release requests should be made using [Airlock](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/index.md) wherever possible. 4 | 5 | 6 | ### Create a folder for outputs 7 | 8 | First, create one folder in your workspace called `release` (if you have previously made a release, we suggest appending the date to the new folder name to distinguish it) and copy from your `output` folder to this `release` folder the data files that require review. The number of study outputs requested for review must be kept to a minimum and include only the results you absolutely need to export from the secure server. 9 | 10 | ### Complete a output review request form 11 | 12 | When you are ready to request a release of your aggregated results please [complete this form](../documents/OpenSAFELY_Output_Review_Form_ADD_WORKSPACE_NAME_ADD_DATE.docx), renaming the form to replace the placeholders with your workspace name and the date. 13 | 14 | #### Context requirements 15 | 16 | For each output wishing to be released you will need to provide a clear contextual description including: 17 | 18 | 1. The file path for each output 19 | 2. Variable descriptions 20 | 3. A description and count of the underlying sample of the population for each output. 21 | 4. Population size and degrees of freedom for all regression outputs. 22 | 5. Relationship to other data/tables which through combination may introduce secondary disclosive risks. 23 | 24 | Each section in the review request form should normally describe a single file, but where necessary for similar files, these can be grouped together and wildcards can be used for the file path (e.g. `release/hospitalisation_rate_by_*.csv`). **If you use a wildcard, please indicate how many files this captures**. 25 | 26 | ### Checklist 27 | 28 | Please run through [the checklist](../outputs/requesting-file-release.md#checklist) before making a review request. In addition, check: 29 | 30 | 1. Are all of the outputs in a [separate release folder](#create-a-folder-for-outputs)? 31 | 1. Are all of the outputs clearly described? 32 | * Is the filename sensible and is the filepath provided in the request form correct? 33 | * Have you provided all of the context needed to review each output in isolation in the request form? 34 | * Have you described the disclosure controls you have applied to each output? 35 | 36 | ### Submitting the form 37 | 38 | Once you have completed this form, please send it to ****. The requested outputs will undergo independent review by two OpenSAFELY output checkers who will check that the outputs are within the scope of your original project proposal and that they do not present any disclosure risks. **Please allow up to 5 working days for feedback on your request**. 39 | 40 | 41 | ### Responding to requests 42 | 43 | Once reviewed, the completed review request will be emailed back to you. We aim to provide a response to review requests within **5 working days**. If all outputs are approved, they will then be released. If one or more outputs are approved subject to change, you will need to address the disclosure issues and submit a new review form detailing the changes you have made. 44 | -------------------------------------------------------------------------------- /docs/legacy/study-def-codelists.md: -------------------------------------------------------------------------------- 1 | --- 2 | search: 3 | boost: 0.001 4 | --- 5 | ---8<-- 'includes/cohort-extractor-deprecated.md' 6 | 7 | A *Codelist* is a collection of clinical codes that classifies patients as having certain conditions or demographic properties. For example, in an clinical system, an asthma diagnosis may be indicated by [any of more than 100 codes](https://www.opencodelists.org/codelist/primis-covid19-vacc-uptake/ast/v1/#full-list). 8 | 9 | Codelists must be stored as data within your study repository, from where they can be used in your study definition. 10 | 11 | To help you create, edit, and manage codelists, OpenSAFELY provides a web-based tool called [OpenCodelists](https://www.opencodelists.org). For more information about how to create and edit codelists on the website, see the [codelists documentation](../codelist-intro.md). 12 | 13 | ## Pulling codelists into your study definition 14 | 15 | Many functions for defining variables take *codelists* as arguments. 16 | 17 | Codelists live as CSV files in the `codelists/` directory (see instructions in the [Adding codelists to a project](../codelist-project.md) page). 18 | 19 | Codelists are loaded into variables as follows: 20 | 21 | ```py 22 | chronic_cardiac_disease_codes = codelist_from_csv( 23 | "codelists/opensafely-chronic-cardiac-disease.csv", system="ctv3", column="CTV3ID" 24 | ) 25 | ``` 26 | 27 | You should put code that creates codelist variables before your `StudyDefinition()`, so it can refer to them, and users know where to look. 28 | 29 | You can do this in `analysis/study_definition.py`, but we recommend that you put all your codelist definitions into a file called `codelists.py` and importing it in at the top of your file: 30 | 31 | ```py 32 | from codelists import * 33 | ``` 34 | 35 | This keeps it cleaner and easier to read. 36 | 37 | ## Combining codelists 38 | 39 | Codelists can be combined where appropriate. 40 | This has the advantage of keeping codeslists separate for some studies but easily combining them for others. 41 | 42 | 43 | Codelists can be combined using the `combine_codelist` function from `cohortextractor`, for example: 44 | 45 | ```py 46 | from cohortextractor import combine_codelists 47 | 48 | all_cardiac_disease_codes = combine_codelists( 49 | chronic_cardiac_disease_codes, 50 | acute_cardiac_disease_codes 51 | ) 52 | ``` 53 | 54 | ## Using a single code 55 | 56 | In some cases you may only want a variable to use a one or two codes, e.g you want to look at use of a single code within a codelist. You can create a codelist object directly as follows: 57 | 58 | ```py 59 | weight_codes = codelist( 60 | ["27113001", "162763007"], system="snomed" 61 | ) 62 | ``` 63 | 64 | Whilst you can pass this a list of codes of any length, we recommend building or using existing codelists on OpenCodelists for ease of discoverability and reproducibility. 65 | -------------------------------------------------------------------------------- /docs/legacy/study-def-flowcharts.md: -------------------------------------------------------------------------------- 1 | --- 2 | search: 3 | boost: 0.001 4 | --- 5 | ---8<-- 'includes/cohort-extractor-deprecated.md' 6 | 7 | ## Flowcharts (temporary workaround) 8 | 9 | Many studies will require a flowchart to show inclusion/exclusion of patients in the study. Eventually the numbers of patients excluded/included will be summarised automatically following cohort extract, but for now, a slightly manual approach is required: 10 | 11 | - Make a copy of the study definition (called `study_definition_flow_chart.py`). The `population=patients.satisfying()` function should be replaced with `population=patients.all()`. Then all variables except for those that appeared in the population definition logic should be removed (this will mean that it runs much faster than the main study definition). An example of such a study definition can be seen in [this repository on NSAIDS use and COVID-related outcomes](https://github.com/opensafely/nsaids-covid-research/commit/e5ad58c72926d7c73ba131099486409f6876883d). 12 | - Then write a script that reads the `input_flow_chart.csv` and then sequentially drops each of the variables and counts the remaining population, in whatever order you'd like to report them; [Stata example](https://github.com/opensafely/nsaids-covid-research/blob/23069312944ea1fc6d79ec4d9b45eea25df96ab0/analysis/flowchart_numbers.do). 13 | -------------------------------------------------------------------------------- /docs/level-4-server.md: -------------------------------------------------------------------------------- 1 | All outputs from OpenSAFELY pipelines are subject to [tiered levels of scrutiny](security-levels.md), to provide assurance that identifiable data is not leaked accidentally, or maliciously. 2 | 3 | The final tier is review of so-called "Level 4" outputs, where the OpenSAFELY framework stores outputs labelled as `moderately_sensitive` in the `project.yaml` file. 4 | 5 | If you have Level 4 access you can use [Airlock](outputs/viewing-with-airlock.md) to review your aggregated results and request files to be released. 6 | 7 | 8 | !!! note 9 | 10 | **Mac users** 11 | 12 | If intending to use a Mac for Level 4 access, please check your 13 | hardware is suitable first. 14 | 15 | Level 4 access requires a working Windows installation. Mac users 16 | with older *Intel* hardware have had success in accessing Level 4 17 | when running Windows in a virtual machine. 18 | 19 | However, Mac users with newer Macs that have *Apple* processors — 20 | for example, the M1 processor — can only run Windows in a virtual 21 | machine via the Windows on ARM release: **this configuration is 22 | currently incompatible with the client necessary for Level 4 23 | access.** 24 | 25 | Macs with Apple processors are still suitable for writing, testing 26 | and submitting OpenSAFELY code to be run on the secure server. 27 | **This issue only affects access to Level 4 server.** 28 | 29 | See [this support 30 | discussion](https://github.com/opensafely/documentation/discussions/251#discussioncomment-1767887) 31 | for a description of the problem. 32 | -------------------------------------------------------------------------------- /docs/open-data-manifesto.md: -------------------------------------------------------------------------------- 1 | 2 | ## Better Data for the NHS 3 | 4 | Operational research is key to understanding what works in the NHS. However the methods and code used to carry out this research are rarely seen in public. Standards are variable. One-off point-and-click analytics are common. This living document aims to start a discussion about best practice for research and analysis using NHS data. Send us your feedback! [bennett@phc.ox.ac.uk](mailto:bennett@phc.ox.ac.uk) 5 | 6 | ### Publish everything 7 | * **Share** your methods and code, so other teams can review it and learn. 8 | * Permit other people to **re-use** your code and content. 9 | * Publish **imperfect** code fearlessly. 10 | * Work in public from the **start** where possible. 11 | 12 | 13 | ### Make collaboration easy 14 | * Use Jupyter notebooks or R Markdown to **tell a story** for generalists _and_ analysts. 15 | * Do the hard work to make it **easy** for others to run the code. 16 | * Use code **comments** to inform a technical audience. 17 | * **Document** key decisions alongside your code. 18 | * Maintain a single **repository** for each project, with a _README_. 19 | * Develop a **supportive** culture. 20 | 21 | ### Automate tasks with code where possible 22 | * Use **scripts**, not point-and-click tools. 23 | * Make **live** dashboards rather than occasional reports. 24 | * Turn recurring work into **libraries** and share them. 25 | 26 | ### Use coders' best practice 27 | * Do regular code **reviews**. 28 | * Use **version control**. 29 | * Write **tests** for your code. 30 | * Help develop and follow local **conventions** around coding. 31 | -------------------------------------------------------------------------------- /docs/open-methods.md: -------------------------------------------------------------------------------- 1 | 2 | We are using modern open working methods to carry out important analyses whilst preserving patient privacy and keeping all patient data secure. 3 | 4 | ## What do we mean by Open Working Methods? 5 | 6 | We believe that researchers should be openly sharing all analytic cods and development insights in order to accelerate development of analyses and tools by other groups with other datasets. 7 | 8 | Our tools are build using Python, SQL and Docker. Analyses can be carried out in Python, R or Stata. 9 | All our code including analytic code is shared on GitHub and is open access for efficiency, reuse and collaboration. 10 | We encourage external review and reuse of our code. 11 | -------------------------------------------------------------------------------- /docs/outputs/index.md: -------------------------------------------------------------------------------- 1 | ## Viewing research outputs 2 | 3 | - [Viewing outputs with Airlock](viewing-with-airlock.md) 4 | 5 | 6 | ## Releasing research outputs 7 | 8 | - [Overview](releasing-overview.md) 9 | - [Applying statistical disclosure control](sdc.md) 10 | - [Requesting release of research outputs](requesting-file-release.md) 11 | - [Review process for release requests](output-checking.md) 12 | - [Viewing released outputs](viewing-released-files.md) 13 | 14 | ## Using Airlock 15 | - [Using Airlock to view and releasing outputs](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/index.md) 16 | -------------------------------------------------------------------------------- /docs/outputs/output-checking.md: -------------------------------------------------------------------------------- 1 | Before any files are released from the secure server, they are checked independently by two trained OpenSAFELY output checkers. Each checked output is marked as one of the following categories: 2 | 3 | * **Approve** — output meets disclosure requirements and is safe to be released 4 | * **Request changes** — output is an acceptable type for release, but has outstanding disclosure issues that must be addressed before release 5 | * **Reject** — output is not an acceptable type for release. An example is the release of practice level data which does not meet the [permitted study results policy](https://www.opensafely.org/policies-for-researchers/#permitted-study-results-policy) 6 | 7 | ### Responding to requests 8 | Requests submitted via Airlock will also be reviewed by output checkers on Airlock. If 9 | the output checkers require changes or have questions about the requested files, they 10 | will return the release request to you. You will receive an email notification when this happens. 11 | 12 | For further information on how to submit and respond to returned requests, please see the 13 | documentation on [releasing with Airlock](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/index.md). 14 | 15 | ### Most common problems with output review requests 16 | 17 | Below are the most common problems encountered by output checkers when reviewing output review requests. **Avoiding these issues makes it more likely your files can be released first time round**, saving reviewer time and allowing quicker file release for you and other researchers. 18 | 19 | 1. **There are unrounded counts in the outputs**. All counts should be [rounded](sdc.md#rounding-counts). This includes rounding counts prior to them being used to calculate further statistics, such as percentages or odds ratios. Commonly raw counts are rounded, but downstream statistics are calculated using the raw counts rather than the rounded counts. Unrounded counts account for **~30%** of rejections. 20 | 2. **Insufficicent context is provided for the outputs**. **~25%** of rejected outputs are due to insufficient context. Make sure you have provided all of the context needed to review each output in isolation in the request form. Common errors include: 21 | * Using unclear column/variable names or poorly describing the presented data. Refer to the [context requirements](requesting-file-release.md#context-and-controls). 22 | * Not clearly indicating the relationship between different outputs. 23 | * Where an output has previously been requested, not indicating how the output differs to previously reviewed version. 24 | 3. **There are unredacted counts in the outputs**. Prior to rounding counts, [any counts <=7 should be redacted](sdc.md#redacting-counts-less-than-or-equal-to-7). The redaction approach should be clearly described when making a review request. It is not uncommon for the stated redaction approach to be improperly implemented in the outputs. Inappropriate redaction of low counts accounts for **~20%** of rejected outputs. 25 | 4. **Underlying data is not provided**. To ensure the low number threshold is met, reviewers require to see the underlying data for each output. This includes the data used to generate figures and to calculate summary statistics such as mean or median. **~10%** of rejected outputs are due to underlying data not being provided. 26 | 5. **Unsupported file types being requested**. Files requested for release should be one of the [allowed file types](requesting-file-release.md#allowed-file-types). If you are requesting the release of HTML files, please make sure you have followed the [guidance for HTML files](requesting-file-release.md#allowed-file-types). **~10%** of rejected outputs are due to unsupported file types being requested. (Note: Airlock will automatically restrict output files in a request to only allowed file types.) 27 | 28 | To help avoid these issues, please make sure you have read the [checklist](requesting-file-release.md#checklist) before submitting your review request. 29 | -------------------------------------------------------------------------------- /docs/outputs/releasing-overview.md: -------------------------------------------------------------------------------- 1 | OpenSAFELY follows the [Five Safes](../five-safes.md) framework for data access to allow safe and efficient use of data. 2 | 3 | The Five Safes are: 4 | 5 | - Safe projects 6 | - Safe people 7 | - Safe data 8 | - Safe settings 9 | - **Safe outputs** 10 | 11 | When we release files from the Level 4 server, we need to take particular care of the 12 | **Safe Outputs** dimension of the Five Safes framework, which assesses any residual risk of disclosure of patient information in outputs wishing to be released from the secure environment. This risk is minimised by researchers applying **statistical disclosure controls** to their research outputs, followed by **output checking** of these outputs by our team of trained output checkers. 13 | 14 | In OpenSAFELY, there are 4 key “Safe Outputs” activities: 15 | 16 | **1. [Apply Statistical disclosure controls](sdc.md)** 17 | 18 | Researchers must apply statistical disclosure controls to their research outputs. 19 | 20 | **2. [Requesting release of outputs](requesting-file-release.md)** 21 | 22 | Researchers must follow a defined procedure for requesting release of outputs from the Level 4 server. This includes: 23 | 24 | - only requesting release of files that are necessary to fulfil the purpose of a project 25 | - describing the context (why the files are requested for release) and statistical disclosure controls applied 26 | - restricting files to specific allowed types 27 | - limits on file size and number of rows in tables 28 | - Airlock, a dedicated OpenSAFELY tool for managing the release request and review process 29 | 30 | **3. [Output checking](output-checking.md)** 31 | 32 | Review of the requested outputs by two trained OpenSAFELY output checkers. 33 | 34 | **4. [Retricted viewing of released files](viewing-released-files.md)** 35 | 36 | Outputs that meet our disclosure rules and have undergone thorough output checking are 37 | released to the relevant workspace on the [Jobs site](../jobs-site.md). Viewing of 38 | released outputs is restricted to individuals with the relevant roles on the jobs 39 | site, and is not publicly accessible until outputs are published. 40 | -------------------------------------------------------------------------------- /docs/outputs/viewing-released-files.md: -------------------------------------------------------------------------------- 1 | All approved OpenSAFELY outputs are released to the workspace they belong to on the [Jobs site](../jobs-site.md). 2 | 3 | ### Viewing released outputs 4 | 5 | View your released outputs by navigating to "Released Outputs" in the "Releases" section of your workspace on the Jobs site. 6 | 7 | These outputs can be shared with project collaborators and published in line with our [data sharing and publication policy](https://www.opensafely.org/policies-for-researchers/#acknowledgment-and-data-sharing--publication-policy). Please note that you should check this for each dataset that you have used: rules may vary. 8 | 9 | ### Running further analyses on released outputs 10 | 11 | If you have had [aggregated results released](requesting-file-release.md#release-of-aggregated-results-to-be-used-to-generate-final-outputs) and you wish to run further analyses on them, such as reformatting figures, there are a few things to consider. 12 | 13 | 1. You should include the code for these steps in your GitHub repo. 14 | 2. You **should not** commit any of the released outputs (including final processed charts/tables) to your GitHub repo. Make sure to include them in the `.gitignore` file. 15 | 3. Consider adding the code as an action in your project pipeline. 16 | 17 | ### Reporting a data breach 18 | 19 | If you discover files released to the Jobs site that have been insufficiently redacted and still contain sensitive information, you should immediately contact and email the following (providing as much information as possible): Amir Mehrkar (); Ben Goldacre (); [disclosurecontrol@opensafely.org](mailto:disclosurecontrol@opensafely.org); and your co-pilot. Ensure you do not share these files and if they have already been shared please identify as best as possible with whom they have been shared. 20 | -------------------------------------------------------------------------------- /docs/outputs/viewing-with-airlock.md: -------------------------------------------------------------------------------- 1 | Research outputs and log files can be viewed from within the 2 | secure environment using Airlock. 3 | 4 | The [Airlock documentation](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/index.md) provides information on how to 5 | [log in](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/how-tos/access-airlock.md) to Airlock and how to [view moderately sensitive workspace outputs](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/how-tos/view-workspace-files.md). 6 | 7 | ## Log files 8 | 9 | In addition to workspace output files, you can also view log 10 | files from your jobs on Airlock (including logs from failed 11 | jobs). 12 | 13 | To view log files, [navigate to your workspace](../using-opensafely/viewing-and-releasing-outputs/viewing-and-releasing-with-airlock/how-tos/view-workspace-files.md) in Airlock. 14 | Log files are found in the `metadata/` folder in your workspace, 15 | and are named with the name of the action from the `project.yaml` file. 16 | -------------------------------------------------------------------------------- /docs/paper_template.txt: -------------------------------------------------------------------------------- 1 | --- 2 | # the full paper title, but remove any extra subtitles added by the journal 3 | title: "Computing and statistics in the eighteenth century" 4 | 5 | # the date the paper was published on the journal's website 6 | date: "1850-06-24" 7 | 8 | # List of authors 9 | authors: 10 | - Ada Lovelace # Enter each authors full name in plain text 11 | - Florence Nightingale # One author per line 12 | 13 | # List of categories 14 | categories: 15 | - OpenSAFELY # Enter each category name in plain text 16 | - OpenPrescribing # One category per line 17 | 18 | # Full citation for the paper - this can usually be obtained from the online version of the paper or from a reference manager 19 | citation: "Lovelace A, Nightingale F. Computing and statistics in the eighteenth century: a Primer. Journal of Theory and Practice, XX(x)X.123 1850" 20 | 21 | # A short description that will display under the paper title across the 22 | # Bennett website and when the link is shared 23 | # For examples see https://www.opensafely.org/research/ 24 | description: "This paper investigated..." 25 | 26 | # the DOI identifier for the paper *after* the `https://doi.org/` 27 | doi: "10.1111/ABC(12).16030" 28 | 29 | # Paper details from the journal, if published 30 | paper: 31 | # the full title as it is displayed on the journal website 32 | title: "Computing and statistics in the eighteenth century: A Primer" 33 | # the name of the journal if the paper is published 34 | journal: "Journal of Theory and Practice" 35 | 36 | # If the paper is now published, enter information of the preprint version of the paper 37 | preprint: 38 | title: "Computing and statistics in the eighteenth century" # Preprint title 39 | doi: "abc-101232" # Preprint DOI 40 | link: https://www.preprintserver.com/abc123 # Link to preprint version of the paper 41 | 42 | # If the paper used OpenSAFELY, it should be linked to a project and repo 43 | opensafely: 44 | project: 123 # the project number from the Approved Projects page - see https://www.opensafely.org/approved-projects/ 45 | repo: "test-repo" # the name of the repo in the github.com/opensafely org 46 | 47 | # The slug is the second part of the url after the 48 | # The slug should match the DOI, but with the following amendments: 49 | # - If the DOI has brackets in it, remove them from the slug 50 | # - If the DOI has upper case characters, replace them with lowercase 51 | # For example, a DOI of the form "10.1111/ABC(12).16030", becomes "10.1111/abc12.16030" 52 | slug: "10.1111/abc12.16030" 53 | 54 | # Set the status of the paper to "preprint" or "published" 55 | status: "published" 56 | 57 | --- 58 | 59 | ## Markdown-formatted abstract goes here! 60 | -------------------------------------------------------------------------------- /docs/plan-s.md: -------------------------------------------------------------------------------- 1 | ### Academic Journal Destination: Plan S and OpenSAFELY 2 | 3 | [Plan S](https://www.coalition-s.org/) is an initiative for Open Access publishing. Plan S requires that scientific publications that result from research funded by public grants must be published in compliant Open Access journals or platforms. Wellcome are a core funder of the OpenSAFELY platform, therefore we ask that academic outputs comply with Wellcome's Plan S requirements for journal publication. It is likely that teams will be required to comply with this for other reasons, such as receiving funding from NIHR / UKRI. For further details see [openaccess.ox.ac.uk/wellcome](https://openaccess.ox.ac.uk/wellcome). 4 | 5 | ### How do I check journal open access compliance? 6 | 7 | Before you submit a paper to a journal, please check open access arrangements. 8 | 9 | [journalcheckertool.org](https://journalcheckertool.org) is our recommended tool for checking journal compliance. 10 | 11 | If you have any questions, please ask your co-pilot and post in the `#opensafely-users` slack channel. 12 | -------------------------------------------------------------------------------- /docs/project-changes.md: -------------------------------------------------------------------------------- 1 | **Here is some information about the new process for telling us about changes to your project:** 2 | 3 | **Project Update Form** 4 | 5 | When leaving your research position or if you are ending a project, a step by step guide for handing over a **‘single’** OpenSAFELY project will need to be adhered to by completing a [Project Update Form](https://docs.google.com/document/d/1WqABEzk6sfmjO1Fyekj55aChwaVm6qHDw5A1QgR7R84/edit). Completion of this form is required for any project that meets any of the following circumstances: 6 | 7 | 1. **One or more researchers are leaving their research position at their home institution.** 8 | 1. **One or more roles within the project need to be handed over (i.e study lead, researcher, etc,.)** 9 | 1. **The project can no longer continue i.e the project needs to be either postponed, or retired.** 10 | 1. **The project is completed.** 11 | 12 | This form must be returned by the Study Lead. If you are not the study lead, this form must be completed in collaboration with them (or as a minimum review by). For any questions that arise while filling in this form, researchers are advised to contact their Co-pilot in the first instance, or the OpenSAFELY Research Administrator via the applications inbox applications@opensafely.org. 13 | -------------------------------------------------------------------------------- /docs/protocol.md: -------------------------------------------------------------------------------- 1 | ## Pre-specifying your Research Question and Writing a Study Protocol 2 | 3 | !!! note 4 | This section is a work in progress, and will be further developed. 5 | 6 | Briefly, pre-specifying your research question and developing a study protocol which outlines your planned methodology is an important open science principle. Doing so can help reduce 'researcher degrees of freedom', and in turn minimise the risk for questionable research practices (such as ["hypothesising after the results are known" (HARKing)](https://en.wikipedia.org/wiki/HARKing) or [p-hacking](https://en.wikipedia.org/wiki/Data_dredging)). 7 | 8 | Taken together, this can improve both the quality and credibility of your research. Developing a detailed study plan, including figure and table shells, can be particularly helpful when using a federated analytics platform such as OpenSAFELY, as there is less scope for interactively developing these whilst working with the data. 9 | 10 | This page will eventually contain resources for how to develop an effective study protocol, as well as tips for how to pre-register these formally on [OSF](https://osf.io/) or [ENCePP](http://www.encepp.eu/), or informally by uploading "locked" protocol versions to GitHub. There is no specific template for a protocol that you should use when working with OpenSAFELY, but you can see examples of protocols we've written for OpenSAFELY studies on most of our public repositories — for example [this inhaled corticosteroid (ICS) research repository](https://github.com/opensafely/ics-research/tree/master/protocol) or [this ethnicity research repository](https://github.com/opensafely/ethnicity-covid-research/tree/master/protocol). 11 | -------------------------------------------------------------------------------- /docs/reports/create-a-draft.md: -------------------------------------------------------------------------------- 1 | 2 | ## Create a draft report 3 | After logging into the [administration area](https://reports.opensafely.org/admin/) click "Add" next to Reports: 4 | 5 | ![Add report button](./images/reports-admin-add-report.jpg) 6 | 7 | ### Organisation 8 | Select your organisation from the list. 9 | If it's missing from the list, please [contact us](../how-to-get-help.md). 10 | 11 | 12 | ### Navigation 13 | Pick the `Category` you want to host your report under in the side nav of the site. 14 | 15 | Then set a `Menu name`. 16 | 17 | 18 | ### Report file details (GitHub) 19 | !!! note 20 | It was previously possible to release outputs to GitHub and the reports site was originally built to use those outputs. 21 | OpenSAFELY has moved away from this method and all outputs are now released to the jobs site. 22 | Please make sure to use the jobs site field in this form for your output location instead of the GitHub ones. 23 | 24 | 25 | ### Report file details (Jobs site) 26 | Find your workspace on [the jobs site](https://jobs.opensafely.org). 27 | Click `Released Outputs` to view the most recent version of each of your released outputs: 28 | 29 | ![Most recent version of released outputs](./images/job-server-workspace-latest-outputs.jpg) 30 | 31 | Select the file you want to make your report with from the list on the left, and copy the direct URL which shows at the top of the file viewer. This is the URL you will need in the reports admin. 32 | 33 | ![Where to get the direct URL for a file](./images/job-server-direct-output-file-link-release.jpg) 34 | 35 | 36 | ### Front matter 37 | These fields are displayed above your report on the site. 38 | 39 | 40 | ### DOI 41 | Fill this in after you have published your report. 42 | 43 | 44 | ### Visibility 45 | `Is draft` will be ticked by default. 46 | Reports should start as drafts so they are private before being reviewed. 47 | 48 | 49 | ### External 50 | If you are creating a report from an external organisation, then you should fill in this section to explain why the report is being hosted on the OpenSAFELY platform. 51 | 52 | 53 | ### Related links 54 | You should also add a link to the source code on GitHub which generated the outputs your report is built around. 55 | 56 | If the report has an associated paper, preprint or blog, ensure their links are added. 57 | 58 | 59 | ## Next step 60 | [Have your report reviewed](./review-process.md). 61 | -------------------------------------------------------------------------------- /docs/reports/images/job-server-direct-output-file-link-published.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/reports/images/job-server-direct-output-file-link-published.jpg -------------------------------------------------------------------------------- /docs/reports/images/job-server-direct-output-file-link-release.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/reports/images/job-server-direct-output-file-link-release.jpg -------------------------------------------------------------------------------- /docs/reports/images/job-server-published-outputs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/reports/images/job-server-published-outputs.jpg -------------------------------------------------------------------------------- /docs/reports/images/job-server-workspace-latest-outputs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/reports/images/job-server-workspace-latest-outputs.jpg -------------------------------------------------------------------------------- /docs/reports/images/reports-admin-add-report.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/docs/reports/images/reports-admin-add-report.jpg -------------------------------------------------------------------------------- /docs/reports/publish-a-report.md: -------------------------------------------------------------------------------- 1 | The report you've created so far is only a draft. 2 | Edit your report and complete the steps below to make it available for public viewing. 3 | 4 | ## Switch to a published outputs 5 | Published reports require the output they're based on to also be published. 6 | 7 | Follow [the instructions on publishing your output](../jobs-site.md#publishing-outputs). 8 | 9 | Select a set of published outputs from the list: 10 | ![List of published outputs](./images/job-server-published-outputs.jpg) 11 | 12 | !!! note 13 | Published outputs can be in a draft status while they are being reviewed. 14 | A badge saying "published" will be displayed next to those which have moved out of that state. 15 | 16 | Select the file you want to use for your report from the list on the left, and copy the direct URL which shows at the top of the file viewer. 17 | 18 | ![Where to get the direct URL for a file](./images/job-server-direct-output-file-link-published.jpg) 19 | 20 | Replace the `Job server url` you currently have with this URL.you will need in the reports admin. 21 | 22 | 23 | ## Create a DOI 24 | All published reports should have a DOI. 25 | You will need your own [CrossRef user credentials](https://www.crossref.org/documentation/member-setup/account-credentials/) in order to register DOIs. 26 | 27 | How to create and add a DOI: 28 | 29 | 1. Make sure your report is published and not still draft. (CrossRef requires that all DOIs link to a landing page which is publicly accessible.) 30 | 2. Find the suggested DOI on the admin page for your report. 31 | 3. Go to the [CrossRef Web Deposit page](https://apps.crossref.org/webDeposit/) and fill in the basic information: 32 | - select `Report` as the Data Type. 33 | - Enter the suggested DOI from your report, prefixed with our organisation code: `10.53764/xxxxxxxxxxxx`. 34 | - Add the URL for the report. 35 | - `OpenSAFELY` as Publisher. 36 | - Authors have to be added one-by-one so you may wish to use `The OpenSAFELY Collaborative` in place of some or all authors, if appropriate. 37 | - Use the report's first publication date as the `online` publication date (`Print` publication date can be left blank). 38 | 4. Submit DOI, you'll then be prompted for your login credentials and then your/an email address. 39 | 5. Enter the DOI URL in the `DOI` field (`https://doi.org/10.53764/xxxxxxxxxxxx`) 40 | 41 | See the [Crossref documentation](https://www.crossref.org/documentation/member-setup/web-deposit-form/) for more information. 42 | 43 | 44 | ## Publish your report 45 | Untick the `Is draft` check box and click Save. 46 | 47 | Your report has now been published and will be visible to the public on the reports site. 48 | -------------------------------------------------------------------------------- /docs/reports/review-process.md: -------------------------------------------------------------------------------- 1 | When you are ready to publish your report, several steps should be followed: 2 | 3 | * It must be checked by NHSE (may simply reference the corresponding paper where applicable). Contact [publications@opensafely.org](mailto:publications@opensafely.org). 4 | * Send your report to your co-pilot so they can review it. 5 | * Make sure the linked code repository is also public and has gone through the [checking process](../project-completion.md). 6 | 7 | 8 | ## Next step 9 | [Publish your report](./publish-a-report.md) 10 | -------------------------------------------------------------------------------- /docs/system-integration.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | 3 | These notes are a work-in-progress. 4 | This page provides an overview as guidance only, 5 | and not a series of definite instructions. 6 | We plan to further expand on this information. 7 | 8 | ## Audience 9 | 10 | This page is aimed at people who want to run their own installation of OpenSAFELY 11 | as a proof-of-concept trial prior to integrating into the live system. 12 | 13 | These people are likely to have one of the following professional roles: 14 | 15 | * software developers 16 | * clinical data providers 17 | * system integrators 18 | 19 | ### Assumed knowledge 20 | 21 | It is taken that you have some familiarity with OpenSAFELY. 22 | 23 | If not, you should first refer to: 24 | 25 | * [this general overview of OpenSAFELY](https://www.opensafely.org/about/) 26 | * [the typical researcher workflow](workflow.md) 27 | * [the introductory tutorial](getting-started/tutorial/index.md) 28 | 29 | ## Software components 30 | 31 | The [OpenSAFELY technical architecture diagram](technical-architecture.md) shows all of the platform software components 32 | and how they interact. 33 | 34 | The specific steps required to create a minimal setup are: 35 | 36 | 1. Deploy a [*job runner*](https://github.com/opensafely-core/job-runner) within your secure environment. 37 | A minimal configuration simply runs Docker containers 38 | and stores any resulting container output on a local disk. 39 | 40 | 2. Deploy a [*job server*](https://github.com/opensafely-core/job-server) 41 | that the *job runner* polls for jobs that end users request to be run. 42 | 43 | It is possible to use our existing instance of this server at our [*jobs site*](https://jobs.opensafely.org); 44 | [contact us](how-to-get-help.md#data-providers) 45 | if you would like us to configure this for you. 46 | 47 | 3. Create a secure network with our [*GitHub proxy*](https://github.com/opensafely-core/proxy). 48 | This provides access to repositories with research study code to run 49 | and Docker images used to run the code. 50 | 51 | 4. To provide *access to your database* from within your setup, 52 | integrate into our [*ehrQL*](https://github.com/opensafely-core/ehrql) ETL tool: 53 | 54 | * via an implementation of a backend interface; [this is an example for TPP](https://github.com/opensafely-core/ehrql/blob/main/ehrql/backends/tpp.py) 55 | * and, if you are using an as-yet unsupported database, a query engine; [this is an example for Trino](https://github.com/opensafely-core/ehrql/blob/main/ehrql/query_engines/trino.py) 56 | 57 | 5. *Releasing job outputs* requires: 58 | 59 | * the [*release-hatch*](https://github.com/opensafely-core/release-hatch) tool for reviewing outputs 60 | * the [*output-publisher*](https://github.com/opensafely-core/output-publisher) tool for publishing outputs 61 | 62 | ### Deployment 63 | 64 | We use Ubuntu in our deployments. 65 | Our deployments use [deploy scripts](https://github.com/opensafely-core/backend-server) 66 | that you can refer to. 67 | 68 | ## Support 69 | 70 | See our [support page](how-to-get-help.md#data-providers) 71 | for details of how to get more assistance on integration. 72 | -------------------------------------------------------------------------------- /docs/technical-architecture.md: -------------------------------------------------------------------------------- 1 | 2 | This shows the architecture of OpenSAFELY. It is intended for a more technical audience. 3 | 4 | ## System context 5 | 6 | How OpenSAFELY fits into the wider world. 7 | 8 | ![A system context diagram of the OpenSAFELY platform.](./images/c4-system-context.svg) 9 | 10 | ## Container diagram 11 | 12 | This shows the high-level technical building blocks of the system. 13 | 14 | [![A container-level diagram of the OpenSAFELY platform.](./images/c4-container.svg)](./images/c4-container.svg) 15 | -------------------------------------------------------------------------------- /docs/updating-the-docs.md: -------------------------------------------------------------------------------- 1 | OpenSAFELY is a rapidly changing platform and the user documentation should be updated frequently to keep pace. 2 | If you are an OpenSAFELY user and want to contribute corrections, clarifications, or new materials to the documentation, please do! 3 | You can either: 4 | 5 | * Suggest improvements in an [issue](https://github.com/opensafely/documentation/issues). 6 | * Run the documentation [in GitHub Codespaces](#running-in-github-codespaces) for editing. 7 | * Clone [the repo](https://github.com/opensafely/documentation) locally, make edits on a new branch, then create a pull request for it. 8 | * [Edit directly on GitHub](https://github.com/opensafely/documentation/tree/main/docs) ([instructions](https://docs.github.com/en/github/managing-files-in-a-repository/editing-files-in-your-repository)), making sure to "Create a new branch for this commit and start a pull request". 9 | 10 | Do not commit changes directly to the main branch. 11 | 12 | ## Running in GitHub Codespaces 13 | 14 | Clicking the button below will open a codespace 15 | that allows you to run and edit the site. 16 | 17 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/opensafely/documentation) 18 | 19 | When you see "Your application running on port 8910 is available", 20 | you can click "Open in Browser" to see a preview, 21 | and edit the content files in `docs/` to change the content. 22 | It may take a few seconds for changes you make to appear. 23 | 24 | ## Documentation style 25 | 26 | When adding or revising text, use [Semantic Line Breaks](https://sembr.org/) rather than fixed length lines. 27 | With semantic line breaks, the diff is more concise and easier to interpret than with fixed length lines, 28 | where a single change can propagate through a whole paragraph. 29 | -------------------------------------------------------------------------------- /docs/workflow.md: -------------------------------------------------------------------------------- 1 | This section introduces the typical OpenSAFELY workflow for a single research project. 2 | 3 | The workflow consists of a number of key steps which may be iterated over as the code is developed and the study evolves. 4 | The following assumes that a well-defined and ethically-approved research agenda has been specified, with an accompanying study protocol, and all necessary permissions for accessing the OpenSAFELY platform are in place. 5 | 6 | The workflow for a single study can typically be broken down into the following steps: 7 | 8 | 1. **Create a git repository** from the [template repository provided](https://github.com/opensafely/research-template) and clone it on your local machine. 9 | This repo will contain all the code relating to your project, and a history of its development over time. 10 | 2. **Write a [dataset definition](ehrql/index.md)** that specifies what data you want to extract from the database: 11 | - specify the patient population (dataset rows) and variables (dataset columns) 12 | - specify the expected distributions of these variables for use in dummy data 13 | - specify (or create) the [codelists](codelist-intro.md) required by the dataset definition, hosted by [OpenCodelists](https://www.opencodelists.org), and import them to the repo. 14 | 3. **Generate [dummy data](ehrql/how-to/dummy-data.md)** based on the dataset definition, for writing and testing code. 15 | 4. **Develop analysis scripts** using the dummy data in R, Stata, or Python. This will include: 16 | - importing and processing the dataset(s) created by the dataset definition 17 | - importing any other external files needed for analysis 18 | - generating analysis outputs like tables and figures 19 | - generating log files to debug the scripts when they run on the real data. 20 | 5. **Test the code** by running the analysis steps specified in the [_project pipeline_](actions-pipelines.md), which specifies the execution order for data extracts and analyses and the outputs to be released. 21 | 6. **Execute the analysis on the real data** via OpenSAFELY's [jobs site](jobs-site.md). This will generate outputs on the secure server. 22 | 7. **Check the output for [disclosivity](outputs/sdc.md)** within the server, and redact if necessary. 23 | 8. **[Request release](outputs/requesting-file-release.md) of the outputs** 24 | 9. **Repeat and iterate steps 2 to 8 as necessary**. 25 | 26 | These steps should always proceed with frequent git commits and code reviews where appropriate. Steps 2-5 can all be progressed on your local machine without accessing the real data. 27 | 28 | It is possible to automatically test that the analytical pipeline defined in step 5 can be successfully executed on dummy data, using the `opensafely run` command. 29 | This pipeline is also [automatically tested](actions-pipelines.md#running-your-code-with-github-actions) against dummy data every time a new version of the study repository is saved ("pushed") to GitHub. 30 | 31 | As well as your own Python, R or Stata scripts, other non-standard actions are available. 32 | For example, it's possible to run a matching routine that extracts a matched control population to the population defined in the dataset definition, without having to extract all candidate matches into a dataset first. 33 | -------------------------------------------------------------------------------- /hooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensafely/documentation/afe23da1bcde0212b2d7bd0785fad7346c33928c/hooks/__init__.py -------------------------------------------------------------------------------- /hooks/ehrql_branch.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import mkdocs.plugins 4 | 5 | 6 | log = logging.getLogger("mkdocs") 7 | 8 | 9 | @mkdocs.plugins.event_priority(100) 10 | def on_config(config): 11 | """ 12 | Update config with specified ehrQL branch 13 | This is done first, before any imported repos are processed, or nav sections 14 | hidden. 15 | """ 16 | ehrql_branch = config["extra"]["ehrql_branch"] 17 | if ehrql_branch == "main": 18 | return config 19 | ehrql_nav_index, ehrql_nav_section = next( 20 | (i, section) 21 | for i, section in enumerate(config["nav"]) 22 | if list(section.keys())[0] == "ehrQL" 23 | ) 24 | new_import_string = ehrql_nav_section["ehrQL"].replace( 25 | "branch=main", f"branch={ehrql_branch}" 26 | ) 27 | config["nav"][ehrql_nav_index] = {"ehrQL": new_import_string} 28 | 29 | log.info("ehrQL docs imported from branch '%s'", ehrql_branch) 30 | return config 31 | -------------------------------------------------------------------------------- /hooks/ehrql_css.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pathlib 3 | 4 | from mkdocs.structure.files import File 5 | 6 | 7 | log = logging.getLogger("mkdocs") 8 | 9 | 10 | def on_files(files, config, **kwargs): 11 | """ 12 | Update files available to MkDocs with ehrQL documentation CSS. 13 | 14 | This hook is necessary because 15 | * paths provided to `extra_css` must be in the `docs_dir` directory 16 | (the default being `docs/`) 17 | * the multi-repo plugin pulls in the ehrQL docs to a different path, 18 | outside of `docs/` 19 | 20 | This approach is one of the suggested workarounds in: 21 | https://github.com/mkdocs/mkdocs/issues/1662 22 | """ 23 | for css_config_entry in config["extra"]["ehrql_imported_css"]: 24 | css_path = pathlib.Path(css_config_entry) 25 | 26 | # This MkDocs API is not well documented anywhere. 27 | # We want to create a MkDocs File object: 28 | # whose source is the `src_dir` concatenated to the ehrQL CSS path 29 | # whose destination is an appropriate CSS directory in the `site_dir`. 30 | # `use_directory_urls` only affects Markdown files, and this is a CSS file. 31 | css_file = File( 32 | path=css_path, 33 | src_dir=config["docs_dir"] + "/../", 34 | dest_dir=config["site_dir"] + "/css", 35 | use_directory_urls=False, 36 | ) 37 | log.info("ehrQL CSS imported from '%s'", css_path) 38 | files.append(css_file) 39 | return files 40 | -------------------------------------------------------------------------------- /hooks/parent_snippets.py: -------------------------------------------------------------------------------- 1 | def on_page_markdown(markdown, page, **kwargs): 2 | """ 3 | Replace parent_snippet markers from imported repos with appropriate snippet notation 4 | 5 | on_page_* methods are called for each Page in a mkdocs site and can modify the 6 | markdown they are given as input. We're using this method to look for the 7 | parent_includes markers in pages that come from an imported repo, and replace it 8 | with the pymdownx.snippets syntax to retrieve the snippet from this (the parent 9 | repo). 10 | 11 | For example: 12 | !!! parent_snippet:'includes/glossary.md' 13 | 14 | will be replaced with: 15 | ---8<-- 'includes/glossary.md' 16 | 17 | This allows docs imported from other repos (e.g. ehrQL) to reference snippets 18 | in the parent docs, such as the glossary. 19 | """ 20 | 21 | return markdown.replace("!!! parent_snippet:", "---8<-- ") 22 | -------------------------------------------------------------------------------- /includes/cohort-extractor-deprecated.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | cohort-extractor is now deprecated. 3 | All new projects should use [ehrQL](../ehrql/index.md) to extract data from an OpenSAFELY database. 4 | -------------------------------------------------------------------------------- /includes/imd-warning-header.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | The original IMD ranking is rounded to the nearest 100 in the OpenSAFELY-TPP and OpenSAFELY-EMIS databases. 3 | The rounded IMD ranking ranges from 0 to 32,800. 4 | If there is no original ranking, then the rounded ranking is -1 in the OpenSAFELY-TPP database and `NULL` in the OpenSAFELY-EMIS database. 5 | 6 | !!! warning 7 | Avoid extracting the rounded IMD ranking to a binary format, such as `.feather` or `.dta`. 8 | Either nest it within a variable, 9 | such as when [grouping rounded IMD by quintile](https://docs.opensafely.org/legacy/study-def-tricks/#grouping-imd-by-quintile), 10 | or extract it to a non-binary format, such as `.csv.gz`. 11 | -------------------------------------------------------------------------------- /includes/isaric-warning-header.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | ISARIC data can only be used in collaboration with ISARIC researchers who must be involved in working on the study and writing it up. 3 | 4 | Please contact your co-pilot, or if you have any questions. 5 | -------------------------------------------------------------------------------- /includes/vmp-ids-warning.md: -------------------------------------------------------------------------------- 1 | !!! warning 2 | dm+d codes for Virtual Medicinal Products (VMPs) can change. 3 | cohort-extractor handles this by automatically expanding a medication codelist 4 | to include all current and previous codes of any VMPs in the codelist. 5 | However, this means that when a VMP code has changed, a query using 6 | `patients.with_these_medications(codelist, returning="code", ...)` 7 | might return a code that is not in the provided codelist. 8 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | # just has no idiom for setting a default value for an environment variable 2 | # so we shell out, as we need VIRTUAL_ENV in the justfile environment 3 | export VIRTUAL_ENV := `echo ${VIRTUAL_ENV:-.venv}` 4 | 5 | # TODO: make it /scripts on windows? 6 | export BIN := VIRTUAL_ENV + "/bin" 7 | export PATH := env_var('PATH') + ":" + justfile_directory() + "/" + BIN 8 | export PIP := BIN + "/python -m pip" 9 | # enforce our chosen pip compile flags 10 | export COMPILE := BIN + "/pip-compile --allow-unsafe --generate-hashes" 11 | 12 | 13 | # list available commands 14 | default: 15 | @{{ just_executable() }} --list 16 | 17 | 18 | # clean up temporary files 19 | clean: 20 | rm -rf .venv 21 | 22 | 23 | # ensure valid virtualenv 24 | virtualenv: 25 | #!/usr/bin/env bash 26 | set -euo pipefail 27 | 28 | # allow users to specify python version in .env 29 | PYTHON_VERSION=${PYTHON_VERSION:-python3.11} 30 | 31 | # create venv and upgrade pip 32 | test -d $VIRTUAL_ENV || { $PYTHON_VERSION -m venv $VIRTUAL_ENV && $PIP install --upgrade pip; } 33 | 34 | # ensure we have pip-tools so we can run pip-compile 35 | test -e $BIN/pip-compile || $PIP install pip-tools 36 | 37 | 38 | _compile src dst *args: virtualenv 39 | #!/usr/bin/env bash 40 | set -euo pipefail 41 | 42 | # exit if src file is older than dst file (-nt = 'newer than', but we negate with || to avoid error exit code) 43 | test "${FORCE:-}" = "true" -o {{ src }} -nt {{ dst }} || exit 0 44 | $BIN/pip-compile --allow-unsafe --generate-hashes --output-file={{ dst }} {{ src }} {{ args }} 45 | 46 | 47 | # update requirements.prod.txt if requirements.prod.in has changed 48 | requirements-prod *args: 49 | {{ just_executable() }} _compile requirements.prod.in requirements.prod.txt {{ args }} 50 | 51 | 52 | # update requirements.dev.txt if requirements.dev.in has changed 53 | requirements-dev *args: requirements-prod 54 | {{ just_executable() }} _compile requirements.dev.in requirements.dev.txt {{ args }} 55 | 56 | 57 | # ensure prod requirements installed and up to date 58 | prodenv: requirements-prod fetch-cohort-extractor 59 | #!/usr/bin/env bash 60 | set -euo pipefail 61 | 62 | # exit if .txt file has not changed since we installed them (-nt == "newer than', but we negate with || to avoid error exit code) 63 | test requirements.prod.txt -nt $VIRTUAL_ENV/.prod || exit 0 64 | 65 | $PIP install -r requirements.prod.txt 66 | touch $VIRTUAL_ENV/.prod 67 | 68 | 69 | # && dependencies are run after the recipe has run. Needs just>=0.9.9. This is 70 | # a killer feature over Makefiles. 71 | # 72 | # ensure dev requirements installed and up to date 73 | devenv: prodenv requirements-dev && install-precommit 74 | #!/usr/bin/env bash 75 | set -euo pipefail 76 | 77 | # exit if .txt file has not changed since we installed them (-nt == "newer than', but we negate with || to avoid error exit code) 78 | test requirements.dev.txt -nt $VIRTUAL_ENV/.dev || exit 0 79 | 80 | $PIP install -r requirements.dev.txt 81 | touch $VIRTUAL_ENV/.dev 82 | 83 | 84 | # ensure precommit is installed 85 | install-precommit: 86 | #!/usr/bin/env bash 87 | set -euo pipefail 88 | 89 | BASE_DIR=$(git rev-parse --show-toplevel) 90 | test -f $BASE_DIR/.git/hooks/pre-commit || $BIN/pre-commit install 91 | 92 | 93 | # upgrade dev or prod dependencies (specify package to upgrade single package, all by default) 94 | upgrade env package="": virtualenv 95 | #!/usr/bin/env bash 96 | set -euo pipefail 97 | 98 | opts="--upgrade" 99 | test -z "{{ package }}" || opts="--upgrade-package {{ package }}" 100 | FORCE=true {{ just_executable() }} requirements-{{ env }} $opts 101 | 102 | # Fetch cohort-extractor submodule 103 | fetch-cohort-extractor: 104 | git submodule update --init 105 | 106 | # Update cohort-extractor submodule 107 | update-cohort-extractor: 108 | git submodule update --remote src/cohort-extractor 109 | 110 | # Requires Vale: https://github.com/errata-ai/vale 111 | lint-docs: 112 | vale ./docs 113 | 114 | # Run the tests 115 | test: devenv 116 | echo "Not implemented here" 117 | 118 | 119 | format *args=".": devenv 120 | $BIN/ruff format --check {{ args }} 121 | 122 | lint *args=".": devenv 123 | $BIN/ruff check {{ args }} 124 | 125 | # run the various dev checks but does not change any files 126 | check: format lint 127 | 128 | 129 | # fix formatting and import sort ordering 130 | fix: devenv 131 | $BIN/ruff check --fix . 132 | $BIN/ruff format . 133 | 134 | # Run the dev project 135 | run: devenv 136 | $BIN/mkdocs serve -a localhost:8910 137 | 138 | # Build the documentation 139 | build: devenv 140 | $BIN/mkdocs build 141 | 142 | # Count words in generated documentation content (within
tags) 143 | wordcount: build 144 | #!/usr/bin/env bash 145 | set -euo pipefail 146 | find site/ -name '*.html' -exec python scripts/wordcount.py {} + | awk '{sum += $1} END {print "Total words in documentation:", sum}' 147 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | def define_env(env): 2 | "Hook function" 3 | 4 | @env.macro 5 | def build_toc(nav, page): 6 | """ 7 | Build a nested list of page links from a navigation section index, to be inserted into the index page itself 8 | """ 9 | assert page.is_index, ( 10 | "`build_toc` macro is only available for navigation index pages" 11 | ) 12 | parent_section = page.parent 13 | links = [make_link(item) for item in parent_section.children if item != page] 14 | html = f"
    {''.join(links)}
" 15 | return html 16 | 17 | 18 | def make_link(item): 19 | """ 20 | Create the html list links for a nav item 21 | `item` may be a Section or a Page 22 | """ 23 | if item.is_page: 24 | # Note that we prepend / so the URL extracted from the navigation is relative to the 25 | # root, and not to the location of this index page. This means we can deal with nested 26 | # nav links from any point in the document structure 27 | return f"
  • {item.title}
  • " 28 | else: 29 | items = [make_link(sub_item) for sub_item in item.children] 30 | return f"
  • {item.title}
      {''.join(items)}
  • " 31 | -------------------------------------------------------------------------------- /overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block fonts %} 4 | 5 | 6 | 11 | 12 | 18 | 19 | 25 | {% endblock %} 26 | -------------------------------------------------------------------------------- /overrides/partials/integrations/analytics/plausible.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "documentation" 3 | requires-python = ">=3.11" 4 | 5 | [tool.ruff] 6 | line-length = 88 7 | exclude = [ 8 | ".direnv", 9 | ".git", 10 | ".github", 11 | ".ipynb_checkpoints", 12 | ".pytest_cache", 13 | ".venv", 14 | "__pycache__", 15 | "docker", 16 | "htmlcov", 17 | "venv", 18 | "src/cohort-extractor", 19 | ] 20 | 21 | [tool.ruff.lint] 22 | extend-select = [ 23 | "A", # flake8-builtins 24 | "I", # isort 25 | "INP", # flake8-no-pep420 26 | "ISC", # flake8-implicit-str-concat 27 | "UP", # pyupgrade 28 | "W", # pycodestyle warning 29 | ] 30 | extend-ignore = [ 31 | "E501", 32 | "E731", 33 | ] 34 | 35 | [tool.ruff.lint.isort] 36 | lines-after-imports = 2 37 | -------------------------------------------------------------------------------- /requirements.dev.in: -------------------------------------------------------------------------------- 1 | --constraint requirements.prod.txt 2 | 3 | # Additional dev requirements 4 | # To generate a requirements file that includes both prod and dev requirements, run: 5 | # pip-compile --generate-hashes --output-file=requirements.dev.txt requirements.dev.in 6 | 7 | pip-tools 8 | pre-commit 9 | ruff 10 | -------------------------------------------------------------------------------- /requirements.prod.in: -------------------------------------------------------------------------------- 1 | # Main prod requirements 2 | 3 | # To generate requirements file, run: 4 | # pip-compile --generate-hashes --output-file=requirements.prod.txt requirements.prod.in 5 | 6 | mkdocs 7 | mkdocs-macros-plugin 8 | mkdocs-material 9 | mkdocstrings[python] 10 | mkdocs-multirepo-plugin 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file is a shim for CloudFlare Pages which expects a requirements.txt, 2 | # but we want to maintain requirements.prod.txt for our general dev tooling. 3 | -r requirements.prod.txt 4 | -------------------------------------------------------------------------------- /scripts/wordcount.py: -------------------------------------------------------------------------------- 1 | from html.parser import HTMLParser 2 | import sys 3 | 4 | 5 | class ArticleParser(HTMLParser): 6 | def __init__(self): 7 | super().__init__() 8 | self.in_article = False 9 | self.text = [] 10 | 11 | def handle_starttag(self, tag, _): 12 | if tag == "article": 13 | self.in_article = True 14 | 15 | def handle_endtag(self, tag): 16 | if tag == "article": 17 | self.in_article = False 18 | 19 | def handle_data(self, data): 20 | if self.in_article: 21 | self.text.append(data) 22 | 23 | 24 | if __name__ == "__main__": 25 | total = 0 26 | for filename in sys.argv[1:]: 27 | with open(filename, "r", encoding="utf-8") as f: 28 | parser = ArticleParser() 29 | parser.feed(f.read()) 30 | text = " ".join(parser.text) 31 | total += len(text.split()) 32 | print(total) 33 | -------------------------------------------------------------------------------- /styles/OpenSAFELY/Branding.yml: -------------------------------------------------------------------------------- 1 | extends: substitution 2 | scope: text 3 | message: "Consider using '%s' instead of '%s'" 4 | level: suggestion 5 | ignorecase: true 6 | # swap maps tokens in form of bad: good 7 | swap: 8 | # NOTE: The left-hand (bad) side can match the right-hand (good) side; Vale 9 | # will ignore any alerts that match the intended form. 10 | 'github(?!.com)': 'GitHub' 11 | 'open[ -]?safely(?!.org)': 'OpenSAFELY' 12 | -------------------------------------------------------------------------------- /styles/OpenSAFELY/HereLinks.yml: -------------------------------------------------------------------------------- 1 | extends: existence 2 | scope: raw 3 | message: 'Consider rewording link text to remove the use of "here"' 4 | level: suggestion 5 | ignorecase: true 6 | raw: 7 | - '(\[|\[.*\s)here(\s.*)?\]' 8 | -------------------------------------------------------------------------------- /styles/OpenSAFELY/InternalLinks.yml: -------------------------------------------------------------------------------- 1 | extends: existence 2 | message: 'Links to other documentation pages should be written with a relative file path, not a link to a specific domain' 3 | link: 'https://www.mkdocs.org/user-guide/writing-your-docs/#linking-to-pages' 4 | scope: raw 5 | level: suggestion 6 | ignorecase: true 7 | raw: 8 | - '[(<].*docs.opensafely.org.*[)>]' 9 | -------------------------------------------------------------------------------- /templates/python/material/parameters.html: -------------------------------------------------------------------------------- 1 |

    Parameters:

    2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | {% for parameter in parameters %} 12 | 13 | 14 | 15 | 16 | 17 | {% endfor %} 18 | 19 |
    NameDescriptionDefault
    {{ parameter.name }}{{ parameter.description|convert_markdown(heading_level, html_id) }}{% if parameter.default %}{{ parameter.default }}{% else %}required{% endif %}
    20 | --------------------------------------------------------------------------------