├── .github
└── workflows
│ └── publish.yaml
├── .gitignore
├── CNAME
├── README.md
├── _quarto.yml
├── asa-biop-swe-wg.Rproj
├── bayesian_mmrm_R_package.qmd
├── blog.qmd
├── blog
├── 2023-gswe4rp-workshops-wrapup
│ ├── IMG_0190.JPG
│ ├── IMG_0192.JPG
│ ├── IMG_0193.JPG
│ ├── IMG_0200.JPG
│ └── index.qmd
├── 20230210-bbs-swe-course-basel
│ ├── Daniel.jpg
│ ├── Friedrich.jpg
│ ├── Kevin.jpg
│ ├── index.qmd
│ ├── testing.jpg
│ └── topics_and_people.jpg
├── _metadata.yml
├── biop_report
│ ├── asa-biop.jpg
│ └── index.qmd
├── efspi_sig
│ ├── efspi_logo.jpeg
│ ├── efspi_logo_square.jpeg
│ └── index.qmd
├── hexwall_post
│ ├── hexwall_logo.svg
│ └── index.qmd
├── julia_ssd_package
│ ├── index.qmd
│ └── julia.svg
├── mmrm-video-nov-2022
│ ├── index.qmd
│ └── mmrm-video.png
├── new-co-chair
│ ├── index.qmd
│ └── openstatsware-hex-300.png
├── new_short_name_and_logo
│ ├── index.qmd
│ └── sticker-new-1200.png
├── psi-2025
│ ├── index.qmd
│ └── psi-banner.png
├── risw2023_session
│ ├── index.qmd
│ ├── panel_photo.jpeg
│ └── panel_photo_2.jpg
├── user-2024
│ ├── index.qmd
│ └── sergio_slide.jpg
└── wg_announcement
│ ├── index.qmd
│ └── sticker-1200.png
├── computational-statistics.csl
├── contact_us.qmd
├── data
├── members.csv
└── news.csv
├── goals.qmd
├── guide.qmd
├── hexwall.qmd
├── hta_page.qmd
├── html
└── openstatsware.scss
├── index.qmd
├── join_us.qmd
├── media
└── bbswLogoDarkBg.png
├── mmrm_R_package.qmd
├── news.qmd
├── people.qmd
├── presentations.qmd
├── publications.qmd
├── references.bib
├── slides
├── 2023-psi-aims-london.qmd
├── aimslogo.png
├── asa-biop-webinar-feb2024-quarto.qmd
├── asa-biop-webinar-feb2024.qmd
├── asa-ssc-mmrm-nov2023.qmd
├── bbs-efspi-mmrm-dec2022.qmd
├── cen-first-year-wg-sep2023.qmd
├── china-R-mmrm-nov2023.qmd
├── converge_speed.png
├── estimate_diff.png
├── jsm-aug2023.qmd
├── mmrm-review-treatment-fev-1.png
├── openstatsguide-poster.pdf
├── pharma-rug-mmrm-mar2023.qmd
├── r-adoption-jan2023.qmd
├── r-govys-jan2023.qmd
├── r-meetup-july2023.qmd
├── risw-first-year-wg-sep2023-quarto.qmd
├── risw-first-year-wg-sep2023.qmd
├── rpharma-apac-2024.qmd
├── rpharma-wg-mmrm-oct2023-quarto.qmd
├── rpharma-wg-mmrm-oct2023.qmd
├── sd_diff.png
├── slide-background-mmrm.png
├── style.css
├── test_flow_mean.png
├── testing_venn.jpeg
├── thank-you.jpg
├── unit_testing_advanced.qmd
├── unit_testing_basic.qmd
└── user-2024-mmrm-jul2024.qmd
├── sponsors.qmd
├── ssd_package.qmd
├── sticker
├── .gitignore
├── developer.png
├── developer.svg
├── hex-round.png
├── hex.png
├── hex_sticker.R
├── laptop.svg
├── openstatsware-hex-1200.png
├── openstatsware-hex-300.png
├── openstatsware-hex-600.png
├── openstatsware-hex-round-1200.png
├── openstatsware-hex-round-300.png
├── openstatsware-hex-round-600.png
├── openstatsware-hex-round.svg
├── openstatsware-hex.svg
├── software-bw.png
├── software.png
├── sticker-new-1200.png
├── sticker-new-300.png
└── sticker-new-600.png
├── utils
├── read_members.R
└── read_news.R
├── videos.qmd
└── webinars.qmd
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | # Github Action Workflow to publish
3 | # the Quarto website to GitHub Pages
4 | on:
5 | workflow_dispatch:
6 | push:
7 | branches:
8 | - main
9 |
10 | name: Publish Site
11 |
12 | env:
13 | QUARTO_DIR: "."
14 |
15 | jobs:
16 | publish:
17 | name: Build & Deploy
18 | runs-on: ubuntu-latest
19 | container:
20 | image: ghcr.io/insightsengineering/rstudio:latest
21 | permissions:
22 | contents: write
23 | steps:
24 | - name: Check out repository
25 | uses: actions/checkout@v3
26 |
27 | - name: Install CRAN dependencies
28 | run: install.packages(c("pander", "glue", "geomtextpath", "gt"), repos = "https://cloud.r-project.org")
29 | shell: Rscript {0}
30 |
31 | - name: Install GitHub dependencies
32 | run: remotes::install_github(c("coolbutuseless/poissoned", "coolbutuseless/minisvg", "danielinteractive/svgpatternsimple"))
33 | shell: Rscript {0}
34 | env:
35 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
36 |
37 | - name: Render Quarto Project
38 | run: quarto render --output-dir _site
39 | shell: bash
40 | working-directory: ${{ env.QUARTO_DIR }}
41 | env:
42 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
43 |
44 | - name: Publish Project
45 | uses: peaceiris/actions-gh-pages@v3
46 | with:
47 | github_token: ${{ secrets.GITHUB_TOKEN }}
48 | publish_dir: ./${{ env.QUARTO_DIR }}/_site
49 | destination_dir: ${{ env.QUARTO_DIR }}
50 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # R
2 | .Rproj.user
3 | .Rhistory
4 | .RData
5 | .Ruserdata
6 |
7 | # Quarto
8 | /.quarto/
9 | _cache
10 | cache
11 | *_cache
12 | *_files
13 | libs
14 | _freeze
15 | _site
16 |
17 | # MacOS
18 | .DS_Store
19 |
--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | www.openstatsware.org
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # `openstatsware`
2 |
3 | This is the GitHub repository for the `openstatsware` working group, also known as the Software Engineering Working Group (SWE WG) under the American Statistical Association (ASA) Biopharmaceutical Section (BIOP) and a Special Interest Group (SIG) of the European Federation of Statisticians in the Pharmaceutical Industry (EFSPI).
4 |
5 | The site is published here: https://www.openstatsware.org
6 |
--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
1 | project:
2 | type: website
3 | resources:
4 | - CNAME
5 | website:
6 |
7 | title: "openstatsware"
8 | favicon: "sticker/openstatsware-hex.svg"
9 | site-url: "https://www.openstatsware.org"
10 | description: "Website for the `openstatsware' Working Group"
11 |
12 | navbar:
13 | background: "#f8f9fa"
14 | pinned: true
15 | title: false
16 | logo: "sticker/openstatsware-hex.svg"
17 | left:
18 | - href: index.qmd
19 | text: "Home"
20 | - text: "About Us"
21 | menu:
22 | - href: goals.qmd
23 | - href: people.qmd
24 | - href: sponsors.qmd
25 | - href: join_us.qmd
26 | - href: contact_us.qmd
27 | - href: news.qmd
28 | text: "News"
29 | - href: blog.qmd
30 | text: "Blog"
31 | - text: "Workstreams"
32 | menu:
33 | - href: mmrm_R_package.qmd
34 | - href: hta_page.qmd
35 | - href: bayesian_mmrm_R_package.qmd
36 | - href: ssd_package.qmd
37 | - href: presentations.qmd
38 | text: "Presentations"
39 | - href: hexwall.qmd
40 | text: "TaskView Hexwall"
41 | - text: "Guides"
42 | menu:
43 | - href: guide.qmd
44 | right:
45 | - icon: github
46 | href: https://github.com/openstatsware/website
47 | - icon: rss
48 | href: blog.xml
49 |
50 | # Giscus comments:
51 | comments:
52 | giscus:
53 | # Reference: https://quarto.org/docs/reference/projects/books.html#giscus
54 | # Also https://giscus.app/ for all options
55 | repo: openstatsware/website
56 | repo-id: "R_kgDOIEo72A"
57 | category: "Ideas"
58 | category-id: "DIC_kwDOIEo72M4CRqLG"
59 | reactions-enabled: true
60 | theme: light
61 | language: en
62 | loading: lazy
63 | mapping: pathname
64 | input-position: "top"
65 |
66 | format:
67 | html:
68 | toc: true
69 | highlight-style: github
70 | theme:
71 | - html/openstatsware.scss
72 |
73 | editor: visual
74 |
--------------------------------------------------------------------------------
/asa-biop-swe-wg.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | UseNativePipeOperator: Yes
19 |
--------------------------------------------------------------------------------
/bayesian_mmrm_R_package.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Bayesian MMRM R Package Development"
3 | ---
4 |
5 | **Lead**: Will Landau (Eli Lilly)
6 |
7 | **Objective**: To develop an R package for Bayesian mixed models with repeated measures (MMRM) to support robust analysis of longitudinal clinical data.
8 |
9 | ```{r, label = "active", echo = FALSE, message = FALSE, warning = FALSE, results = "asis"}
10 | path <- getwd()
11 | source(file = paste(path, "/utils/read_members.R", sep = ""))
12 | read.members("bayesian_mmrm")
13 | ```
14 |
15 | To learn more about our Bayesian MMRM R package using [`brms`](https://paul-buerkner.github.io/brms/), please visit [https://openpharma.github.io/brms.mmrm/](https://openpharma.github.io/brms.mmrm/).
16 |
--------------------------------------------------------------------------------
/blog.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Blog"
3 | listing:
4 | contents: blog
5 | sort: "date desc"
6 | type: default
7 | categories: true
8 | sort-ui: false
9 | filter-ui: false
10 | feed:
11 | categories:
12 | - R
13 | page-layout: full
14 | title-block-banner: true
15 | ---
16 |
17 | We are proudly contributing to [R-Bloggers](https://www.r-bloggers.com).
18 |
--------------------------------------------------------------------------------
/blog/2023-gswe4rp-workshops-wrapup/IMG_0190.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/2023-gswe4rp-workshops-wrapup/IMG_0190.JPG
--------------------------------------------------------------------------------
/blog/2023-gswe4rp-workshops-wrapup/IMG_0192.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/2023-gswe4rp-workshops-wrapup/IMG_0192.JPG
--------------------------------------------------------------------------------
/blog/2023-gswe4rp-workshops-wrapup/IMG_0193.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/2023-gswe4rp-workshops-wrapup/IMG_0193.JPG
--------------------------------------------------------------------------------
/blog/2023-gswe4rp-workshops-wrapup/IMG_0200.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/2023-gswe4rp-workshops-wrapup/IMG_0200.JPG
--------------------------------------------------------------------------------
/blog/2023-gswe4rp-workshops-wrapup/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Celebrating 5 workshops on good SWE practices in 2023"
3 | subtitle: "Thanks to all local and `openstatsware` contributors!"
4 | author: "Daniel"
5 | description: With the last workshop this year in Montreal we achieved 5 workshops this year.
6 | date: "2023-10-18"
7 | categories: [news, R]
8 | image: "IMG_0200.JPG"
9 | ---
10 |
11 | ## Montreal workshop
12 |
13 | On Monday and Tuesday this week (16 and 17 October), Doug Kelkhoff (Roche), Phil Boileau (Analysis Group) and Daniel Sabanés Bové (Roche) held another edition of the Good Software Engineering Practices for R Packages (or short GSWEP4RP) workshop - this time in Montreal, QC, Canada.
14 |
15 | ::: {layout-nrow="2" fig-align="center"}
16 | {width="33%"}
17 | {width="33%"}
18 | {width="33%"}
19 |
20 | {width="100%"}
21 | :::
22 |
23 | With more than 33 participants this was another success for `openstatsware`!
24 |
25 | We would like to thank the McGill team which made this event possible - the workshop was part of their "McGill initiative in Computational Medicine" training series. Thanks a lot to:
26 |
27 | - Larisa Morales Soto
28 | - Adrien Osakwe
29 | - Georgette Femerling Romero
30 | - Kevin Liang
31 | - Prof. Celia M. T. Greenwood
32 |
33 | Everything worked very smoothly and it was a pleasure to visit the McGill University and engage with the curious students and faculty!
34 |
35 | As usual, the updated course materials are online under CC-BY license [here](https://openpharma.github.io/workshop-r-swe-mtl/) and the source code for all materials is available [here](https://github.com/openpharma/workshop-r-swe-mtl).
36 |
37 | ## Celebrating 5 workshops in 2023
38 |
39 | This is a milestone for the `openstatsware` team, because we now have run 5 workshops just in 2023 on this topic in 4 different countries:
40 |
41 | - [10th February 2023, Basel, CH](https://kkmann.github.io/workshop-r-swe/)
42 | - [24th March 2023, Shanghai, CN](https://openpharma.github.io/workshop-r-swe/)
43 | - [20th July 2023, San José, CA, US](https://openpharma.github.io/workshop-r-swe-sf/)
44 | - [26th September 2023, Rockville, MD, US](https://openpharma.github.io/workshop-r-swe-md/)
45 | - [16th and 17th October 2023, Montréal, QC, CA](https://openpharma.github.io/workshop-r-swe-mtl/)
46 |
47 | This is part of achieving our secondary objective on disseminating best practices of software engineering into the biostatistics community.
48 |
49 | Thanks a lot to all the `openstatsware` members and friends who created the materials and presented this year:
50 |
51 | - Friedrich Pahlke
52 | - Kevin Kunzmann
53 | - Liming Li
54 | - Shuang Li
55 | - Joe Zhu
56 | - Matt Secrest
57 | - Laura Harris
58 | - Daniel Sjoberg
59 | - Andrew Bean
60 | - Ryan (Biju) Wang
61 | - Doug Kelkhoff
62 | - Phil Boileau
63 | - Daniel Sabanés Bové
64 |
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/Daniel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/20230210-bbs-swe-course-basel/Daniel.jpg
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/Friedrich.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/20230210-bbs-swe-course-basel/Friedrich.jpg
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/Kevin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/20230210-bbs-swe-course-basel/Kevin.jpg
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Workshop on good software engineering practice for R"
3 | subtitle: "Teaching foundations of SWE for statisticians with R"
4 | author: "SWE WG"
5 | description: Under the umbrella of the Basel Biometric Society, Daniel, Friedrich, and Kevin organized a one-day workshop on good SWE practice in R.
6 | date: "2023-02-13"
7 | categories: [news, R]
8 | image: "topics_and_people.jpg"
9 | ---
10 |
11 | On Friday February 10, Daniel, Friedrich, and Kevin held
12 | a one-day workshop on good software engineering practice for R
13 | (GSWEP4R) in Basel.
14 | In order to achieve the best learning experience for
15 | participants, we only offered this course face to face without dialing in
16 | option - and the 50 seats had been booked out quickly within days after the
17 | course had been announced.
18 |
19 | ::: {layout-nrow="2" fig-align="center"}
20 | {width="50%"}
21 |
22 | {width="50%"}
23 |
24 | {width="50%"}
25 |
26 | {width="50%"}
27 | :::
28 |
29 | We covered topics from R package basics over development workflows, quality,
30 | and modern collaboration tools to publishing. We consider democratizing these
31 | skills crucial to foster a vibrant R package ecosystem in biostatics.
32 | Even if you do not intend to write your own package,
33 | a solid understanding of how all of this fits together enables you to
34 | participate in the community and positively influence future development
35 | by reporting bugs, giving feedback, or bringing your methodological expertise
36 | to the table of open-source development.
37 |
38 | We thank the BBS for supporting the workshop, Roche for hosting the event,
39 | and the participants for a lively and inspiring atmosphere.
40 |
41 | The course materials are online under CC-BY license
42 | [here](https://openpharma.github.io/workshop-r-swe/) and the source code for
43 | all materials is available [here](https://github.com/openpharma/workshop-r-swe).
44 |
45 | We also plan to repeat the course in other locations in the future
46 | - please reach out if you are interested to work with the SWE WG to
47 | host a course!
48 |
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/testing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/20230210-bbs-swe-course-basel/testing.jpg
--------------------------------------------------------------------------------
/blog/20230210-bbs-swe-course-basel/topics_and_people.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/20230210-bbs-swe-course-basel/topics_and_people.jpg
--------------------------------------------------------------------------------
/blog/_metadata.yml:
--------------------------------------------------------------------------------
1 | # options specified here will apply to all posts in this folder
2 |
3 | # freeze computational output
4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze)
5 | freeze: true
6 |
7 | # Enable banner style title blocks
8 | title-block-banner: true
9 |
--------------------------------------------------------------------------------
/blog/biop_report/asa-biop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/biop_report/asa-biop.jpg
--------------------------------------------------------------------------------
/blog/biop_report/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Software Engineering in Biostatistics - Towards Improving a Critical Competence"
3 | author: "SWE WG"
4 | subtitle: Article introducing the SWE WG in BIOP Report
5 | description: We wrote an article for the Biopharmaceutical Report introducing the SWE WG.
6 | date: "2022-11-16"
7 | categories: [news]
8 | image: "asa-biop.jpg"
9 | ---
10 |
11 | In the [Fall edition of the Biopharmaceutical Report](https://higherlogicdownload.s3.amazonaws.com/AMSTAT/fa4dd52c-8429-41d0-abdf-0011047bfa19/UploadedImages/BIOP%20Report/BioPharm_fall2022FINAL.pdf) we start from the importance of reliable software for statistical analyses, in particular in the pharmaceutical industry. We explain that the increase in using open source software is increasing the criticality of this topic, and summarize the progress that has been made by different initiatives around R in particular. Finally we motivate the creation of our new working group with the fact that little attention seems to have been devoted to good software engineering in biostatistics.
12 |
13 | Read the full article on pages 61 and 62 of the [Biopharmaceutical Report](https://higherlogicdownload.s3.amazonaws.com/AMSTAT/fa4dd52c-8429-41d0-abdf-0011047bfa19/UploadedImages/BIOP%20Report/BioPharm_fall2022FINAL.pdf) and let us know if you have any [comments or feedback](https://rconsortium.github.io/asa-biop-swe-wg/contact_us.html) - we would love to hear from you!
14 |
--------------------------------------------------------------------------------
/blog/efspi_sig/efspi_logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/efspi_sig/efspi_logo.jpeg
--------------------------------------------------------------------------------
/blog/efspi_sig/efspi_logo_square.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/efspi_sig/efspi_logo_square.jpeg
--------------------------------------------------------------------------------
/blog/efspi_sig/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "openstatsware is now an official EFSPI SIG!"
3 | author: "Daniel"
4 | subtitle: Announcing the membership in the EFSPI family
5 | description: "`openstatsware` is now an official EFSPI Special Interest Group (SIG), in addition to being an ASA BIOP scientific working group."
6 | date: "2024-03-11"
7 | categories: [news]
8 | image: "efspi_logo_square.jpeg"
9 | ---
10 |
11 | [EFSPI](https://www.efspi.org/), the European Federation of Statisticians in the Pharmaceutical Industry, is the major European organization in Biopharmaceutical Statistics. The corresponding [European Special Interest Groups (SIGs)](https://www.psiweb.org/sigs-special-interest-groups/sigs) are jointly sponsored by EFSPI and [PSI](https://www.psiweb.org/), which is a member of EFSPI itself.
12 |
13 | We are proud and happy to announce that after a long application process, `openstatsware` is now also an official EFSPI SIG! The corresponding web site is [here](https://www.psiweb.org/sigs-special-interest-groups/software-engineering) and links to our central home page [openstatsware.org](https://openstatsware.org) such that we only need to maintain the latter on a frequent basis.
14 |
15 | As many of the `openstatsware` members and their companies are based in Europe, the association with EFSPI emphasizes nicely the European pillar of `openstatsware`. We will also continue to organize conference sessions, work shops etc. in Europe.
16 |
17 | The new association does fit well with the existing ASA BIOP association of `openstatsware`. In fact, there are other working groups that are associated with both ASA BIOP and EFSPI (e.g. [Oncology Estimands](http://www.oncoestimand.org)). `openstatsware` continues to be a global working group across continents and countries.
18 |
19 | We are looking forward to the strenghtened collaboration with other EFSPI SIGs and corresponding events in Europe!
20 |
--------------------------------------------------------------------------------
/blog/hexwall_post/hexwall_logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/blog/hexwall_post/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Announcing the Packages Hexwall"
3 | subtitle: "Interactive exploration of the Clinical Trials CRAN Task View"
4 | author: "Yoni"
5 | date: "2024-03-05"
6 | categories: [news, R]
7 | image: hexwall_logo.svg
8 | ---
9 |
10 | ```{r, echo = FALSE}
11 | knitr::opts_chunk$set(
12 | message = NA,
13 | echo = FALSE
14 | )
15 | ```
16 |
17 | ```{r}
18 | library(svgpatternsimple)
19 | library(minisvg)
20 | library(gt)
21 | ```
22 |
23 |
24 | ```{r hexwall logo, eval = FALSE}
25 |
26 | logo <- svg_doc(width = 200, height = 200)
27 | hex_pattern <- create_pattern_hex(id = 'p5' , fill_fraction = 0.1, colour = "#345678")
28 |
29 | logo$defs(hex_pattern)
30 |
31 | len <- 100
32 | angles <- (seq(0, 360, 60) + 90) * pi/180
33 | xs <- round(len * cos(angles) + 100, 2)
34 | ys <- round(len * sin(angles) + 100, 2)
35 | hex <- stag$polygon(id = 'hex', xs = xs, ys = ys)
36 | hex$update(stroke = '#c0c0c0', fill = hex_pattern)
37 | logo$append(hex)
38 |
39 | logo$show()
40 | logo$save("blog/hexwall_post/hexwall_logo.svg")
41 | ```
42 |
43 | ```{r get views}
44 | # Download Views binary file from CRAN
45 | tmpfile <- tempfile()
46 | repo <- 'https://cloud.r-project.org'
47 | download.file(paste(repo, "src/contrib/Views.rds", sep = "/"), destfile = tmpfile)
48 |
49 | # Get packages on the task view
50 | all_Views <- readRDS(tmpfile)
51 | unlink(tmpfile)
52 | ```
53 |
54 | ## CRAN task views
55 |
56 | The [CRAN tasks views](https://cran.r-project.org/web/views/) are an indispensable resource of information for discovering which R packages are the right ones for specific tasks. There are `r length(all_Views)` of task views spanning a variety of topics. Each topic has subject matter experts who maintain the lists of packages. Below is a summary table of all the CRAN task views showing the topic (with a link to it), the maintainers, the date it was last updated and the number of packages in the task view.
57 |
58 | ```{r summary table}
59 | purrr::map_df(all_Views, function(x){
60 | tibble::tibble(
61 | topic = glue::glue("{x$topic}"),
62 | maintainers = x$maintainer,
63 | last_update = x$version,
64 | n_pkg = nrow(x$packagelist)
65 | ) |>
66 | dplyr::mutate(topic = purrr::map(topic, gt::html))
67 | }) |>
68 | gt::gt() |>
69 | gt::tab_header(title = 'CRAN Task Views Summary Table') |>
70 | gt::cols_label(
71 | topic = 'Topic',
72 | maintainers = 'Maintainers',
73 | last_update = 'Last Updated',
74 | n_pkg = 'Number of Packages'
75 | ) |>
76 | gt::tab_options(
77 | container.height = '300px'
78 | )
79 |
80 | ```
81 |
82 |
83 | ## Clinical Trials task view
84 |
85 | We are going to focus on the **`r all_Views[[6]]$topic` task view** maintained by `r all_Views[[6]]$maintainer`. Below is the task view itself. The task view layout has in the header summary information of the task view and the body contains the packages. They are categorized into sections: Design and Monitoring, Design and Analysis, Analysis for Specific Designs, Analysis in General and Meta Analysis.
86 |
87 | The next area of the layout lists which packages are "Core" packages to the task view, which are "Regular" and which are archived on CRAN. There is a section listing related links that can include either noteworthy packages on GitHub or topic-specific references. Finally, there are links to other task views that the packages may intersect.
88 |
89 | Each package listed in the task view has a link to the CRAN homepage of the package and a short description of what task the package intends to solve.
90 |
91 | ```{r}
92 | htmltools::tags$iframe(
93 | id = "ctv", width = "100%", height = "300px",
94 | src = "https://cran.r-project.org/web/views/ClinicalTrials.html"
95 | )
96 | ```
97 |
98 | ## The Hexwall
99 |
100 | This resource has a wealth of information for the newly initiated R user and also the expert R user in keeping up to date with the latest packages in the subject.
101 |
102 | This being said the layout of the task view may be a bit daunting. It is strictly text and only gives a short description of each package. To fully understand what the package does and the health of the package the reader needs to click on the package link and then conduct more research to get relevant information.
103 |
104 | This can be for many an entry cost that limits and inhibits the full utility of the task view and the hard work the maintainers do to keep it up to date.
105 |
106 | To remedy these issues we have developed a [new layout](https://www.openstatsware.org/hexwall) to navigate the packages listed in the task view. It is an interactive layout with packages represented as hex images, where we use the package hex sticker when there is one and a general hexagon for packages without. When the user clicks on a hex sticker the CRAN package homepage is displayed on the left-hand side.
107 |
108 | ```{r}
109 | htmltools::tags$iframe(
110 | id = "hexwall", width = "100%", height = "300px",
111 | src = "https://www.openstatsware.org/hexwall"
112 | )
113 | ```
114 |
115 | ## Next Steps
116 |
117 | This is the first release of the hexwall layout and we plan to iterate and add more useful information to it. We invite users to give us feedback on the layout and what information they would like to see added to it that will make your research into which packages to use to complete a task more informative and efficient.
118 |
--------------------------------------------------------------------------------
/blog/julia_ssd_package/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "First Julia package from `openstatsware` published"
3 | author: "Daniel"
4 | subtitle: Announcing Bayesian Safety Signal Detection package
5 | description: "`SafetySignalDetection.jl` is our first Julia package, please try it out!"
6 | date: "2024-04-08"
7 | categories: [news]
8 | image: "julia.svg"
9 | ---
10 |
11 | We are happy to announce the first Julia package from `openstatsware`: please welcome [`SafetySignalDetection.jl`](https://openpharma.github.io/SafetySignalDetection.jl/). This package implements Bayesian safety signal detection as proposed by [Brock et al. (2023)](https://openpharma.github.io/SafetySignalDetection.jl/stable/#Brock:2023) using the [Turing.jl](https://github.com/TuringLang/Turing.jl) framework. Please have a look at the corresponding [introduction](https://openpharma.github.io/SafetySignalDetection.jl/stable/introduction/) to learn more.
12 |
13 | We successfully added the package today to the official Julia registry ([link](https://github.com/JuliaRegistries/General/blob/master/S/SafetySignalDetection/Package.toml)), such that you can easily install it in Julia using:
14 |
15 | ```julia
16 | using Pkg
17 | Pkg.add("SafetySignalDetection")
18 | ```
19 |
20 | The corresponding [workstream](../../ssd_package.qmd) is open for new members who are interested in further developing the package, e.g. adding methodological variations etc.
21 |
22 | And if you are interested in a presentation about why Julia is interesting for implementing statistical software, don't miss the [PSI webinar on 17th April](https://www.psiweb.org/events/event-item/2024/04/17/default-calendar/psi-webinar-open-source-solutions---is-it-really-a-free-for-all)!
--------------------------------------------------------------------------------
/blog/julia_ssd_package/julia.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/blog/mmrm-video-nov-2022/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Visualizing successful collaboration"
3 | author: "SWE WG"
4 | subtitle: "A visual time lapse of six months of work on the mmrm R package"
5 | description: Communicating the benefit of open collaboration can be hard - we used the "gource" tool to create a video based on the repository history of the mmrm R package.
6 | date: "2022-11-20"
7 | categories: [news, R]
8 | image: "mmrm-video.png"
9 | ---
10 |
11 | It can be hard to communicate the benefit of open-source collaboration,
12 | the effort that goes into it, and its enormous impact on data science, to
13 | a non-technical audience.
14 | Visualizations can be very helpful here and we created a
15 | [video](https://youtu.be/DAIR_rBV3zQ) showing the
16 | first six months of development of the [`mmrm`](https://openpharma.github.io/mmrm/main/) R package.
17 | The video is based on the cumulative commit history of the git repository where
18 | the package development happens.
19 | It shows contributions to the "main" branch of the repository and how contributors
20 | add or edit files.
21 | Files are visualized by dots and arranged in a tree structure defined by the
22 | folders in the repository.
23 | The video is optimized for full HD quality.
24 |
25 |
26 |
27 | Only direct code contributions are shown. This does not include, ideation,
28 | community organization, code reviews, bug reporting, or answering questions, and
29 | promoting the package -
30 | all of which are important to make such a project a long-term success.
31 |
32 | If you want to co-star in the next iteration of the video, check out the
33 | [mmrm git repository](https://github.com/openpharma/mmrm), get in touch and
34 | contribute! Our contribution guidelines can be found [here](https://openpharma.github.io/mmrm/main/CONTRIBUTING.html).
35 |
36 | We used [gource](https://gource.io/) to create the video following an
37 | [example and guide](https://youtu.be/_ZUddQAhPFw) by [Nan Xiao](https://nanx.me).
38 |
--------------------------------------------------------------------------------
/blog/mmrm-video-nov-2022/mmrm-video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/mmrm-video-nov-2022/mmrm-video.png
--------------------------------------------------------------------------------
/blog/new-co-chair/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "New European Working Group Co-Chair"
3 | subtitle: "Announcing a new working group co-chair in the European region"
4 | author: "Alessandro"
5 | date: "2024-04-21"
6 | categories: [news]
7 | image: "openstatsware-hex-300.png"
8 | ---
9 |
10 | Hi everyone,
11 |
12 | Alessandro here.
13 | I am writing to announce that I am taking up the role of co-chair for the `openstatsware` working group.
14 | This is motivated by Daniel relocating to Taiwan, thus we felt we needed a person located in the European region to take up the role (I am based in Sweden).
15 |
16 | Let me quickly introduce myself: my name is Alessandro Gasparini, and I am currently a senior biostatistician and software developer at [Red Door Analytics](https://reddooranalytics.se) in Stockholm, Sweden.
17 | I am trained as a biostatistician, but have been writing code (in a variety of languages) for over a decade now; you can read more about me and my background [here](https://www.ellessenne.xyz).
18 |
19 | Daniel and Ya have done a fantastic job leading the working group so far and I look forward to further contributing to the goals of the working group.
20 |
--------------------------------------------------------------------------------
/blog/new-co-chair/openstatsware-hex-300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/new-co-chair/openstatsware-hex-300.png
--------------------------------------------------------------------------------
/blog/new_short_name_and_logo/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Announcing the new short name and logo"
3 | author: "SWE WG"
4 | description: We got a new short name **openstatsware** and a new logo.
5 | date: "2023-09-27"
6 | categories: [news]
7 | image: "sticker-new-1200.png"
8 | ---
9 |
10 | The SWE WG got a new short name as **openstatsware**, which stands for **open**-source **stat**i**s**tical soft**ware**. The new short name reflects the objects of the WG, which are to engineer high-quality open-source statistical software and to develop and disseminate the best practices for the process.
11 |
12 | A new logo has been created to align with the new short name. Credit to Alessandro for designing the new logo.
13 |
14 |
15 |
--------------------------------------------------------------------------------
/blog/new_short_name_and_logo/sticker-new-1200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/new_short_name_and_logo/sticker-new-1200.png
--------------------------------------------------------------------------------
/blog/psi-2025/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: openstatsware at the 2025 PSI conference
3 | description: A session titled _Statistical Software Engineering_ and submitted by the working group has been accepted for the 2025 PSI conference.
4 | author: "Alessandro"
5 | date: "2025-02-05"
6 | categories: [news]
7 | image: "psi-banner.png"
8 | ---
9 |
10 | Hi everyone,
11 |
12 | Alessandro here.
13 | A quick blog post to announce that a single-topic session submitted by the working group and titled _Statistical Software Engineering_ has been accepted for the 2025 PSI conference.
14 | The conference will be at Wembley Stadium in London, UK, between June 8^th^ and 11^th^ 2025.
15 |
16 | {fig-align="center"}
17 |
18 | The submission was led by Wilmar Igl (who will be chairing the session as well), and the session will be organised as follows:
19 |
20 | * Wilmar Igl, PhD (ICON) will start the session with the topic "The Mythical Man Month (1975-2025) - Planning, Implementing, and Managing Statistical Software Projects".
21 | The talk will use the 50^th^ anniversary of the classic book entitled "The Mythical Man-month: Essays on Software Engineering" by Brooks (1975) as a starting point to explore modern concepts of software engineering and management, thereby, highlighting established truths and novel insights.
22 | The topic is more general and conceptual in nature, but expected to appeal to a broader audience of statisticians, statistical programmers, and their managers.
23 |
24 | * Pravin Madhavan, PhD (Berry Consultants) will continue with the topic "Continuous Integration (CI) practices for statistical software development", thereby exploring conceptual, yet more technical approaches of software engineering.
25 | In this context, he will also explain related techniques including version control, automated testing, and build automation and give recommendations for best practices based on his experience of building large commercial software solutions, which are also applicable to open-source software.
26 |
27 | * Isaac Gravestock, PhD (Roche) will present the talk "Scaling Statistical Innovation and Open Source Collaborations".
28 | He will explain that novel statistical methods also need user-friendly, robust, high-quality software.
29 | Here, the role of the `openstatsware` working group in particular and the OpenPharma community in general is highlighted as platforms for industry and academic collaborations for development of software leading to the adoption of novel statistical methods.
30 |
31 | * Brian Lang, PhD (MSD) will conclude the session with the topic "Analysis Specification to Execution in R/Shiny".
32 | He will present a practical example of the development of software based on R, Shiny and a database for study analysis planning and execution to accurately and efficiently generate tables, listings, and figures.
33 |
34 | We look forward to seeing you all at the conference (come say hi if you are attending!) and I'd like to personally thank Wilmar and all the presenters for putting together such an exciting session.
35 | I'm sure you'll all enjoy it.
36 |
--------------------------------------------------------------------------------
/blog/psi-2025/psi-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/psi-2025/psi-banner.png
--------------------------------------------------------------------------------
/blog/risw2023_session/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Considerations in developing and using open-source statistical software in Pharma"
3 | author: "Daniel"
4 | subtitle: Summary of our session at this year's Regulatory-Industry Statistics Workshop
5 | description: We shared our working group progress and discussed with the panel and the audience challenges and next steps.
6 | date: "2023-09-29"
7 | categories: [news]
8 | image: "panel_photo.jpeg"
9 | ---
10 |
11 | We were lucky that the `openstatsware` proposed session was accepted in this year's [ASA Biopharmaceutical Section Regulatory-Industry Statistics Workshop 2023](https://ww2.amstat.org/meetings/biop/2023/), see the session details [here](https://ww3.aievolution.com/AMSTATevents/index.cfm?do=ev.viewEv&ev=2045). We would like to thank Ya Wang (Gilead, `openstatsware`) who organized the session, and Ning Leng (Genentech) who moderated the session and helped with the invitations. It was great to have Paul Schuette (FDA CDER) as FDA presenter and Ryan Jung (FDA CDER) as additional FDA panelist. The round was completed by Eric Nantz (Eli Lilly) as additional industry panelist.
12 |
13 | Personally, I feel it was awesome to have a nicely filled auditorium of interested workshop participants to present to, despite the late timing of the session in the workshop (Friday afternoon).
14 |
15 | ## Industry presentation
16 |
17 | In the first presentation "First year of the Software Engineering working group - working together across organizations" and on behalf of `openstatsware`, Daniel Sabanes Bove (Roche, `openstatsware`) introduced the working group (WG) and presented its progress during the first year. The slides are available (in slightly modified `html` format for convenience) [here](https://rconsortium.github.io/asa-biop-swe-wg/slides/risw-first-year-wg-sep2023-quarto.html).
18 |
19 | ## FDA presentation
20 |
21 | Paul Schuette gave the second presentation on "Open-source software for regulatory submissions and regulatory environments", which is available on the R Submissions WG website [here](https://github.com/RConsortium/submissions-wg/blob/main/_Documents/OpenSource1c_Paul_S_2023_ASA_biopharma.pdf). Paul started by reminding us of [FDA's clarifying statement on software](https://fda.report/media/109552/Statistical-Software-Clarifying-Statement-PDF.pdf), which was already published in May 2015. After noting that he could only comment on the CDER side of FDA for submissions, he commented on the Study Data Technical Conformance Guide (SDTCG). The SDTCG in particular mandates that all source code for ADaM data sets, as well as TLGs for primary and secondary efficacy analyses need to be submitted. Paul noted that while ASCII files are required and non-ASCII characters are to be avoided, the files could still be with `.r` extension - not only `.txt` files are allowed. Interestingly even `.jl` i.e. Julia files are allowed for module 5 content (although they have not received any Julia submission), while `.sas` and `.r` are allowed for modules 3 to 5. In the future they would like to be able to support more formats.
22 |
23 | ### History of R
24 |
25 | Looking at the history of R, he noted that it followed S which was developed in the same building from Bell Labs as C and Unix, by John Chambers et al. R's major innovation in the 1990s was the CRAN repository. RStudio as a popular IDE is available since 2011. With R Core and the R Foundation, a governance structure is in place. Discussing the various R community based WGs, Paul highlighted the R Submissions WG that already has completed 3 pilot
26 | submissions. Currently, pilot 4 with a container or web assembly based submission is under development. Paul also mentioned Python, which is used in a complementary set of applications such as natural language processing and machine learning, and less in statistics and visualization. He noted that also some commercial software is written in Python. Julia seems to be not (yet?) of great importance in the statistics community.
27 |
28 | ### R for submissions
29 |
30 | Open source software comes with benefits but also challenges. Paul noted the higher pace of changes in the software, and the missing support process when something goes wrong. The need for IT departments supporting the software installations can be another challenge. With almost [20,000 contributed R packages on CRAN](https://cran.r-project.org/web/packages/), it is not trivial to decide which R packages to trust.
31 |
32 | Zooming in on the use of R for submissions, Paul noted that CDER has not received yet a purely R based submission. So far, hybrid submissions combining R and proprietary software have been successfully received. Statistical reviewers also have been using R, in particular for plots that have in some instances already made it into labels. New graduates often come with R knowledge, however best practices are not taught in graduate schools. Paul recommended to reach out to the review divisions early before submissions, and to keep in mind the perspective of the statistical reviewers, who are using laptops for their review work.
33 |
34 | Paul finished with the Stan Lee quote "With great power comes great responsibility", reminding us that we need to use the power of open source software responsibly.
35 |
36 | ## Panel discussion
37 |
38 | 
39 |
40 | ### Popularity of R
41 |
42 | Ning started the panel discussion with asking how popular R is in the companies and FDA. Paul mentioned that they have been seeing an uptick in the recent graduates with R and Python background during the last 5 years. They also encourage staff to be bilingual in the programming languages to increase flexibility. Ryan added that he personally uses R a lot, e.g. to replicate sponsor analyses, and conduct FDA in-house analyses/visualization. Some reviewers use Python but it is only for research purposes not for regulatory purposes. The FDA's Division of Analytics and Informatics (DAI) also has lots of talented R programmers. Eric added that in Eli Lilly they use R more and more, and Daniel added that in Roche Pharma new molecules from this year onwards start their study data analyses in R.
43 |
44 | ### Quality Assessment
45 |
46 | The next question was how the panel members assessed the quality of the packages. Daniel looks always at the source code of the package, which is nowadays typically available on GitHub - are tests included? If there are tests, are they meaningful and sufficiently granular? If yes, are functions documented? Are vignettes available? Eric added that documentation is very important for him to get started with a package, so is a key criterion. A great example here is the [`targets` package](https://books.ropensci.org/targets/) from Will Landau. Paul mentioned that often he does not have a lot of time for the assessment, and provenance is an important factor: do you know where the package came from? Ryan added that not all packages meet the requirements of the reviewers, the issue already arose in one case where a package affiliated with LaTeX was utilized to provide improved fonts in R markdown. The package was rejected in the end because it could not be resolved. He added that well known packages are easier to accept, while not open sourced, proprietary packages would take a lot of time to review and test.
47 |
48 | ### Package Dependencies
49 |
50 | It was great to have a lot of the questions subsequently from the audience. The first was around package dependencies and how to deal with them. Eric mentioned that this was a key issue for the R Submission WG and its pilot submissions to the FDA, where [`renv`](https://rstudio.github.io/renv/articles/renv.html) has been used successfully. A key factor here was to be very verbose in the reviewers guide - you cannot be detailed enough because you want to make sure it can be reproduced on any machine. Also doing the work openly on GitHub helps to be transparent to the reviewers.
51 |
52 | ### Commercial Models
53 |
54 | Another question was whether commercial maintenance of open source packages could be an option, similar as RedHat for Linux. Paul commented that there were different models for monetizing open source software, he is aware of at least three different ones which come with their challenges. Generally, he encourages open source publication of packages and also harmonized cross-company efforts, to avoid surprises in submissions. The open source nature is an opportunity to avoid repeating the company specific macros submissions from sponsors for proprietary software seen in the past.
55 |
56 | ### Containerization
57 |
58 | One question was whether the use of containers could help solve the reproducibility challenge posed from different R and packages installations, and whether this could be integrated into the workflow of statistical reviewers at FDA potentially. Paul commented that there are several pain points. One was e.g. that containers work best with Linux operating systems, while reviewers work on Windows systems. Therefore, IT support was required which adds a level of complexity for the setup. Currently, other solutions are being discussed as part of [Pilot 4 of the R Submissions WG](https://rconsortium.github.io/submissions-wg/pilot4.html). One of them is [`webR`](https://docs.r-wasm.org/webr), where it was important that the security requirements are fulfilled because sessions would be sandboxed in the reviewer's browser session, as Eric added.
59 |
60 | ### Newcomers
61 |
62 | A follow on question to the quality assessment discussion was how newcomers to the biostatistics community could make sure that their packages can be accepted, although they might be unknown in the first place to the community. Daniel started by emphasizing that for him assessing the quality of the packages is not primarily driven by the authors - newcomers could do a great job with using best practices (including methods description in vignettes, tests, documentation, user vignettes, GitHub issues tracking, CRAN publication etc). Paul added that dissemination is important, e.g. Journal of Statistical Software and others are important outlets. Eric added that the [R Weekly site](https://rweekly.org/) and [R Weekly Highlights podcast](https://rweekly.fireside.fm/), which regularly feature new and updated packages released in the R community, is another great way to publicize a new package and increase adoption.
63 |
64 | ### HPC
65 |
66 | The last question was whether extensive computing (e.g. cloud based sponsor submissions) could be handled during an FDA statistical review. Paul mentioned that unfortunately cloud based solutions are not currently compatible with security and other regulatory requirements. However, statistical reviewers have access to an FDA HPC environment, but the access process is not trivial, and can be time intensive.
67 |
68 | 
69 |
--------------------------------------------------------------------------------
/blog/risw2023_session/panel_photo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/risw2023_session/panel_photo.jpeg
--------------------------------------------------------------------------------
/blog/risw2023_session/panel_photo_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/risw2023_session/panel_photo_2.jpg
--------------------------------------------------------------------------------
/blog/user-2024/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "openstatsware at UseR!2024"
3 | author: "Daniel, Wilmar, Yann, Friedrich, Joseph"
4 | subtitle: Celebrating the first in-person UseR! event since 2019
5 | description: "The first in-person UseR! conference after the Covid pandemic took place between 8 and 11 July 2024 in Salzburg, Austria."
6 | date: "2024-08-28"
7 | categories: [news, R]
8 | image: "sergio_slide.jpg"
9 | ---
10 |
11 | The first in-person UseR! conference after the Covid pandemic took place between 8 and 11 July 2024 in Salzburg, Austria. Hosted by Roche and the Ludwig-Maximilians-Universität München (Munich) with support by the R Consortium and Linux Foundation, this event brought together about 500 in person attendees as well as virtual participants and presenters. See [here](https://events.linuxfoundation.org/user/) for the conference website which also links to the conference [agenda](https://userconf2024.sched.com/) (includes slides for download, where made available by the presenters) and free [recordings](https://www.youtube.com/c/useRConference_global). See also UseR! talks [assembled by `rOpenSci`](https://ropensci.org/blog/2024/07/19/news-july-2024/).
12 |
13 |
14 | From `openstatsware` attended:
15 |
16 | - Daniel Sabanés Bové (RCONIS)
17 | - Friedrich Pahlke (RCONIS)
18 | - Gregory Chen (MSD)
19 | - Joseph Rickert (R Consortium)
20 | - Lars Andersen (Boehringer Ingelheim)
21 | - Sergio Olmos (Sanofi)
22 | - Will Landau (Lilly)
23 | - Wilmar Igl (ICON)
24 | - Yann Féat (mainanalytics)
25 |
26 | 
27 |
28 | Here a couple of them will state their highlights from the conference and summarize their contribution to the conference.
29 |
30 | Joseph: "The UseR! 2024 conference, which was organized by the R Foundation, Roche, and the Ludwig-Maximilians-Universität München, and financed by the R Consortium was the first in-person event since 2019. It provided a much needed opportunity for R users to establish friendships and meet face-to-face with luminaries from the R world including Kurt Hornik, Uwe Ligges, and Simon Urbanek from the R Core Group and CRAN and Peter Dalgaard,Tomas Kalibera, Martin Mächler, Luke Tierney from the R Core Group, and Hadley Wickham and others from his development team at Posit."
31 |
32 | Wilmar: "I attended my 1st UseR! conference in 2024 to present a personal project to promote civil participation in political decision-making based on R/Shiny technology. The fact that this talk was accepted illustrates the openness of the community and the wide range of potential applications of R and affiliated open-source technologies in society. The conference covered a wide range of other topics in data science and statistics with too many (personal) highlights to mention. The UseR! conference was also special for me because it allowed me to meet many colleagues in real life, whom I have been meeting up with virtually over the last year. Finally, one cannot write about the UseR! conference without mentioning the special vibe of a friendly, inclusive and fun environment, where you can also rub shoulders with long-time champions and rising stars of the R community. UseR! will definitely be on the list of top conferences for the years to come!"
33 |
34 | Yann: "The energy at this conference was incredible. It brought together famous contributors to the R ecosystem and newcomers around topics that are important to me, such as software engineering, statistics, data visualization, reproducibility, etc. This made for interesting and diverse presentations, and more stimulating discussions than one can count. I myself had the opportunity to present the work of the R Validation Hub's [Regulatory R Package Repository](https://github.com/pharmaR/regulatory-r-repo-wg/), and it was a pleasure to exchange ideas on this topic in person. I look forward to future UseR! conferences!"
35 |
36 | Daniel: "I almost can’t believe it, but indeed this was the first UseR! conference I attended. I feel honored to have been part of the organizing and program committee, so I contributed a tiny part to the overall conference organization. It was absolutely amazing to meet so many R community members in person, many of whom I knew by name or video already. Also it was very special to have many R core members mixing with the crowd, giving comments, asking questions, reminding us how R was born in keynote presentations. I really hope that there will be another UseR! conference soon (not just in 5 years) and that I will be able to attend it again. This time, I gave an [`openstatsware` workshop](https://rconis.github.io/user2024-tutorial-gswep/) on good software engineering practice for R packages together with Friedrich, presented the [`{mmrm}` package](https://userconf2024.sched.com/event/1c8uB/mmrm-a-robust-and-comprehensive-r-package-for-implementing-mixed-models-for-repeated-measures-daniel-sabanes-bove-rconis) built by `openstatsware`, and got feedback on the `openstatsguide` by `openstatsware` in the [poster](../../slides/openstatsguide-poster.pdf) session. It was great to bring in so much `openstatsware` to UseR! 2024 😁"
37 |
38 | Friedrich: "Attending the UseR! 2024 conference in Salzburg was a truly enriching experience. This event marked my first in-person UseR! conference, and it exceeded all my expectations. The conference provided an incredible platform for learning and connecting with fellow R enthusiasts. It was especially rewarding to co-lead a 3.5-hour tutorial on 'Good Software Engineering Practice for R Packages' alongside Daniel Sabanés Bové. Our session focused on equipping participants with essential tools and techniques for developing robust and reliable R packages, which was met with great enthusiasm.
39 | In addition to the tutorial, I had the opportunity to present on 'Engineering a Reliable R Package for Regulatory Use Using `{rpact}` as an Example.' This talk gave me the chance to highlight the six-year journey of developing the [`{rpact}` package](https://rpact-com.github.io/rpact/), a tool crucial for confirmatory adaptive clinical trials. I discussed how our validation framework, inspired by GAMP 5 principles, has been instrumental in achieving high levels of automation and reliability, ensuring compliance with stringent regulatory standards.
40 | As a co-founder of [RCONIS](https://rconis.com), our new joint venture that also sponsored the conference, it was an honor to contribute to this fantastic event. Beyond my official roles, I also enjoyed capturing memorable moments as the conference photographer. The entire experience left me eagerly anticipating future UseR! conferences and the continued growth of the R community."
41 |
--------------------------------------------------------------------------------
/blog/user-2024/sergio_slide.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/user-2024/sergio_slide.jpg
--------------------------------------------------------------------------------
/blog/wg_announcement/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "New ASA BIOP working group on Software Engineering"
3 | author: "SWE WG"
4 | subtitle: Fostering open source code collaboration within Biostatistics
5 | description: We are officially announcing the new ASA BIOP scientific working group on Software Engineering (SWE WG).
6 | date: "2022-10-24"
7 | categories: [news, R]
8 | image: "sticker-1200.png"
9 | ---
10 |
11 | With the shift of biostatistics work from proprietary software (e.g. SAS) to R, it is now even more important to write reliable and reproducible code, and to be selective with the R packages we use. This topic is equally relevant for biostatistics work as it is for programming and other roles. A recent [panel discussion](https://www.linkedin.com/pulse/research-software-engineering-clinical-biostatistics-saban%C3%A9s-bov%C3%A9/) on this topic highlighted this evolving need across companies and academia.
12 |
13 | To foster collaboration within biostatistics across organizational silos we proposed a new scientific working group under the umbrella of the [American Statistical Association (ASA) Biopharmaceutical section (BIOP)](https://community.amstat.org/biop/home) which was quickly approved in August this year: The Software Engineering working group (SWE WG). Now we have launched our [landing page](https://community.amstat.org/biop/workinggroups/swe-wg) and [website](https://rconsortium.github.io/asa-biop-swe-wg/) (hosted by the [R Consortium](https://www.r-consortium.org/about) GitHub organization) and want to share this news broadly to raise awareness and attract contributors. We have more than 10 companies and institutions involved in the SWE WG and welcome new members.
14 |
15 | The primary objective of the SWE WG is to engineer R packages that implement important methods that are often utilized in biostatistics but still missing from our R toolbox. The first product is the recently-published R package implementing mixed models for repeated measures (MMRM): [mmrm](https://cran.r-project.org/package=mmrm). In particular, a critical advantage of this package over existing implementations is that it is faster and converges more reliably. It also provides a comprehensive set of features: users can specify a variety of covariance matrices, weight observations, fit models with restricted or standard maximum likelihood inference, perform hypothesis testing with Satterthwaite adjusted degrees of freedom, and extract the least square means estimates by using the [emmeans package](https://github.com/rvlenth/emmeans). We aim to establish the R package [mmrm](https://cran.r-project.org/package=mmrm) as a new standard for fitting MMRM and will communicate it broadly. We welcome any community contributions to the R package, including feedback, questions and feature requests.
16 |
17 | The secondary objective is to develop best practices for engineering high-quality open-source statistical software, and to promote the use of these best practices in the broader biostatistics community via public training materials.
18 |
19 | ➞ Would you like to try out the new package mmrm? Install it from [CRAN](https://cran.r-project.org/package=mmrm) and read the [documentation](https://openpharma.github.io/mmrm/) (hosted by the [openpharma GitHub organization](https://openpharma.github.io/#manifesto)).
20 |
21 | ➞ What do you think is missing in the statisticians' R toolbox? Comment [here](https://rconsortium.github.io/asa-biop-swe-wg/contact_us.html)!
22 |
23 | ➞ Are you developing R packages and are interested in joining the working group? Please visit [our website](https://rconsortium.github.io/asa-biop-swe-wg/) on [how to join](https://rconsortium.github.io/asa-biop-swe-wg/join_us.html) (no ASA membership required).
24 |
--------------------------------------------------------------------------------
/blog/wg_announcement/sticker-1200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/blog/wg_announcement/sticker-1200.png
--------------------------------------------------------------------------------
/computational-statistics.csl:
--------------------------------------------------------------------------------
1 |
2 |
20 |
--------------------------------------------------------------------------------
/contact_us.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Contact Us"
3 | ---
4 |
5 | To contact us, please fill out the form below:
6 |
7 |
8 |
9 | Even easier - just login with your GitHub account and leave us a comment in the [discussions page](https://github.com/openstatsware/website/discussions)!
10 |
--------------------------------------------------------------------------------
/data/members.csv:
--------------------------------------------------------------------------------
1 | Firstname,Lastname,Affiliation,SWE_WG_Member,MMRM,HTA,bayesian_mmrm,SSD
2 | Alessandro,Gasparini,Red Door Analytics,1,0,0,0,0
3 | Andy,Miskell,Eli Lilly,1,0,0,0,0
4 | Ben,Arancibia,GSK,1,0,0,0,0
5 | Brian,Lang,MSD,1,1,0,0,0
6 | Christian,Stock,Boehringer Ingelheim,1,0,0,1,0
7 | Daniel,Leibovitz,Independent,1,1,0,1,0
8 | Daniel,Sabanés Bové,RCONIS,1,1,0,0,1
9 | Dong,Xi,Gilead Sciences,1,0,0,0,0
10 | Friedrich,Pahlke,RPACT,1,0,0,0,0
11 | Jian,Wang,Eli Lilly,1,0,0,0,0
12 | Keaven,Anderson,Merck,1,0,0,0,0
13 | Kevin,Kunzmann,Boehringer Ingelheim,1,1,0,1,0
14 | Peikun,Wu,R&G US,1,0,0,0,0
15 | Ron,Yu,Gilead Sciences,1,0,0,0,0
16 | Will,Landau,Eli Lilly,1,0,0,1,0
17 | Ya,Wang,Gilead Sciences,1,1,0,0,0
18 | Liming,Li,AstraZeneca,1,1,0,0,0
19 | Julia,Dedic,Roche,0,1,0,0,0
20 | Joe,Rickert,RStudio & R Consortium,1,0,0,0,0
21 | Michelle,Zhang,Affamed,1,0,0,0,0
22 | Jack,Talboys,Novartis,1,0,0,0,0
23 | Andrew,Bean,Novartis,1,0,0,1,0
24 | Terri,Johnson,Edwards Lifesciences,1,0,0,0,0
25 | Yoni,Sidi,Sanofi,1,1,0,1,0
26 | Vitaly,Druker,AstraZeneca,1,0,0,0,0
27 | Juliane,Manitz,EMD Serono,1,0,0,0,0
28 | Pascal,Kieslich,Merck KGaA,1,0,0,0,0
29 | Songgu,Xie,Regeneron,1,0,0,0,0
30 | Ryan,Wang,Johnson & Johnson,1,0,0,0,0
31 | Gregory,Chen,MSD,1,0,1,0,0
32 | Laura,Harris,Denali,1,0,0,0,0
33 | Colombe,Chappey,Pfizer,1,0,0,0,0
34 | Martin,Kappler,Cytel,1,0,0,0,0
35 | Pravin,Madhavan,Berry Consultants,1,0,0,0,0
36 | Nick,Berry,Berry Consultants,1,0,0,0,0
37 | Quang,Nguyen,Regeneron,1,0,0,0,0
38 | Wilmar,Igl,ICON,1,0,0,0,0
39 | Clara,Beck,Bayer,1,0,0,0,0
40 | Issac,Gravestock,Roche,0,0,1,0,0
41 | Christos,Kokaliaris,Roche,0,0,1,0,0
42 | Michael,Seo,Roche,0,0,1,0,0
43 | Miranta,Antoniou,Roche,0,0,1,0,0
44 | Rossella,Belleli,Roche,0,0,1,0,0
45 | Cedric,Revil,MSD,0,0,1,0,0
46 | Chrysostomos,Kalyvas,MSD,0,0,1,0,0
47 | Henrik,Thomsen,Novo Nordisk,1,0,0,0,0
48 | Yann,Féat,mainanalytics,1,0,0,0,0
49 | Sergio,Olmos,Sanofi,1,0,0,0,0
50 | Fan,Wu,Sanofi,1,0,0,0,0
51 | Peng,Zhang,CIMS Global,1,0,0,0,0
52 | Elias Laurin,Meyer,Berry Consultants,1,0,0,0,0
53 | Michał,Majka,Erste Group,1,0,0,0,0
54 | Lorin,Towle-Miller,GSK,1,0,0,0,0
55 | Nils,Penard,UCB,1,0,0,0,0
56 | Marianne,Grinberg,UCB,1,0,0,0,0
57 | Kristian,Brock,AstraZeneca,1,0,0,0,1
58 | Jeffrey,Long,GSK,1,0,0,0,0
59 | Serene,Jiang,Johnson & Johnson,1,0,0,0,0
60 | Lars,Andersen,RPACT,1,0,0,0,0
61 | Christian Haargaard,Olsen,Novo Nordisk,0,0,1,0,0
62 | Simon,Clancy,Novo Nordisk,0,0,1,0,0
63 | Thomas,Maltesen,Novo Nordisk,0,0,1,0,0
64 | Matthew,Phelps,Novo Nordisk,0,0,1,0,0
65 | Nicolai Skov,Johnsen,Novo Nordisk,0,0,1,0,0
66 | Keovilay,Chanthavinout,Roche,0,0,1,0,0
67 | Rajesh,Hagalwadi,MaxisIT,1,0,0,0,0
68 | Kamil,Sijko,Transition Technologies Science,1,0,0,0,0
69 | Claudia,Carlucci,Daiichi Sankyo,1,0,1,0,0
70 | Sergei,Krivtov,Daiichi Sankyo,1,0,1,0,0
71 | Isaac,Gravestock,Roche,1,0,1,0,0
72 | Adam,Omidpanah,BMS,1,0,0,0,0
73 | Audrey,Yeo,Independent,1,0,0,0,0
74 | Natalie,Dennis,Daiichi Sankyo,0,0,0,0,0
75 | Tao,Jiang,Genentech,1,0,0,0,0
76 |
--------------------------------------------------------------------------------
/data/news.csv:
--------------------------------------------------------------------------------
1 | Date,Text
2 | 02/05/2025,We are proud to announce that our single-topic session titled Statistical Software Engineering has been accepted for the 2025 PSI conference
3 | 11/14/2024,The HTA-R Pharma workstream is proud to announce `maicplus` v0.1 has been published on CRAN
4 | 09/16/2024,`openstatsguide` is published
5 | 08/28/2024,`openstatsware` at UseR!2024
6 | 04/21/2024,New European working group co-chair
7 | 04/08/2024,First Julia package from `openstatsware` published
8 | 03/12/2024,We are now also an EFSPI and PSI Special Interest Group!
9 | 03/05/2024,Announcing the Clinical Trials Task View Hexwall
10 | 10/18/2023,Celebrating 5 workshops on good SWE practices in 2023
11 | 10/03/2023,Summary of our session at this year's Regulatory-Industry Statistics Workshop
12 | 9/27/2023,Announcing the new short name and logo
13 | 7/25/2023,R Package Workshop in Rockville (MD) on 26th September open for registration
14 | 7/20/2023,{brms.mmrm} R package made public as open-source software (under development).
15 | 4/27/2023,Bayesian MMRM workstream page launched
16 | 3/9/2023,Online workshop in Asia on 24th March open for registration
17 | 3/5/2023,HTA Workstream is here now
18 | 2/13/2023,Workshop on good software engineering practice for R
19 | 1/20/2023,Presentations are now available on this site
20 | 11/16/2022,Article introducing the SWE WG in BIOP Report
21 | 10/24/2022,Announcing the new ASA BIOP working group on Software Engineering
22 | 10/7/2022,Initial publication of the website.
23 | 10/10/2022,Added hex sticker for the working group. Credits to Kalashnyk on Flaticon for creating the sticker icon.
24 |
--------------------------------------------------------------------------------
/goals.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Goals"
3 | ---
4 |
5 | openstatsware is a [scientific working group](https://community.amstat.org/biop/workinggroups/swe-wg) of the [American Statistical Association (ASA) Biopharmaceutical section (BIOP)](https://community.amstat.org/biop/home) and a [European Special Interest Group (SIG)](https://www.psiweb.org/sigs-special-interest-groups/software-engineering) sponsored by [Statisticians in the Pharmaceutical Industry (PSI)](https://www.psiweb.org/about-us) and the [European Federation of Statisticians in the Pharmaceutical Industry (EFSPI)](https://efspi.org/).
6 |
7 | Our goals are to:
8 |
9 | - **Engineer selected R-packages** to fill in gaps in the open-source statistical software landscape, and to promote software tools designed by the working group through publications, conference presentations, workshops, and training courses.
10 |
11 | - **Develop good SWE practices** for engineering high-quality statistical software and promote their use in the broader Biostatistics community via public training materials.
12 |
13 | - **Communicate and collaborate** with other R software initiatives including via the [R Consortium](https://www.r-consortium.org/).
14 |
15 | ```{=html}
16 |
17 |
18 |
--------------------------------------------------------------------------------
/guide.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "openstatsguide"
3 | subtitle: "Minimum Viable Good Practices for High Quality Statistical Software Packages"
4 | author: "0.1-0"
5 | author-title: "Version"
6 | date: "16 Sep 2024"
7 | bibliography: references.bib
8 | nocite: |
9 | @*
10 | csl: computational-statistics.csl
11 | ---
12 |
13 | # Scope
14 |
15 | We encourage developers of statistical software packages to follow this minimum set of good practices around:
16 |
17 | > "**D**ocumentation, **V**ignettes, **T**ests, **F**unctions, **S**tyle, **L**ife cycle"
18 |
19 | These keywords can be easily remembered with the mnemonic bridge sentence:
20 |
21 | > "**D**evelopers **V**alue **T**ests **F**or **S**oftware **L**ongevity"
22 |
23 | While the recommendations are rather generic, we focus on functional programming languages and give links to implementations in R, Python and Julia.
24 |
25 | This guide primarily addresses developers of statistical packages. Users interested in assessing the quality of existing statistical packages will find complementary "validation" focused resources valuable, as listed in [References](#references).
26 |
27 | # Recommendations
28 |
29 | ```{r echo=FALSE}
30 | glue_or_drop <- function(doc, img) {
31 | if (identical(doc, "")) {
32 | NULL
33 | } else {
34 | paste0(
35 | "## ", htmltools::tags$img(src = img, width = 25, alt = "logo"), "\n",
36 | paste(
37 | paste0(
38 | paste0("[", names(doc), "]"),
39 | paste0("(", doc, ")")
40 | ),
41 | collapse = "\n"
42 | )
43 | )
44 | }
45 | }
46 | guide_tabset <- function(r = "", python = "", julia = "") {
47 | contents <- paste(
48 | "::: {.panel-tabset}",
49 | glue_or_drop(r, "https://www.r-project.org/logo/Rlogo.svg"),
50 | glue_or_drop(python, "https://s3.dualstack.us-east-2.amazonaws.com/pythondotorg-assets/media/files/python-logo-only.svg"),
51 | glue_or_drop(julia, "https://raw.githubusercontent.com/JuliaLang/julia-logo-graphics/master/images/julia-dots.svg"),
52 | ":::",
53 | sep = "\n\n"
54 | )
55 | cat(contents, sep = "\n\n")
56 | }
57 | ```
58 |
59 | ## Documentation
60 |
61 | Documentation is important for both users and developers to understand all objects in your package, without reading and interpreting the underlying source code.
62 |
63 | 1. Use **in-line comments** next to functions, classes and other objects to generate their corresponding documentation.
64 |
65 | ```{r results="asis", echo=FALSE}
66 | guide_tabset(
67 | r = c("roxygen2" = "https://roxygen2.r-lib.org/"),
68 | python = c("docstrings" = "https://peps.python.org/pep-0257/"),
69 | julia = c("docstrings" = "https://docs.julialang.org/en/v1/manual/documentation/")
70 | )
71 | ```
72 |
73 | 2. Do also **document internal functions** and classes for maintenance by future developers.
74 |
75 | 3. Add **code comments** for ambiguous or complex pieces of internal code, but only after optimizing the code design as much as possible.
76 |
77 | ## Vignettes
78 |
79 | Vignettes are documents that complement the object documentation by providing a comprehensive and long-form overview of your package's functionality from a user point of view, with particular emphasis on the connection to the statistical approaches.
80 |
81 | 1. Provide an **introduction vignette** that introduces the package to new users, such that they have an easy entry point for getting started. Make sure to include code examples and automatically compile the vignette to ensure reproducibility.
82 |
83 | 2. Include **deep dive vignettes** that go into depth on specific use cases, functionalities or underlying theory, in particular describing the underlying statistical methodology and how it is implemented in the package.
84 |
85 | 3. Host your vignettes on a **dedicated website**, which allows users to read the vignettes without installing the package, and simplifies citing the vignettes in other publications.
86 |
87 | ```{r results="asis", echo=FALSE}
88 | guide_tabset(
89 | r = c("pkgdown" = "https://pkgdown.r-lib.org/"),
90 | python = c("Sphinx" = "https://www.sphinx-doc.org/en/master/"),
91 | julia = c("Documenter" = "https://documenter.juliadocs.org/")
92 | )
93 | ```
94 |
95 | ## Tests
96 |
97 | Tests are a fundamental safety net and development tool to ensure that your package works as expected, both during development as well as on user systems.
98 |
99 | 1. Write **unit tests** for all functions and classes in your package, to ensure that all building blocks work correctly on their own ("white box" testing). Expect to rewrite tests for internal code when you refactor it.
100 |
101 | 2. Write **functional tests** for all user facing functionality ("black box" testing). These tests ensure that the user API is stable when refactoring internal code.
102 |
103 | ```{r results="asis", echo=FALSE}
104 | guide_tabset(
105 | r = c("testthat" = "https://testthat.r-lib.org/"),
106 | python = c("pytest" = "https://pytest.org/"),
107 | julia = c("Test" = "https://docs.julialang.org/en/v1/stdlib/Test/")
108 | )
109 | ```
110 |
111 |
112 | 3. In addition, ensure a **good coverage** of your code with your tests as a final check, but only after having unit and functional tests on all levels of the code.
113 |
114 | ```{r results="asis", echo=FALSE}
115 | guide_tabset(
116 | r = c("covr" = "https://covr.r-lib.org/"),
117 | python = c("Coverage.py" = "https://coverage.readthedocs.io/"),
118 | julia = c("Coverage.jl" = "https://github.com/JuliaCI/Coverage.jl")
119 | )
120 | ```
121 |
122 | ## Functions
123 |
124 | Function definitions should be short, simple and enforce argument types with assertions.
125 |
126 | 1. Write **short functions** with less than 50 lines of code for a single and well-defined purpose, with **few arguments**, and low cyclomatic complexity, in order to reduce the risk of bugs, simplify writing tests and ensure that you can maintain them.
127 |
128 | 2. Use **type hints** or types to explain to the user which argument of the function expects which type of input.
129 |
130 | ```{r results="asis", echo=FALSE}
131 | guide_tabset(
132 | r = c("roxytypes" = "https://openpharma.github.io/roxytypes/"),
133 | python = c("typing" = "https://docs.python.org/3/library/typing.html"),
134 | julia = c("types" = "https://docs.julialang.org/en/v1/manual/types/")
135 | )
136 | ```
137 |
138 | 3. Enforce types and other expected properties of function arguments with **assertions**, which give an early and readable error message to the user instead of failing function code downstream in a less explicable way.
139 |
140 | ```{r results="asis", echo=FALSE}
141 | guide_tabset(
142 | r = c("checkmate" = "https://mllg.github.io/checkmate/"),
143 | python = c("assertpy" = "https://pypi.org/project/assertpy/"),
144 | julia = c("ArgCheck.jl" = "https://github.com/jw3126/ArgCheck.jl")
145 | )
146 | ```
147 |
148 | ## Style
149 |
150 | A consistent and readable code style that is language idiomatic should be used and enforced by style checks.
151 |
152 | 1. Use **language idiomatic** code and follow the **"clean code" rules** (use descriptive and meaningful names, prefer simpler over more complex code, avoid duplication of code, regularly refactor code), while allowing for exceptions only where needed.
153 |
154 | 2. Use a **formatting tool** to automatically implement a consistent and readable code format.
155 |
156 | ```{r results="asis", echo=FALSE}
157 | guide_tabset(
158 | r = c("styler" = "https://styler.r-lib.org/"),
159 | python = c("Autopep8" = "https://pypi.org/project/autopep8/"),
160 | julia = c("JuliaFormatter.jl" = "https://domluna.github.io/JuliaFormatter.jl/")
161 | )
162 | ```
163 |
164 | 3. Use a **style checking** tool to enforce a consistent and readable code style.
165 |
166 | ```{r results="asis", echo=FALSE}
167 | guide_tabset(
168 | r = c("lintr" = "https://lintr.r-lib.org/"),
169 | python = c("Pylint" = "https://pypi.org/project/pylint/"),
170 | julia = c("StaticLint.jl" = "https://github.com/julia-vscode/StaticLint.jl")
171 | )
172 | ```
173 |
174 | ## Life cycle
175 |
176 | Life cycle management is simplified by reducing dependencies, and should comprise a central code repository.
177 |
178 |
179 | 1. **Reduce dependencies** to simplify maintenance of your own package. Only depend on other packages that you trust and deem stable enough for the purpose, in order to avoid reinventing the wheel.
180 |
181 |
182 | 2. **Ensure that you track dependencies and pin their versions** so if another developer contributes then they can use the same environment to produce consistent results and behaviours. This could be tracked using more system level approaches like configuring a snapshot date in the package repository to language-specific tools that generate a file that tracks dependencies and versions that serves as a source of truth for all packages developers.
183 |
184 |
185 | 3. Give clear information to users about changes in the package API via maintaining the **change log** and first **deprecating functionality** before removing it.
186 |
187 | ```{r results="asis", echo=FALSE}
188 | guide_tabset(
189 | r = c(
190 | "lifecycle" = "https://cran.r-project.org/web/packages/lifecycle/index.html",
191 | "fledge" = "https://fledge.cynkra.com/"
192 | ),
193 | python = c("deprecation" = "https://deprecation.readthedocs.io/"),
194 | julia = c("workflow" = "https://invenia.github.io/blog/2022/06/17/deprecating-in-julia/")
195 | )
196 | ```
197 |
198 | 4. Use a **central repository** for version control, collecting and resolving issues, and managing releases. Include the publication of a **contributing guide** to help onboard new developers and enable community contributions.
199 |
200 | # References
201 |
202 |
--------------------------------------------------------------------------------
/hexwall.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "TaskView Hexwall"
3 | format:
4 | dashboard:
5 | orientation: columns
6 | scrolling: true
7 | ---
8 |
9 | ```{r, echo = FALSE}
10 | knitr::opts_chunk$set(
11 | message = NA,
12 | echo = FALSE
13 | )
14 | ```
15 |
16 | ```{r Get the Task View File}
17 | # Download Views binary file from CRAN
18 | tmpfile <- tempfile()
19 | repo <- 'https://cloud.r-project.org'
20 | download.file(paste(repo, "src/contrib/Views.rds", sep = "/"), destfile = tmpfile)
21 |
22 | # Get packages on the task view
23 | all_Views <- readRDS(tmpfile)
24 | unlink(tmpfile)
25 | ctv_clintrials <- all_Views$ClinicalTrials
26 | ctv_clintrials_packages <- ctv_clintrials$packagelist$name
27 |
28 | ```
29 |
30 | ```{r Compile Helper Functions}
31 | # Construct URI to get contents of the man/figures subdirectory on CRAN GitHub
32 | make_cran_uri <- function(dep, endpoint = 'api.github.com', subdir = 'man/figures'){
33 | sprintf('https://%s/repos/cran/%s/contents/%s', endpoint, dep, subdir)
34 | }
35 |
36 | # Construct URI to get contents of the man/figures subdirectory on CRAN GitHub
37 | make_gh_uri <- function(dep, endpoint = 'api.github.com'){
38 | sprintf('https://%s/repos/%s/contents', endpoint, dep)
39 | }
40 |
41 | check_pat <- function(){
42 | if(!nzchar(Sys.getenv('GITHUB_PAT'))){
43 | message('Missing GitHub PAT!')
44 | }
45 | }
46 |
47 |
48 | # look for png files in the repos
49 | # need a GITHUB_PAT for multiple hits to the API
50 | get_logo <- function(dep){
51 | check_pat()
52 | res_cran <- httr::GET(
53 | url = make_cran_uri(dep, subdir = 'man/figures'),
54 | httr::add_headers(
55 | Authorization = sprintf('token %s',Sys.getenv('GITHUB_PAT'))
56 | )
57 | )
58 |
59 | if(httr::status_code(res_cran)=='200'){
60 | man_cran <- httr::content(res_cran)
61 | ret <- purrr::map_chr(man_cran, ~.x$download_url)
62 | ret[!grepl('^(README|lifecycle)',basename(ret))]
63 | }
64 | else{
65 | message("CRAN API Message: ", httr::status_code(res_cran))
66 | NA_character_
67 | }
68 | }
69 |
70 | find_remote <- function(dep){
71 | check_pat()
72 | res_cran <- httr::GET(
73 | url = make_cran_uri(dep, subdir = 'DESCRIPTION'),
74 | httr::add_headers(
75 | Authorization = sprintf('token %s',Sys.getenv('GITHUB_PAT'))
76 | )
77 | )
78 |
79 | if(httr::status_code(res_cran)=='200'){
80 | desc_cran <- httr::content(res_cran)
81 | tf <- tempfile()
82 | on.exit(unlink(tf),add = TRUE)
83 | cat(rawToChar(base64enc::base64decode(desc_cran$content)),file = tf)
84 | parse_desc(tf)
85 | }
86 | else{
87 | message("CRAN API Message: ", httr::status_code(res_cran))
88 | NA_character_
89 | }
90 | }
91 |
92 | parse_desc <- function(file){
93 |
94 | # Try Issues URL
95 | desc_bugs <- clean_key(desc::desc_get(file = file, keys = 'BugReports'))
96 | uri <- grep_github(desc_bugs)
97 |
98 | # Try Remotes URL
99 | if(!length(uri)){
100 |
101 | desc_remotes <- desc::desc_get_urls(file = file)
102 | uri <- grep_github(desc_remotes)
103 |
104 | }
105 |
106 | uri
107 | }
108 |
109 | clean_key <- function(key){
110 | trimws(strsplit(key,',')[[1]])
111 | }
112 |
113 | grep_github <- function(x){
114 | gh_raw <- grep('github\\.com',x,value = TRUE)
115 | gh_prefix <- gsub('^http(.*?).com/','',gh_raw)
116 | gh_suffix <- gsub('issues\\/$|issues$','',gh_prefix)
117 | gh_suffix <- gsub('\\/$','',gh_suffix)
118 | gh_suffix
119 | }
120 |
121 | find_readme <- function(dep){
122 | res_gh <- httr::GET(
123 | url = make_gh_uri(dep),
124 | httr::add_headers(
125 | Authorization = sprintf('token %s',Sys.getenv('GITHUB_PAT'))
126 | )
127 | )
128 |
129 | if(httr::status_code(res_gh)=='200'){
130 | root_gh <- httr::content(res_gh)
131 | ret <- purrr::map_chr(root_gh, function(x){
132 | if(is.null(x$download_url)){
133 | return(NA_character_)
134 | }else(
135 | x$download_url
136 | )
137 | })
138 |
139 | ret[grepl('^readme.md$',basename(ret),ignore.case = TRUE)]
140 | }
141 | else{
142 | message("GitHub API Message: ", httr::status_code(res_gh))
143 | NA_character_
144 | }
145 | }
146 |
147 | parse_gh_md <- function(path){
148 | res_readme <- httr::GET(
149 | url = path,
150 | httr::add_headers(
151 | Authorization = sprintf('token %s',Sys.getenv('GITHUB_PAT'))
152 | )
153 | )
154 |
155 | if(httr::status_code(res_readme)=='200'){
156 | readme_content <- httr::content(res_readme)
157 | readme_content_split <- strsplit(readme_content,'\n')[[1]]
158 | readme_content_split_lgl <- purrr::map_lgl(readme_content_split, function(x) grepl('',
159 | x))
160 | readme_img_tag <- readme_content_split[readme_content_split_lgl]
161 | readme_img_tags <- strsplit(readme_img_tag,'\\s')
162 | if(length(readme_img_tags)==0){
163 | return(NA_character_)
164 | }
165 | readme_img_attr <- grep('^src',readme_img_tags[[1]], value = TRUE)
166 | readme_img_src <- gsub("'",'',gsub('src(.*?)=','',readme_img_attr))
167 | readme_img_src <- gsub('"','',readme_img_src)
168 | file.path(dirname(path), readme_img_src)
169 | }
170 | else{
171 | message("README Message: ", httr::status_code(res_readme))
172 |
173 | NA_character_
174 | }
175 | }
176 | ```
177 |
178 | ```{r Invoke the functions}
179 | ctv_clintrials_remotes <- ctv_clintrials_packages |>
180 | purrr::set_names() |>
181 | purrr::map(find_remote)
182 |
183 | ctv_clintrials_readme <- ctv_clintrials_remotes |>
184 | purrr::discard(~length(.x)==0) |>
185 | purrr::discard(~!grepl('/',.x)) |> #malformed repo paths
186 | purrr::map(find_readme) |>
187 | purrr::map(parse_gh_md)
188 |
189 | # Postprocessing
190 | ctv_clintrials_readme_clean <- ctv_clintrials_readme |>
191 | tibble::enframe(name = 'package', value = 'url') |>
192 | tidyr::unnest(c(url)) |>
193 | dplyr::filter(!is.na(url))
194 |
195 | message("Non Missing logo n:", nrow(ctv_clintrials_readme_clean))
196 | ```
197 |
198 | ```{r Construct list of missing logos}
199 | missing_logo <- setdiff(names(ctv_clintrials_remotes), ctv_clintrials_readme_clean$package)
200 | missing_logo_colors <- sample(viridis::inferno(length(missing_logo)*2), length(missing_logo))
201 | message("Missing logo n:", length(missing_logo))
202 | ```
203 |
204 | ```{r define svg}
205 | library(minisvg)
206 | len <- 95
207 | angles <- (seq(0, 360, 60) + 90) * pi/180
208 | xs <- round(len * cos(angles) + 100, 2)
209 | ys <- round(len * sin(angles) + 100, 2)
210 | hex <- stag$polygon(id = 'hex', xs = xs, ys = ys)
211 | hex$update(stroke = '#223344', fill_opacity=0.25, stroke_width = 4)
212 |
213 | def_layer <- svg_doc(style = "display:none", width = 200, height = 200)$update(width=90, height=90)
214 | def_layer$add_css_url("https://fonts.googleapis.com/css?family=Abril%20Fatface")
215 | def_layer$add_css("
216 | .mainfont {
217 | font-size: 17px;
218 | font-family: 'Abril Fatface', sans-serif;
219 | fill: #223344;
220 | }
221 | ")
222 | def_layer$defs(hex)
223 | ```
224 |
225 |
226 | ```{r compile svg constructor function}
227 | mysvg <- function(nm, color){
228 | new_doc <- svg_doc(width = 200, height = 200)$update(width=90, height=90)
229 | g <- stag$g()
230 | g$update(onclick=sprintf("hover('%s')",file.path("https://cran.r-project.org/web/packages",nm)))
231 | g$use(href = "#hex", fill = color)
232 |
233 | text <- stag$text(
234 | nm,
235 | class = "mainfont",
236 | x = 100, y = 100,
237 | `text-anchor`="middle",
238 | `alignment-baseline`="middle"
239 | )
240 |
241 | g$append(text)
242 |
243 | new_doc$append(g)
244 | new_doc$as_character()
245 | }
246 |
247 | ```
248 |
249 | ## Column {width=35%}
250 |
251 | ```{r construct iframe}
252 | htmltools::tags$iframe(
253 | id="descIframe", width="100%", height="100%",
254 | src = file.path("https://cran.r-project.org/web/packages",ctv_clintrials_readme_clean$package[1])
255 | )
256 | ```
257 |
258 | ## Column {width=50%}
259 |
260 | ```{r construct hex wall, results="asis"}
261 | def_layer$as_character()
262 | # Construct the wall
263 | htmltools::tags$div(
264 | #style = "background-color: #feeacd; border-style: solid; border-radius: 10px; border-color: #000; border-width: 1px;",
265 | htmltools::tagList(
266 | purrr::map2(
267 | ctv_clintrials_readme_clean$package,
268 | ctv_clintrials_readme_clean$url,
269 | ~htmltools::tags$img(
270 | onclick=sprintf("hover('%s')",file.path("https://cran.r-project.org/web/packages",.x)),
271 | src = .y,
272 | width = "90",
273 | style="padding-left: 5px; padding-right: 5px;padding-top: 2px; padding-bottom: 2px;")
274 | ),
275 | purrr::map2(missing_logo, missing_logo_colors, ~mysvg(.x, .y)))
276 | )
277 | ```
278 |
279 | ```{=html}
280 |
285 | ```
286 |
--------------------------------------------------------------------------------
/hta_page.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "HTA-R workstream"
3 | ---
4 |
5 | **Lead**: Gregory Chen (MSD)
6 |
7 | **Objective**: This workstream of the SWE WG unites voluntary statistical software engineers that are experienced in HTA (Health Technology Assessment) or highly-interested in this area. The team aims to develop open-source R tools of good quality in the right formats (R packages, apps, user guides) that are useful to both manufacturers and payers, to support crucial analytic topics in HTA dossier submission across various countries, particularly the topics with unmet needs in R implementation and/or related to [EUnetHTA](https://www.eunethta.eu/).
8 |
9 | The workstream also works within a broader group of business and methodological SMEs in HTA and Market Access space of our pharma industry, to ensure this open-source HTA pipeline of R tools is relevant over time and the tools can be adopted easily by business and payers.
10 |
11 | Find more details in our GitHub organization: [link](https://github.com/hta-pharma/).
12 |
13 | ```{r, label = "active", echo = FALSE, message = FALSE, warning = FALSE, results = "asis"}
14 | path <- getwd()
15 | source(file = paste(path, "/utils/read_members.R", sep = ""))
16 | read.members("HTA")
17 | ```
18 |
--------------------------------------------------------------------------------
/html/openstatsware.scss:
--------------------------------------------------------------------------------
1 | // Based on Cosmo 5.3.2, by Bootswatch
2 |
3 | /*-- scss:defaults --*/
4 |
5 | $theme: "openstatsware" !default;
6 |
7 | //
8 | // Color system
9 | //
10 |
11 | $white: #fff !default;
12 | $gray-100: #f8f9fa !default;
13 | $gray-200: #e9ecef !default;
14 | $gray-300: #dee2e6 !default;
15 | $gray-400: #ced4da !default;
16 | $gray-500: #adb5bd !default;
17 | $gray-600: #868e96 !default;
18 | $gray-700: #495057 !default;
19 | $gray-800: #373a3c !default;
20 | $gray-900: #212529 !default;
21 | $black: #000 !default;
22 |
23 | $blue: #2780e3 !default;
24 | $indigo: #6610f2 !default;
25 | $purple: #613d7c !default;
26 | $pink: #e83e8c !default;
27 | $red: #ff0039 !default;
28 | $orange: #f0ad4e !default;
29 | $yellow: #ff7518 !default;
30 | $green: #3fb618 !default;
31 | $teal: #20c997 !default;
32 | $cyan: #9954bb !default;
33 |
34 | $osw-black: #000814;
35 | $osw-dblue: #001D3D;
36 | $osw-lblue: #003566;
37 | $osw-gold: #FFC300;
38 | $osw-yellow: #FFD60A;
39 |
40 | $primary: $osw-lblue !default;
41 | $secondary: $osw-gold !default;
42 | $success: $green !default;
43 | $info: $cyan !default;
44 | $warning: $yellow !default;
45 | $danger: $red !default;
46 | $light: $gray-100 !default;
47 | $dark: $gray-800 !default;
48 |
49 | $min-contrast-ratio: 2.6 !default;
50 |
51 | // Options
52 |
53 | $enable-rounded: false !default;
54 |
55 | // Body
56 |
57 | $body-color: $osw-black !default;
58 | $body-bg: $gray-100 !default;
59 |
60 | // Fonts
61 |
62 | //// Main font
63 | $font-family-sans-serif: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", "Noto Sans", "Liberation Sans", Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji" !default;
64 | $font-size-base: 1rem !default;
65 |
66 | //// Headings
67 | $headings-font-family: "Inter Tight", "Inter", sans-serif !default;
68 |
69 | //// Code
70 | $font-family-monospace: "Fira Code", SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !default;
71 | $code-font-size: $font-size-base !default;
72 |
73 | /*-- scss:rules --*/
74 |
75 | // Variables
76 |
77 | $web-font-path: "https://fonts.googleapis.com/css2?family=Inter&family=Inter+Tight&family=Fira+Code:wght@400;700&display=swap";
78 | @if $web-font-path {
79 | @import url($web-font-path);
80 | }
81 |
82 | // Typography
83 |
84 | body {
85 | -webkit-font-smoothing: antialiased;
86 | font-variant-ligatures: common-ligatures;
87 | -moz-osx-font-smoothing: grayscale;
88 | }
89 |
90 | // Table
91 | .table>thead, .table tbody {
92 | border-bottom-color: $osw-dblue;
93 | }
94 |
95 | .table>:not(caption)>*:not(:last-child)>* {
96 | border-bottom-color: $osw-lblue;
97 | }
98 |
99 | // Hyperlinks
100 | a {
101 | text-decoration-color: $osw-gold;
102 | }
103 | a:hover {
104 | color: $osw-gold;
105 | text-decoration: none;
106 | }
107 |
108 | // Titles
109 | .quarto-title {
110 | text-align: center;
111 | }
112 |
113 | // Hex
114 | .sticker {
115 | min-width: 256px;
116 | max-width: 95%;
117 | }
118 |
119 | // Code in titles
120 | .quarto-title, .quarto-post {
121 | code {
122 | background: none;
123 | color: inherit;
124 | font-size: inherit;
125 | font-weight: inherit;
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | pagetitle: "openstatsware"
3 | comments: false
4 | ---
5 |
6 | ```{=html}
7 |
51 | ```
52 |
--------------------------------------------------------------------------------
/join_us.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Join Us"
3 | ---
4 |
5 | Note that you don't have to be a member of ASA BIOP or EFSPI/PSI to join `openstatsware`. \
6 | There are multiple ways to get involved with `openstatsware`:
7 |
8 | - Join as a working group member, who attends the `openstatsware` meetings.
9 |
10 | - Join as a workstream member, who actively contributes to one or several workstream(s).
11 |
12 | If you are interested to join `openstatsware`, please contact us: [Daniel Sabanes Bove](mailto:daniel@rconis.com), [Ya Wang](mailto:ya.wang10@gilead.com) or [Alessandro Gasparini](alessandro.gasparini@reddooranalytics.se).
13 |
--------------------------------------------------------------------------------
/media/bbswLogoDarkBg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/media/bbswLogoDarkBg.png
--------------------------------------------------------------------------------
/mmrm_R_package.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "MMRM R Package Development"
3 | ---
4 |
5 | **Lead**: Daniel Sabanes Bove (Roche)
6 |
7 | **Objective**: To develop a comprehensive R package for mixed models with repeated measures (MMRM) that is robust, well documented and thoroughly tested.
8 |
9 | ```{r, label = "active", echo = FALSE, message = FALSE, warning = FALSE, results = "asis"}
10 | path <- getwd()
11 | source(file = paste(path, "/utils/read_members.R", sep = ""))
12 | read.members("MMRM")
13 |
14 | ```
15 |
16 | To learn more about the MMRM R package visit [openpharma.github.io/mmrm](https://openpharma.github.io/mmrm/).
17 |
--------------------------------------------------------------------------------
/news.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "News"
3 | editor_options:
4 | chunk_output_type: console
5 | ---
6 |
7 | ```{r}
8 | #| include = FALSE
9 | source(file = "utils/read_news.R")
10 | ```
11 |
12 | ```{r}
13 | #| message = FALSE,
14 | #| warning = FALSE,
15 | #| results = "asis",
16 | #| echo = FALSE
17 | read.news()
18 | ```
19 |
--------------------------------------------------------------------------------
/people.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "People"
3 | ---
4 |
5 | **Co-Chairs:**
6 |
7 | * Daniel Sabanes Bove (RCONIS)
8 |
9 | * Ya Wang (Gilead)
10 |
11 | * Alessandro Gasparini (Red Door Analytics)
12 |
13 | **Members:**
14 |
15 | ```{r, label = "active", echo = FALSE, message = FALSE, warning = FALSE, results = "asis"}
16 | path <- getwd()
17 | source(file = paste(path, "/utils/read_members.R", sep = ""))
18 | read.members("SWE_WG_Member")
19 | ```
20 |
--------------------------------------------------------------------------------
/presentations.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Presentations"
3 | ---
4 | - *Introducing `openstatsware`: Who we are and what we build together*. R/Pharma APAC Conference, 31 Oct 2024.
5 | - [slides](slides/rpharma-apac-2024.html)
6 | - *`brms.mmrm`: an R package for Bayesian MMRMs*. Bayesian Biostatistics Conference, 24 October 2024.
7 | - [slides](https://bayes-pharma.org/wp-content/uploads/2024/11/Will-Landau.pdf)
8 | - *`{mmrm}`: A Robust and Comprehensive R Package for Implementing Mixed Models for Repeated Measures*. useR! 2024, 9 Jul 2024.
9 | - [slides](slides/user-2024-mmrm-jul2024.html)
10 | - *Introducing `openstatsware`: Who we are and what we build together*. ASA BIOP DL Webinar, 23 Feb 2024.
11 | - [slides](slides/asa-biop-webinar-feb2024-quarto.html)
12 | - *{mmrm}: an Open Source R Package for Mixed Model Repeated Measures*. China-R Conference, 28 Nov 2023.
13 | - [slides](slides/china-R-mmrm-nov2023.html)
14 | - *Introducing `openstatsware` and the R Package `{mmrm}`*. Lightning Session at ASA SSC Mini-Symposium, 04 Nov 2023.
15 | - [slides](slides/asa-ssc-mmrm-nov2023.html)
16 | - *Introducing `openstatsware` and the R Package `{mmrm}`*. R/Pharma Conference, 24 Oct 2023.
17 | - [slides](slides/rpharma-wg-mmrm-oct2023-quarto.html)
18 | - *First year of the Software Engineering working group*. Regulatory-Industry Statistics Workshop 2023, 29 Sep 2023.
19 | - [slides](slides/risw-first-year-wg-sep2023-quarto.html)
20 | - *{mmrm}: A Robust and Comprehensive R Package for Implementing Mixed Models for Repeated Measures*. JSM, 8 Aug 2023.
21 | - [slides](slides/jsm-aug2023.html)
22 | - *Introduction to the ASA BIOP Software Engineering Working Group and the {mmrm} Package*. R Meetup, 21 Jul 2023.
23 | - [slides](slides/r-meetup-july2023.html)
24 | - *Workshop on Good Software Engineering Practice for R*. San Francisco bay area, 20 Jul 2023.
25 | - [slides](https://openpharma.github.io/workshop-r-swe-sf/listing.html)
26 | - *Plugging the gaps: Lessons learned from implementing the mmrm R package*. PSI, 13 Jun 2023.
27 | - [slides](slides/2023-psi-aims-london.html)
28 | - *{mmrm}: an Open Source R Package for Mixed Model Repeated Measures*. Pharma RUG, 31 Mar 2023.
29 | - [slides](slides/pharma-rug-mmrm-mar2023.html)
30 | - *Workshop on Good Software Engineering Practice for R*. Shanghai, 24 Mar 2023.
31 | - [slides](https://openpharma.github.io/workshop-r-swe/listing.html)
32 | - *Workshop on Good Software Engineering Practice for R*. BBS workshop, Basel, 10 Feb 2023.
33 | - [slides](https://kkmann.github.io/workshop-r-swe/listing.html)
34 | - *Introducing the Software Engineering working group and {mmrm}*. R/Adoption Series Webinar, 26 Jan 2023.
35 | - [slides](slides/r-adoption-jan2023.html)
36 | - *Introducing the Software Engineering working group: Who we are and what we build together*. R Govys Meetup, 19 Jan 2023.
37 | - [slides](slides/r-govys-jan2023.html)
38 | - *Statistical software for continuous longitudinal endpoints: Introducing the R package `mmrm`*. Joint EFSPI & BBS virtual event - Addressing intercurrent events: Treatment policy and hypothetical strategies (day 1), 8 Dec 2022.
39 | - [slides](slides/bbs-efspi-mmrm-dec2022.html)
40 | - [recording](https://streamingmedia.roche.com/media/t/1_toj7v6bn) (35:00)
41 | - *Statistical Software Engineering 101*. Video blog series on ASA BIOP Youtube channel.
42 | - Unit Testing for R Developers
43 | - 1: The Basics \[[slides](slides/unit_testing_basic.html), [video](https://www.youtube.com/watch?v=mfLXiP4A9CA)\]
44 | - 2: Advancing your skills \[[slides](slides/unit_testing_advanced.html), [video](https://www.youtube.com/watch?v=n44Lfyt-Rio)\]
45 |
--------------------------------------------------------------------------------
/publications.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Publications"
3 | ---
4 |
5 | To be added later.
6 |
--------------------------------------------------------------------------------
/references.bib:
--------------------------------------------------------------------------------
1 | @software{ropensci_statsoftware_2021,
2 | author = {Mark Padgham and
3 | Maëlle Salmon and
4 | Noam Ross and
5 | Jakub Nowosad and
6 | Rich FitzJohn and
7 | Yilong Zhang and
8 | Christoph Sax and
9 | Francisco Rodriguez-Sanchez and
10 | François Briatte and
11 | Leonardo Collado-Torres},
12 | title = {{rOpenSci} Statistical Software Peer Review},
13 | month = oct,
14 | year = 2021,
15 | publisher = {Zenodo},
16 | version = {v0.1.0},
17 | doi = {10.5281/zenodo.5556756},
18 | url = {https://doi.org/10.5281/zenodo.5556756}
19 | }
20 |
21 | @software{ropensci_devguide_2024,
22 | author = {{rOpenSci} and
23 | Anderson, Brooke and
24 | Chamberlain, Scott and
25 | DeCicco, Laura and
26 | Gustavsen, Julia and
27 | Krystalli, Anna and
28 | Lepore, Mauro and
29 | Mullen, Lincoln and
30 | Ram, Karthik and
31 | Ross, Noam and
32 | Salmon, Maëlle and
33 | Vidoni, Melina and
34 | Riederer, Emily and
35 | Sparks, Adam and
36 | Hollister, Jeff},
37 | title = {{rOpenSci Packages: Development, Maintenance, and
38 | Peer Review}},
39 | month = mar,
40 | year = 2024,
41 | publisher = {Zenodo},
42 | version = {0.9.0},
43 | doi = {10.5281/zenodo.10797633},
44 | url = {https://doi.org/10.5281/zenodo.10797633}
45 | }
46 |
47 | @techreport{pharmar_2020,
48 | author = {Juliane Manitz and Andy Nicholls and Paulo Bargo and Doug Kelkhoff and Yilong Zhang and Lyn Taylor and Joe
49 | Rickert and Marly Gotti and Keaven M Anderson},
50 | title = {A Risk-based approach for assessing {R} package accuracy within a validated infrastructure},
51 | year = {2020},
52 | url = {https://www.pharmar.org/white-paper/}
53 | }
54 |
55 | @manual{usethis_2024,
56 | title = {{usethis}: Automate Package and Project Setup},
57 | author = {Hadley Wickham and Jennifer Bryan and Malcolm Barrett and Andy Teucher},
58 | year = {2024},
59 | note = {R package version 2.2.3, https://github.com/r-lib/usethis},
60 | url = {https://usethis.r-lib.org}
61 | }
62 |
63 | @book{wickham_2023,
64 | title = {R Packages: Organize, Test, Document, and Share Your Code},
65 | author = {Wickham, Hadley and Bryan, Jennifer},
66 | year = {2023},
67 | edition = {2},
68 | publisher = {{O'Reilly Media, Inc.}},
69 | url = {https://r-pkgs.org/}
70 | }
71 |
72 | @book{wickham_design,
73 | title = {Tidy design principles},
74 | author = {Wickham, Hadley},
75 | url = {https://design.tidyverse.org/}
76 | }
77 |
78 | @book{carpentries_2023,
79 | title = {A Software Carpentries lesson on {R} packaging},
80 | author = {Rodriguez-Sanchez, Pablo and Vreede, Barbara and de Boer, Lieke},
81 | year = {2023},
82 | url = {https://carpentries-incubator.github.io/lesson-R-packaging/}
83 | }
84 |
85 | @misc{gswe4rp_2024,
86 | author = {Sabanés Bové, Daniel and Pahlke, Friedrich and Kunzmann, Kevin and Bean, Andrew and
87 | Kelkhoff, Doug and Boileau, Philippe and
88 | Li, Liming and Zhu, Joe and Li, Shuang and Widmer, Lukas A. and Mayer, Michael},
89 | title = {Good Software Engineering Practice for {R} Packages},
90 | year = {2024},
91 | url = {https://rconis.github.io/workshop-r-swe-zrh/}
92 | }
93 |
94 | @misc{validation_2024,
95 | author = {{Wikipedia contributors}},
96 | title = {Software verification and validation --- {Wikipedia}{,} The Free Encyclopedia},
97 | year = {2024},
98 | url = {https://en.wikipedia.org/w/index.php?title=Software_verification_and_validation&oldid=1220844793},
99 | note = {[Online; accessed 31-May-2024]}
100 | }
101 |
102 | @misc{sabanes_2023,
103 | title = {Improving Software Engineering in Biostatistics: Challenges and Opportunities},
104 | author = {Sabanés Bové, Daniel and Heidi Seibold and Anne-Laure Boulesteix and Juliane Manitz and Alessandro Gasparini and Burak K. Guünhan and Oliver Boix and Armin Schuüler and Sven Fillinger and Sven Nahnsen and Anna E. Jacob and Thomas Jaki},
105 | year = {2023},
106 | eprint = {2301.11791},
107 | archiveprefix = {arXiv},
108 | primaryclass = {stat.CO},
109 | url = {https://arxiv.org/abs/2301.11791}
110 | }
--------------------------------------------------------------------------------
/slides/2023-psi-aims-london.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Plugging the gaps: Lessons learned from implementing the mmrm R package"
3 | author: "Kevin Kunzmann on behalf of the ASA BIOP SWE WG"
4 | institute: Boehringer Ingelheim
5 | date: "June 13 2023"
6 | date-format: long
7 | format:
8 | revealjs:
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
10 | fontsize: 34px
11 | slide-number: true
12 | title-slide-attributes:
13 | data-background-image: https://github.com/openpharma/mmrm/raw/main/man/figures/logo.png
14 | data-background-size: 15%
15 | data-background-opacity: "1.0"
16 | data-background-position: 90% 10%
17 | css: style.css
18 | ---
19 |
20 | ## Package Authors:
21 |
22 | ::: columns
23 | ::: {.column width="50%"}
24 | - Brian Matthew Lang (MSD)
25 | - Christian Stock (Boehringer-Ingelheim)
26 | - Craig Gower-Page (Roche)
27 | - Dan James (AstraZeneca)
28 | - Daniel Sabanes Bove (Roche, lead)
29 | :::
30 |
31 | ::: {.column width="50%"}
32 | - Doug Kelkhoff (Roche)
33 | - Julia Dedic (Roche)
34 | - Kevin Kunzmann (Boehringer-Ingelheim)
35 | - Liming Li (Roche)
36 | - Ya Wang (Gilead)
37 | :::
38 | :::
39 |
40 | ### Acknowledgments & thanks to: {.smaller}
41 |
42 | Ben Bolker (McMaster University), Davide Garolini (Roche), Dinakar Kulkarni (Roche), Gonzalo Duran Pacheco (Roche), [**ASA BIOP Software Engineering working group (SWE WG)**](https://rconsortium.github.io/asa-biop-swe-wg/)
43 |
44 | ## Agenda
45 |
46 | - Overview of Open Source in Biostatistics
47 | - Case Study: `{mmrm}` Package
48 | - Mixed Models for Repeated Measures - Why bother?
49 | - Why this is not "yet another package"
50 | - Lessons Learned
51 | - Next Steps
52 |
53 | ## Open Source in Biostatistics
54 |
55 | - Open-source software increasing popularity in Biostatistics
56 | - Rapid uptake of novel statistical methods
57 | - Unprecedented opportunities for collaboration
58 | - Transparency of methods and implementation
59 | - Variability in software quality
60 | - No statistical quality assurance on open-source extension package repositories, e.g. CRAN
61 | - No industry standard for assessing quality of R packages
62 | - **Reliable software for core statistical analysis is paramount**
63 |
64 | ## The ASA BIOP SWE Working Group
65 |
66 | - Idea: form industry working group to identify and fill gaps in R package landscape with reliable implementations of crucial methods
67 | - [ASA Biopharmaceutical section software engineering working group](https://rconsortium.github.io/asa-biop-swe-wg/) (ASA BIOP SWE WG)
68 | - Foster good coding practices for easier maintenance, review. and transparency:
69 | - Version control for traceability and structured change management
70 | - Self-sufficient high-quality documentation
71 | - Code style standards for easier review
72 | - Extensive unit testing and CI/CD for test-driven development
73 | - Reproducible outputs, especially for stochastic algorithms
74 |
75 | ## Why do we need a package for MMRMs?
76 |
77 | - MMRM popular for longitudinal continuous outcomes in randomized clinical trials
78 | - Think about long term maintenance and responsibility!
79 | - Try to improve existing package
80 | - extend `glmmTMB` with Satterthwaite adjusted degrees of freedom
81 | - not feasible: `glmmTMB` only random effects representation, no real unstructured model
82 | - No tailored R package with sufficient capabilities/reliability
83 | - `lme4` + `lmerTest`: fails on large data sets (slow, convergence issues)
84 | - `nlme`: no Satterthwaite adjusted degrees of freedom, convergence issues, only approximate `emmeans`
85 |
86 | ## Case Study: The [`{mmrm}`](https://cran.r-project.org/package=mmrm) Package
87 |
88 | - Linear model for dependent observations within independent subjects
89 | - Multiple covariance structures for the dependent observations
90 | - REML or ML estimation, using multiple optimizers if needed
91 | - `emmeans` interface for least square means
92 | - `tidymodels` for easy model fitting
93 | - Satterthwaite and Kenward-Roger adjustments for degrees of freedom
94 | - Designed for needs of pharma applications from ground up
95 | - Ongoing maintenance and support from the pharma industry
96 | - 5 companies involved in the development
97 | - ambition to become standard package within the field
98 |
99 | ## How we develop `{mmrm}`
100 |
101 | - Created initial prototype quickly, open-sourced on [github.com/openpharma/mmrm](https://github.com/openpharma/mmrm)
102 | - In parallel, assembled multiple other companies with shared interest in reliable MMRM for R
103 | - $\leadsto$ Start of Software Engineering working group (SWE WG), with \>35 members from \>25 organizations
104 |
105 | - Focus on core issues and aim for frequent releases of new features instead of waiting for perfect state
106 | - Do not compromise quality on the way, rather less done well
107 | - Regular online developer calls to align next steps
108 |
109 | ## Getting started with `{mmrm}`
110 |
111 | - `{mmrm}` is on CRAN - use this as a starting point:
112 |
113 | ```{r, eval = FALSE, echo = TRUE}
114 | install.packages("mmrm")
115 | library(mmrm)
116 | fit <- mmrm(
117 | formula = FEV1 ~ RACE + SEX + ARMCD * AVISIT + us(AVISIT | USUBJID),
118 | data = fev_data
119 | )
120 | summary(fit)
121 | library(emmeans)
122 | emmeans(fit, ~ ARMCD | AVISIT)
123 | ```
124 |
125 | - Visit [openpharma.github.io/mmrm](https://openpharma.github.io/mmrm/) for detailed docs
126 |
127 | ## Comparing `{mmrm}` and SAS
128 |
129 | Comparison to *de-facto* gold standard in SAS with `PROC MIXED`:
130 |
131 | ::: columns
132 | ::: {.column width="50%"}
133 | ### `{mmrm}`
134 | - Documentation: Available online
135 | - Testing: Transparent + coverage
136 | - Covariance structures: 10 non-spatial covariance structures
137 | - Focus on population marginal models
138 | :::
139 |
140 | ::: {.column width="50%"}
141 | ### `PROC MIXED`
142 | - Documentation: Available online
143 | - Testing: Hidden
144 | - Covariance structures: 23 non-spatial covariance structures
145 | - Marginal and individual level random effects
146 | :::
147 | :::
148 |
149 | ## Deviation from SAS
150 |
151 | * negligible differences from SAS in (non-representative) set of examples
152 | * `{mmrm}` orders of magnitude closer than `{glmmTMB}` or `{nlme}`
153 |
154 | ::: columns
155 | ::: {.column width="40%"}
156 | 
157 | :::
158 |
159 | ::: {.column width="40%"}
160 | 
161 | :::
162 | :::
163 |
164 |
165 | ## Speed
166 |
167 | - `{mmrm}` fast due to C++ code via template model builder (TMB) library
168 | - `{mmrm}` uses efficient automatic gradients for the ML/REML objective
169 |
170 | 
171 |
172 |
173 | ## Lessons Learned: Best Practices {.smaller}
174 |
175 | - User interface design (function names etc.)
176 | - Difficult to get right
177 | - Spend enough time on making it convenient/familiar
178 | - Testing:
179 | - Unit tests are essential for preventing regression and assuring quality
180 | - Use continuous integration to catch errors early
181 | - Documentation:
182 | - Lots of work but extremely important
183 | - Needs to be kept up-to-date
184 | - Testing alone is not sufficient - also use example & vignettes for building trust with users
185 |
186 | ## Lessons Learned: Collaboration
187 |
188 | - Important to go public as soon as possible - you never know who else might be interested/could help
189 | - Version control with git: cornerstone of effective collaboration
190 | - Agree on standards:
191 | - Consistent and readable code style: transparency and easier collaboration
192 | - How to contribute: Written (!) contribution guidelines + lowering the entry hurdle using developer calls
193 | - Leverage network of developers in companies to get feedback from real projects as soon as possible
194 | - Emphasize importance of non-coding contributions like documentation
195 |
196 | ## Long Term Perspective & Next Steps
197 |
198 | - Software engineering is a critical competence in producing high-quality statistical software
199 | - A lot of work needs to be done regarding the establishment, dissemination and adoption of best practices for engineering open-source software
200 | - Improving the way software engineering is done will help improve the efficiency, reliability and innovation within Biostatistics
201 | - Continue to highlight and support cross-industry collaboration through, e.g.: Phuse, R Consortium, openpharma
202 | - Publicize existing open source projects, e.g.: `admiral`, `rtables`, Bayesian `mmrm`
203 |
204 | ## Thank you! Questions?
205 |
206 | ::: columns
207 | ::: {.column width="33%"}
208 | {height="300"}
209 | :::
210 |
211 | ::: {.column width="33%"}
212 | {height="300"}
213 | :::
214 |
215 | ::: {.column width="33%"}
216 | {height="300"}
217 | :::
218 | :::
219 |
--------------------------------------------------------------------------------
/slides/aimslogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/aimslogo.png
--------------------------------------------------------------------------------
/slides/asa-biop-webinar-feb2024-quarto.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing `openstatsware`"
3 | subtitle: "Who we are and what we build together"
4 | author: "Ya Wang on behalf of the working group and co-chair Daniel Sabanes Bove"
5 | date: "2024/02/23"
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/openstatsware-hex-1200.png
10 | slide-number: c/t
11 | toc: true
12 | toc-depth: 1
13 | fontsize: 32px
14 | ---
15 |
16 | ```{r setup}
17 | #| include: false
18 | #| echo: false
19 | ```
20 |
21 | # Introducing the Working Group
22 |
23 | ## `openstatsware`
24 |
25 | ```{r calc-stats}
26 | library(readr)
27 | library(dplyr)
28 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
29 | n_members <- nrow(members)
30 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
31 | ```
32 |
33 | ::: columns
34 | ::: {.column width="70%"}
35 | - Official working group of the American Statistical Association (ASA) Biopharmaceutical Section
36 | - Formed on 19 August 2022
37 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
38 | - Full name: Software Engineering Working Group
39 | - Short name: `openstatsware`
40 | - Homepage: [openstatsware.org](https://www.openstatsware.org/)
41 | - We welcome new members to join!
42 | :::
43 |
44 | ::: {.column width="30%"}
45 | {height="300"}
46 | :::
47 | :::
48 |
49 | ## Motivation
50 |
51 | - Open-source software increasing popularity in Biostatistics
52 | - Rapid uptake of novel statistical methods
53 | - Unprecedented opportunities for collaboration
54 | - Transparency of methods and implementation
55 | - Variability in software quality
56 | - No statistical quality assurance on open-source extension package repositories, e.g. CRAN
57 | - No industry standard for assessing quality of R packages
58 | - **Reliable software for core statistical analysis is paramount**
59 |
60 | ## Working Group Objectives
61 |
62 | - Primary
63 | - Engineer R packages that implement important statistical methods
64 | - to fill in gaps in the open-source statistical software landscape
65 | - focusing on what is needed for biopharmaceutical applications
66 | - Secondary
67 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
68 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
69 |
70 | ## Workstreams in R Package Development
71 |
72 | - Mixed Models for Repeated Measures (MMRM)
73 | - Develop `mmrm` R package for frequentist inference in MMRM
74 | - Bayesian MMRM
75 | - Develop `brms.mmrm` R package for Bayesian inference in MMRM
76 | - Health Technology Assessment (HTA)
77 | - Develop open-source R tools to be used in HTA submission
78 |
79 | ## Best Practices
80 |
81 | - User interface design
82 | - Code readability
83 | - Unit and integration tests
84 | - Documentation
85 | - Version control
86 | - Reproducibility
87 | - Maintainability
88 | - etc.
89 |
90 | ## Best Practices Dissemination - Workshop
91 |
92 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
93 | - To teach hands-on skills and tools to engineer reliable R packages
94 | - Topics: R package structure, engineering workflow, ensuring quality, version control, collaboration and publication, and shiny development
95 | - 5 events in 2023 at Basel, Shanghai, San José, Rockville, and Montreal
96 |
97 | ## Best Practices Dissemination - Video
98 |
99 | - Youtube video series [Statistical Software Engineering 101](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS)
100 | - To introduce tips and tricks for good statistical software engineering practices
101 | - 2 videos on unit testing for R developers
102 |
103 | # Overview of Active Workstreams
104 |
105 | ## MMRM R Package Development
106 |
107 | - The `mmrm` R package is the first product of `openstatsware`\
108 | - Motivation
109 | - Mixed models for repeated measures (MMRM) is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
110 | - Existing R packages are not great for one of the following reasons
111 | - Model convergence issues
112 | - Limited choices of covariance structures
113 | - Lack of adjusted degrees of freedom methods
114 | - Computational efficiency is not satisfactory
115 |
116 | ## Features of `mmrm`
117 |
118 | - Linear model for dependent observations within independent subjects
119 | - Covariance structures for the dependent observations:
120 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
121 | - Allows group specific covariance estimates and weights
122 | - REML or ML estimation, using multiple optimizers if needed
123 | - `emmeans` interface for least square means
124 | - `tidymodels` for easy model fitting
125 | - Satterthwaite and Kenward-Roger adjustments
126 | - Robust sandwich estimator for covariance
127 |
128 | ## Why It's Not Just Another Package
129 |
130 | - Ongoing maintenance and support from the pharmaceutical industry
131 | - 5 companies being involved in the development, on track to become standard package
132 | - Development using best practices as show case for high quality package
133 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
134 |
135 | ## `mmrm` on CRAN
136 |
137 | ::: columns
138 | ::: {.column width="70%"}
139 | - First available on CRAN in October 2022
140 | - Latest update in January 2024
141 | - Links
142 | - CRAN:
143 | - Workstream: [openstatsware.org/mmrm_R\_package.html](https://www.openstatsware.org/mmrm_R_package.html)
144 | :::
145 |
146 | ::: {.column width="30%"}
147 | {height="300"}
148 | :::
149 | :::
150 |
151 | ## Bayesian MMRM R Package Workstream
152 |
153 | - The `brms.mmrm` R package leverages `brms` to run Bayesian MMRM
154 | - `brms` is a powerful and versatile package for fitting Bayesian regression models
155 | - Support a simplified interface and align with the best practices
156 | - Documentation website has a complete function reference and tutorial vignettes
157 | - Rigorous validation using simulation-based calibration and comparisons with the frequentist `mmrm` package on two example datasets
158 |
159 | ## `brms.mmrm` on CRAN
160 |
161 | - First version available in August 2023
162 | - Latest update in February 2024
163 | - Links
164 | - CRAN:
165 | - Workstream: [openstatsware.org/bayesian_mmrm_R\_package.html](https://www.openstatsware.org/bayesian_mmrm_R_package.html)
166 |
167 | ## HTA-R Package Workstream
168 |
169 | - Develop and maintain a collection of open-source R tools of high quality in the right format (R packages, apps, user guides) to support crucial analytic topics in HTA
170 | - In close collaboration with [HTA SIG in PSI/EFPSI](https://www.psiweb.org/sigs-special-interest-groups/hta) (a group of HTA SMEs with statistical background, who help to generate pipeline ideas, ensure relevance of developed tools, pilot created tools in real business setting)
171 | - R package under development: `maicplus`
172 |
173 | ## `maicplus` R package
174 |
175 | - An R package to support analysis and reporting of matching-adjusted indirect comparison (MAIC) for HTA dossiers
176 | - Motivation
177 | - Sponsors are required to submit evidence of relative effectiveness of their treatment comparing to relevant comparators that may not be included in their clinical trial, for health technology assessment (HTA) in different countries
178 | - MAIC is a prevalent and well-accepted method to derive population-adjusted treatment effect in such case for two trials, one of which has Individual patient data and the other has only aggregate data
179 | - There is a lack of open-source R packages following good software engineering practices for conducting and reporting MAIC analyses
180 | - workstream: [openstatsware.org/hta_page.html](https://www.openstatsware.org/hta_page.html)
181 |
182 | # Lessons Learned on Best Practices
183 |
184 | ## Development process
185 |
186 | - Important to go public as soon as possible
187 | - don't wait for the product to be finished
188 | - you never know who else might be interested/could help
189 | - Version control with git
190 | - cornerstone of effective collaboration
191 | - Building software together works better than alone
192 | - Different perspectives in discussions and code review help to optimize the user interface and thus experience
193 |
194 | ## Coding standards
195 |
196 | - Consistent and readable code style simplifies joint work
197 | - Written (!) contribution guidelines help
198 | - Lowering the entry hurdle using developer calls is important
199 |
200 | ## Robust test suite
201 |
202 | - Unit and integration tests are essential for preventing regression and assuring quality
203 | - Especially with compiled code critical to see if package works correctly
204 | - Use continuous integration during development to make sure nothing breaks along the way
205 |
206 | ## Documentation
207 |
208 | - Lots of work but extremely important
209 | - start with writing up the methods details
210 | - think about the code structure first in a "design doc"
211 | - only then put the code in the package
212 | - Needs to be kept up-to-date
213 | - Need to have examples & vignettes
214 | - Testing alone is not sufficient
215 | - Builds trust with users
216 | - Reference for developers over time
217 |
218 | # Long Term Perspective
219 |
220 | ## Long Term Perspective
221 |
222 | - Software engineering is a critical competence in producing high-quality statistical software
223 | - A lot of work needs to be done regarding the establishment, dissemination and adoption of best practices for engineering open-source software
224 | - Improving the way software engineering is done will help improve the efficiency, reliability and innovation within Biostatistics
225 |
226 | ## Q&A {background-image="thank-you.jpg"}
227 |
228 |
229 |
--------------------------------------------------------------------------------
/slides/asa-biop-webinar-feb2024.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing `openstatsware`"
3 | subtitle: "Who we are and what we build together"
4 | author: "Ya Wang on behalf of the working group and co-chair Daniel Sabanes Bove"
5 | date: "2024/02/23"
6 | format:
7 | pptx:
8 | incremental: true
9 | toc: true
10 | toc-depth: 1
11 | toc-title: Outline
12 | ---
13 |
14 | ```{r setup}
15 | #| include: false
16 | #| echo: false
17 | ```
18 |
19 | # Introducing the Working Group
20 |
21 | ## `openstatsware`
22 |
23 | ```{r calc-stats}
24 | library(readr)
25 | library(dplyr)
26 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
27 | n_members <- nrow(members)
28 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
29 | ```
30 |
31 | ::: columns
32 | ::: {.column width="70%"}
33 | - Official working group of the American Statistical Association (ASA) Biopharmaceutical Section
34 | - Formed on 19 August 2022
35 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
36 | - Full name: Software Engineering Working Group
37 | - Short name: `openstatsware`
38 | - Homepage: [openstatsware.org](https://www.openstatsware.org/)
39 | - We welcome new members to join!
40 | :::
41 |
42 | ::: {.column width="30%"}
43 | {height="300"}
44 | :::
45 | :::
46 |
47 | ## Motivation
48 |
49 | - Open-source software increasing popularity in Biostatistics
50 | - Rapid uptake of novel statistical methods
51 | - Unprecedented opportunities for collaboration
52 | - Transparency of methods and implementation
53 | - Variability in software quality
54 | - No statistical quality assurance on open-source extension package repositories, e.g. CRAN
55 | - No industry standard for assessing quality of R packages
56 | - **Reliable software for core statistical analysis is paramount**
57 |
58 | ## Working Group Objectives
59 |
60 | - Primary
61 | - Engineer R packages that implement important statistical methods
62 | - to fill in gaps in the open-source statistical software landscape
63 | - focusing on what is needed for biopharmaceutical applications
64 | - Secondary
65 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
66 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
67 |
68 | ## Workstreams in R Package Development
69 |
70 | - Mixed Models for Repeated Measures (MMRM)
71 | - Develop `mmrm` R package for frequentist inference in MMRM
72 | - Bayesian MMRM
73 | - Develop `brms.mmrm` R package for Bayesian inference in MMRM
74 | - Health Technology Assessment (HTA)
75 | - Develop open-source R tools to be used in HTA submission
76 |
77 | ## Best Practices
78 |
79 | - User interface design
80 | - Code readability
81 | - Unit and integration tests
82 | - Documentation
83 | - Version control
84 | - Reproducibility
85 | - Maintainability
86 | - etc.
87 |
88 | ## Best Practices Dissemination - Workshop
89 |
90 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
91 | - To teach hands-on skills and tools to engineer reliable R packages
92 | - Topics: R package structure, engineering workflow, ensuring quality, version control, collaboration and publication, and shiny development
93 | - 5 events in 2023 at Basel, Shanghai, San José, Rockville, and Montreal
94 |
95 | ## Best Practices Dissemination - Video
96 |
97 | - Youtube video series [Statistical Software Engineering 101](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS)
98 | - To introduce tips and tricks for good statistical software engineering practices
99 | - 2 videos on unit testing for R developers
100 |
101 | # Overview of Active Workstreams
102 |
103 | ## MMRM R Package Development {.smaller}
104 |
105 | - The `mmrm` R package is the first product of `openstatsware`\
106 | - Motivation
107 | - Mixed models for repeated measures (MMRM) is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
108 | - Existing R packages are not great for one of the following reasons
109 | - Model convergence issues
110 | - Limited choices of covariance structures
111 | - Lack of adjusted degrees of freedom methods
112 | - Computational efficiency is not satisfactory
113 |
114 | ## Features of `mmrm` {.smaller}
115 |
116 | - Linear model for dependent observations within independent subjects
117 | - Covariance structures for the dependent observations:
118 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
119 | - Allows group specific covariance estimates and weights
120 | - REML or ML estimation, using multiple optimizers if needed
121 | - `emmeans` interface for least square means
122 | - `tidymodels` for easy model fitting
123 | - Satterthwaite and Kenward-Roger adjustments
124 | - Robust sandwich estimator for covariance
125 |
126 | ## Why It's Not Just Another Package
127 |
128 | - Ongoing maintenance and support from the pharmaceutical industry
129 | - 5 companies being involved in the development, on track to become standard package
130 | - Development using best practices as show case for high quality package
131 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
132 |
133 | ## `mmrm` on CRAN
134 |
135 | ::: columns
136 | ::: {.column width="70%"}
137 | - First available on CRAN in October 2022
138 | - Latest update in January 2024
139 | - Links
140 | - CRAN:
141 | - Workstream: [openstatsware.org/mmrm_R\_package.html](https://www.openstatsware.org/mmrm_R_package.html)
142 | :::
143 |
144 | ::: {.column width="30%"}
145 | {height="300"}
146 | :::
147 | :::
148 |
149 | ## Bayesian MMRM R Package Workstream {.smaller}
150 |
151 | - The `brms.mmrm` R package leverages `brms` to run Bayesian MMRM
152 | - `brms` is a powerful and versatile package for fitting Bayesian regression models
153 | - Support a simplified interface and align with the best practices
154 | - Documentation website has a complete function reference and tutorial vignettes
155 | - Rigorous validation using simulation-based calibration and comparisons with the frequentist `mmrm` package on two example datasets
156 |
157 | ## `brms.mmrm` on CRAN
158 |
159 | - First version available in August 2023
160 | - Latest update in February 2024
161 | - Links
162 | - CRAN:
163 | - Workstream: [openstatsware.org/bayesian_mmrm_R\_package.html](https://www.openstatsware.org/bayesian_mmrm_R_package.html)
164 |
165 | ## HTA-R Package Workstream
166 |
167 | - Develop and maintain a collection of open-source R tools of high quality in the right format (R packages, apps, user guides) to support crucial analytic topics in HTA
168 | - In close collaboration with [HTA SIG in PSI/EFPSI](https://www.psiweb.org/sigs-special-interest-groups/hta) (a group of HTA SMEs with statistical background, who help to generate pipeline ideas, ensure relevance of developed tools, pilot created tools in real business setting)
169 | - R package under development: `maicplus`
170 |
171 | ## `maicplus` R package
172 |
173 | - An R package to support analysis and reporting of matching-adjusted indirect comparison (MAIC) for HTA dossiers
174 | - Motivation
175 | - Sponsors are required to submit evidence of relative effectiveness of their treatment comparing to relevant comparators that may not be included in their clinical trial, for health technology assessment (HTA) in different countries
176 | - MAIC is a prevalent and well-accepted method to derive population-adjusted treatment effect in such case for two trials, one of which has Individual patient data and the other has only aggregate data
177 | - There is a lack of open-source R packages following good software engineering practices for conducting and reporting MAIC analyses
178 | - workstream: [openstatsware.org/hta_page.html](https://www.openstatsware.org/hta_page.html)
179 |
180 | # Lessons Learned on Best Practices
181 |
182 | ## Development process
183 |
184 | - Important to go public as soon as possible
185 | - don't wait for the product to be finished
186 | - you never know who else might be interested/could help
187 | - Version control with git
188 | - cornerstone of effective collaboration
189 | - Building software together works better than alone
190 | - Different perspectives in discussions and code review help to optimize the user interface and thus experience
191 |
192 | ## Coding standards
193 |
194 | - Consistent and readable code style simplifies joint work
195 | - Written (!) contribution guidelines help
196 | - Lowering the entry hurdle using developer calls is important
197 |
198 | ## Robust test suite
199 |
200 | - Unit and integration tests are essential for preventing regression and assuring quality
201 | - Especially with compiled code critical to see if package works correctly
202 | - Use continuous integration during development to make sure nothing breaks along the way
203 |
204 | ## Documentation
205 |
206 | - Lots of work but extremely important
207 | - start with writing up the methods details
208 | - think about the code structure first in a "design doc"
209 | - only then put the code in the package
210 | - Needs to be kept up-to-date
211 | - Need to have examples & vignettes
212 | - Testing alone is not sufficient
213 | - Builds trust with users
214 | - Reference for developers over time
215 |
216 | # Long Term Perspective
217 |
218 | ## Long Term Perspective
219 |
220 | - Software engineering is a critical competence in producing high-quality statistical software
221 | - A lot of work needs to be done regarding the establishment, dissemination and adoption of best practices for engineering open-source software
222 | - Improving the way software engineering is done will help improve the efficiency, reliability and innovation within Biostatistics
223 |
224 | ## Q&A {background-image="thank-you.jpg"}
225 |
226 |
227 |
--------------------------------------------------------------------------------
/slides/asa-ssc-mmrm-nov2023.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing `openstatsware` and the R Package `{mmrm}`"
3 | subtitle: "ASA-SSC Mini-Symposium on Statistical Computing in Action 2023 "
4 | author: "Jonathan Sidi on behalf of the ASA Software Engineering Working Group"
5 | date: "2023/11/04"
6 | format:
7 | revealjs:
8 | incremental: false
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-new-1200.png
10 | slide-number: c/t
11 | fontsize: 32px
12 | title-slide-attributes:
13 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-new-1200.png
14 | data-background-size: 20%
15 | data-background-opacity: "0.5"
16 | data-background-position: 98% 10%
17 | ---
18 |
19 | ```{r setup}
20 | #| include: false
21 | #| echo: false
22 | ```
23 |
24 | ## `openstatsware`
25 |
26 | ```{r calc-stats}
27 | library(readr)
28 | library(dplyr)
29 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
30 | n_members <- nrow(members)
31 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
32 | ```
33 |
34 | ::: columns
35 | ::: {.column width="50%"}
36 | - Official working group of the American Statistical Association (ASA) Biopharmaceutical Section
37 | - Formed on 19 August 2022
38 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
39 | - Full name: Software Engineering Working Group
40 | - Short name: `openstatsware`
41 | - Homepage: [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
42 | :::
43 |
44 | ::: {.column width="50%"}
45 | {height="300"}
46 | :::
47 | :::
48 |
49 | ## Working Group Objectives
50 |
51 | - Primary
52 | - Engineer R packages that implement important statistical methods
53 | - to fill in gaps in the open-source statistical software landscape
54 | - focusing on what is needed for biopharmaceutical applications
55 | - Secondary
56 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
57 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
58 |
59 | ## Workstreams in R Package Development
60 |
61 | - Mixed Models for Repeated Measures (MMRM) [🌎](https://rconsortium.github.io/asa-biop-swe-wg/mmrm_R_package.html)
62 | - Develop `mmrm` R package for frequentist inference in MMRM ([CRAN](https://cran.r-project.org/package=mmrm))
63 | - Bayesian MMRM [🌎](https://rconsortium.github.io/asa-biop-swe-wg/bayesian_mmrm_R_package.html)
64 | - Develop `brms.mmrm` R package for Bayesian inference in MMRM ([CRAN](https://cran.r-project.org/package=brms.mmrm))
65 | - Health Technology Assessment [🌎](https://github.com/hta-pharma/)
66 | - Develop open-source R tools to support HTA dossier submission across various countries
67 | - Particularly topics with unmet needs in R implementation and/or related to EUnetHTA
68 |
69 | # R Package `mmrm` {background-image="slide-background-mmrm.png"}
70 |
71 | ## Motivation
72 |
73 | - MMRM is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
74 | - No great R package
75 | - Initially thought that the MMRM problem was solved by using `lme4` with `lmerTest`, learned that this approach failed on large data sets (slow, did not converge)
76 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
77 | - Next we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom, but it did not work
78 |
79 | ## Idea
80 |
81 | - We only want to fit a fixed effects model with a structured covariance matrix for each subject
82 | - The idea is then to use the Template Model Builder (`TMB`) directly
83 | - as it is also underlying `glmmTMB`
84 | - but code the exact model we want
85 | - We do this by implementing the log-likelihood in `C++` using the `TMB` provided libraries
86 | - Provide an R solution that
87 | - has fast convergence times
88 | - generates estimates closest to `SAS`
89 |
90 | ## Advantages of `TMB`
91 |
92 | - Fast `C++` framework for defining objective functions (`Rcpp` would have been alternative interface)
93 | - Automatic differentiation of the log-likelihood as a function of the variance parameters
94 | - We get the gradient and Hessian exactly and without additional coding
95 | - This can be used from the R side with the `TMB` interface and plugged into optimizers
96 |
97 | ## Why It's Not Just Another Package
98 |
99 | - Ongoing maintenance and support from the pharmaceutical industry
100 | - 5 companies being involved in the development, on track to become standard package
101 | - Development using best practices as show case for high quality package
102 | - Thorough unit and integration tests to ensure accurate results
103 | - In depth documentation of methods, functionality and comparison to other packages and languages
104 | - `nlme`, `glmmTMB`, `lme4`, `SAS PROC GLMMIX`
105 |
106 | ## Highlighted Features of `mmrm`
107 |
108 | - **Covariance structures** for the dependent observations:
109 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
110 | - Allows group specific covariance estimates and weights
111 | - **Hypothesis Testing**:
112 | - `emmeans` interface for least square means
113 | - Satterthwaite and Kenward-Roger adjustments
114 | - Robust sandwich estimator for covariance
115 | - **Integrations and extentions**
116 | - `tidymodels` builtin parsnip engine and recipes for streamlined model fitting workflows
117 | - `teal`, `tern`, `rtables` integration for post processing and reporting
118 |
119 | ## Comparison with Other Software
120 |
121 | - We have run comparison analyses with other R packages, namely `nlme`, `glmmTMB` and `lme4`
122 | - Also compared with SAS `PROC GLIMMIX`
123 | - Highlights
124 | - `mmrm` has faster convergence time
125 | - `mmrm` provides closest results to `PROC GLIMMIX`
126 | - Detailed results at the online [comparison vignette](https://openpharma.github.io/mmrm/main/articles/mmrm_review_methods.html)
127 |
128 | ## Q&A {background-image="thank-you.jpg"}
129 |
130 |
131 |
--------------------------------------------------------------------------------
/slides/bbs-efspi-mmrm-dec2022.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "mmrm"
3 | author: "Daniel Sabanes Bove"
4 | institute: (Statistical Engineering,
5 | Pharma Product Development Data Sciences,
6 | Roche)
7 | date: "@BBS/EFSPI Workshop 8 Dec 2022"
8 | format:
9 | revealjs:
10 | incremental: true
11 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
12 | slide-number: c/t
13 | title-slide-attributes:
14 | data-background-image: https://github.com/openpharma/mmrm/raw/main/man/figures/logo.png
15 | data-background-size: 20%
16 | data-background-opacity: "0.5"
17 | data-background-position: 80% 10%
18 | ---
19 |
20 | ## Acknowledgments
21 |
22 | ::: nonincremental
23 | ::: columns
24 | ::: {.column width="50%"}
25 | Authors:
26 |
27 | - Kevin Kunzmann (Boehringer Ingelheim)
28 | - Brian Matthew Lang (MSD)
29 | - Ya Wang (Gilead)
30 | - Julia Dedic (Roche)
31 | - Doug Kelkhoff (Roche)
32 | - Liming Li (Roche)
33 | :::
34 |
35 | ::: {.column width="50%"}
36 | Thanks to:
37 |
38 | - Ben Bolker (McMaster University)
39 | - Software Engineering working group (SWE WG)
40 | - Gonzalo Duran Pacheco (Roche)
41 | - Craig Gower-Page (Roche)
42 | - Dinakar Kulkarni (Roche)
43 | - Davide Garolini (Roche)
44 | :::
45 | :::
46 | :::
47 |
48 | ## Motivation
49 |
50 | - 2 years ago, we thought we had solved the MMRM problem, using `lme4` and `lmerTest`
51 | - However in March this year, Gonzalo and Julia told us the bad news: this approach failed on their Ophthalmology data sets (slow, did not converge)
52 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
53 | - Next we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom, but it did not work
54 |
55 | ## Idea
56 |
57 | - Because `glmmTMB` is always using a random effects representation, we cannot have a real unstructured model (uses $\sigma = \varepsilon > 0$ trick)
58 | - We only want to fit a fixed effects model with a structured covariance matrix for each subject
59 | - The idea is then to use the Template Model Builder (`TMB`) directly - as it is also underlying `glmmTMB` - but code the exact model we want
60 | - We do this by implementing the log-likelihood in `C++` using the `TMB` provided libraries
61 |
62 | ## Advantages of `TMB`
63 |
64 | - Fast `C++` framework for defining objective functions (`Rcpp` would have been alternative interface)
65 | - Automatic differentiation of the log-likelihood as a function of the variance parameters
66 | - We get the gradient and Hessian exactly and without additional coding
67 | - This can be used from the R side with the `TMB` interface and plugged into optimizers
68 |
69 | ## Cross-industry collaboration
70 |
71 | - We created an initial prototype quickly and open sourced it on [github.com/openpharma/mmrm](https://github.com/openpharma/mmrm)
72 | - In parallel we assembled multiple other companies - many of them had also struggled to implement MMRM in R (crucial gap in toolbox so far)
73 | - This was the start of the Software Engineering working group (SWE WG)
74 | - Read more at [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
75 | - Official working group of the ASA Biopharmaceutical section
76 |
77 | ## Features
78 |
79 | - Linear model for dependent observations within independent subjects
80 | - Covariance structures for the dependent observations:
81 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
82 | - Allows group specific covariance estimates and weights
83 | - REML or ML estimation, using multiple optimizers if needed
84 | - `emmeans` interface for least square means
85 | - Satterthwaite adjusted degrees of freedom
86 |
87 | ## Getting started
88 |
89 | - `mmrm` is on CRAN - use this as a starting point:
90 |
91 | . . .
92 |
93 | ```{r, eval = FALSE, echo = TRUE}
94 | install.packages("mmrm")
95 | library(mmrm)
96 | fit <- mmrm(
97 | formula = FEV1 ~ RACE + SEX + ARMCD * AVISIT + us(AVISIT | USUBJID),
98 | data = fev_data
99 | )
100 | summary(fit)
101 | library(emmeans)
102 | emmeans(fit, ~ ARMCD | AVISIT)
103 | ```
104 |
105 | - Visit [openpharma.github.io/mmrm](https://openpharma.github.io/mmrm/) for detailed docs including vignettes
106 | - Consider [tern.mmrm](https://insightsengineering.github.io/tern.mmrm/main/) for high-level clinical reporting interface, incl. standard tables and graphs
107 |
108 | ## Outlook
109 |
110 | - We still have major features on our backlog:
111 | - Kenward-Roger (improved variance-covariance matrix, degrees of freedom)
112 | - Robust sandwich estimator for covariance matrix (important when not using unstructured covariance)
113 | - Type II and type III ANOVA tests
114 | - Working on comparison to other implementations
115 | - Parameter estimates, computation time, convergence, ...
116 | - Please let us know what is missing in `mmrm` for you! And try it out :-)
117 |
118 | ## Thank you! Questions?
119 |
120 | ::: columns
121 | ::: {.column width="50%"}
122 | {height="500"}
123 | :::
124 |
125 | ::: {.column width="50%"}
126 | {height="500"}
127 | :::
128 | :::
129 |
--------------------------------------------------------------------------------
/slides/cen-first-year-wg-sep2023.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "First year of the Software Engineering working group"
3 | author: "Daniel Sabanés Bové (Roche) and Ya Wang (Gilead) on behalf of the working group"
4 | date: "2023/09/06"
5 | format:
6 | pptx:
7 | incremental: true
8 | toc: true
9 | toc-depth: 1
10 | toc-title: Outline
11 | ---
12 |
13 | ```{r setup}
14 | #| include: false
15 | #| echo: false
16 | ```
17 |
18 | # Introducing the WG
19 |
20 | ## Software Engineering Working Group
21 |
22 | ::: columns
23 | ::: {.column width="50%"}
24 | Founded last year:
25 |
26 | - When: 19 August 2022 - just celebrated our 1 year birthday!
27 | - Where: American Statistical Association (ASA) Biopharmaceutical Section (BIOP)
28 | - Who: 11 statisticians from 7 pharma companies developing statistical software
29 | :::
30 |
31 | ::: {.column width="50%"}
32 | {height="300"}
33 | :::
34 | :::
35 |
36 | ## Why a new WG?
37 |
38 | - Started with specific R-package project (more below)
39 | - Makes sense to stay together as a group also for other package projects
40 | - New focus on good engineering practices and collaborative work
41 | - Importance of reliable software for statistical analysis can not be underestimated
42 | - Be rooted in the biostatistics community (rather than statistical programming)
43 |
44 | ## But there are other WGs?
45 |
46 | - Comparing Analysis Method Implementations in Software (CAMIS) of PhUSE
47 | [🌎](https://advance.phuse.global/pages/viewpage.action?pageId=327874)
48 | - Application and Implementation of Methodologies in Statistics (AIMS) Special Interest Group (SIG) of PSI [🌎](https://www.psiweb.org/sigs-special-interest-groups/aims)
49 | - R Submission Working Group of R Consortium [🌎](https://rconsortium.github.io/submissions-wg/)
50 | - R Tables for Regulatory Submissions Working Group of R Consortium [🌎](https://rconsortium.github.io/rtrs-wg/)
51 | - R Certification working group of R Consortium [🌎](https://github.com/RConsortium/R-Certification-WG)
52 | - R Repositories WG of R Consortium [🌎](https://github.com/RConsortium/r-repositories-wg)
53 |
54 | ## WG Objectives
55 |
56 | - Primary:
57 | - Engineer R packages that implement important statistical methods
58 | - ... to fill in gaps in the open-source statistical software landscape
59 | - ... focusing on what is needed for biopharmaceutical applications
60 | - Secondary:
61 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
62 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
63 |
64 | ## Members and meetings
65 |
66 | ```{r calc-stats}
67 | library(readr)
68 | library(dplyr)
69 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
70 | n_members <- nrow(members)
71 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
72 | ```
73 |
74 | - Currently `r n_members` members
75 | - new members are welcome! (incl. academia/regulators/etc.)
76 | - Currently `r length(unique_orgs)` organizations
77 | - `r paste(unique_orgs, collapse = ", ")`
78 | - Meet every 2 weeks
79 |
80 | ## Workstreams
81 |
82 | - Mixed Models for Repeated Measures (MMRM) [🌎](https://rconsortium.github.io/asa-biop-swe-wg/mmrm_R_package.html)
83 | - Develop `mmrm` (see below) to use frequentist inference in MMRM
84 | - Bayesian MMRM [🌎](https://rconsortium.github.io/asa-biop-swe-wg/bayesian_mmrm_R_package.html)
85 | - Develop `brms.mmrm` (see below)
86 | - Health Technology Assessment [🌎](https://github.com/hta-pharma/)
87 | - Develop open-source R tools to support HTA dossier submission across various countries, particularly the topics with unmet needs in R implementation and/or related to EUnetHTA
88 | - Note: Also "just" contributing to workstreams is great!
89 |
90 | # Achievements in the first year
91 |
92 | ## New R packages released to CRAN
93 |
94 | - `mmrm`
95 | - R package for frequentist inference in MMRM, based on `TMB` (which provides automatic differentiation in `C++` and R frontend)
96 | - See documentation [🌎](https://openpharma.github.io/mmrm/)
97 | - Easiest to install from CRAN [🌎](https://cran.r-project.org/package=mmrm)
98 | - `brms.mmrm`
99 | - R package for Bayesian inference in MMRM, based on `brms` (as Stan frontend for HMC sampling)
100 | - See documentation [🌎](https://openpharma.github.io/brms.mmrm/)
101 | - Easiest to install from CRAN [🌎](https://cran.r-project.org/package=brms.mmrm)
102 |
103 | ## Why was the MMRM topic important?
104 |
105 | - MMRM is a popular analysis method for longitudinal continuous outcomes in randomized clinical trials
106 | - No tailored R package with sufficient capabilities/reliability
107 | - Also used as backbone for more recent methods such as multiple imputation
108 | - Have a look at Gonzalo’s presentation to learn more about `mmrm`
109 | - 10.40 am in the following session
110 | - compares `mmrm` with previous R packages and SAS
111 |
112 | ## Best practices
113 |
114 | - Includes version control, git workflow, code review, unit/integration testing, continuous integration/delivery (ci/cd), reproducibility, traceability (news), documentation, package design, maintainability, publication, etc.
115 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
116 | - Basel, Shanghai, San José, Rockville MD, Montreal [🌎](https://openpharma.github.io/workshop-r-swe-mtl/)
117 | - Start of video series "Statistical Software Engineering 101" [🌎](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS)
118 | - currently 2 videos, hopefully we can still produce more content
119 |
120 | ## Conference contributions and Publications
121 |
122 | - Dedicated sessions with discussions at ISCB, CEN (now), ASA/FDA workshop
123 | - Presentations at PSI, JSM, Pharma RUG, BBS, etc. [🌎](https://rconsortium.github.io/asa-biop-swe-wg/presentations.html)
124 | - BIOP Report [🌎](https://rconsortium.github.io/asa-biop-swe-wg/blog/biop_report/)
125 | - Blog [🌎](https://rconsortium.github.io/asa-biop-swe-wg/blog.html)
126 |
127 | # Ingredients for successful and sustainable collaboration
128 |
129 | ## Human factors
130 |
131 | - Mutual interest
132 | - Mutual trust
133 | - Prerequisite is getting to know each other
134 | - Although mostly just online, biweekly calls help a lot with this
135 | - Reciprocity mindset
136 | - "Reciprocity means that in response to friendly actions, people are frequently much nicer and much more cooperative than predicted by the self-interest model"
137 | - Personal experience: If you first give away something, more will come back to you.
138 |
139 | ## Development process
140 |
141 | - Important to go public as soon as possible
142 | - don't wait for the product to be finished
143 | - you never know who else might be interested/could help
144 | - Version control with git
145 | - cornerstone of effective collaboration
146 | - Building software together works better than alone
147 | - Different perspectives in discussions and code review help to optimize the user interface and thus experience
148 |
149 | ## Coding standards
150 |
151 | - Consistent and readable code style simplifies joint work
152 | - Written (!) contribution guidelines help
153 | - Lowering the entry hurdle using developer calls is important
154 |
155 | ## Robust test suite
156 |
157 | - Unit and integration tests are essential for preventing regression and assuring quality
158 | - Especially with compiled code critical to see if package works correctly
159 | - Use continuous integration during development to make sure nothing breaks along the way
160 |
161 | ## Documentation
162 |
163 | - Lots of work but extremely important
164 | - start with writing up the methods details
165 | - think about the code structure first in a "design doc"
166 | - only then put the code in the package
167 | - Needs to be kept up-to-date
168 | - Need to have examples & vignettes
169 | - Testing alone is not sufficient
170 | - Builds trust with users
171 | - Reference for developers over time
172 |
173 | # Long term vision
174 |
175 | ## Vision: Statisticians have software engineering skills
176 |
177 | - These skills are taught at university
178 | - Statisticians can use basic practices in their daily work ...
179 | - ... to ensure reproducibility of statistical analyses and research results
180 |
181 | ## Vision: Innovation does not stop with publication
182 |
183 | - Methods research does not end when the first methods paper is published
184 | - Initial prototype code as paper supplement is not sufficient
185 | - Continue to developing open source and reliably tested software packages ...
186 | - ... to enable users to easily use the new methodology in their own applications
187 |
188 | ## Vision: Industry develops common code base
189 |
190 | - Increasingly companies work in the open source as much as possible
191 | - Rather than repeating similar developments internally ...
192 | - ... to become more cost-effective and transparent towards society
193 |
194 | # Next steps
195 |
196 | ## R Packages
197 |
198 | - New workstream on covariate adjustment is starting up
199 | - Think more strategically about identifying gaps in the statistical software landscape
200 | - Help maintaining the CRAN Task View on Clinical Trials
201 |
202 | ## Branding and Collaboration
203 |
204 | - Find a new short and memorable name
205 | - Proposed to associate also with EFSPI to emphasize global nature of the group
206 | - Ensure a strong connection to the new pan-pharma methodology group
207 |
208 | ## Communication and Outreach
209 |
210 | - Add more content to our video series
211 | - Start a chat channel to start informal discussions within larger community
212 | - Organize hackathons working together on workstream packages e.g.
213 |
214 | ## Q&A {background-image="thank-you.jpg"}
215 |
216 |
217 |
218 |
--------------------------------------------------------------------------------
/slides/converge_speed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/converge_speed.png
--------------------------------------------------------------------------------
/slides/estimate_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/estimate_diff.png
--------------------------------------------------------------------------------
/slides/mmrm-review-treatment-fev-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/mmrm-review-treatment-fev-1.png
--------------------------------------------------------------------------------
/slides/openstatsguide-poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/openstatsguide-poster.pdf
--------------------------------------------------------------------------------
/slides/r-govys-jan2023.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing the Software Engineering working group"
3 | subtitle: "Who we are and what we build together"
4 | author: "Daniel Sabanes Bove, Ya Wang"
5 | date: "R Govys meeting, 19 Jan 2023"
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
10 | slide-number: c/t
11 | toc: true
12 | toc-depth: 1
13 | title-slide-attributes:
14 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
15 | data-background-size: 20%
16 | data-background-opacity: "0.5"
17 | data-background-position: 98% 10%
18 | ---
19 |
20 | # What is the SWE WG?
21 |
22 | ## ASA BIOP Software Engineering working group (SWE WG)
23 |
24 | - An official working group of the ASA Biopharmaceutical Section
25 | - Formed in August 2022
26 | - Cross-industry collaboration with more than 30 members from over 20 organizations
27 | - Home page at [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
28 | - Open for new members!
29 |
30 | ## Motivation
31 |
32 | - Importance of reliable software for statistical analysis cannot be underestimated
33 | - In the past a lot of statistical analyses have been performed with proprietary software
34 | - Open-source software has gained increasing popularity in Biostatistics over the last two decades
35 | - Pros: rapid uptake of novel methods and great opportunities for collaboration and innovation
36 | - Cons: users face huge variability in software quality (reliability, efficiency and maintainability)
37 |
38 | ## Motivation (con't)
39 |
40 | - Current repositories (GitHub, CRAN):
41 | - Do not require any statistical quality assurance
42 | - It's harder to adopt packages without good documentation and maintenance
43 | - Developing high-quality software is critical to inform clinical and regulatory decisions:
44 | - Good coding practices
45 | - Reproducible outputs
46 | - Self-sufficient documentation
47 |
48 | ## Goals
49 |
50 | - **Primary Goal**: Collaborate to engineer R packages that implement important statistical methods to fill in critical gaps
51 |
52 | - **Secondary Goal**: Develop and disseminate best practices for engineering high-quality open-source statistical software
53 |
54 |
55 | ## SWE WG Activities
56 |
57 | - First R package `mmrm` was published on CRAN in October 2022 and updated in December
58 | - We aim to establish this package as a new standard for fitting mixed models for repeated measures (MMRM)
59 | - We have been developing and adopting best practices for software in the `mmrm` package, and open sourced it at [github.com/openpharma/mmrm](https://github.com/openpharma/mmrm)
60 | - Currently under active development to add more features
61 |
62 |
63 | ## SWE WG Activities (con't)
64 |
65 | - Prepare public training materials to disseminate best practice for software engineering in the Biostatistics community
66 | - At the beginning of February, a face-to-face workshop will take place in Basel, Switzerland with a focus on open-source software for clinical trials
67 | - Organize conference sessions with a focus on statistical software engineering at CEN, JSM and ASA/FDA Workshop
68 | - [Youtube series](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS) with a focus on best practices for software engineering
69 |
70 | ## Best Practices for Software Engineering
71 |
72 | - User interface design
73 | - Version control with git
74 | - Unit and integration tests
75 | - Consistent and readable code style
76 | - Documentation
77 | - Continuous integration setup
78 | - Publication on website and repositories
79 | - etc.
80 |
81 | # `mmrm` package example
82 |
83 | ## Motivation
84 |
85 | - Mixed Models for Repeated Measures (MMRM) is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
86 | - No great R Package - initially thought that it was solved by using `lme4` with `lmerTest`
87 | - But this approach failed on large data (slow, did not converge)
88 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
89 |
90 | ## Before creating a new package
91 |
92 | - First try to improve existing package
93 | - Here we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom
94 | - But it did not work
95 | - Think about long term maintenance and responsibility
96 |
97 | ## `mmrm` Package overview
98 |
99 | - Linear model for dependent observations within independent subjects
100 | - Multiple covariance structures available for the dependent observations
101 | - REML or ML estimation, using multiple optimizers if needed
102 | - `emmeans` interface for least square means, `tidymodels` for easy model fitting
103 | - Satterthwaite and Kenward-Roger adjustments
104 |
105 | ## Why it's not just another package
106 |
107 | - Ongoing maintenance and support from the pharmaceutical industry
108 | - 5 companies being involved in the development, on track to become standard package
109 | - Development using best practices as show case for high quality package
110 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
111 |
112 | ## Covariance structures
113 |
114 | | Covariance structures | `{mmrm}` | `PROC MIXED` |
115 | |:----------------------------------------------|:------------:|:-----:|
116 | | Unstructured (Unweighted/Weighted) | X/X | X/X |
117 | | Toeplitz (hetero/homo) | X/X | X/X |
118 | | Compound symmetry (hetero/homo) | X/X | X/X |
119 | | Auto-regressive (hetero/homo) | X/X | X/X |
120 | | Ante-dependence (hetero/homo) | X/X | X |
121 | | Spatial exponential | X | X |
122 |
123 | ## Degrees of Freedom Methods
124 |
125 | | Method | `{mmrm}` | `PROC MIXED` |
126 | |:---------------- |:----------:|:---:|
127 | | Contain | X* | X |
128 | | Between/Within | X* | X |
129 | | Residual | X* | X |
130 | | Satterthwaite | X | X |
131 | | Kenward-Roger | X | X |
132 | | Kenward-Roger (Linear)** | X| X |
133 |
134 | *Available through the `emmeans` package.
135 |
136 | **This is not equivalent to the KR2 setting in `PROC MIXED`
137 |
138 | ## Contrasts/LSMEANS
139 |
140 | Contrasts and least square means estimates are available in `mmrm` using:
141 |
142 | - `df_1d`, `df_md`
143 | - S3 method that is compatible with `emmeans`
144 | - LS means difference can be produced through `emmeans` (`pairs` method)
145 | - Degrees of freedom method is passed from `mmrm` to `emmeans`
146 | - By default `PROC MIXED` and `mmrm` do not adjust for multiplicity, whereas `emmeans` does.
147 |
148 | ## Demo
149 |
150 | # Closing and Next Steps
151 |
152 | ## Long term perspective
153 |
154 | - Software engineering is a critical competence in producing high-quality statistical software
155 | - A lot of work needs to be done regarding the establishment, dissemination and adoption of best practices for engineering open-source software
156 | - Improving the way software engineering is done will help improve the efficiency, reliability and innovation within Biostatistics
157 |
158 | ## New packages coming up
159 |
160 | - sasr
161 | - HTA packages
162 | - Bayesian MMRM
163 |
164 | ## Thank you! Questions?
165 |
166 | ::: columns
167 | ::: {.column width="50%"}
168 | {height="500"}
169 | :::
170 |
171 | ::: {.column width="50%"}
172 | {height="500"}
173 | :::
174 | :::
175 |
--------------------------------------------------------------------------------
/slides/r-meetup-july2023.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introduction to the ASA BIOP Software Engineering Working Group and the {mmrm} Package"
3 | author: "Laura Harris "
4 | date: "21 July 2023"
5 | format:
6 | revealjs:
7 | incremental: true
8 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
9 | slide-number: c/t
10 | toc: true
11 | toc-depth: 1
12 | title-slide-attributes:
13 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
14 | data-background-size: 20%
15 | data-background-opacity: "0.5"
16 | data-background-position: 98% 10%
17 | ---
18 |
19 | # What is the SWE WG?
20 |
21 | ## ASA BIOP Software Engineering working group (SWE WG)
22 |
23 | - An official working group of the ASA Biopharmaceutical Section
24 | - Formed in August 2022
25 | - Cross-industry collaboration with more than 30 members from over 20 organizations
26 | - Home page at [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
27 | - Open for new members!
28 |
29 | ## Motivation
30 |
31 | - Open-source software has gained increasing popularity in Biostatistics over the last two decades
32 | - Current repositories (GitHub, CRAN) don't require any statistical quality assurance
33 | - Developing high-quality software is critical to inform clinical and regulatory decisions:
34 | - Good coding practices
35 | - Reproducible outputs
36 | - Self-sufficient documentation
37 |
38 | ## Goals
39 |
40 | - **Primary Goal**: Collaborate to engineer R packages that implement important statistical methods to fill in critical gaps
41 |
42 | - **Secondary Goal**: Develop and disseminate best practices for engineering high-quality open-source statistical software
43 |
44 | ## SWE WG Activities
45 |
46 | - Engineer selected R packages to fill in gaps in the open-source statistical software landscape
47 | - First R package [`mmrm`](https://cran.r-project.org/web/packages/mmrm/index.html) was published on CRAN in October 2022 and updated in December
48 | - Disseminate best practice for software engineering in the Biostatistics community
49 | - Organize 1-day [workshop](https://openpharma.github.io/workshop-r-swe-sf/) on best R package development practices
50 | - Organize conference sessions with a focus on statistical software engineering at CEN, JSM and ASA/FDA Workshop
51 | - [Youtube series](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS) with a focus on best practices for software engineering
52 |
53 | ## Best Practices for Software Engineering
54 |
55 | - User interface design
56 | - Version control with git
57 | - Unit and integration tests
58 | - Consistent and readable code style
59 | - Documentation
60 | - Continuous integration setup
61 | - Publication on website and repositories
62 | - etc.
63 |
64 | # `mmrm` package
65 |
66 | ## Motivation
67 |
68 | - Mixed Models for Repeated Measures (MMRM) is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
69 | - No great R Package - initially thought that it was solved by using `lme4` with `lmerTest`
70 | - But this approach failed on large data (slow, did not converge)
71 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
72 |
73 | ## Before creating a new package
74 |
75 | - First try to improve existing package
76 | - Here we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom
77 | - But it did not work
78 | - Think about long term maintenance and responsibility
79 |
80 | ## `mmrm` Package overview
81 |
82 | - Linear model for dependent observations within independent subjects
83 | - Multiple covariance structures available for the dependent observations
84 | - REML or ML estimation, using multiple optimizers if needed
85 | - `emmeans` interface for least square means, `tidymodels` for easy model fitting
86 | - Satterthwaite and Kenward-Roger adjustments
87 |
88 | ## Why it's not just another package
89 |
90 | - Ongoing maintenance and support from the pharmaceutical industry
91 | - 5 companies being involved in the development, on track to become standard package
92 | - Development using best practices as show case for high quality package
93 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
94 |
95 | ## Covariance structures
96 |
97 | | Covariance structures | `{mmrm}` | `PROC MIXED` |
98 | |:----------------------------------------------|:------------:|:-----:|
99 | | Unstructured (Unweighted/Weighted) | X/X | X/X |
100 | | Toeplitz (hetero/homo) | X/X | X/X |
101 | | Compound symmetry (hetero/homo) | X/X | X/X |
102 | | Auto-regressive (hetero/homo) | X/X | X/X |
103 | | Ante-dependence (hetero/homo) | X/X | X |
104 | | Spatial exponential | X | X |
105 |
106 | ## Degrees of Freedom Methods
107 |
108 | | Method | `{mmrm}` | `PROC MIXED` |
109 | |:---------------- |:----------:|:---:|
110 | | Contain | X* | X |
111 | | Between/Within | X* | X |
112 | | Residual | X* | X |
113 | | Satterthwaite | X | X |
114 | | Kenward-Roger | X | X |
115 | | Kenward-Roger (Linear)** | X| X |
116 |
117 | *Available through the `emmeans` package.
118 |
119 | **This is not equivalent to the KR2 setting in `PROC MIXED`
120 |
121 | ## Contrasts/LSMEANS
122 |
123 | Contrasts and least square means estimates are available in `mmrm` using:
124 |
125 | - `df_1d`, `df_md`
126 | - S3 method that is compatible with `emmeans`
127 | - LS means difference can be produced through `emmeans` (`pairs` method)
128 | - Degrees of freedom method is passed from `mmrm` to `emmeans`
129 | - By default `PROC MIXED` and `mmrm` do not adjust for multiplicity, whereas `emmeans` does.
130 |
131 | ## Thank you! Questions?
132 |
133 | ::: columns
134 | ::: {.column width="50%"}
135 | {height="500"}
136 | :::
137 |
138 | ::: {.column width="50%"}
139 | {height="500"}
140 | :::
141 | :::
142 |
--------------------------------------------------------------------------------
/slides/rpharma-apac-2024.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing openstatsware"
3 | subtitle: "Who we are and what we build together"
4 | author: "Daniel Sabanés Bové on behalf of `openstatsware`"
5 | date: "2024/10/30"
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/openstatsware-hex-1200.png
10 | slide-number: c/t
11 | toc: true
12 | toc-depth: 1
13 | fontsize: 32px
14 | ---
15 |
16 | ```{r setup}
17 | #| include: false
18 | #| echo: false
19 | ```
20 |
21 | # Introducing openstatsware
22 |
23 | ## openstatsware
24 |
25 | ```{r calc-stats}
26 | library(readr)
27 | library(dplyr)
28 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
29 | n_members <- nrow(members)
30 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
31 | ```
32 |
33 | ::: columns
34 | ::: {.column width="70%"}
35 | - Formed on 19 August 2022
36 | - Official working group of the [American Statistical Association (ASA) Biopharmaceutical section (BIOP)](https://community.amstat.org/biop/home)
37 | - Special Interest Group (SIG) of the [European Federation of Statisticians in the Pharmaceutical Industry (EFSPI)](https://www.efspi.org/).
38 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
39 | - Homepage: [openstatsware.org](https://www.openstatsware.org/)
40 | - We welcome new members to join!
41 | :::
42 |
43 | ::: {.column width="30%"}
44 | {height="300"}
45 | :::
46 | :::
47 |
48 | ## Motivation
49 |
50 | - Open-source software increasingly popular in Biostatistics
51 | - Rapid uptake of novel statistical methods
52 | - Unprecedented opportunities for collaboration
53 | - Transparency of methods and implementation
54 | - Variability in software quality
55 | - No statistical quality assurance on open-source extension package repositories, e.g. CRAN
56 | - No industry standard for assessing quality of R packages
57 | - **Reliable software for core statistical analyses is paramount**
58 |
59 | # Our work
60 |
61 | ## openstatsware objectives
62 |
63 | - **Engineer selected packages** to fill in gaps in the open-source statistical software landscape, and to promote software tools designed by the working group through publications, conference presentations, workshops, and training courses.
64 |
65 | - **Develop good SWE practices** for engineering high-quality statistical software and promote their use in the broader Biostatistics community via public training materials.
66 |
67 | - **Communicate and collaborate** with other R software initiatives including via the [R Consortium](https://www.r-consortium.org/).
68 |
69 |
70 | ## Workstreams in Package Development
71 |
72 | - Mixed Models for Repeated Measures (MMRM)
73 | - Developed the [`mmrm`](https://cran.r-project.org/package=mmrm) R package for frequentist inference in MMRM
74 | - Bayesian MMRM
75 | - Developed the [`brms.mmrm`](https://cran.r-project.org/package=brms.mmrm) R package for Bayesian inference in MMRM
76 | - Health Technology Assessment (HTA)
77 | - Developed the [`maicplus`](https://hta-pharma.github.io/maicplus/) R package for matching-adjusted indirect comparison (MAIC)
78 | - Bayesian Safety Signal Detection
79 | - Developed the [`SafetySignalDetection.jl`](https://openpharma.github.io/SafetySignalDetection.jl/) Julia package
80 |
81 | ## Best Practices Dissemination - Workshop
82 |
83 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
84 | - To teach hands-on skills and tools to engineer reliable R packages
85 | - Topics: R package structure, engineering workflow, ensuring quality, version control, collaboration and publication, and shiny development
86 | - 5 events in 2023 in Basel, Shanghai, San José, Rockville, and Montreal
87 | - 4 events in 2024 in Zurich, Salzburg, Beijing, and this Monday [online at R/Pharma APAC](https://openpharma.github.io/workshop-r-swe-rinpharma-2024/)!
88 |
89 | ## Best Practices Dissemination - openstatsguide
90 |
91 | - Small and concise set of recommendations for package developers
92 | - Opinionated, but aims to be based on experienced majority opinions
93 | - Focus are developers, while users might find complementary "validation" frameworks valuable
94 | - Primarily for statistical packages (not plotting, data wrangling, etc.)
95 | - Generic principles which can be used across functional data science languages R, Python, and Julia
96 | - Concrete tools are mentioned as examples
97 |
98 | ## {background-iframe="https://openstatsware.org/guide.html"}
99 |
100 | # Outlook
101 |
102 | ## Long Term Perspective
103 |
104 | - Software engineering is a critical competence in producing high-quality statistical software
105 | - A lot of work needs to be done regarding the establishment, dissemination and adoption of best practices for engineering open-source software
106 | - Improving the way software engineering is done will help improve the efficiency, reliability and innovation within Biostatistics
107 |
108 | ## Next Steps
109 |
110 | - Join the Effective Statistician conference invited session organized by `openstatsware`
111 | - We will relaunch the CRAN Task View on Clinical Trials
112 | - We are considering to write a Shiny app development guide
113 |
114 | ## Q&A {background-image="thank-you.jpg"}
115 |
116 |
117 |
--------------------------------------------------------------------------------
/slides/rpharma-wg-mmrm-oct2023-quarto.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing `openstatsware` and the R Package `{mmrm}`"
3 | subtitle: "R/Pharma 2023 Conference"
4 | author: "Ya Wang on behalf of the working group"
5 | date: "2023/10/24"
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-new-1200.png
10 | slide-number: c/t
11 | toc: true
12 | toc-depth: 1
13 | fontsize: 32px
14 | title-slide-attributes:
15 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-new-1200.png
16 | data-background-size: 20%
17 | data-background-opacity: "0.5"
18 | data-background-position: 98% 10%
19 | ---
20 |
21 | ```{r setup}
22 | #| include: false
23 | #| echo: false
24 | ```
25 |
26 | # Introducing the Working Group
27 |
28 | ## `openstatsware`
29 |
30 | ```{r calc-stats}
31 | library(readr)
32 | library(dplyr)
33 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
34 | n_members <- nrow(members)
35 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
36 | ```
37 |
38 | ::: columns
39 | ::: {.column width="50%"}
40 | - Official working group of the American Statistical Association (ASA) Biopharmaceutical Section
41 | - Formed on 19 August 2022
42 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
43 | - Full name: Software Engineering Working Group
44 | - Short name: `openstatsware`
45 | - Homepage: [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
46 | :::
47 |
48 | ::: {.column width="50%"}
49 | {height="300"}
50 | :::
51 | :::
52 |
53 | ## Working Group Objectives
54 |
55 | - Primary
56 | - Engineer R packages that implement important statistical methods
57 | - to fill in gaps in the open-source statistical software landscape
58 | - focusing on what is needed for biopharmaceutical applications
59 | - Secondary
60 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
61 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
62 |
63 | ## Workstreams in R Package Development
64 |
65 | - Mixed Models for Repeated Measures (MMRM) [🌎](https://rconsortium.github.io/asa-biop-swe-wg/mmrm_R_package.html)
66 | - Develop `mmrm` R package for frequentist inference in MMRM ([CRAN](https://cran.r-project.org/package=mmrm))
67 | - Bayesian MMRM [🌎](https://rconsortium.github.io/asa-biop-swe-wg/bayesian_mmrm_R_package.html)
68 | - Develop `brms.mmrm` R package for Bayesian inference in MMRM ([CRAN](https://cran.r-project.org/package=brms.mmrm))
69 | - Health Technology Assessment [🌎](https://github.com/hta-pharma/)
70 | - Develop open-source R tools to support HTA dossier submission across various countries
71 | - Particularly topics with unmet needs in R implementation and/or related to EUnetHTA
72 |
73 | ## Best Practices Dissemination
74 |
75 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
76 | - to teach hands-on skills and tools to engineer reliable R packages
77 | - 5 events so far at Basel, Shanghai, San José, Rockville, and Montreal
78 | - Youtube video series "Statistical Software Engineering 101" [🌎](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS)
79 | - to introduce tips and tricks for good statistical software engineering practices
80 | - 2 videos so far
81 |
82 | # R Package `mmrm` {background-image="slide-background-mmrm.png"}
83 |
84 | ## Motivation
85 |
86 | - MMRM is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
87 | - No great R package
88 | - Initially thought that the MMRM problem was solved by using `lme4` with `lmerTest`, learned that this approach failed on large data sets (slow, did not converge)
89 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
90 | - Next we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom, but it did not work
91 |
92 | ## Idea
93 |
94 | - We only want to fit a fixed effects model with a structured covariance matrix for each subject
95 | - The idea is then to use the Template Model Builder (`TMB`) directly - as it is also underlying `glmmTMB` - but code the exact model we want
96 | - We do this by implementing the log-likelihood in `C++` using the `TMB` provided libraries
97 |
98 | ## Advantages of `TMB`
99 |
100 | - Fast `C++` framework for defining objective functions (`Rcpp` would have been alternative interface)
101 | - Automatic differentiation of the log-likelihood as a function of the variance parameters
102 | - We get the gradient and Hessian exactly and without additional coding
103 | - This can be used from the R side with the `TMB` interface and plugged into optimizers
104 |
105 | ## Why It's Not Just Another Package
106 |
107 | - Ongoing maintenance and support from the pharmaceutical industry
108 | - 5 companies being involved in the development, on track to become standard package
109 | - Development using best practices as show case for high quality package
110 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
111 |
112 | ## Features of `mmrm`
113 |
114 | - Linear model for dependent observations within independent subjects
115 | - Covariance structures for the dependent observations:
116 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
117 | - Allows group specific covariance estimates and weights
118 | - REML or ML estimation, using multiple optimizers if needed
119 | - `emmeans` interface for least square means
120 | - `tidymodels` for easy model fitting
121 | - Satterthwaite and Kenward-Roger adjustments
122 | - Robust sandwich estimator for covariance
123 |
124 | ## Comparison with Other Software
125 |
126 | - We have run comparison analyses with other R packages, namely `nlme`, `glmmTMB` and `lme4`
127 | - Also compared with SAS `PROC GLIMMIX`
128 | - Highlights
129 | - `mmrm` has faster convergence time
130 | - `mmrm` provides closest results to `PROC GLIMMIX`
131 | - Detailed results at the online [comparison vignette](https://openpharma.github.io/mmrm/main/articles/mmrm_review_methods.html)
132 |
133 | ## Q&A {background-image="thank-you.jpg"}
134 |
135 |
136 |
137 |
--------------------------------------------------------------------------------
/slides/rpharma-wg-mmrm-oct2023.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introducing `openstatsware` and the R Package `{mmrm}`"
3 | subtitle: "R/Pharma 2023 Conference"
4 | author: "Ya Wang on behalf of the working group"
5 | date: "2023/10/24"
6 | format:
7 | pptx:
8 | incremental: true
9 | toc: true
10 | toc-depth: 1
11 | toc-title: Outline
12 | ---
13 |
14 | ```{r setup}
15 | #| include: false
16 | #| echo: false
17 | ```
18 |
19 | # Introducing the Working Group
20 |
21 | ## `openstatsware`
22 |
23 | ```{r calc-stats}
24 | library(readr)
25 | library(dplyr)
26 | members <- read_csv("../data/members.csv") |> filter(SWE_WG_Member == 1)
27 | n_members <- nrow(members)
28 | unique_orgs <- members |> pull("Affiliation") |> unique() |> sort()
29 | ```
30 |
31 | ::: columns
32 | ::: {.column width="50%"}
33 | - Official working group of the American Statistical Association (ASA) Biopharmaceutical Section
34 | - Formed in 19 August 2022
35 | - Cross-industry collaboration (`r n_members` members from `r length(unique_orgs)` organizations)
36 | - Full name: Software Engineering Working Group
37 | - Short name: `openstatsware`
38 | - Homepage: [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg)
39 | :::
40 |
41 | ::: {.column width="50%"}
42 | {height="300"}
43 | :::
44 | :::
45 |
46 | ## Working Group Objectives
47 |
48 | - Primary
49 | - Engineer R packages that implement important statistical methods
50 | - to fill in gaps in the open-source statistical software landscape
51 | - focusing on what is needed for biopharmaceutical applications
52 | - Secondary
53 | - Develop and disseminate best practices for engineering high-quality open-source statistical software
54 | - By actively doing the statistical engineering work together, we align on best practices and can communicate these to others
55 |
56 | ## Workstreams in R Package Development
57 |
58 | - Mixed Models for Repeated Measures (MMRM) [🌎](https://rconsortium.github.io/asa-biop-swe-wg/mmrm_R_package.html)
59 | - Develop `mmrm` R package for frequentist inference in MMRM ([CRAN](https://cran.r-project.org/package=mmrm))
60 | - Bayesian MMRM [🌎](https://rconsortium.github.io/asa-biop-swe-wg/bayesian_mmrm_R_package.html)
61 | - Develop `brms.mmrm` R package for Bayesian inference in MMRM ([CRAN](https://cran.r-project.org/package=brms.mmrm))
62 | - Health Technology Assessment [🌎](https://github.com/hta-pharma/)
63 | - Develop open-source R tools to support HTA dossier submission across various countries
64 | - Particularly topics with unmet needs in R implementation and/or related to EUnetHTA
65 |
66 | ## Best Practices Dissemination
67 |
68 | - Workshop "Good Software Engineering Practice for R Packages" on world tour
69 | - to teach hands-on skills and tools to engineer reliable R packages
70 | - 5 events so far at Basel, Shanghai, San José, Rockville, and Montreal
71 | - Youtube video series "Statistical Software Engineering 101" [🌎](https://www.youtube.com/playlist?list=PL848NFA2PWgCR35n02yn1ZV7JqSu3NMxS)
72 | - to introduce tips and tricks for good statistical software engineering practices
73 | - 2 videos so far
74 |
75 | # R Package `mmrm` {background-image="slide-background-mmrm.png"}
76 |
77 | ## Motivation
78 |
79 | - MMRM is a popular choice for analyzing longitudinal continuous outcomes in randomized clinical trials
80 | - No great R package
81 | - Initially thought that the MMRM problem was solved by using `lme4` with `lmerTest`, learned that this approach failed on large data sets (slow, did not converge)
82 | - `nlme` does not give Satterthwaite adjusted degrees of freedom, has convergence issues, and with `emmeans` it is only approximate
83 | - Next we tried to extend `glmmTMB` to calculate Satterthwaite adjusted degrees of freedom, but it did not work
84 |
85 | ## Idea
86 |
87 | - We only want to fit a fixed effects model with a structured covariance matrix for each subject
88 | - The idea is then to use the Template Model Builder (`TMB`) directly - as it is also underlying `glmmTMB` - but code the exact model we want
89 | - We do this by implementing the log-likelihood in `C++` using the `TMB` provided libraries
90 |
91 | ## Advantages of `TMB`
92 |
93 | - Fast `C++` framework for defining objective functions (`Rcpp` would have been alternative interface)
94 | - Automatic differentiation of the log-likelihood as a function of the variance parameters
95 | - We get the gradient and Hessian exactly and without additional coding
96 | - This can be used from the R side with the `TMB` interface and plugged into optimizers
97 |
98 | ## Why It's Not Just Another Package
99 |
100 | - Ongoing maintenance and support from the pharmaceutical industry
101 | - 5 companies being involved in the development, on track to become standard package
102 | - Development using best practices as show case for high quality package
103 | - Thorough unit and integration tests (also comparing with SAS results) to ensure accurate results
104 |
105 | ## Features of `mmrm` {.smaller}
106 |
107 | - Linear model for dependent observations within independent subjects
108 | - Covariance structures for the dependent observations:
109 | - Unstructured, Toeplitz, AR1, compound symmetry, ante-dependence, spatial exponential
110 | - Allows group specific covariance estimates and weights
111 | - REML or ML estimation, using multiple optimizers if needed
112 | - `emmeans` interface for least square means
113 | - `tidymodels` for easy model fitting
114 | - Satterthwaite and Kenward-Roger adjustments
115 | - Robust sandwich estimator for covariance
116 |
117 | ## Comparison with Other Software
118 |
119 | - We have run comparison analyses with other R packages, namely `nlme`, `glmmTMB` and `lme4`
120 | - Also compared with SAS `PROC GLIMMIX`
121 | - Highlights
122 | - `mmrm` has faster convergence time
123 | - `mmrm` provides closest results to `PROC GLIMMIX`
124 | - Detailed results at the online [comparison vignette](https://openpharma.github.io/mmrm/main/articles/mmrm_review_methods.html)
125 |
126 | ## Q&A {background-image="thank-you.jpg"}
127 |
128 |
129 |
130 |
--------------------------------------------------------------------------------
/slides/sd_diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/sd_diff.png
--------------------------------------------------------------------------------
/slides/slide-background-mmrm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/slide-background-mmrm.png
--------------------------------------------------------------------------------
/slides/style.css:
--------------------------------------------------------------------------------
1 | .title {
2 | font-size: 48px !important;
3 | }
4 |
5 | .quarto-title-author-name {
6 | font-size: 32px;
7 | }
8 |
9 | .date {
10 | font-size: 32px;
11 | }
12 |
--------------------------------------------------------------------------------
/slides/test_flow_mean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/test_flow_mean.png
--------------------------------------------------------------------------------
/slides/testing_venn.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/testing_venn.jpeg
--------------------------------------------------------------------------------
/slides/thank-you.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstatsware/website/14174adff848ba251c1bac354578358642ce04b0/slides/thank-you.jpg
--------------------------------------------------------------------------------
/slides/unit_testing_advanced.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Unit Testing for R Developers"
3 | subtitle: "2: Advancing your skills"
4 | author: "Jonathan Sidi, Daniel Sabanes Bove"
5 | date: ""
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
10 | slide-number: c/t
11 | margin: 0.05
12 | fontsize: 25px
13 | title-slide-attributes:
14 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
15 | data-background-size: 10%
16 | data-background-opacity: "0.9"
17 | data-background-position: 92% 30%
18 | editor_options:
19 | chunk_output_type: console
20 | ---
21 |
22 | ## What are we going to talk about?
23 |
24 | ::: columns
25 | ::: {.column width="40%"}
26 | - Understand the philosophy behind unit testing and how it relates to other tests
27 | - Gain more confidence in your testing framework by examining coverage
28 | :::
29 |
30 | ::: {.column width="60%"}
31 | 
32 | :::
33 | :::
34 |
35 | ## What is the structure of unit tests?
36 |
37 | - **Setup**: Set up the inputs for the test.
38 | - **Compute**: Compute the result which will be tested.
39 | - **Expect**: Define the expected result.
40 | - **Compare**: Compare the actual with the expected result.
41 |
42 | . . .
43 |
44 | The code should test a specific characteristic or functionality of the package.
45 |
46 | Because if your action does too many things at once, you have to search longer for the bug.
47 |
48 | ## Who do you write tests for?
49 |
50 | Unit testing is a method to communicate to various stakeholders the package requirements.
51 |
52 | {.absolute top="200" left="250" width="55%"}
53 |
54 | ## Test Flavors
55 |
56 | There are different flavors of unit tests that we write:
57 |
58 | - **Developer Tests**
59 | - Tests that help the software developer speed up and iterate over different versions
60 | - **Quality Assurance (QA) Tests**
61 | - Proving that the package isn't broken or that functions are returning the expected answers
62 |
63 | ## Test Flavors (cont'd)
64 |
65 | ::: nonincremental
66 | ::: columns
67 | ::: {.column width="50%"}
68 | **Developer Tests**
69 |
70 | *Remove Fear of Change*
71 |
72 | - Assist with package design
73 | - Guide for refactoring
74 |
75 | *Reduce Development Time*
76 |
77 | - Explain code to others
78 | - Pinpoint errors
79 | - Minimize debugging/manual testing
80 | :::
81 |
82 | ::: {.column width="50%"}
83 | **QA Tests**
84 |
85 | - Encode object requirements
86 | - Catch performance degradation
87 | :::
88 | :::
89 | :::
90 |
91 | ## Planning tests for success
92 |
93 | - Write tests to evaluate the exported behavior that the package user would invoke.
94 | - Sets up the ability to refactor code.
95 | - Tests will not have to be rewritten as the package iterates and improves.
96 |
97 | ## Planning tests for success (cont'd)
98 |
99 | - Write tests that you will not need to update or maintain because of a dependency.
100 | - Because you can't control how other maintainers are developing packages and managing their lifecycle.
101 | - Note: Having unit tests allows dependency changes to be detected early enough - e.g. CRAN is running reverse dependency checks before accepting new package versions
102 |
103 | ## Testing surface areas
104 |
105 | ::: columns
106 | ::: {.column width="50%"}
107 | - When writing unit tests for functions it is important to take into account the surface area of the tests
108 | - In this example we have three unit tests on `stats::mean.default`
109 | - Tests B and C have large surface areas, if they fail you will still need to investigate the cause of their failure.
110 | :::
111 |
112 | ::: {.column width="50%"}
113 | {width="75%"}
114 | :::
115 | :::
116 |
117 | ## Adding new features with a safety net
118 |
119 | When you are developing new features in a package make sure to prepare the area with unit tests for the expected behavior of the feature.
120 |
121 | This will serve two main purposes:
122 |
123 | ::: columns
124 | ::: {.column width="35%"}
125 | 
126 | :::
127 |
128 | ::: {.column width="65%"}
129 | 1. **Communicate the goals** of the feature to others and to yourself.
130 | 2. **Free to develop** the idea while writing messy code and refactoring it while still controlling for the basic requirements.
131 | :::
132 | :::
133 |
134 | ## Some strategies for learning
135 |
136 | 1. **Pair up** with experienced developer buddies.
137 | 2. **Help out** with refactoring or features on GitHub repositories:
138 | - Developers usually list what they are working on and are happy to get an extra pair of hands to tackle issues.
139 | - The process of merging a pull request will teach you hands on.
140 | 3. **Clone repositories** of packages you are familiar with:
141 | - Run the tests you find there.
142 | - You will learn a lot about testing, coding and strategies.
143 |
144 | ## What can and should be unit tested?
145 |
146 | 1. Script
147 | - Single files with functions can have unit tests
148 | 2. Packages
149 | - This is the standard object to write tests for
150 | 3. Shiny apps
151 | - Shiny app UI and reactivity can be tested
152 | 4. Data derivation
153 | - Testing data preprocessing pipelines for expected characteristics of columns
154 |
155 | ## Overview of unit testing frameworks
156 |
157 | 1. script/packages: `testthat`, `tinytest`, `box`, `Runit`
158 | 2. shiny apps: `shinytest2`, `reactor`, `crrry`
159 | 3. data: `pointblank`, `assertr`, `validate`, `dataReporter`
160 | 4. plots: `vdiffr`
161 |
162 | ## Testing plot functions
163 |
164 | - `vdiffr` package allows to save vector graphics file of the expected plot outcome
165 | - works for both base plots as well as `ggplot`s
166 | - Beware of false positive test failures though
167 | - operating systems can lead to slightly different plot outcomes
168 | - requires manual visual checks
169 |
170 | ## Testing plot functions (cont'd)
171 |
172 | - Alternative for `ggplot`:
173 | - use `ggplot2::layer_data()` function to extract layer information
174 | - use that for comparisons in tests instead of vector graphics
175 |
176 | ## Communicating Tests: Coverage
177 |
178 | 1. [`covr`](https://cran.r-project.org/package=covr)
179 | - R package that evaluates the % of lines of code that are tested
180 | - Use `covr::package_coverage()` to analyze current package coverage
181 |
182 | 2. [`covrpage`](https://github.com/yonicd/covrpage)
183 | - R package that summarizes `covr` statistics into simple reports that can be shared
184 | - Use `covrpage::covrpage()` to create the report page
185 |
186 | ## Mapping the Logic
187 |
188 | ```{r}
189 | library(ggplot2)
190 | color_data <- dplyr::bind_cols(
191 | tidyr::expand_grid(
192 | xColor = seq(0,1,length.out=100),
193 | yColor = seq(0,1,length.out=100)
194 | ),
195 | tidyr::expand_grid(
196 | x = seq(3,1,length.out=100),
197 | y = seq(1,3,length.out=100),
198 | ))|>
199 | dplyr::mutate(
200 | zColor = xColor^3+yColor
201 | )
202 |
203 | axis_labels <- tibble::tibble(
204 | x = c(1.25, 2.95),
205 | xend = c(2.75,2.95),
206 | y = c(1.1,2.75),
207 | yend = c(1.1,1.25),
208 | label = c('Less Surface Area is Better','More Coverage is Better')
209 | )
210 |
211 | path_labels <- tibble::tibble(
212 | x = c(1.05, 1.25, 1.35, 1.45, 2.5),
213 | xend = c(1.35, 1.85, 2.00, 2.00, 1.50),
214 | y = c(2.25, 1.50, 2.65, 1.40, 2.8),
215 | yend = c(2.85, 2.75, 2.65, 2.30, 1.20),
216 | label = c('bad tests', 'snapshots','no tests','examples', 'unit tests')
217 | )
218 |
219 | ggplot() +
220 | geom_raster(data = color_data, aes(x, y,fill = zColor),show.legend = FALSE)+
221 | scale_fill_distiller(type = "div", palette = 8, direction=-1) +
222 | scale_y_reverse() +
223 | geomtextpath::geom_textcurve(aes(x = x,xend = xend, y = y, yend = yend, label = label),linetype = NA, data = path_labels, size = 5) +
224 | geomtextpath::geom_textsegment(aes(x = x,xend = xend, y = y, yend = yend, label = label),arrow = grid::arrow(type = 'closed',length = unit(0.1, "inches")),data = axis_labels, size = 4) +
225 | theme_void()
226 | ```
227 |
228 |
229 |
230 | ## Summary
231 |
232 | - Philosophy of testing:
233 | - Aggregate of all tests should cover the whole functionality
234 | - But each test on its own should be specific and only have small surface
235 | - Developer vs. Quality Assurance Tests
236 | - Not just R packages: also scripts, Shiny apps, data
237 | - How to communicate test coverage:
238 | - Use `covr` to calculate coverage
239 | - Use `covrpage` to create a summary report
240 |
241 | ## Outlook: Automating Tests
242 |
243 | ::: columns
244 | ::: {.column width="65%"}
245 | When developing through a version control platform like GitHub, GitLab, Bitbucket you can automatically run your tests through CI/CD for each commit or when working with other developers via pull requests.
246 | :::
247 |
248 | ::: {.column width="35%"}
249 | 
250 | :::
251 | :::
252 |
253 | ## Brought to you by the Software Engineering Working Group
254 |
255 | {height="500" fig-align="center"}
256 |
257 | ::: {style="text-align: center"}
258 | [rconsortium.github.io/asa-biop-swe-wg](https://rconsortium.github.io/asa-biop-swe-wg/)
259 | :::
260 |
--------------------------------------------------------------------------------
/slides/unit_testing_basic.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Unit Testing for R Developers"
3 | subtitle: "1: The Basics"
4 | author: "Daniel Sabanes Bove, Jonathan Sidi"
5 | date: ""
6 | format:
7 | revealjs:
8 | incremental: true
9 | logo: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
10 | slide-number: c/t
11 | margin: 0.05
12 | title-slide-attributes:
13 | data-background-image: https://github.com/RConsortium/asa-biop-swe-wg/raw/main/sticker/sticker-1200.png
14 | data-background-size: 10%
15 | data-background-opacity: "0.9"
16 | data-background-position: 92% 30%
17 | editor_options:
18 | chunk_output_type: console
19 | ---
20 |
21 | ## What are we going to talk about?
22 | :::: {.columns}
23 | ::: {.column width="40%"}
24 | - Introduce unit tests for R packages
25 | - Show that writing unit tests is easy
26 | :::
27 | ::: {.column width="60%"}
28 | 
29 | :::
30 | ::::
31 |
32 | ## Wikipedia Definition of "Unit testing"
33 |
34 | - Unit tests \[...\] ensure that a section of an application (known as the "unit") meets its design and behaves as intended.
35 | - In procedural programming, a "unit" could be \[...\] an individual function or procedure.
36 | - A unit test provides a strict, written contract that the piece of code must satisfy.
37 | - By writing tests first for the smallest testable units, then the compound behaviors between those, one can build up comprehensive tests for complex applications.
38 |
39 | ## What is the structure of unit tests?
40 |
41 | - **Setup**: Set up the inputs for the test.
42 | - **Compute**: Compute the result which will be tested.
43 | - **Expect**: Define the expected result.
44 | - **Compare**: Compare the actual with the expected result.
45 |
46 | ## How do unit tests for R packages look like?
47 |
48 | The most popular testing framework for R packages is [`{testthat}`](https://testthat.r-lib.org/).
49 |
50 | Therefore we show here the `{testthat}` syntax, but the structure is similar in other frameworks.
51 |
52 | . . .
53 |
54 | ``` r
55 | test_that("my_fun can do xyz as expected", {
56 | input <- … # setup: prepare input for xyz
57 | result <- my_fun(input, …) # compute: do xyz
58 | expected <- … # expect: hardcode what the result of xyz should be
59 | expect_identical(result, expected) # compare result with expectation
60 | })
61 | ```
62 |
63 | ## What comparisons can I use with `{testthat}`?
64 |
65 | - All comparisons start with `expect_` prefix and take result and expected as arguments.
66 |
67 | - They will throw an error if the comparison evaluates to a value different than what is expected.
68 |
69 | . . .
70 |
71 | ``` r
72 | expect_identical() # exact identity
73 | expect_equal() # equal up to numerical tolerance
74 | expect_match() # character matches regular expression
75 | expect_silent() # no message, warning, error is produced
76 | expect_warning() # (specific) warning occurs
77 | expect_error() # (specific) error occurs
78 | expect_is() # object is of specific class
79 | expect_true(), expect_false() # general usage
80 | ```
81 |
82 | ## Can I not just use examples?
83 |
84 | "Wait a second ...
85 |
86 | R packages contain example code for documented functions or objects.
87 |
88 | These are automatically executed by `R CMD CHECK`.
89 |
90 | So this sounds sufficient, right? ..."
91 |
92 | . . .
93 |
94 | → **Not really!**
95 |
96 | ## Why can I not just use examples?
97 |
98 | - **Misses Bugs**: If a code change causes a bug that does not lead to an error, this bug will not be detected.
99 | * E.g., wrong or no output of a function.
100 | * Because examples (usually) don't compare vs. expected results or behavior!
101 | - **Misses Internals**: Examples cannot test internal functions (i.e. units) that build up the API for users or developers.
102 | * Manual debugging becomes necessary to track down root cause of error down to internal functions.
103 |
104 | ## So why should I write unit tests?
105 |
106 | - **Faster Debugging**: Only need to search narrow (unit) scope for the root cause.
107 | - **Faster Development**: Have confidence that no side-bugs from new code.
108 | - **Better Design**: Encourages aggressive refactoring into small maintainable units.
109 | - **Better Documentation**: Developers can look at the unit tests to understand a function's usage and behavior.
110 | - **Reduce Future Cost**: Writing unit tests is an investment that pays off long-term.
111 |
112 | ## When should I write unit tests?
113 |
114 | - **Before coding**: In test-driven development (TDD), unit tests are created before the code itself is written.
115 | - **During coding**: When developing new functions, you anyway need to do some (interactive) testing.
116 | - **In PR**: Unit tests should be included in the PR that merges the new or modified code.
117 | - **When bug bites**: When a bug is detected, add unit test(s) that reproduce the bug, fix the code and confirm that the corresponding unit tests pass now.
118 |
119 | ## How should I write unit tests?
120 |
121 | - **Isolatable**: Can be run on its own.
122 | - **Repeatable**: Deterministic behavior (e.g. use `set.seed()`).
123 | - **Readable**: Keep it simple.
124 | - **Small**: Only test one behavior with each unit test.
125 | - **Fast**: Because it will be run in automation.
126 | - **Coverage**: Test all relevant features.
127 |
128 | ## Summary
129 |
130 | Unit tests ...
131 |
132 | - ... **are required** and daily business in professional software development.
133 | - ... **take time** to write.
134 | - ... **pay off** though, by speeding up development and debugging, improving design and documentation, and enabling refactoring.
135 | - ... **avoid bugs** than can be orders more expensive.
136 | - ... **are complemented** by higher level tests (integration tests).
137 |
138 | ## Brought to you by the Software Engineering Working Group
139 |
140 | {height="500" fig-align="center"}
141 |
142 |