├── py
├── terra_widgets
│ ├── __init__.py
│ ├── tests
│ │ ├── __init__.py
│ │ └── test_workspace_paths.py
│ ├── workspace_metadata.py
│ ├── workspace_paths.py
│ └── html_snapshots.py
├── requirements.txt
├── setup.py
├── README.md
└── py_cromwell_setup.py
├── storage-snippets
├── snippets_setup.R
├── snippets_setup.py
├── interact_with_html_snapshots.py
├── list_objects_in_bucket.R
├── list_objects_in_bucket.py
├── copy_file_from_workspace_bucket.R
├── copy_file_from_workspace_bucket.py
├── copy_data_to_workspace_bucket.py
├── copy_data_to_workspace_bucket.R
└── README.md
├── .gitignore
├── sql-snippets
├── total_number_of_participants.sql
├── number_of_participants_with_measurements.sql
├── number_of_participants_with_med_conditions.sql
├── measurement_of_interest_by_site.ggplot
├── measurement_of_interest_by_site.plotnine
├── measurement_of_interest_by_sex_at_birth.plotnine
├── most_recent_measurement_of_interest_by_site.ggplot
├── measurement_of_interest_by_sex_at_birth.ggplot
├── most_recent_measurement_of_interest_by_sex_at_birth.plotnine
├── most_recent_measurement_of_interest_by_site.plotnine
├── most_recent_measurement_of_interest_by_sex_at_birth.ggplot
├── measurement_of_interest_by_age_and_sex_at_birth.ggplot
├── most_recent_measurement_of_interest_by_age_and_sex_at_birth.ggplot
├── snippets_setup.py
├── measurement_of_interest_by_age_and_sex_at_birth.plotnine
├── measurement_of_interest.sql
├── most_recent_measurement_of_interest_by_age_and_sex_at_birth.plotnine
├── snippets_setup.R
├── most_recent_measurement_of_interest.sql
├── measurements_of_interest_summary.sql
├── measurements_of_interest_summary_test.py
├── most_recent_measurement_of_interest_test.py
├── measurement_of_interest_test.py
└── README.md
├── dataset-snippets
├── summarize_a_dataframe.R
├── summarize_a_dataframe.py
├── add_age_to_demographics.py
├── join_dataframes.py
├── add_age_to_demographics.R
├── snippets_setup.py
├── join_dataframes.R
├── snippets_setup.R
├── summarize_a_survey_module.R
├── summarize_a_survey_module.py
├── measurement_by_sex_at_birth.ggplot
├── measurement_by_sex_at_birth.plotnine
├── measurement_by_age_and_sex_at_birth.ggplot
├── summarize_a_survey_by_question_concept_id.py
├── summarize_a_survey_by_question_concept_id.R
├── measurement_by_age_and_sex_at_birth.plotnine
└── README.md
├── .github
└── PULL_REQUEST_TEMPLATE.md
├── README.md
├── LICENSE.txt
├── r
└── r_cromwell_setup.R
└── CONTRIBUTING.md
/py/terra_widgets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/py/terra_widgets/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/py/requirements.txt:
--------------------------------------------------------------------------------
1 | firecloud
2 | ipython
3 | ipywidgets
4 | multiprocess
5 | pandas
6 |
--------------------------------------------------------------------------------
/storage-snippets/snippets_setup.R:
--------------------------------------------------------------------------------
1 | library(tidyverse) # Data wrangling packages.
2 |
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *smoke_test.R
6 | *smoke_test.py
7 | *.json
8 | *.html
9 |
--------------------------------------------------------------------------------
/storage-snippets/snippets_setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 |
--------------------------------------------------------------------------------
/storage-snippets/interact_with_html_snapshots.py:
--------------------------------------------------------------------------------
1 | from terra_widgets.html_snapshots import display_html_snapshots_widget
2 |
3 | # This will display a user interface to interact with HTML snapshots stored in the workspace bucket.
4 | display_html_snapshots_widget()
5 |
--------------------------------------------------------------------------------
/sql-snippets/total_number_of_participants.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Compute the count of unique participants in our All of Us cohort.
3 | SELECT
4 | COUNT(DISTINCT person_id) AS total_number_of_participants
5 | FROM
6 | `{CDR}.person`
7 | WHERE
8 | person_id IN ({COHORT_QUERY})
9 |
--------------------------------------------------------------------------------
/storage-snippets/list_objects_in_bucket.R:
--------------------------------------------------------------------------------
1 | # This snippet assumes that you run setup first
2 |
3 | # This code lists objects in your Google Bucket
4 |
5 | # Get the bucket name
6 | my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
7 |
8 | # List objects in the bucket
9 | system(paste0("gsutil ls -r ", my_bucket), intern=T)
10 |
11 |
12 |
--------------------------------------------------------------------------------
/sql-snippets/number_of_participants_with_measurements.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Compute the count of unique participants in our All of Us cohort
3 | -- that have at least one measurement.
4 | SELECT
5 | COUNT(DISTINCT person_id) AS number_of_participants_with_measurements
6 | FROM
7 | `{CDR}.measurement`
8 | WHERE
9 | person_id IN ({COHORT_QUERY})
10 |
--------------------------------------------------------------------------------
/sql-snippets/number_of_participants_with_med_conditions.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Compute the count of unique participants in our All of Us cohort
3 | -- that have at least one condition.
4 | SELECT
5 | COUNT(DISTINCT person_id) AS number_of_participants_with_med_conditions
6 | FROM
7 | `{CDR}.condition_occurrence`
8 | WHERE
9 | person_id IN ({COHORT_QUERY})
10 |
--------------------------------------------------------------------------------
/storage-snippets/list_objects_in_bucket.py:
--------------------------------------------------------------------------------
1 | # This snippet assumes that you run setup first
2 |
3 | # This code lists objects in your Google Bucket
4 |
5 | # Get the bucket name
6 | my_bucket = os.getenv('WORKSPACE_BUCKET')
7 |
8 | # List objects in the bucket
9 | print(subprocess.check_output(f"gsutil ls -r {my_bucket}", shell=True).decode('utf-8'))
10 |
11 |
12 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_dataframe.R:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_dataframe' to display summary statistics for a dataframe.
2 | # It assumes snippet 'Setup' has been executed.
3 | # See also https://www.rdocumentation.org/packages/skimr/versions/1.0.7/topics/skim
4 |
5 |
6 | ## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
7 | print(skim(YOUR_DATASET_NAME_person_df))
8 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | \
2 |
3 | Unfortunately we don't have automated testing configured for the code in this
4 | repository yet so we set up this checklist as an *automatic reminder*:
5 |
6 | - [ ] Ensure that the smoke tests pass using the current (or upcoming) CDR
7 | - [ ] Update documentation relevant to this pull request
8 |
9 | Questions? See [CONTRIBUTING.md](https://github.com/all-of-us/workbench-snippets/blob/master/CONTRIBUTING.md)
10 | or file an issue so that we can get it documented!
11 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_dataframe.py:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_dataframe' to display summary statistics for a dataframe.
2 | # It assumes snippet 'Setup' has been executed.
3 | # See also https://towardsdatascience.com/exploring-your-data-with-just-1-line-of-python-4b35ce21a82d
4 |
5 |
6 | ## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
7 | YOUR_DATASET_NAME_person_df.loc[:10000,:].profile_report() # Examine up to the first 10,000 rows. Larger
8 | # dataframes can be profiled, but it takes more time.
9 |
--------------------------------------------------------------------------------
/dataset-snippets/add_age_to_demographics.py:
--------------------------------------------------------------------------------
1 | # Use snippet 'add_age_to_demographics' to calculate the age of people in your demographics.
2 | # It assumes the 'Setup' snippet has been executed.
3 | # It also assumes that you got your demographics dataframe from Dataset Builder
4 |
5 | # Note: This snippet calculates current age and does not take into account whether the person is already dead
6 |
7 |
8 | ## -----[ CHANGE THE DATAFRAME NAME(S) `YOUR_DATASET_NAME_person_df` TO MATCH YOURS FROM DATASET BUILDER] -----
9 | YOUR_DATASET_NAME_person_df['age'] = pd.to_datetime('today').year - YOUR_DATASET_NAME_person_df['date_of_birth'].dt.year
10 |
--------------------------------------------------------------------------------
/dataset-snippets/join_dataframes.py:
--------------------------------------------------------------------------------
1 | # Use snippet 'join_dataframes' to join together two dataframes.
2 | # It assumes the 'Setup' snippet has been executed.
3 | #
4 | # In the example below, it joins Demographics '_person_df' and Measurements '_measurement_df' using
5 | # any columns they have in common, which in this case should only be 'person_id'.
6 | #
7 | # See also https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/pandas.merge.html
8 |
9 |
10 | ## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
11 | measurement_df = pd.merge(left=YOUR_DATASET_NAME_person_df, right=YOUR_DATASET_NAME_measurement_df, how='inner')
12 |
13 | measurement_df.shape
14 |
--------------------------------------------------------------------------------
/dataset-snippets/add_age_to_demographics.R:
--------------------------------------------------------------------------------
1 | # Use snippet 'add_age_to_demographics' to calculate the age of people in your demographics.
2 | # It assumes the 'Setup' snippet has been executed.
3 | # It also assumes that you got your demographics dataframe from Dataset Builder
4 |
5 | # Note: This snippet calculates current age and does not take into account whether the person is already dead
6 |
7 |
8 | ## -----[ CHANGE THE DATAFRAME NAME(S) `YOUR_DATASET_NAME_person_df` TO MATCH YOURS FROM DATASET BUILDER] -----
9 | YOUR_DATASET_NAME_person_df <- YOUR_DATASET_NAME_person_df %>%
10 | mutate_if(is.list, as.character) %>%
11 | mutate(age = year(today()) - year(YOUR_DATASET_NAME_person_df$date_of_birth))
12 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_site.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 8, repr.plot.width = 16)
3 |
4 | measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | ggplot(aes(x = src_id, y = value_as_number)) +
7 | geom_boxplot() +
8 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
9 | position = position_dodge(width = 0.9), vjust = -0.8) +
10 | # scale_y_log10() + # Uncomment if the data looks skewed.
11 | coord_flip() +
12 | ylab(str_glue('{UNIT_NAME}')) +
13 | labs(title = str_glue('All {MEASUREMENT_NAME} measurements, by site'),
14 | caption = 'Source: All Of Us Data')
15 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_site.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 |
3 | # meas_filter is a column of True and False
4 | meas_filter = measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
5 | (ggplot(measurement_of_interest_df[meas_filter], aes(x = 'src_id', y = 'value_as_number')) +
6 | geom_boxplot() +
7 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
8 | position = position_dodge(width = 0.9), va = 'top') +
9 | # scale_y_log10() + # Uncomment if the data looks skewed.
10 | coord_flip() +
11 | ylab(f'{UNIT_NAME}') +
12 | ggtitle(f'All {MEASUREMENT_NAME} measurements, by site\nSource: All Of Us Data') +
13 | theme(figure_size=(12, 6)))
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Workbench snippets
2 |
3 | ## How to use the workbench snippets
4 |
5 | Please see the welcome page for the [All of Us Researcher Workbench](https://workbench.researchallofus.org/). It has both a tutorial video and several articles in user support documentation.
6 |
7 | ## How to add/update a workbench snippet
8 |
9 | First see [CONTRIBUTING](./CONTRIBUTING.md) for general getting started instructions.
10 |
11 | If you want to add/modify a snippet that uses a dataframe from Dataset Builder as its input, then see [dataset-snippets/README](./dataset-snippets/README.md).
12 |
13 | Otherwise, see the other snippets collections such as
14 |
15 | * [sql-snippets/README](./sql-snippets/README.md)
16 | * [storage-snippets/README](./storage-snippets/README.md)
17 | * [terra-widgets/README](./py/README.md)
18 |
--------------------------------------------------------------------------------
/dataset-snippets/snippets_setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import pandas_profiling
5 | import plotnine
6 | from plotnine import * # Provides a ggplot-like interface to matplotlib.
7 | from IPython.display import display
8 |
9 | ## Plot setup.
10 | theme_set(theme_bw(base_size = 11)) # Default theme for plots.
11 |
12 | def get_boxplot_fun_data(df):
13 | """Returns a data frame with a y position and a label, for use annotating ggplot boxplots.
14 |
15 | Args:
16 | d: A data frame.
17 | Returns:
18 | A data frame with column y as max and column label as length.
19 | """
20 | d = {'y': max(df), 'label': f'N = {len(df)}'}
21 | return(pd.DataFrame(data=d, index=[0]))
22 |
23 | # NOTE: if you get any errors from this cell, restart your kernel and run it again.
24 |
--------------------------------------------------------------------------------
/dataset-snippets/join_dataframes.R:
--------------------------------------------------------------------------------
1 | # Use snippet 'join_dataframes' to join together two dataframes.
2 | # It assumes the 'Setup' snippet has been executed.
3 | #
4 | # In the example below, it joins Demographics '_person_df' and Measurements '_measurement_df' using
5 | # any columns they have in common, which in this case should only be 'person_id'.
6 | #
7 | # See also https://dplyr.tidyverse.org/reference/join.html and https://r4ds.had.co.nz/relational-data.html#understanding-joins
8 |
9 |
10 | ## -----[ CHANGE THE DATAFRAME NAME(S) TO MATCH YOURS FROM DATASET BUILDER] -----
11 | measurement_df <- inner_join(YOUR_DATASET_NAME_person_df,
12 | YOUR_DATASET_NAME_measurement_df) %>%
13 | mutate_if(is.list, as.character) # Convert column type list as character.
14 |
15 | dim(measurement_df)
16 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 |
3 | # meas_filter is a column of True and False
4 | meas_filter = measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
5 | (ggplot(measurement_of_interest_df[meas_filter], aes(x = 'sex_at_birth', y = 'value_as_number')) +
6 | geom_boxplot() +
7 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
8 | position = position_dodge(width = 0.9), va = 'top') +
9 | # scale_y_log10() + # Uncomment if the data looks skewed.
10 | ylab(f'{UNIT_NAME}') +
11 | ggtitle(f'All {MEASUREMENT_NAME} measurements, by site\nSource: All Of Us Data') +
12 | theme(figure_size=(12, 6), axis_text_x = element_text(angle=25, hjust=1)))
13 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_site.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 8, repr.plot.width = 16)
3 |
4 | most_recent_measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | ggplot(aes(x = src_id, y = value_as_number)) +
7 | geom_boxplot() +
8 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
9 | position = position_dodge(width = 0.9), vjust = -0.8) +
10 | # scale_y_log10() + # Uncomment if the data looks skewed.
11 | coord_flip() +
12 | ylab(str_glue('{UNIT_NAME}')) +
13 | labs(title = str_glue('Most recent {MEASUREMENT_NAME} measurement\nper person, by site'),
14 | caption = 'Source: All Of Us Data')
15 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 8, repr.plot.width = 16)
3 |
4 | measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | ggplot(aes(x = sex_at_birth, y = value_as_number)) +
7 | geom_boxplot() +
8 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
9 | position = position_dodge(width = 0.9), vjust = -0.8) +
10 | # scale_y_log10() + # Uncomment if the data looks skewed.
11 | ylab(str_glue('{UNIT_NAME}')) +
12 | labs(title = str_glue('All {MEASUREMENT_NAME} measurements, by sex_at_birth'),
13 | caption = 'Source: All Of Us Data') +
14 | theme(axis.text.x = element_text(angle=25, hjust=1))
15 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 |
3 | meas_filter = most_recent_measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
4 | (ggplot(most_recent_measurement_of_interest_df[meas_filter], aes(x = 'sex_at_birth', y = 'value_as_number')) +
5 | geom_boxplot() +
6 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
7 | position = position_dodge(width = 0.9), va = 'top') +
8 | # scale_y_log10() + # Uncomment if the data looks skewed.
9 | ylab(f'{UNIT_NAME}') +
10 | ggtitle(f'Most recent {MEASUREMENT_NAME} measurement\nper person, by sex_at_birth\nSource: All Of Us Data') +
11 | theme(figure_size=(12, 6), axis_text_x = element_text(angle=25, hjust=1)))
12 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_site.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 |
3 | # meas_filter is a column of True and False
4 | meas_filter = most_recent_measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
5 | (ggplot(most_recent_measurement_of_interest_df[meas_filter], aes(x = 'src_id', y = 'value_as_number')) +
6 | geom_boxplot() +
7 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
8 | position = position_dodge(width = 0.9), va = 'top') +
9 | # scale_y_log10() + # Uncomment if the data looks skewed.
10 | coord_flip() +
11 | ylab(f'{UNIT_NAME}') +
12 | ggtitle(f'Most recent {MEASUREMENT_NAME} measurement\nper person, by site\nSource: All Of Us Data') +
13 | theme(figure_size=(12, 6)))
14 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 8, repr.plot.width = 16)
3 |
4 | most_recent_measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | ggplot(aes(x = sex_at_birth, y = value_as_number)) +
7 | geom_boxplot() +
8 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
9 | position = position_dodge(width = 0.9), vjust = -0.8) +
10 | # scale_y_log10() + # Uncomment if the data looks skewed.
11 | ylab(str_glue('{UNIT_NAME}')) +
12 | labs(title = str_glue('Most recent {MEASUREMENT_NAME} measurement\nper person, by sex_at_birth'),
13 | caption = 'Source: All Of Us Data') +
14 | theme(axis.text.x = element_text(angle=25, hjust=1))
15 |
--------------------------------------------------------------------------------
/storage-snippets/copy_file_from_workspace_bucket.R:
--------------------------------------------------------------------------------
1 | # This snippet assumes that you run setup first
2 |
3 | # This code copies a file from your Google Bucket into a dataframe
4 |
5 | # replace 'test.csv' with the name of the file in your google bucket (don't delete the quotation marks)
6 | name_of_file_in_bucket <- 'test.csv'
7 |
8 | ########################################################################
9 | ##
10 | ################# DON'T CHANGE FROM HERE ###############################
11 | ##
12 | ########################################################################
13 |
14 | # Get the bucket name
15 | my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
16 |
17 | # Copy the file from current workspace to the bucket
18 | system(paste0("gsutil cp ", my_bucket, "/data/", name_of_file_in_bucket, " ."), intern=T)
19 |
20 | # Load the file into a dataframe
21 | my_dataframe <- read_csv(name_of_file_in_bucket)
22 | head(my_dataframe)
23 |
--------------------------------------------------------------------------------
/dataset-snippets/snippets_setup.R:
--------------------------------------------------------------------------------
1 | lapply(c('viridis', 'ggthemes', 'skimr'),
2 | function(pkg_name) { if(! pkg_name %in% installed.packages()) { install.packages(pkg_name)} } )
3 |
4 | library(viridis) # A nice color scheme for plots.
5 | library(ggthemes) # Common themes to change the look and feel of plots.
6 | library(scales) # Graphical scales map data to aesthetics in plots.
7 | library(skimr) # Better summaries of data.
8 | library(lubridate) # Date library from the tidyverse.
9 | library(tidyverse) # Data wrangling packages.
10 | library(bigrquery) # Data extraction from Google BigQuery
11 |
12 | ## Plot setup.
13 | theme_set(theme_bw(base_size = 14)) # Default theme for plots.
14 |
15 | #' Returns a data frame with a y position and a label, for use annotating ggplot boxplots.
16 | #'
17 | #' @param d A data frame.
18 | #' @return A data frame with column y as max and column label as length.
19 | get_boxplot_fun_data <- function(df) {
20 | return(data.frame(y = max(df), label = stringr::str_c('N = ', length(df))))
21 | }
22 |
--------------------------------------------------------------------------------
/storage-snippets/copy_file_from_workspace_bucket.py:
--------------------------------------------------------------------------------
1 | # This snippet assumes you run setup first
2 |
3 | # This code copies file in your Google Bucket and loads it into a dataframe
4 |
5 | # Replace 'test.csv' with THE NAME of the file you're going to download from the bucket (don't delete the quotation marks)
6 | name_of_file_in_bucket = 'test.csv'
7 |
8 | ########################################################################
9 | ##
10 | ################# DON'T CHANGE FROM HERE ###############################
11 | ##
12 | ########################################################################
13 |
14 | # get the bucket name
15 | my_bucket = os.getenv('WORKSPACE_BUCKET')
16 |
17 | # copy csv file from the bucket to the current working space
18 | os.system(f"gsutil cp '{my_bucket}/data/{name_of_file_in_bucket}' .")
19 |
20 | print(f'[INFO] {name_of_file_in_bucket} is successfully downloaded into your working space')
21 | # save dataframe in a csv file in the same workspace as the notebook
22 | my_dataframe = pd.read_csv(name_of_file_in_bucket)
23 | my_dataframe.head()
24 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_age_and_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 20, repr.plot.width = 16)
3 |
4 | measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | mutate(age_at_measurement = year(as.period(interval(start = birth_datetime, end = measurement_date)))) %>%
7 | ggplot(aes(x = cut_width(age_at_measurement, width = 10, boundary = 0), y = value_as_number)) +
8 | geom_boxplot() +
9 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
10 | position = position_dodge(width = 0.9), vjust = -0.8) +
11 | # scale_y_log10() + # Uncomment if the data looks skewed.
12 | coord_flip() +
13 | facet_wrap(~ sex_at_birth, nrow = length(unique(measurement_of_interest_df$sex_at_birth))) +
14 | xlab('age') +
15 | ylab(str_glue('{UNIT_NAME}')) +
16 | labs(title = str_glue('All {MEASUREMENT_NAME} measurements, by age, faceted by sex_at_birth'),
17 | caption = 'Source: All Of Us Data')
18 |
--------------------------------------------------------------------------------
/storage-snippets/copy_data_to_workspace_bucket.py:
--------------------------------------------------------------------------------
1 | # This snippet assumes you run setup first
2 |
3 | # This code saves your dataframe into a csv file in a "data" folder in Google Bucket
4 |
5 | # Replace df with THE NAME OF YOUR DATAFRAME
6 | my_dataframe = df
7 |
8 | # Replace 'test.csv' with THE NAME of the file you're going to store in the bucket (don't delete the quotation marks)
9 | destination_filename = 'test.csv'
10 |
11 | ########################################################################
12 | ##
13 | ################# DON'T CHANGE FROM HERE ###############################
14 | ##
15 | ########################################################################
16 |
17 | # save dataframe in a csv file in the same workspace as the notebook
18 | my_dataframe.to_csv(destination_filename, index=False)
19 |
20 | # get the bucket name
21 | my_bucket = os.getenv('WORKSPACE_BUCKET')
22 |
23 | # copy csv file to the bucket
24 | args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
25 | output = subprocess.run(args, capture_output=True)
26 |
27 | # print output from gsutil
28 | output.stderr
29 |
--------------------------------------------------------------------------------
/storage-snippets/copy_data_to_workspace_bucket.R:
--------------------------------------------------------------------------------
1 | # This snippet assumes that you run setup first
2 |
3 | # This code saves your dataframe into a csv file in a "data" folder in Google Bucket
4 |
5 | # Replace df with THE NAME OF YOUR DATAFRAME
6 | my_dataframe <- df
7 |
8 | # Replace 'test.csv' with THE NAME of the file you're going to store in the bucket (don't delete the quotation marks)
9 | destination_filename <- 'test.csv'
10 |
11 | ########################################################################
12 | ##
13 | ################# DON'T CHANGE FROM HERE ###############################
14 | ##
15 | ########################################################################
16 |
17 | # store the dataframe in current workspace
18 | write_excel_csv(my_dataframe, destination_filename)
19 |
20 | # Get the bucket name
21 | my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
22 |
23 | # Copy the file from current workspace to the bucket
24 | system(paste0("gsutil cp ./", destination_filename, " ", my_bucket, "/data/"), intern=T)
25 |
26 | # Check if file is in the bucket
27 | system(paste0("gsutil ls ", my_bucket, "/data/*.csv"), intern=T)
28 |
--------------------------------------------------------------------------------
/py/setup.py:
--------------------------------------------------------------------------------
1 | """A setuptools based module for PIP installation of the Terra widgets package."""
2 |
3 | import pathlib
4 | from setuptools import find_packages
5 | from setuptools import setup
6 |
7 | here = pathlib.Path(__file__).parent.resolve()
8 | # Get the requirements from the requirements file
9 | requirements = (here / 'requirements.txt').read_text(encoding='utf-8')
10 | # Get the long description from the README file
11 | long_description = (here / 'README.md').read_text(encoding='utf-8')
12 |
13 | setup(
14 | name='terra-widgets',
15 | version='0.0.1',
16 | license='BSD',
17 |
18 | description='Terra Notebook widgets',
19 | long_description=long_description,
20 | long_description_content_type='text/markdown',
21 |
22 | python_requires='>=3.7',
23 | install_requires=requirements,
24 | packages=find_packages(),
25 |
26 | url='https://github.com/all-of-us/workbench-snippets',
27 | project_urls={
28 | 'Bug Reports': 'https://github.com/all-of-us/workbench-snippets/issues',
29 | 'Source': 'https://github.com/all-of-us/workbench-snippets/blob/main/py',
30 | },
31 | )
32 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_age_and_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 | options(repr.plot.height = 20, repr.plot.width = 16)
3 |
4 | most_recent_measurement_of_interest_df %>%
5 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
6 | mutate(age_at_measurement = year(as.period(interval(start = birth_datetime, end = measurement_date)))) %>%
7 | ggplot(aes(x = cut_width(age_at_measurement, width = 10, boundary = 0), y = value_as_number)) +
8 | geom_boxplot() +
9 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
10 | position = position_dodge(width = 0.9), vjust = -0.8) +
11 | # scale_y_log10() + # Uncomment if the data looks skewed.
12 | coord_flip() +
13 | facet_wrap(~ sex_at_birth, nrow = length(unique(most_recent_measurement_of_interest_df$sex_at_birth))) +
14 | xlab('age') +
15 | ylab(str_glue('{UNIT_NAME}')) +
16 | labs(title = str_glue('Most recent {MEASUREMENT_NAME} measurement\nper person, by age, faceted by sex_at_birth'),
17 | caption = 'Source: All Of Us Data')
18 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_survey_module.R:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_survey_module' to print a table of participant counts by question in a module
2 | # The snippet assumes that a dataframe containing survey questions and answers already exists
3 |
4 | # Update the next 3 lines
5 |
6 | survey_df <- YOUR_DATASET_NAME_survey_df
7 | module_name <- 'The Basics'
8 | denominator <- NULL
9 |
10 | ####################################################################################
11 | # DON'T CHANGE FROM HERE
12 | ####################################################################################
13 | summarize_a_module <- function(df, module=NULL, denominator=NULL) {
14 | if (!is.null(module)){
15 | df <- df %>% filter(tolower(survey) == tolower(module))
16 | }
17 | data <- df %>% group_by(survey, question_concept_id, question) %>%
18 | summarize(n_participant = n_distinct(person_id))
19 | if (!is.null(denominator)) {
20 | data <- data %>% mutate(response_rate = paste0(round(100*n_participant/denominator,2),'%'))
21 | }
22 | data
23 | }
24 |
25 | summarize_a_module(survey_df, module_name, denominator)
26 |
27 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_survey_module.py:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_survey_module' to print a table of participant counts by question in a module
2 | # The snippet assumes that a dataframe containing survey questions and answers already exists
3 |
4 | # Update the next 3 lines
5 | survey_df = YOUR_DATASET_NAME_survey_df
6 | module_name = 'The Basics' # e.g: 'The Basics', 'Lifestyle', 'Overall Health', etc.
7 | denominator = None # e.g: 200000
8 |
9 | ####################################################################################
10 | # DON'T CHANGE FROM HERE
11 | ####################################################################################
12 |
13 | def summarize_a_module(df, module=None, denominator=None):
14 | if module:
15 | df = df[df['survey'].str.lower() == module.lower()].copy()
16 | data = (df.groupby(['survey','question_concept_id','question'])['person_id'].nunique()
17 | .reset_index()
18 | .rename(columns={'person_id':'n_participant'}))
19 | if denominator:
20 | data['response_rate'] = round(100*data['n_participant']/denominator,2)
21 | return data
22 |
23 | summarize_a_module(df=survey_df, module=module_name, denominator=denominator)
24 |
25 |
--------------------------------------------------------------------------------
/dataset-snippets/measurement_by_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # Use snippet 'measurement_by_sex_at_birth' to plot joined demographics and measurements dataframes.
2 | # This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
3 | # join together demographics and measurements dataframes.
4 | # See also https://r4ds.had.co.nz/data-visualisation.html
5 |
6 |
7 | options(repr.plot.height = 10, repr.plot.width = 16)
8 |
9 | # There could be many different measurements in the dataframe. By default, plot the first one.
10 | measurement_to_plot <- unique(measurement_df$standard_concept_name)[1]
11 |
12 | measurement_df %>%
13 | filter(standard_concept_name == measurement_to_plot) %>%
14 | filter(!unit_concept_name %in% c('No matching concept', 'NULL')) %>%
15 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
16 | ggplot(aes(x = sex_at_birth, y = value_as_number)) +
17 | geom_boxplot() +
18 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 4,
19 | position = position_dodge(width = 0.9), vjust = -0.8) +
20 | # scale_y_log10() + # Uncomment if the data looks skewed.
21 | facet_wrap(standard_concept_name ~ unit_concept_name, ncol = 2, scales = 'free') +
22 | labs(title = str_glue('Numeric values of measurements, by sex_at_birth'), caption = 'Source: All Of Us Data') +
23 | theme(axis.text.x = element_text(angle=25, hjust=1))
24 |
--------------------------------------------------------------------------------
/sql-snippets/snippets_setup.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 | import pandas as pd
5 | import plotnine
6 | from plotnine import * # Provides a ggplot-like interface to matplotlib.
7 |
8 | # Get the BigQuery curated dataset for the current workspace context.
9 | CDR = os.environ['WORKSPACE_CDR']
10 |
11 | ## Plot setup.
12 | theme_set(theme_bw(base_size = 11)) # Default theme for plots.
13 |
14 | def get_boxplot_fun_data(df):
15 | """Returns a data frame with a y position and a label, for use annotating ggplot boxplots.
16 |
17 | Args:
18 | d: A data frame.
19 | Returns:
20 | A data frame with column y as max and column label as length.
21 | """
22 | d = {'y': max(df), 'label': f'N = {len(df)}'}
23 | return(pd.DataFrame(data=d, index=[0]))
24 |
25 | ## ---------------[ CHANGE THESE AS NEEDED] ---------------------------------------
26 | # Set default parameter values so that all snippets run successfully with no edits needed.
27 | COHORT_QUERY = f'SELECT person_id FROM `{CDR}.person`' # Default to all participants.
28 | MEASUREMENT_OF_INTEREST = 'hemoglobin'
29 | # Tip: the next four parameters could be set programmatically using one row from
30 | # the result of measurements_of_interest_summary.sql
31 | MEASUREMENT_CONCEPT_ID = 3004410 # Hemoglobin A1c
32 | UNIT_CONCEPT_ID = 8554 # percent
33 | MEASUREMENT_NAME = ''
34 | UNIT_NAME = ''
35 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2019 All of Us Research Program
2 |
3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
4 |
5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
6 |
7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8 |
9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 |
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 |
--------------------------------------------------------------------------------
/dataset-snippets/measurement_by_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # Use snippet 'measurement_by_sex_at_birth' to plot joined demographics and measurements dataframes.
2 | # This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
3 | # join together demographics and measurements dataframes.
4 | # See also https://plotnine.readthedocs.io/en/stable/
5 |
6 |
7 | # There could be many different measurements in the dataframe. By default, plot the first one.
8 | measurement_to_plot = measurement_df.standard_concept_name.unique()[0]
9 |
10 | # meas_filter is a column of True and False.
11 | meas_filter = ((measurement_df.standard_concept_name == measurement_to_plot)
12 | & (measurement_df.unit_concept_name != 'No matching concept')
13 | & (measurement_df.unit_concept_name.notna())
14 | & (measurement_df.value_as_number < 9999999) # Get rid of nonsensical outliers.
15 | )
16 |
17 | (ggplot(measurement_df[meas_filter], aes(x = 'sex_at_birth', y = 'value_as_number')) +
18 | geom_boxplot() +
19 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
20 | position = position_dodge(width = 0.9), va = 'top') +
21 | # scale_y_log10() + # Uncomment if the data looks skewed.
22 | facet_wrap(('standard_concept_name', 'unit_concept_name'), ncol = 2, scales = 'free') +
23 | ggtitle(f'Numeric values of measurements, by sex_at_birth\nSource: All Of Us Data') +
24 | theme(figure_size=(12, 6), panel_spacing = .5, axis_text_x = element_text(angle=25, hjust=1)))
25 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest_by_age_and_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that measurement_of_interest.sql has been run.
2 |
3 | measurement_of_interest_df['age_at_measurement'] = ((pd.to_datetime(measurement_of_interest_df['measurement_date'])
4 | - measurement_of_interest_df['birth_datetime'].dt.tz_localize(None)).dt.days)//365.24
5 | measurement_of_interest_df['age_group'] = pd.cut(measurement_of_interest_df['age_at_measurement'],
6 | [-np.inf, 34.5, 49.5, 64.5, np.inf],
7 | labels=["<35", "35-49", "50-64", "65+"])
8 | # meas_filter is a column of True and False
9 | meas_filter = measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
10 | age_group_not_null = (measurement_of_interest_df['age_group'].notnull())
11 |
12 | (ggplot(measurement_of_interest_df[meas_filter & age_group_not_null], aes(x = 'age_group', y = 'value_as_number')) +
13 | geom_boxplot() +
14 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
15 | position = position_dodge(width = 0.9), va = 'top') +
16 | # scale_y_log10() + # Uncomment if the data looks skewed.
17 | coord_flip() +
18 | facet_wrap('~ sex_at_birth', nrow = len(measurement_of_interest_df.sex_at_birth.unique())) +
19 | xlab('age') +
20 | ylab(f'{UNIT_NAME}') +
21 | ggtitle(f'All {MEASUREMENT_NAME} measurements, by age, faceted by sex_at_birth\nSource: All Of Us Data') +
22 | theme(figure_size=(12, 12)))
23 |
--------------------------------------------------------------------------------
/sql-snippets/measurement_of_interest.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Return row level data for a measurement for our cohort.
3 | --
4 | -- PARAMETERS:
5 | -- MEASUREMENT_CONCEPT_ID: for example 3004410 # Hemoglobin A1c
6 | -- UNIT_CONCEPT_ID: for example 8554 # percent
7 |
8 | WITH
9 | --
10 | -- Retrieve participants birthdate and sex_at_birth.
11 | --
12 | persons AS (
13 | SELECT
14 | person_id,
15 | birth_datetime,
16 | concept_name AS sex_at_birth
17 | FROM
18 | `{CDR}.person`
19 | LEFT JOIN `{CDR}.concept` ON concept_id = sex_at_birth_concept_id),
20 | --
21 | -- Retrieve the row-level data for our measurement of interest.
22 | --
23 | measurements AS (
24 | SELECT
25 | person_id,
26 | measurement_id,
27 | measurement_concept_id,
28 | measurement_date,
29 | measurement_datetime,
30 | measurement_type_concept_id,
31 | operator_concept_id,
32 | value_as_number,
33 | value_as_concept_id,
34 | unit_concept_id,
35 | range_low,
36 | range_high
37 | FROM
38 | `{CDR}.measurement`
39 | WHERE
40 | measurement_concept_id = {MEASUREMENT_CONCEPT_ID}
41 | AND unit_concept_id = {UNIT_CONCEPT_ID}
42 | AND person_id IN ({COHORT_QUERY}))
43 | --
44 | -- Lastly, JOIN all this data together so that we have the birthdate, sex_at_birth and site for each measurement.
45 | --
46 | SELECT
47 | persons.*,
48 | src_id,
49 | measurements.* EXCEPT(person_id, measurement_id)
50 | FROM
51 | measurements
52 | LEFT JOIN
53 | persons USING (person_id)
54 | LEFT JOIN
55 | `{CDR}.measurement_ext` USING (measurement_id)
56 | ORDER BY
57 | person_id,
58 | measurement_id
59 |
60 |
--------------------------------------------------------------------------------
/dataset-snippets/measurement_by_age_and_sex_at_birth.ggplot:
--------------------------------------------------------------------------------
1 | # Use snippet 'measurement_by_age_and_sex_at_birth' to plot joined demographics and measurements dataframes.
2 | # This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
3 | # join together demographics and measurements dataframes.
4 | # See also https://r4ds.had.co.nz/data-visualisation.html
5 |
6 |
7 | options(repr.plot.height = 16, repr.plot.width = 16)
8 |
9 | # There could be many different measurements in the dataframe. By default, plot the first one.
10 | measurement_to_plot <- unique(measurement_df$standard_concept_name)[1]
11 |
12 | measurement_df %>%
13 | filter(standard_concept_name == measurement_to_plot) %>%
14 | filter(!unit_concept_name %in% c('No matching concept', 'NULL')) %>%
15 | filter(sex_at_birth != 'No matching concept') %>%
16 | filter(value_as_number < 9999999) %>% # Get rid of nonsensical outliers.
17 | mutate(age_at_measurement = year(as.period(interval(start = date_of_birth, end = measurement_datetime)))) %>%
18 | ggplot(aes(x = cut_width(age_at_measurement, width = 5, boundary = 0), y = value_as_number)) +
19 | geom_boxplot() +
20 | stat_summary(fun.data = get_boxplot_fun_data, geom = 'text', size = 2,
21 | position = position_dodge(width = 0.9), vjust = -0.8) +
22 | # scale_y_log10() + # Uncomment if the data looks skewed.
23 | coord_flip() +
24 | facet_wrap(standard_concept_name + unit_concept_name ~ sex_at_birth, ncol = 2, scales = 'free') +
25 | xlab('age group') +
26 | labs(title = str_glue('Numeric values of measurements by age and sex_at_birth'), caption = 'Source: All Of Us Data')
27 |
--------------------------------------------------------------------------------
/py/README.md:
--------------------------------------------------------------------------------
1 | # Terra widgets
2 |
3 | A python package for ipywidget-based user interfaces for performing tasks within the context Python Jupyter notebooks running in either the Terra or All of Us workbench environments.
4 |
5 |
6 | ## Create and view HTML snapshots of notebooks
7 |
8 | The workbench takes care of saving the current version of your notebooks for you. But what if you want to know **what your notebook looked like two weeks ago?** Use `display_html_snapshots_widget()` to display a widget which can save snapshots of a notebook for later review, allowing users to track changes to results in notebooks over time. To do this, it:
9 |
10 | 1. Converts the selected notebook to an HTML file (without re-running the notebook).
11 | 1. And then copies that HTML file to subfolder within the same workspace bucket where the notebook file is stored.
12 |
13 | Use this interface to create an HTML snapshot each time you make a major change to your notebook. You can choose notebooks from **any of your workspaces!**
14 |
15 | Implementation details:
16 |
17 | * The user interface controls are implemented using the [ipywidgets](https://ipywidgets.readthedocs.io/en/latest/) Python package.
18 |
19 | * Notebooks are converted from `.ipynb` to `.html` using [nbconvert](https://nbconvert.readthedocs.io/en/latest/).
20 |
21 | * Files are transfered back and forth from the workspace bucket using both:
22 | * [gsutil](https://cloud.google.com/storage/docs/gsutil)
23 | * [Tensorflow GFile](https://www.tensorflow.org/api_docs/python/tf/io/gfile/GFile).
24 |
25 | * The few files of code implementing this interface are preinstalled as a [Python library](https://github.com/all-of-us/workbench-snippets/blob/main/py/setup.py) on the AoU workbench.
26 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_by_age_and_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # This plot assumes that most_recent_measurement_of_interest.sql has been run.
2 |
3 | most_recent_measurement_of_interest_df['age_at_measurement'] = ((pd.to_datetime(most_recent_measurement_of_interest_df['measurement_date'])
4 | - most_recent_measurement_of_interest_df['birth_datetime'].dt.tz_localize(None)).dt.days)//365.24
5 | most_recent_measurement_of_interest_df['age_group'] = pd.cut(most_recent_measurement_of_interest_df['age_at_measurement'],
6 | [-np.inf, 34.5, 49.5, 64.5, np.inf],
7 | labels=["<35", "35-49", "50-64", "65+"])
8 | # meas_filter is a column of True and False
9 | meas_filter = most_recent_measurement_of_interest_df['value_as_number'] < 9999999 # Get rid of nonsensical outliers.
10 | age_group_not_null = (most_recent_measurement_of_interest_df['age_group'].notnull())
11 |
12 | (ggplot(most_recent_measurement_of_interest_df[meas_filter & age_group_not_null], aes(x = 'age_group', y = 'value_as_number')) +
13 | geom_boxplot() +
14 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
15 | position = position_dodge(width = 0.9), va = 'top') +
16 | # scale_y_log10() + # Uncomment if the data looks skewed.
17 | coord_flip() +
18 | facet_wrap('~ sex_at_birth', nrow = len(most_recent_measurement_of_interest_df.sex_at_birth.unique())) +
19 | xlab('age') +
20 | ylab(f'{UNIT_NAME}') +
21 | ggtitle(f'Most recent {MEASUREMENT_NAME} measurement\nper person, by age, faceted by sex_at_birth\nSource: All Of Us Data') +
22 | theme(figure_size=(12, 6)))
23 |
--------------------------------------------------------------------------------
/sql-snippets/snippets_setup.R:
--------------------------------------------------------------------------------
1 | lapply(c('viridis', 'ggthemes', 'skimr'),
2 | function(pkg_name) { if(! pkg_name %in% installed.packages()) { install.packages(pkg_name)} } )
3 |
4 | library(viridis) # A nice color scheme for plots.
5 | library(ggthemes) # Common themes to change the look and feel of plots.
6 | library(scales) # Graphical scales map data to aesthetics in plots.
7 | library(skimr) # Better summaries of data.
8 | library(lubridate) # Date library from the tidyverse.
9 | library(bigrquery) # BigQuery R client.
10 | library(tidyverse) # Data wrangling packages.
11 |
12 | ## BigQuery setup.
13 | BILLING_PROJECT_ID <- Sys.getenv('GOOGLE_PROJECT')
14 | # Get the BigQuery curated dataset for the current workspace context.
15 | CDR <- Sys.getenv('WORKSPACE_CDR')
16 |
17 | ## Plot setup.
18 | theme_set(theme_bw(base_size = 14)) # Default theme for plots.
19 |
20 | #' Returns a data frame with a y position and a label, for use annotating ggplot boxplots.
21 | #'
22 | #' @param d A data frame.
23 | #' @return A data frame with column y as max and column label as length.
24 | get_boxplot_fun_data <- function(df) {
25 | return(data.frame(y = max(df), label = stringr::str_c('N = ', length(df))))
26 | }
27 |
28 | ## ---------------[ CHANGE THESE AS NEEDED] ---------------------------------------
29 | # Set default parameter values so that all snippets run successfully with no edits needed.
30 | COHORT_QUERY <- str_glue('SELECT person_id FROM `{CDR}.person`') # Default to all participants.
31 | MEASUREMENT_OF_INTEREST <- 'hemoglobin'
32 | # Tip: the next four parameters could be set programmatically using one row from
33 | # the result of measurements_of_interest_summary.sql
34 | MEASUREMENT_CONCEPT_ID <- 3004410 # Hemoglobin A1c
35 | UNIT_CONCEPT_ID <- 8554 # percent
36 | MEASUREMENT_NAME <- ''
37 | UNIT_NAME <- ''
38 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_survey_by_question_concept_id.py:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_survey_module' to output a table and a graph of
2 | # participant counts by response for one question_concept_id
3 | # The snippet assumes that a dataframe containing survey questions and answers already exists
4 | # The snippet also assumes that setup has been run
5 |
6 | # Update the next 3 lines
7 | survey_df = YOUR_DATASET_NAME_survey_df
8 | question_concept_id = 1585940
9 | denominator = None # e.g: 200000
10 |
11 | ####################################################################################
12 | # DON'T CHANGE FROM HERE
13 | ####################################################################################
14 | def summarize_a_question_concept_id(df, question_concept_id, denominator=None):
15 | df = df.loc[df['question_concept_id'] == question_concept_id].copy()
16 | new_df = df.groupby(['answer_concept_id', 'answer'])['person_id']\
17 | .nunique()\
18 | .reset_index()\
19 | .rename(columns=dict(person_id='n_participant'))\
20 | .assign(answer_concept_id = lambda x: np.int32(x.answer_concept_id))
21 | if denominator:
22 | new_df['response_rate'] = round(100*new_df['n_participant']/denominator,2)
23 | if question_concept_id in df['question_concept_id'].unique():
24 | print(f"Distribution of response to {df.loc[df['question_concept_id'] == question_concept_id, 'question'].unique()[0]}")
25 | # show table
26 | display(new_df)
27 | # show graph
28 | display(ggplot(data=new_df) +
29 | geom_bar(aes(x='answer', y='n_participant'), stat='identity') +
30 | coord_flip() +
31 | labs(y="Participant count", x="") +
32 | theme_bw())
33 | else:
34 | print("There is an error with your question_concept_id")
35 |
36 | summarize_a_question_concept_id(survey_df, question_concept_id, denominator)
37 |
38 |
39 |
--------------------------------------------------------------------------------
/dataset-snippets/summarize_a_survey_by_question_concept_id.R:
--------------------------------------------------------------------------------
1 | # Use snippet 'summarize_a_survey_module' to output a table and a graph of
2 | # participant counts by response for one question_concept_id
3 | # The snippet assumes that a dataframe containing survey questions and answers already exists
4 | # The snippet also assumes that setup has been run
5 |
6 | # Update the next 3 lines
7 | survey_df <- YOUR_DATASET_NAME_survey_df
8 | question_concept_id <- 1585940
9 | denominator <- NULL
10 |
11 | ####################################################################################
12 | # DON'T CHANGE FROM HERE
13 | ####################################################################################
14 | summarize_a_question_concept_id <- function(df, q_concept_id, denominator=NULL){
15 | df <- df %>%
16 | mutate(question_concept_id = as.numeric(question_concept_id)) %>%
17 | filter(question_concept_id == q_concept_id)
18 |
19 | new_df <- df %>% group_by(answer_concept_id, answer) %>%
20 | summarize(n_participant = n_distinct(person_id)) %>%
21 | ungroup() %>%
22 | mutate(answer_concept_id = as.integer(answer_concept_id))
23 | if (!is.null(denominator)) {
24 | new_df <- new_df %>% mutate(response_rate = paste0(round(100*n_participant/denominator,2),'%'))
25 | }
26 |
27 | if (q_concept_id %in% as.vector(unique(df[['question_concept_id']]))){
28 | question_name <- as.vector(unique(df$question))
29 | print(str_glue("Distribution of response to {question_name}"))
30 |
31 | # show table
32 | print(new_df)
33 |
34 | # show graph
35 | options(repr.plot.width=12, repr.plot.height=6)
36 | ggplot(new_df) +
37 | geom_bar(aes(x=answer, y=n_participant), stat='identity') +
38 | coord_flip() +
39 | labs(y="Participant count", x="") +
40 | theme_bw()
41 | }
42 | else {
43 | print("There is an error with your question_concept_id")
44 | }
45 | }
46 |
47 | summarize_a_question_concept_id(survey_df, question_concept_id, denominator)
48 |
49 |
50 |
--------------------------------------------------------------------------------
/dataset-snippets/measurement_by_age_and_sex_at_birth.plotnine:
--------------------------------------------------------------------------------
1 | # Use snippet 'measurement_by_age_and_sex_at_birth' to plot joined demographics and measurements dataframes.
2 | # This plot assumes 'measurement_df' was created using snippet 'Basic operations -> join_dataframes' to
3 | # join together demographics and measurements dataframes.
4 | # See also https://plotnine.readthedocs.io/en/stable/
5 |
6 |
7 | # There could be many different measurements in the dataframe. By default, plot the first one.
8 | measurement_to_plot = measurement_df.standard_concept_name.unique()[0]
9 |
10 | # Create a derived variable for age group.
11 | measurement_df['age_at_measurement'] = ((measurement_df['measurement_datetime'].dt.tz_localize(None)
12 | - measurement_df['date_of_birth'].dt.tz_localize(None)).dt.days)//365.24
13 | measurement_df['age_group'] = pd.cut(measurement_df['age_at_measurement'],
14 | [-np.inf, 34.5, 49.5, 64.5, np.inf],
15 | labels=["<35", "35-49", "50-64", "65+"])
16 |
17 | # meas_filter is a column of True and False
18 | meas_filter = ((measurement_df.standard_concept_name == measurement_to_plot)
19 | & (measurement_df.unit_concept_name != 'No matching concept')
20 | & (measurement_df.unit_concept_name.notna())
21 | & (measurement_df.sex_at_birth != 'No matching concept')
22 | & (measurement_df.value_as_number < 9999999)
23 | & (measurement_df['age_at_measurement'].notnull()) # Get rid of nonsensical outliers.
24 | )
25 |
26 | (ggplot(measurement_df[meas_filter], aes(x = 'age_group', y = 'value_as_number')) +
27 | geom_boxplot() +
28 | stat_summary(fun_data = get_boxplot_fun_data, geom = 'text', size = 10,
29 | position = position_dodge(width = 0.9), va = 'top') +
30 | # scale_y_log10() + # Uncomment if the data looks skewed.
31 | coord_flip() +
32 | facet_wrap(['standard_concept_name + ": " + unit_concept_name', 'sex_at_birth'], ncol = 2, scales = 'free') +
33 | xlab('age group') +
34 | ggtitle('Numeric values of measurements by age and sex_at_birth\nSource: All Of Us Data') +
35 | theme(figure_size = (12, 12), panel_spacing = .5))
36 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Return row level data for a measurement, limited to only the most recent result per person in our cohort.
3 | --
4 | -- PARAMETERS:
5 | -- MEASUREMENT_CONCEPT_ID: for example 3004410 # Hemoglobin A1c
6 | -- UNIT_CONCEPT_ID: for example 8554 # percent
7 |
8 | WITH
9 | --
10 | -- Retrieve participants birthdate and sex_at_birth.
11 | --
12 | persons AS (
13 | SELECT
14 | person_id,
15 | birth_datetime,
16 | concept_name AS sex_at_birth
17 | FROM
18 | `{CDR}.person`
19 | LEFT JOIN `{CDR}.concept` ON concept_id = sex_at_birth_concept_id),
20 | --
21 | -- Retrieve the row-level data for our measurement of interest. Also compute
22 | -- a new column for the recency rank of the measurement per person, a rank of
23 | -- of 1 being the most recent lab result for that person.
24 | --
25 | measurements AS (
26 | SELECT
27 | person_id,
28 | measurement_id,
29 | measurement_concept_id,
30 | unit_concept_id,
31 | measurement_date,
32 | measurement_datetime,
33 | measurement_type_concept_id,
34 | operator_concept_id,
35 | value_as_number,
36 | value_as_concept_id,
37 | range_low,
38 | range_high,
39 | ROW_NUMBER() OVER (PARTITION BY person_id
40 | ORDER BY measurement_date DESC,
41 | measurement_datetime DESC,
42 | measurement_id DESC) AS recency_rank
43 |
44 | FROM
45 | `{CDR}.measurement`
46 | WHERE
47 | measurement_concept_id = {MEASUREMENT_CONCEPT_ID}
48 | AND unit_concept_id = {UNIT_CONCEPT_ID}
49 | AND person_id IN ({COHORT_QUERY}))
50 | --
51 | -- Lastly, JOIN all this data together so that we have the birthdate, sex_at_birth and site for each
52 | -- measurement, retaining only the most recent result per person.
53 | --
54 | SELECT
55 | persons.*,
56 | src_id,
57 | measurements.* EXCEPT(person_id, measurement_id, recency_rank)
58 | FROM
59 | measurements
60 | LEFT JOIN
61 | persons USING (person_id)
62 | LEFT JOIN
63 | `{CDR}.measurement_ext` USING (measurement_id)
64 | WHERE
65 | recency_rank = 1
66 | ORDER BY
67 | person_id,
68 | measurement_id
69 |
70 |
--------------------------------------------------------------------------------
/sql-snippets/measurements_of_interest_summary.sql:
--------------------------------------------------------------------------------
1 |
2 | -- Compute summary information for our measurements of interest for our cohort.
3 | --
4 | -- PARAMETERS:
5 | -- MEASUREMENT_OF_INTEREST: a case-insensitive string, such as "hemoglobin", to be compared
6 | -- to all measurement concept names to identify those of interest
7 |
8 | WITH
9 | --
10 | -- Use a case insensitive string to search the measurement concept names of those
11 | -- measurements we do have in the measurements table.
12 | --
13 | labs_of_interest AS (
14 | SELECT
15 | measurement_concept_id,
16 | measurement_concept.concept_name AS measurement_name,
17 | unit_concept_id,
18 | unit_concept.concept_name AS unit_name
19 | FROM
20 | `{CDR}.measurement`
21 | LEFT JOIN `{CDR}.concept` AS measurement_concept
22 | ON measurement_concept.concept_id = measurement_concept_id
23 | LEFT JOIN `{CDR}.concept` AS unit_concept
24 | ON unit_concept.concept_id = unit_concept_id
25 | WHERE
26 | REGEXP_CONTAINS(measurement_concept.concept_name, r"(?i){MEASUREMENT_OF_INTEREST}")
27 | GROUP BY
28 | measurement_concept_id,
29 | unit_concept_id,
30 | measurement_concept.concept_name,
31 | unit_concept.concept_name
32 | )
33 | --
34 | -- Summarize the information about each measurement concept of interest that our
35 | -- prior query identified.
36 | --
37 | SELECT
38 | measurement_name AS measurement,
39 | IFNULL(unit_name, "NA") AS unit,
40 | COUNT(1) AS N,
41 | COUNTIF(value_as_number IS NULL
42 | AND (value_as_concept_id IS NULL
43 | OR value_as_concept_id = 0)) AS missing,
44 | MIN(value_as_number) AS min,
45 | MAX(value_as_number) AS max,
46 | AVG(value_as_number) AS avg,
47 | STDDEV(value_as_number) AS stddev,
48 | APPROX_QUANTILES(value_as_number, 4) AS quantiles,
49 | COUNTIF(value_as_number IS NOT NULL) AS num_numeric_values,
50 | COUNTIF(value_as_concept_id IS NOT NULL
51 | AND value_as_concept_id != 0) AS num_concept_values,
52 | COUNTIF(operator_concept_id IS NOT NULL) AS num_operators,
53 | IF(src_id = "PPI/PM", "PPI", "EHR") AS measurement_source,
54 | measurement_concept_id,
55 | unit_concept_id
56 | FROM
57 | `{CDR}.measurement`
58 | INNER JOIN
59 | labs_of_interest USING(measurement_concept_id, unit_concept_id)
60 | LEFT JOIN
61 | `{CDR}.measurement_ext` USING(measurement_id)
62 | WHERE
63 | person_id IN ({COHORT_QUERY})
64 | GROUP BY
65 | measurement_concept_id,
66 | measurement_name,
67 | measurement_source,
68 | unit_concept_id,
69 | unit_name
70 | ORDER BY
71 | N DESC
72 |
73 |
--------------------------------------------------------------------------------
/sql-snippets/measurements_of_interest_summary_test.py:
--------------------------------------------------------------------------------
1 | """Tests for query measurements_of_interest_summary.sql.
2 |
3 | See https://github.com/verilylifesciences/analysis-py-utils for more details
4 | about the testing framework.
5 | """
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from ddt import ddt
12 | import os
13 | import unittest
14 | from verily.bigquery_wrapper import bq_test_case
15 |
16 | SQL_TEMPLATE = "measurements_of_interest_summary.sql"
17 |
18 |
19 | @ddt
20 | class QueryTest(bq_test_case.BQTestCase):
21 |
22 | @classmethod
23 | def setUpClass(cls):
24 | """Set up class."""
25 | super(QueryTest, cls).setUpClass(use_mocks=False)
26 | cls.sql_to_test = open(
27 | os.path.join(os.path.dirname(os.path.realpath(__file__)),
28 | SQL_TEMPLATE), "r").read()
29 |
30 | @classmethod
31 | def create_mock_tables(cls):
32 | """Create mock tables."""
33 |
34 | cls.client.create_table_from_query("""
35 | SELECT * FROM UNNEST([
36 | STRUCT
39 | (1001, '1990-12-31 00:00:00 UTC', 501),
40 | (1002, '1950-08-01 00:00:00 UTC', 500),
41 | (1003, '1965-06-30 00:00:00 UTC', 500)
42 | ])
43 | """, cls.client.path("person"))
44 |
45 | cls.client.create_table_from_query("""
46 | SELECT * FROM UNNEST([
47 | STRUCT
50 | (123, 'Hemoglobin', 'LOINC'),
51 | (456, 'gram per deciliter', 'UCUM')
52 | ])
53 | """, cls.client.path("concept"))
54 |
55 | cls.client.create_table_from_query("""
56 | SELECT * FROM UNNEST([
57 | STRUCT
59 | (1, 'EHR site1'),
60 | (2, 'EHR site1'),
61 | (3, 'EHR site1'),
62 | (4, 'EHR site2'),
63 | (5, 'EHR site2'),
64 | (6, 'PPI/PM')
65 | ])
66 | """, cls.client.path("measurement_ext"))
67 |
68 | cls.client.create_table_from_query("""
69 | SELECT * FROM UNNEST([
70 | STRUCT
78 | (1, 1001, 123, 123, 456, NULL, 42.0, NULL),
79 | (2, 1001, 123, 123, 456, NULL, 13.5, NULL),
80 | (3, 1002, 123, 123, 456, NULL, NULL, 100),
81 | (4, 1002, 123, 123, 456, NULL, NULL, NULL),
82 | (5, 1002, 123, 123, 456, 789, 7.2, NULL),
83 | # This measurement is for someone not in our cohort.
84 | (6, 1003, 123, 123, 456, NULL, 500, NULL)
85 | ])
86 | """, cls.client.path("measurement"))
87 |
88 | # Get the project id and dataset name where the temp tables are stored.
89 | (project_id, dataset_id, _) = cls.client.parse_table_path(
90 | cls.client.path("any_temp_table"))
91 | cls.src_dataset = ".".join([project_id, dataset_id])
92 |
93 | def test(self):
94 | sql = self.sql_to_test.format(
95 | CDR=self.src_dataset,
96 | COHORT_QUERY="SELECT person_id FROM `{}.person` WHERE person_id <= 1002".format(self.src_dataset),
97 | MEASUREMENT_OF_INTEREST="hemoglobin")
98 |
99 | expected = [
100 | # measurement unit N missing min max avg stddev quantiles num_numeric_values num_concept_values num_operators measurement_source measurement_concept_id unit_concept_id
101 | ("Hemoglobin", "gram per deciliter", 5, 1, 7.2, 42.0, 20.9, 18.542653531789888, [7.2, 7.2, 13.5, 42.0, 42.0], 3, 1, 1, "EHR", 123, 456)
102 | ]
103 | self.expect_query_result(query=sql, expected=expected)
104 |
105 | if __name__ == "__main__":
106 | unittest.main()
107 |
108 |
--------------------------------------------------------------------------------
/storage-snippets/README.md:
--------------------------------------------------------------------------------
1 | # Cloud Storage snippets
2 |
3 | This snippets in this subdirectory are for workbench users who directly use the workspace bucket.
4 |
5 | # Get setup for GitHub
6 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#get-setup-for-github) for the details. If you are new to `git`, please see the example commands there.
7 |
8 | # How to contribute a snippet to the Cloud Storage snippets menu group
9 |
10 | 1. Write your snippet of code in your preferred language, R or Python.
11 | * Try to make your snippet consistent with other snippets in this collection.
12 | * For data wrangling, use [dplyr](https://dplyr.tidyverse.org/) for R and [pandas](https://pandas.pydata.org/) for Python.
13 | * Choose a good prefix and suffix for your snippet file name.
14 | * See the names of the other files for examples.
15 | * The file name helps users decide whether the snippet will be useful to them.
16 | * Put some comments at the top of your snippet to explain its purpose and any assumptions.
17 | 1. After you are happy with your new snippet, port it to the other language or file a GitHub issue asking for help from someone else to do this.
18 | 1. If your snippet has any inputs or parameters, add default values for those parameters to both [`snippets_setup.R`](./snippets_setup.R) and [`snippets_setup.py`](./snippets_setup.py) so that your snippet will work as-is.
19 | 1. Update [r_gcs_snippets_menu_config.yml](../build/r_gcs_snippets_menu_config.yml) and [py_gcs_snippets_menu_config.yml](../build/py_gcs_snippets_menu_config.yml) to add your snippet where ever you would like it to be displayed within the menu.
20 | 1. Send your pull request!
21 |
22 | Don't like these conventions? We can change them! This is just a starting point. Keep in mind we'll need to reflect those changes in the auto-generation script described in the next section.
23 |
24 | # Auto-generation of Jupyter 'Snippets Menu' configuration
25 |
26 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#auto-generation-of-jupyter-snippets-menu-configuration) for the details.
27 |
28 | # Testing
29 |
30 | ## Snippet tests
31 | To test individual snippets, the best thing to do is copy and paste them into a notebook on the workbench.
32 |
33 | ## Integration 'smoke tests'
34 |
35 | If the smoke tests are run from the workbench environment and there are no obvious bugs in the snippets, they will run start-to-finish without error. (This won't necessarily catch all bugs, but its a good start.)
36 |
37 | * The script to auto-generate the Jupyter Snippets Menu configuration also emits both `r_gcs_snippets_menu_config_smoke_test.R` and `py_gcs_snippets_menu_config_smoke_test.py`.
38 | * Those scripts each include, respectively, all the R Cloud Storage snippets and all the Python Cloud Storage snippets.
39 | * Additional configuration needed for the smoke tests can be defined in [r_gcs_snippets_menu_config.smoke_test_setup](../build/r_gcs_snippets_menu_config.smoke_test_setup) and [py_gcs_snippets_menu_config.smoke_test_setup](../build/py_gcs_snippets_menu_config.smoke_test_setup), respectively. Update it as needed.
40 |
41 | After opening a notebook in the production workbench environment, upload these smoke test files into Jupyter and then execute the following code from the Jupyter terminal or a Python notebook in the same directory. They will emit _"Smoke test complete!"_ when they have completed successfully.
42 |
43 | To run the R Cloud Storage snippets smoke tests:
44 | ```
45 | %%bash
46 |
47 | Rscript r_gcs_snippets_menu_config_smoke_test.R # There will be output, but there should be no errors.
48 | ```
49 |
50 | To run the Python Cloud Storage snippets smoke tests:
51 | ```
52 | %%bash
53 |
54 | python3 py_gcs_snippets_menu_config_smoke_test.py # There will be output, but there should be no errors.
55 | ```
56 |
57 | # Deployment
58 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#deployment) for the details.
59 |
--------------------------------------------------------------------------------
/py/terra_widgets/tests/test_workspace_paths.py:
--------------------------------------------------------------------------------
1 | """Tests for the WorkspacePaths class."""
2 |
3 | import os
4 | import unittest
5 | from terra_widgets.workspace_paths import WorkspacePaths
6 |
7 |
8 | class TestWorkspacePaths(unittest.TestCase):
9 |
10 | def setUp(self):
11 | self.wp = WorkspacePaths(workspace_bucket='fc-fake-bucket')
12 | os.environ['OWNER_EMAIL'] = 'testUser@somecompany.com'
13 |
14 | def tearDown(self):
15 | os.unsetenv('OWNER_EMAIL')
16 |
17 | def test_destinations(self):
18 | notebook_paths = ['gs://fc-fake-bucket/notebooks/test1.ipynb',
19 | 'gs://fc-fake-bucket/notebooks/test2.ipynb']
20 | destinations = self.wp.formulate_destination_paths(notebooks=notebook_paths)
21 | self.assertSetEqual(set(destinations.keys()), set(notebook_paths))
22 | self.assertRegex(
23 | destinations[notebook_paths[0]].html_file,
24 | r'gs://fc-fake-bucket/reports/testUser@somecompany.com/\d{8}/\d{6}/test1.html')
25 | self.assertRegex(
26 | destinations[notebook_paths[0]].comment_file,
27 | r'gs://fc-fake-bucket/reports/testUser@somecompany.com/\d{8}/\d{6}/test1.comment.txt')
28 | self.assertRegex(
29 | destinations[notebook_paths[1]].html_file,
30 | r'gs://fc-fake-bucket/reports/testUser@somecompany.com/\d{8}/\d{6}/test2.html')
31 | self.assertRegex(
32 | destinations[notebook_paths[1]].comment_file,
33 | r'gs://fc-fake-bucket/reports/testUser@somecompany.com/\d{8}/\d{6}/test2.comment.txt')
34 |
35 | def test_fail_destinations(self):
36 | with self.assertRaisesRegex(
37 | ValueError,
38 | r'"gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/120000/test1.html" does not match "gs://fc-fake-bucket/notebooks/\*\.ipynb"'):
39 | self.wp.formulate_destination_paths(notebooks=['gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/120000/test1.html'])
40 |
41 | def test_glob_for_aou(self):
42 | input_path = 'gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/120000'
43 | expected = os.path.join(input_path, '*.html')
44 | self.assertEqual(self.wp.add_html_glob_to_path(input_path), expected)
45 |
46 | def test_glob_for_terra(self):
47 | wp = WorkspacePaths(workspace_bucket='fc-fake-bucket')
48 | input_path = 'gs://fc-fake-bucket/reports/test@somecompany.com/20200701/120000'
49 | expected = os.path.join(input_path, '*.html')
50 | self.assertEqual(wp.add_html_glob_to_path(input_path), expected)
51 |
52 | def test_glob_path_already_complete(self):
53 | # Pass a complete path to an HTML file when instead we should pass a partial path to it.
54 | with self.assertRaisesRegex(ValueError, '"gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/120000/test1.html" does not match'):
55 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/120000/test1.html')
56 |
57 | def test_glob_path_missing_time(self):
58 | with self.assertRaisesRegex(ValueError, 'does not match'):
59 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/reports/test@researchallofus.org/20200701/')
60 |
61 | def test_glob_path_missing_date(self):
62 | with self.assertRaisesRegex(ValueError, 'does not match'):
63 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/reports/test@researchallofus.org/120000/')
64 |
65 | def test_glob_path_missing_user(self):
66 | with self.assertRaisesRegex(ValueError, 'does not match'):
67 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/reports/20200701/120000/')
68 |
69 | def test_glob_path_missing_report_folder(self):
70 | with self.assertRaisesRegex(ValueError, 'does not match'):
71 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/test@researchallofus.org/20200701/120000/')
72 |
73 | def test_glob_wrong_path(self):
74 | # Pass a path to a notebook when instead we should pass a partial path to a report.
75 | with self.assertRaisesRegex(ValueError, '"gs://fc-fake-bucket/notebooks/test1.ipynb" does not match'):
76 | self.wp.add_html_glob_to_path('gs://fc-fake-bucket/notebooks/test1.ipynb')
77 |
78 |
79 | if __name__ == '__main__':
80 | unittest.main()
81 |
82 |
--------------------------------------------------------------------------------
/dataset-snippets/README.md:
--------------------------------------------------------------------------------
1 | # Dataset Builder snippets
2 |
3 | This snippets in this subdirectory are for workbench users who use Dataset Builder to retrieve their data.
4 |
5 | # Get setup for GitHub
6 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#get-setup-for-github) for the details. If you are new to `git`, please see the example commands there.
7 |
8 | # How to contribute a snippet to the Dataset Builder snippets menu group
9 |
10 | 1. Write your snippet of code in your preferred language, R or Python.
11 | * Try to make your snippet consistent with other snippets in this collection.
12 | * For data wrangling, use [dplyr](https://dplyr.tidyverse.org/) for R and [pandas](https://pandas.pydata.org/) for Python.
13 | * For static plots, use [ggplot2](https://ggplot2.tidyverse.org/) for R and [plotnine](https://plotnine.readthedocs.io/en/stable/) for Python.
14 | * Choose a good prefix and suffix for your snippet file name.
15 | * See the names of the other files for examples.
16 | * The file name helps users decide whether the snippet will be useful to them.
17 | * Put some comments at the top of your snippet to explain its purpose and any assumptions.
18 | 1. After you are happy with your new snippet, port it to the other language or file a GitHub issue asking for help from someone else to do this.
19 | 1. If your snippet has any inputs or parameters other than a dataframe created by Dataset Builder, add default values for those parameters to both [`snippets_setup.R`](./snippets_setup.R) and [`snippets_setup.py`](./snippets_setup.py) so that your snippet will work as-is.
20 | 1. Update [r_dataset_snippets_menu_config.yml](../build/r_dataset_snippets_menu_config.yml) and [py_dataset_snippets_menu_config.yml](../build/py_dataset_snippets_menu_config.yml) to add your snippet where ever you would like it to be displayed within the menu.
21 | 1. Send your pull request!
22 |
23 | Don't like these conventions? We can change them! This is just a starting point. Keep in mind we'll need to reflect those changes in the auto-generation script described in the next section.
24 |
25 | # Auto-generation of Jupyter 'Snippets Menu' configuration
26 |
27 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#auto-generation-of-jupyter-snippets-menu-configuration) for the details.
28 |
29 | # Testing
30 |
31 | ## Snippet tests
32 | To test individual snippets such as plots, the best thing to do is copy and paste them into a notebook on the workbench.
33 |
34 | ## Integration 'smoke tests'
35 |
36 | If the smoke tests are run from the workbench environment and there are no obvious bugs in the snippets, they will run start-to-finish without error. (This won't necessarily catch all bugs, but its a good start.)
37 |
38 | * The script to auto-generate the Jupyter Snippets Menu configuration also emits both `r_dataset_snippets_menu_config_smoke_test.R` and `py_dataset_snippets_menu_config_smoke_test.py`.
39 | * Those scripts each include, respectively, all the R Dataset Builder snippets and all the Python Dataset Builder snippets.
40 | * The Dataset from Dataset Builder is defined in [r_dataset_snippets_menu_config.smoke_test_setup](../build/r_dataset_snippets_menu_config.smoke_test_setup) and [py_dataset_snippets_menu_config.smoke_test_setup](../build/py_dataset_snippets_menu_config.smoke_test_setup), respectively. Update it as needed.
41 |
42 | After opening a notebook in the production workbench environment, upload these smoke test files into Jupyter and then execute the following code from the Jupyter terminal or a Python notebook in the same directory. They will emit _"Smoke test complete!"_ when they have completed successfully.
43 |
44 | To run the R Dataset Builder snippets smoke tests:
45 | ```
46 | %%bash
47 |
48 | Rscript r_dataset_snippets_menu_config_smoke_test.R # There will be output, but there should be no errors.
49 | ```
50 |
51 | To run the Python Dataset Builder snippets smoke tests:
52 | ```
53 | %%bash
54 |
55 | # Any notebook '!' commands won't work in this context. Comment them out and run them explicitly first.
56 | perl -i -pe 's/!pip/#!pip/g' py_dataset_snippets_menu_config_smoke_test.py
57 | pip3 install --user pandas_profiling
58 |
59 | python3 py_dataset_snippets_menu_config_smoke_test.py # There will be output, but there should be no errors.
60 | ```
61 |
62 | # Deployment
63 | The instructions are identical for all of the snippets collections. See [CONTRIBUTING](../CONTRIBUTING.md#deployment) for the details.
64 |
--------------------------------------------------------------------------------
/py/terra_widgets/workspace_metadata.py:
--------------------------------------------------------------------------------
1 | """Methods to obtain workspace metadata for the current user in various formats."""
2 |
3 | import json
4 | import os
5 | from typing import Dict
6 |
7 | import firecloud.api as fapi
8 | from IPython import get_ipython
9 |
10 |
11 | class WorkspaceMetadata:
12 | """Encapsulate all logic for obtaining workspace metadata."""
13 |
14 | AOU_DOMAIN = '@researchallofus.org'
15 | EDIT_ACCESS_LEVELS = ['WRITER', 'OWNER', 'PROJECT_OWNER']
16 | AOU_PROD_API = 'https://api.workbench.researchallofus.org/v1/workspaces'
17 |
18 | def __init__(self):
19 | self.user = os.getenv('OWNER_EMAIL')
20 | self.terra_workspaces = fapi.list_workspaces().json()
21 | if self.user.endswith(self.AOU_DOMAIN):
22 | aou_api = os.getenv('RW_API_BASE_URL')
23 | if not aou_api:
24 | aou_api = self.AOU_PROD_API
25 | # Use the All of Us API to get the human-readable workspace names. For All of Us workspaces,
26 | # the Terra workspace metadata the workspace names are actually the AoU workspace ids.
27 | aou_response = get_ipython().getoutput(f'''curl -H "Content-Type: application/json" \
28 | -H "Authorization: Bearer $(gcloud auth print-access-token)" \
29 | "{aou_api}" 2>/dev/null | jq .''')
30 | self.aou_workspaces = json.loads(''.join(aou_response))['items']
31 | else:
32 | self.aou_workspaces = None
33 |
34 | def get_workspace_name_to_id_mapping(self, include_private_readonly: bool = False, include_all: bool = False) -> Dict[str, str]:
35 | """Retrieve a mapping of workspace names to ids.
36 |
37 | Args:
38 | include_private_readonly: whether to include private workspaces for which the current user has only has read access.
39 | include_all: whether to include all workspaces visible to the user
40 | Returns:
41 | A dictionary of workspace names to workspace ids.
42 | """
43 | if self.aou_workspaces:
44 | return {ws['workspace']['name']: ws['workspace']['id'] for ws in self.aou_workspaces
45 | if include_all
46 | or (include_private_readonly and not ws['workspace']['published'])
47 | or ws['accessLevel'] in self.EDIT_ACCESS_LEVELS}
48 | else:
49 | return {ws['workspace']['name']: ws['workspace']['workspaceId'] for ws in self.terra_workspaces
50 | if include_all
51 | or (include_private_readonly and not ws['public'])
52 | or ws['accessLevel'] in self.EDIT_ACCESS_LEVELS}
53 |
54 | def get_workspace_name_to_bucket_mapping(self, include_private_readonly: bool = False, include_all: bool = False) -> Dict[str, str]:
55 | """Retrieve a mapping of workspace names to Cloud Storage bucket names.
56 |
57 | Args:
58 | include_private_readonly: whether to include private workspaces for which the current user has only has read access.
59 | include_all: whether to include all workspaces visible to the user
60 | Returns:
61 | A dictionary of workspace names to workspace bucket names.
62 | """
63 | ws_mapping = self.get_workspace_name_to_id_mapping(include_private_readonly=include_private_readonly,
64 | include_all=include_all)
65 | if self.aou_workspaces:
66 | # For All of Us workspaces, in the Terra workspace metadata the workspace names are actually
67 | # the AoU workspace ids.
68 | terra_ws_names = ws_mapping.values()
69 | else:
70 | terra_ws_names = ws_mapping.keys()
71 | return {ws['workspace']['name']: ws['workspace']['bucketName'] for ws in self.terra_workspaces
72 | if ws['workspace']['name'] in terra_ws_names}
73 |
74 | def get_workspace_id_to_bucket_mapping(self, include_private_readonly: bool = False, include_all: bool = False) -> Dict[str, str]:
75 | """Retrieve a mapping of workspace ids to Cloud Storage bucket names.
76 |
77 | Args:
78 | include_private_readonly: whether to include private workspaces for which the current user has only has read access.
79 | include_all: whether to include all workspaces visible to the user
80 | Returns:
81 | A dictionary of workspace names to workspace bucket names.
82 | """
83 | ws_mapping = self.get_workspace_name_to_id_mapping(include_private_readonly=include_private_readonly,
84 | include_all=include_all)
85 | if self.aou_workspaces:
86 | # For All of Us workspaces, in the Terra workspace metadata the workspace names are actually
87 | # the AoU workspace ids.
88 | terra_metadata_key = 'name'
89 | else:
90 | terra_metadata_key = 'workspaceId'
91 | return {ws['workspace'][terra_metadata_key]: ws['workspace']['bucketName'] for ws in self.terra_workspaces
92 | if ws['workspace'][terra_metadata_key] in ws_mapping.values()}
93 |
--------------------------------------------------------------------------------
/sql-snippets/most_recent_measurement_of_interest_test.py:
--------------------------------------------------------------------------------
1 | """Tests for query most_recent_measurement_of_interest.sql.
2 |
3 | See https://github.com/verilylifesciences/analysis-py-utils for more details
4 | about the testing framework.
5 | """
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from datetime import date
12 | from datetime import datetime
13 | from dateutil import tz
14 | from ddt import ddt
15 | import os
16 | import unittest
17 | from verily.bigquery_wrapper import bq_test_case
18 |
19 | SQL_TEMPLATE = "most_recent_measurement_of_interest.sql"
20 |
21 |
22 | @ddt
23 | class QueryTest(bq_test_case.BQTestCase):
24 |
25 | @classmethod
26 | def setUpClass(cls):
27 | """Set up class."""
28 | super(QueryTest, cls).setUpClass(use_mocks=False)
29 | cls.sql_to_test = open(
30 | os.path.join(os.path.dirname(os.path.realpath(__file__)),
31 | SQL_TEMPLATE), "r").read()
32 |
33 | @classmethod
34 | def create_mock_tables(cls):
35 | """Create mock tables."""
36 |
37 | cls.client.create_table_from_query("""
38 | SELECT * FROM UNNEST([
39 | STRUCT
42 | (1001, '1990-12-31 00:00:00 UTC', 501),
43 | (1002, '1950-08-01 00:00:00 UTC', 500),
44 | (1003, '1965-06-30 00:00:00 UTC', 500)
45 | ])
46 | """, cls.client.path("person"))
47 |
48 | cls.client.create_table_from_query("""
49 | SELECT * FROM UNNEST([
50 | STRUCT
52 | ( 0, 'No matching concept'),
53 | (123, 'Hemoglobin'),
54 | (456, 'gram per deciliter'),
55 | (500, 'FEMALE'),
56 | (501, 'MALE')
57 | ])
58 | """, cls.client.path("concept"))
59 |
60 | cls.client.create_table_from_query("""
61 | SELECT * FROM UNNEST([
62 | STRUCT
64 | (1, 'EHR site1'),
65 | (2, 'EHR site1'),
66 | (3, 'PPI/PM'),
67 | (4, 'EHR site2'),
68 | (5, 'EHR site2'),
69 | (6, 'EHR site2')
70 | ])
71 | """, cls.client.path("measurement_ext"))
72 |
73 | cls.client.create_table_from_query("""
74 | SELECT * FROM UNNEST([
75 | STRUCT
87 | (1, 1001, 123, 456, NULL, '2005-12-31', '2005-12-31 10:30:00 UTC', NULL, 42.0, NULL, 0, 999),
88 | (2, 1001, 123, 456, NULL, '2007-09-11', '2007-09-11 08:00:00 UTC', NULL, 13.5, NULL, 0, 999),
89 | (3, 1001, 123, 456, NULL, '2007-09-11', '2007-09-11 20:59:00 UTC', NULL, NULL, 100, 0, 999),
90 | (4, 1002, 123, 456, NULL, '2008-02-10', '2008-02-10 23:30:00 UTC', NULL, NULL, NULL, 0, 999),
91 | (5, 1002, 123, 456, 789, '2008-02-10', '2008-02-10 23:30:00 UTC', NULL, 7.2, NULL, 0, 999),
92 | # This measurement is for someone not in our cohort.
93 | (6, 1003, 123, 456, 789, '2010-01-01', '2010-10-01 23:30:00 UTC', NULL, 500, NULL, 0, 999)
94 | ])
95 | """, cls.client.path("measurement"))
96 |
97 | # Get the project id and dataset name where the temp tables are stored.
98 | (project_id, dataset_id, _) = cls.client.parse_table_path(
99 | cls.client.path("any_temp_table"))
100 | cls.src_dataset = ".".join([project_id, dataset_id])
101 |
102 | def test(self):
103 | sql = self.sql_to_test.format(
104 | CDR=self.src_dataset,
105 | COHORT_QUERY="SELECT person_id FROM `{}.person` WHERE person_id <= 1002".format(self.src_dataset),
106 | MEASUREMENT_CONCEPT_ID=123,
107 | UNIT_CONCEPT_ID=456)
108 |
109 | expected = [
110 | # person_id birth_datetime sex_at_birth src_id measurement_concept_id unit_concept_id measurement_date measurement_datetime measurement_type_concept_id operator_concept_id value_as_number value_as_concept_id range_low range_high
111 | (1001, datetime(1990, 12, 31, 0, 0, tzinfo=tz.gettz("UTC")), "MALE", "PPI/PM", 123, 456, date(2007, 9, 11), datetime(2007, 9, 11, 20, 59, tzinfo=tz.gettz("UTC")), None, None, None, 100, 0, 999),
112 | (1002, datetime(1950, 8, 1, 0, 0, tzinfo=tz.gettz("UTC")), "FEMALE", "EHR site2", 123, 456, date(2008, 2, 10), datetime(2008, 2, 10, 23, 30, tzinfo=tz.gettz("UTC")), None, 789, 7.2, None, 0, 999)
113 | ]
114 | self.expect_query_result(query=sql, expected=expected)
115 |
116 | if __name__ == "__main__":
117 | unittest.main()
118 |
119 |
--------------------------------------------------------------------------------
/py/terra_widgets/workspace_paths.py:
--------------------------------------------------------------------------------
1 | """Methods to obtains paths to files within the workspace bucket."""
2 |
3 | import datetime
4 | import fnmatch
5 | import os
6 | from typing import Dict
7 | from typing import List
8 | from typing import NamedTuple
9 |
10 |
11 | WorkspaceDestination = NamedTuple('WorkspaceDestination', [('html_file', str), ('comment_file', str)])
12 |
13 |
14 | class WorkspacePaths:
15 | """Encapsulate all logic for manipulating workspace paths.
16 |
17 | Paths are of the form:
18 | gs:///reports///
148 | '''),
149 | workspace_chooser, notebook_chooser, commenter, submit_button],
150 | layout=widgets.Layout(width='auto', border='solid 1px grey'))
151 |
152 |
153 | def create_view_files_widget(ws_names2id: Dict[str, str], ws_paths: Dict[str, WorkspacePaths], output):
154 | """Create an ipywidget UI to view HTML snapshots and their associated comment files."""
155 | workspace_chooser = widgets.Dropdown(
156 | options=ws_names2id,
157 | value=None,
158 | description='Choose the workspace:',
159 | style={'description_width': 'initial'},
160 | layout=widgets.Layout(width='900px')
161 | )
162 | user_chooser = widgets.Dropdown(
163 | options=[],
164 | value=None,
165 | description='Choose the user:',
166 | style={'description_width': 'initial'},
167 | layout=widgets.Layout(width='900px')
168 | )
169 | date_chooser = widgets.Dropdown(
170 | options=[],
171 | value=None,
172 | description='Choose the date:',
173 | style={'description_width': 'initial'},
174 | layout=widgets.Layout(width='900px')
175 | )
176 | time_chooser = widgets.Dropdown(
177 | options=[],
178 | value=None,
179 | description='Choose the time:',
180 | style={'description_width': 'initial'},
181 | layout=widgets.Layout(width='900px')
182 | )
183 | file_chooser = widgets.Dropdown(
184 | options=[],
185 | value=None,
186 | description='Choose the file:',
187 | style={'description_width': 'initial'},
188 | layout=widgets.Layout(width='900px')
189 | )
190 | view_comment_button = widgets.Button(
191 | description='View the comment for the HTML snapshot',
192 | disabled=False,
193 | button_style='success',
194 | layout=widgets.Layout(width='300px'),
195 | tooltip='Click the button to view the comment associated with the HTML snapshot of the notebook.'
196 | )
197 | view_html_button = widgets.Button(
198 | description='View the HTML snapshot',
199 | disabled=False,
200 | button_style='success',
201 | layout=widgets.Layout(width='250px'),
202 | tooltip='Click the button to view the HTML snapshot of the notebook.'
203 | )
204 |
205 | def on_view_comment_button_clicked(_):
206 | with output:
207 | output.clear_output()
208 | if not file_chooser.value:
209 | display(HTML('''
210 | No comment files found for HTML snapshots in this workspace.
Use the dropdowns to select the workspace, user, date, time, and particular HTML snapshot.
273 | Then click on the 'view' buttons to see either the comment for the snapshot or the actual snapshot.
274 |
'''),
275 | workspace_chooser, user_chooser, date_chooser, time_chooser, file_chooser,
276 | widgets.HBox([view_comment_button, view_html_button])],
277 | layout=widgets.Layout(width='auto', border='solid 1px grey'))
278 |
279 |
280 | def create_view_all_comments_widget(ws_names2id: Dict[str, str], ws_paths: Dict[str, WorkspacePaths], output):
281 | """Create an ipywidget UI to display the contents of all comment files within a particular workspace."""
282 | workspace_chooser = widgets.Dropdown(
283 | options=ws_names2id,
284 | value=None,
285 | description='Choose a workspace to view:',
286 | style={'description_width': 'initial'},
287 | layout=widgets.Layout(width='900px')
288 | )
289 |
290 | def on_choose_workspace(changed):
291 | with output:
292 | output.clear_output()
293 | workspace_paths = ws_paths[changed['new']]
294 | try:
295 | comment_files = tf.io.gfile.glob(pattern=workspace_paths.get_comment_file_glob())
296 | except tf.errors.PermissionDeniedError as e:
297 | target_workspace = [name for name, id in ws_names2id.items() if id == changed['new']]
298 | display(HTML(f'''
299 | Warning: Unable to view HTML snapshots in workspace {target_workspace} from this workspace.
300 |
{e.message}
301 |
'''))
302 | return
303 | if not comment_files:
304 | display(HTML('''
305 | No comment files found for HTML snapshots in this workspace.
Use the dropdown to choose a workspace. Then this will display the contents of all comment files for the selected workspace.
331 | The user, date, time, and notebook name are shown in the left column. The comment is shown in the right column.
332 |
'''),
333 | workspace_chooser],
334 | layout=widgets.Layout(width='auto', border='solid 1px grey'))
335 |
336 |
337 | def display_html_snapshots_widget():
338 | """Create an ipywidget UI encapsulating all three UIs related to HTML snapshots."""
339 | if not get_ipython():
340 | print('The HTML snapshot widget cannot be display in environments other than IPython.')
341 | return
342 |
343 | # Configure notebook display preferences to better suit this UI. These display settings
344 | # will be in effect for all cells in the notebook run after this one is run.
345 | pd.set_option('display.max_colwidth', None)
346 | pd.set_option('display.max_rows', None)
347 | get_ipython().run_cell_magic(
348 | 'javascript',
349 | '',
350 | '''// Display cell outputs to full height (no vertical scroll bar)
351 | IPython.OutputArea.auto_scroll_threshold = 9999;''')
352 |
353 | # Retrieve the workspace metadata for the current user and environment.
354 | ws_meta = WorkspaceMetadata()
355 | workspace_names2id = collections.OrderedDict(sorted(
356 | ws_meta.get_workspace_name_to_id_mapping().items()))
357 | workspace_names2id_include_readonly = collections.OrderedDict(sorted(
358 | ws_meta.get_workspace_name_to_id_mapping(include_private_readonly=True).items()))
359 | workspace_ids2bucket_include_readonly = ws_meta.get_workspace_id_to_bucket_mapping(include_private_readonly=True)
360 | workspace_paths = {k: WorkspacePaths(workspace_bucket=v)
361 | for k, v in workspace_ids2bucket_include_readonly.items()}
362 |
363 | ui_output = widgets.Output()
364 |
365 | ui_tabs = widgets.Tab()
366 | ui_tabs.children = [create_html_snapshot_widget(ws_names2id=workspace_names2id,
367 | ws_paths=workspace_paths,
368 | output=ui_output),
369 | create_view_files_widget(ws_names2id=workspace_names2id_include_readonly,
370 | ws_paths=workspace_paths,
371 | output=ui_output),
372 | create_view_all_comments_widget(ws_names2id=workspace_names2id_include_readonly,
373 | ws_paths=workspace_paths,
374 | output=ui_output)]
375 | ui_tabs.set_title(title='Create', index=0)
376 | ui_tabs.set_title(title='View one', index=1)
377 | ui_tabs.set_title(title='View all', index=2)
378 |
379 | display(ui_tabs, ui_output)
380 |
--------------------------------------------------------------------------------