├── .gitignore
├── .dockerignore
├── Dockerfile
├── images
    └── pr.png
├── prebuild.Dockerfile
├── Pipfile
├── bootstrap.sh
├── LICENSE
├── action.yml
├── .github
    └── workflows
    │   └── test_action.yaml
├── wandb_get_runs.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | .env


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | .env
3 | Pipfile*
4 | bootstrap.sh


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM hamelsmu/wandb-action
2 | ENTRYPOINT ["python",  "/wandb_get_runs.py"]
3 | 


--------------------------------------------------------------------------------
/images/pr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/machine-learning-apps/wandb-action/HEAD/images/pr.png


--------------------------------------------------------------------------------
/prebuild.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7.4
2 | 
3 | RUN pip install wandb tabulate pandas
4 | COPY wandb_get_runs.py /wandb_get_runs.py
5 | RUN  chmod u+x /wandb_get_runs.py
6 | 
7 | ENTRYPOINT ["python",  "/wandb_get_runs.py"]
8 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | wandb = "*"
10 | tabulate = "*"
11 | 
12 | [requires]
13 | python_version = "3.7"
14 | 


--------------------------------------------------------------------------------
/bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | docker build -t hamelsmu/wandb .
 3 | 
 4 | INPUT_PROJECT_NAME="hamelsmu/test-wandb-action"
 5 | GITHUB_WORKSPACE="/data"
 6 | GITHUB_SHA="testsha1234"
 7 | INPUT_DISPLAY_METRICS="['acc', 'loss', 'val_acc', 'val_loss']"
 8 | INPUT_DISPLAY_CONFIG_VARS="['secondary_sha']"
 9 | INPUT_BASELINE_TAGS="['baseline']"
10 | INPUT_DEBUG="y"
11 | 
12 | 
13 | docker run \
14 | -e INPUT_FILTER_GITHUB_SHA=$GITHUB_SHA \
15 | -e INPUT_FILTER_SECONDARY_SHA="" \
16 | -e INPUT_PROJECT_NAME=$INPUT_PROJECT_NAME \
17 | -e INPUT_BASELINE_TAGS="$INPUT_BASELINE_TAGS" \
18 | -e INPUT_DISPLAY_METRICS="$INPUT_DISPLAY_METRICS" \
19 | -e INPUT_DISPLAY_CONFIG_VARS="$INPUT_DISPLAY_CONFIG_VARS" \
20 | -e WANDB_API_KEY=$INPUT_WANDB_API_KEY \
21 | -e GITHUB_WORKSPACE=$GITHUB_WORKSPACE \
22 | -e INPUT_DEBUG=$INPUT_DEBUG \
23 | -v ${PWD}:/data/ \
24 | hamelsmu/wandb


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ML Apps
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/action.yml:
--------------------------------------------------------------------------------
 1 | name: 'Weights & Biases'
 2 | description: Get Runs From Weights & Biases
 3 | author: Hamel Husain
 4 | inputs:
 5 |   WANDB_API_KEY:
 6 |     description: your W&B api key.
 7 |     required: true
 8 |   PROJECT_NAME:
 9 |     description: The entity/project name associated with your wandb project.  Example - 'github/predict-issue-labels'
10 |     required: true
11 |   RUN_ID:
12 |     description: the run id, which can be found in the url https://app.wandb.ai/{entity_name}/{project_name}/runs/{run_ID}.  When supplying this input, FILTER_GITHUB_SHA and FILTER_SECONDARY_SHA are ignored and only the run corresponding to this id (along with any baselines corresponding to the input BASELINE_TAGS) are returned.
13 |     required: false
14 |   FILTER_GITHUB_SHA:
15 |     description: The git SHA that you want to filter runs by.  This assumes you have a logged a configuration variable named 'github_sha' to your runs. A common usage pattern is to supply the built-in environment variable $GITHUB_SHA, to get the commit SHA that triggered the workflow.  Note that this argument is ignored if RUN_ID is specified.
16 |     require: false
17 |   FILTER_SECONDARY_SHA:
18 |     description: This is an optional field you can filter your runs by.  This assumes you have logged a configuration variable named 'secondary_sha' to your model runs.  You might use this field for data versioning.  Note that this argument is ignored if RUN_ID is specified.
19 |     require: false
20 |     default: ""
21 |   BASELINE_TAGS:
22 |     description: A list of tags that correspond to runs you want to retrieve in addition to those that correspond to the FILTER_GITHUB_SHA.  You would typically use this field to obtain baseline runs to compare your current runs against.  Example - "['baseline']"
23 |     require: false
24 |     default: "[]"
25 |   DISPLAY_METRICS:
26 |     description: A list of summary metrics you want to retain for the csv file that is written to the actions environment.  Example - "['acc', 'loss', 'val_acc', 'val_loss']"
27 |     require: false
28 |     default: "[]"
29 |   DISPLAY_CONFIG_VARS:
30 |     description: A list of configuration variables you want to retain for the csv file written to the actions environment.  Example - "['learning_rate', 'num_layers']"
31 |     require: false
32 |     default: "[]"
33 |   DEBUG:
34 |     description: Setting this variable to any value will turn debug mode on.
35 |     require: false
36 |     default: ""
37 | outputs:
38 |   BOOL_COMPLETE:
39 |     description: True if there is at least 1 finished run and no runs that have a state of 'running' else False
40 |   BOOL_SINGLE_RUN:
41 |     description: True if there is only 1 run returned from the query else False
42 |   NUM_FINISHED:
43 |     description: The number of non-baseline runs with a state of 'finished'
44 |   NUM_RUNNING:
45 |     description: The number of non-baseline runs with a state of 'running'
46 |   NUM_CRASHED:
47 |     description: The number of non-baseline runs with a state of 'crashed'
48 |   NUM_ABORTED:
49 |     description: The number of non-baseline runs with a state of 'aborted'
50 |   NUM_BASELINES:
51 |     description: The number of baseline runs returned.
52 | branding:
53 |   color: 'yellow'
54 |   icon: 'bar-chart-2'
55 | runs:
56 |   using: 'docker'
57 |   image: 'Dockerfile'
58 | 


--------------------------------------------------------------------------------
/.github/workflows/test_action.yaml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | on: [push]
  3 | 
  4 | jobs:
  5 |   with-sha:
  6 |     needs: pre-build-container
  7 |     runs-on: ubuntu-latest
  8 |     steps:
  9 |       - name: Get Runs Using SHA
 10 |         uses: machine-learning-apps/wandb-action@master
 11 |         id: wandb1
 12 |         with:
 13 |           GITHUB_SHA: 'testsha1234'
 14 |           PROJECT_NAME: 'hamelsmu/test-wandb-action'
 15 |           FILTER_GITHUB_SHA: "testsha1234"
 16 |           BASELINE_TAGS: "['baseline', 'reference']"
 17 |           DISPLAY_METRICS: "['acc', 'loss', 'val_acc', 'val_loss']"
 18 |           DISPLAY_CONFIG_VARS: "['secondary_sha']"
 19 |           WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
 20 |           DEBUG: "true"
 21 |       - name: test outputs
 22 |         run: |
 23 |           python -c "assert '${NUM_FINISHED}' == '3'"
 24 |           python -c "assert '${NUM_BASELINES}' == '2'"
 25 |           python -c "assert '${BOOL_COMPLETE}' == 'True'"
 26 |           python -c "assert '${NUM_CRASHED}' == '0'"
 27 |           python -c "assert '${NUM_ABORTED}' == '0'"
 28 |           python -c "assert '${NUM_RUNNING}' == '0'"
 29 |         env:
 30 |           BOOL_COMPLETE: ${{ steps.wandb1.outputs.BOOL_COMPLETE }}
 31 |           BOOL_SINGLE_RUN: ${{ steps.wandb1.outputs.BOOL_SINGLE_RUN }}
 32 |           NUM_FINISHED: ${{ steps.wandb1.outputs.NUM_FINISHED }}
 33 |           NUM_RUNNING: ${{ steps.wandb1.outputs.NUM_RUNNING }}
 34 |           NUM_CRASHED: ${{ steps.wandb1.outputs.NUM_CRASHED }}
 35 |           NUM_ABORTED: ${{ steps.wandb1.outputs.NUM_ABORTED }}
 36 |           NUM_BASELINES: ${{ steps.wandb1.outputs.NUM_BASELINES }}
 37 |   with-sha-without-config:
 38 |     needs: pre-build-container
 39 |     runs-on: ubuntu-latest
 40 |     steps:
 41 |       - name: Get Runs Using SHA
 42 |         uses: machine-learning-apps/wandb-action@master
 43 |         id: wandb1
 44 |         with:
 45 |           GITHUB_SHA: 'testsha1234'
 46 |           PROJECT_NAME: 'hamelsmu/test-wandb-action'
 47 |           FILTER_GITHUB_SHA: "testsha1234"
 48 |           BASELINE_TAGS: "['baseline', 'reference']"
 49 |           DISPLAY_METRICS: "['acc', 'loss', 'val_acc', 'val_loss']"
 50 |           WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
 51 |           DEBUG: "true"
 52 |       - name: test outputs
 53 |         run: |
 54 |           python -c "assert '${NUM_FINISHED}' == '3'"
 55 |           python -c "assert '${NUM_BASELINES}' == '2'"
 56 |           python -c "assert '${BOOL_COMPLETE}' == 'True'"
 57 |           python -c "assert '${NUM_CRASHED}' == '0'"
 58 |           python -c "assert '${NUM_ABORTED}' == '0'"
 59 |           python -c "assert '${NUM_RUNNING}' == '0'"
 60 |         env:
 61 |           BOOL_COMPLETE: ${{ steps.wandb1.outputs.BOOL_COMPLETE }}
 62 |           BOOL_SINGLE_RUN: ${{ steps.wandb1.outputs.BOOL_SINGLE_RUN }}
 63 |           NUM_FINISHED: ${{ steps.wandb1.outputs.NUM_FINISHED }}
 64 |           NUM_RUNNING: ${{ steps.wandb1.outputs.NUM_RUNNING }}
 65 |           NUM_CRASHED: ${{ steps.wandb1.outputs.NUM_CRASHED }}
 66 |           NUM_ABORTED: ${{ steps.wandb1.outputs.NUM_ABORTED }}
 67 |           NUM_BASELINES: ${{ steps.wandb1.outputs.NUM_BASELINES }}
 68 |           
 69 |   with-run-id:
 70 |     needs: pre-build-container
 71 |     runs-on: ubuntu-latest
 72 |     steps:
 73 |       - name: Get Runs Using SHA
 74 |         uses: machine-learning-apps/wandb-action@master
 75 |         id: wandb2
 76 |         with:
 77 |           RUN_ID: 'k271zzd3'
 78 |           PROJECT_NAME: 'hamelsmu/test-wandb-action'
 79 |           FILTER_GITHUB_SHA: "testsha1234"
 80 |           BASELINE_TAGS: "['baseline', 'reference']"
 81 |           DISPLAY_METRICS: "['acc', 'loss', 'val_acc', 'val_loss']"
 82 |           DISPLAY_CONFIG_VARS: "['secondary_sha']"
 83 |           WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
 84 |           DEBUG: "true"
 85 |       - name: test outputs
 86 |         run: |
 87 |           python -c "assert '${NUM_FINISHED}' == '1'"
 88 |           python -c "assert '${NUM_BASELINES}' == '1'"
 89 |           python -c "assert '${BOOL_COMPLETE}' == 'True'"
 90 |           python -c "assert '${NUM_CRASHED}' == '0'"
 91 |           python -c "assert '${NUM_ABORTED}' == '0'"
 92 |           python -c "assert '${NUM_RUNNING}' == '0'"
 93 |         env:
 94 |           BOOL_COMPLETE: ${{ steps.wandb2.outputs.BOOL_COMPLETE }}
 95 |           BOOL_SINGLE_RUN: ${{ steps.wandb2.outputs.BOOL_SINGLE_RUN }}
 96 |           NUM_FINISHED: ${{ steps.wandb2.outputs.NUM_FINISHED }}
 97 |           NUM_RUNNING: ${{ steps.wandb2.outputs.NUM_RUNNING }}
 98 |           NUM_CRASHED: ${{ steps.wandb2.outputs.NUM_CRASHED }}
 99 |           NUM_ABORTED: ${{ steps.wandb2.outputs.NUM_ABORTED }}
100 |           NUM_BASELINES: ${{ steps.wandb2.outputs.NUM_BASELINES }}
101 |   with-secondary-sha:
102 |     needs: pre-build-container
103 |     runs-on: ubuntu-latest
104 |     steps:
105 |       - name: Get Runs Using SHA
106 |         uses: machine-learning-apps/wandb-action@master
107 |         id: wandb3
108 |         with:
109 |           PROJECT_NAME: 'hamelsmu/test-wandb-action'
110 |           FILTER_GITHUB_SHA: "testsha5678"
111 |           FILTER_SECONDARY_SHA: "testsecondarysha1234"
112 |           BASELINE_TAGS: "['reference']"
113 |           DISPLAY_METRICS: "['acc', 'loss', 'val_acc', 'val_loss']"
114 |           DISPLAY_CONFIG_VARS: "['secondary_sha']"
115 |           WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
116 |           DEBUG: "true"
117 |       - name: test outputs
118 |         run: |
119 |           python -c "assert '${NUM_FINISHED}' == '1'"
120 |           python -c "assert '${NUM_BASELINES}' == '1'"
121 |           python -c "assert '${BOOL_COMPLETE}' == 'True'"
122 |           python -c "assert '${NUM_CRASHED}' == '0'"
123 |           python -c "assert '${NUM_ABORTED}' == '0'"
124 |           python -c "assert '${NUM_RUNNING}' == '0'"
125 |         env:
126 |           BOOL_COMPLETE: ${{ steps.wandb3.outputs.BOOL_COMPLETE }}
127 |           BOOL_SINGLE_RUN: ${{ steps.wandb3.outputs.BOOL_SINGLE_RUN }}
128 |           NUM_FINISHED: ${{ steps.wandb3.outputs.NUM_FINISHED }}
129 |           NUM_RUNNING: ${{ steps.wandb3.outputs.NUM_RUNNING }}
130 |           NUM_CRASHED: ${{ steps.wandb3.outputs.NUM_CRASHED }}
131 |           NUM_ABORTED: ${{ steps.wandb3.outputs.NUM_ABORTED }}
132 |           NUM_BASELINES: ${{ steps.wandb3.outputs.NUM_BASELINES }}
133 |   pre-build-container:
134 |     runs-on: ubuntu-latest
135 |     steps:
136 |     - uses: actions/checkout@master
137 |     - name: Prebuild Image
138 |       run: |
139 |         cd $GITHUB_WORKSPACE
140 |         echo ${PASSWORD} | docker login -u $USERNAME --password-stdin
141 |         docker build -t hamelsmu/wandb-action -f prebuild.Dockerfile .
142 |         docker push hamelsmu/wandb-action
143 |       env:
144 |         USERNAME: ${{ secrets.DOCKER_USERNAME }}
145 |         PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
146 |         
147 | 


--------------------------------------------------------------------------------
/wandb_get_runs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retrieves all runs from wandb that either:
  3 | - correspond to a Git SHA
  4 | - have specific tags.
  5 | 
  6 | The purpose is to compare runs from a given SHA to runs you may have tagged as baselines.
  7 | """
  8 | 
  9 | 
 10 | import os
 11 | os.environ["WANDB_API_KEY"] = os.getenv('INPUT_WANDB_API_KEY')
 12 | import wandb
 13 | import logging
 14 | import pandas as pd
 15 | 
 16 | logging.root.setLevel(logging.DEBUG)
 17 | 
 18 | api = wandb.Api()
 19 | 
 20 | # Read Inputs
 21 | project_name = os.getenv('INPUT_PROJECT_NAME')
 22 | run_id = os.getenv('INPUT_RUN_ID')
 23 | save_folder = os.getenv('GITHUB_WORKSPACE')
 24 | debug = True if os.getenv('INPUT_DEBUG') else False
 25 | 
 26 | # Read Query Parameters
 27 | secondary_sha = os.getenv('INPUT_FILTER_SECONDARY_SHA')
 28 | github_sha = os.getenv('INPUT_FILTER_GITHUB_SHA')
 29 | tags = eval(os.getenv('INPUT_BASELINE_TAGS'))
 30 | 
 31 | print(f'Debug Mode On: {debug}')
 32 | 
 33 | if debug:
 34 |     logging.debug(f'RUN_ID: {run_id}')
 35 |     logging.debug(f'BASELINE_TAGS: {tags}')
 36 |     logging.debug(f'FILTER_GITHUB_SHA: {github_sha}')
 37 |     logging.debug(f'FILTER_SECONDARY_SHA: {secondary_sha}')
 38 |     logging.debug(f'PROJECT_NAME: {project_name}')
 39 |     logging.debug(f"DISPLAY_CONFIG_VARS: {os.getenv('INPUT_DISPLAY_CONFIG_VARS')}")
 40 |     logging.debug(f"DISPLAY_METRICS: {os.getenv('INPUT_DISPLAY_METRICS')}")
 41 | 
 42 |     
 43 | metrics = eval(os.getenv('INPUT_DISPLAY_METRICS'))
 44 | config_vars = eval(os.getenv('INPUT_DISPLAY_CONFIG_VARS'))    
 45 |     
 46 | # validate inputs
 47 | def check_list(var, name):
 48 |     assert isinstance(var, list), f"{name} input must evaluate to a python list"
 49 |     if var:
 50 |         assert max([isinstance(x, str) for x in var]), f"{name} input must be a list of strings"
 51 | 
 52 | check_list(tags, "BASELINE_TAGS")
 53 | check_list(metrics, "METRICS")
 54 | check_list(config_vars, "CONFIG_VARS")
 55 | 
 56 | assert run_id or github_sha, "You must supply an input for either FILTER_GITHUB_SHA or RUN_ID.  Both of these inputs are not specified."
 57 | 
 58 | if secondary_sha and not github_sha:
 59 |     raise Exception("If input FILTER_SECONDARY_SHA is supplied you must also supply an input for FILTER_GITHUB_SHA")
 60 | 
 61 | 
 62 | 
 63 | if run_id:
 64 |     runs=api.runs(project_name, filters={"name":f"{run_id}"})
 65 |     baseline_runs=api.runs(project_name, filters={"$and": [{"tags": {"$in": tags}},
 66 |                                                            {"name": {"$ne": f"{run_id}"}}]
 67 |                                                  }
 68 |                            )
 69 |     if github_sha:
 70 |         logging.info("You have supplied both inputs FILTER_GITHUB_SHA and RUN_ID.  Runs matching FILTER_GITHUB_SHA will be ignored and only the run corresponding to RUN_ID will be returned.")
 71 | 
 72 | #run a query for all runs matching the github sha AND optionally the secondary sha
 73 | if not run_id and github_sha and secondary_sha:
 74 |     runs = api.runs(project_name, {"$and": [{"config.github_sha": f"{github_sha}"},
 75 |                                             {"config.secondary_sha": f"{secondary_sha}"}]
 76 |                                   }
 77 |                    )
 78 |     # baseline runs should be mutually exclusive from the experimental runs
 79 |     # the only time the github_sha is allowed to not exist is for baseline runs             
 80 |     baseline_runs=api.runs(project_name, {"$and": [{"tags": {"$in": tags}},
 81 |                                                    {"$or": [{"config.github_sha": { "$ne": f"{github_sha}"}},
 82 |                                                             {"config.github_sha": { "$exists": False}},
 83 |                                                             {"config.secondary_sha": { "$ne": f"{secondary_sha}"}},
 84 |                                                             {"config.secondary_sha": { "$exists": False}}]
 85 |                                                    }]
 86 |                                          }
 87 |                            )
 88 | 
 89 | if not run_id and github_sha and not secondary_sha:
 90 |     runs = api.runs(project_name, {"config.github_sha": f"{github_sha}"})
 91 |     # baseline runs should be mutually exclusive from the experimental runs 
 92 |     # the only time the github_sha is allowed to not exist is for baseline runs
 93 |     baseline_runs = api.runs(project_name, {"$and": [{"tags": {"$in": tags}},
 94 |                                                             { "$or": [{"config.github_sha": { "$ne": f"{github_sha}"}},
 95 |                                                                       {"config.github_sha": { "$exists": False}}]
 96 |                                                             },
 97 |                                                     ]
 98 |                                             }
 99 |                             )
100 | 
101 | runs = list(runs)
102 | baseline_runs = list(baseline_runs)
103 | 
104 | finished_runs = [run for run in runs if runs and run.state == 'finished']
105 | running_runs = [run for run in runs if runs and run.state == 'running']
106 | crashed_runs = [run for run in runs if run.state == 'crashed']
107 | aborted_runs = [run for run in runs if run.state == 'aborted']
108 | 
109 | # emit variables as outputs for other actions
110 | print(f'::set-output name=BOOL_COMPLETE::{True if finished_runs and not running_runs else False}')
111 | print(f'::set-output name=BOOL_SINGLE_RUN::{True if len(runs) == 1 else False}')
112 | print(f'::set-output name=NUM_FINISHED::{len(finished_runs)}')
113 | print(f'::set-output name=NUM_RUNNING::{len(running_runs)}')
114 | print(f'::set-output name=NUM_CRASHED::{len(crashed_runs)}')
115 | print(f'::set-output name=NUM_ABORTED::{len(aborted_runs)}')
116 | print(f'::set-output name=NUM_BASELINES::{len(baseline_runs)}')
117 | 
118 | 
119 | def summarize_runs(runs, eval_category_label, debug, metrics=[], config_vars=[]):
120 |     """
121 |     Summarize a sequence of wandb runs into a table
122 |     
123 |     Parameters:
124 |     ----------
125 |     runs: a wandb run object
126 |         this is the object you receive when you query a run with the wandb api using the python client
127 |     eval_category_label: str
128 |         this will create a column in the dataframe called __eval.category that = eval_category
129 |     debug: bool
130 |         whether or not to show debuging information
131 |     metrics: List[str]
132 |         metrics names provided as list of strings.  Ex ['accuracy', 'loss']
133 |     config_vars: List[str]
134 |         list of configuration variable names. Ex ['learning_rate', 'num_epochs']
135 |     """
136 |     summary_dict = dict()
137 |     
138 |     for run in runs:
139 |         summary_dict['run.url'] = summary_dict.get('run.url', []) + [run.url]
140 |         summary_dict['run.name'] = summary_dict.get('run.name', []) + [run.name]
141 |         summary_dict['run.tags'] = summary_dict.get('run.tags', []) + [run.tags]
142 |         summary_dict['run.id'] = summary_dict.get('run.id', []) + [run.id]
143 |         summary_dict['run.entity'] = summary_dict.get('run.entity', []) + [run.entity]
144 |         summary_dict['run.project'] = summary_dict.get('run.project', []) + [run.project]
145 |         summary_dict['github_sha'] = summary_dict.get('github_sha', []) + [run.config.get('github_sha')]
146 |         summary_dict['run.description'] = summary_dict.get('run.description', []) + [run.description]          
147 | 
148 |         for metric in metrics:
149 |             summary_dict[metric] = summary_dict.get(metric, []) + [run.summary_metrics.get(metric)]
150 |                                                                                                                 
151 |         for var in config_vars:            
152 |             # configuration variables preceded with _ to avoid name collisions with metrics
153 |             summary_dict[f"_{var}"] =  summary_dict.get(f"_{var}", []) + [run.config.get(var)]
154 |     
155 |     
156 |     df = pd.DataFrame(summary_dict)
157 |     # debugging information
158 |     if debug:
159 |         logging.debug(f"=== Debugging information for: {eval_category_label} runs ===")
160 |         logging.debug(f"Missing value summary:")
161 |         logging.debug(df.isna().sum())
162 |         logging.debug(f"Preview of Data:")
163 |         logging.debug(df.head(1).T)
164 | 
165 |     # assign eval_category column
166 |     df['__eval.category'] = eval_category_label
167 |     return df
168 | 
169 | if finished_runs:
170 |     e_df = summarize_runs(runs=finished_runs, debug=debug, eval_category_label='candidate', metrics=metrics, config_vars=config_vars)
171 |     b_df = summarize_runs(runs=baseline_runs, debug=debug, eval_category_label='baseline', metrics=metrics, config_vars=config_vars)
172 |     df = pd.concat([e_df, b_df])
173 |     report_filename = os.path.join(save_folder, 'wandb_report.csv')
174 |     df.to_csv(report_filename, index=False)
175 |     print(f'{df.shape[0]} runs written to {report_filename}')
176 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Actions Status](https://github.com/machine-learning-apps/wandb-action/workflows/Tests/badge.svg)
  2 | 
  3 | 
  4 | # GitHub Action That Retrieves Model Runs From Weights & Biases
  5 | 
  6 | Weights & Biases [homepage](https://www.wandb.com/)
  7 | 
  8 | <!-- TOC depthFrom:1 depthTo:6 withLinks:1 updateOnSave:1 orderedList:0 -->
  9 | 
 10 | - [Usage](#usage)
 11 |     - [Example](#example)
 12 |     - [Inputs](#inputs)
 13 |         - [Mandatory Inputs](#mandatory-inputs)
 14 |         - [Optional Inputs](#optional-inputs)
 15 |     - [Outputs](#outputs)
 16 | - [Features of This Action](#features-of-this-action)
 17 |     - [Querying Model Runs](#querying-model-runs)
 18 |     - [Querying Additonal Baseline Runs](#querying-additonal-baseline-runs)
 19 |     - [Saving & Displaying Model Run Data](#saving-displaying-model-run-data)
 20 | 
 21 | 
 22 | <!-- /TOC -->
 23 | 
 24 | 
 25 | 
 26 | ## Usage
 27 | 
 28 | ### Example
 29 | 
 30 | ```yaml
 31 | name: Get WandB Runs
 32 | on: [issue_comment]
 33 | 
 34 | jobs:
 35 |   get-runs:
 36 |     if: (github.event.issue.pull_request != null) &&  contains(github.event.comment.body, '/get-runs')
 37 |     runs-on: ubuntu-latest
 38 | 
 39 |     steps:
 40 |   - name: Get the latest SHA for the PR that was commented on
 41 |     id: chatops
 42 |     uses: machine-learning-apps/actions-chatops@master
 43 |     with:
 44 |       TRIGGER_PHRASE: "/get-runs"
 45 |     env:
 46 |       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 47 |       
 48 |   - name: Get Runs Using SHA
 49 |     uses: machine-learning-apps/wandb-action@master
 50 |     with:
 51 |       PROJECT_NAME: ${{ format('{0}/{1}', secrets.WANDB_ENTITY, secrets.WANDB_PROJECT) }}
 52 |       FILTER_GITHUB_SHA: ${{ steps.chatops.outputs.SHA }}
 53 |       BASELINE_TAGS: "['baseline', 'reference']"
 54 |       DISPLAY_METRICS: "['accuracy', 'loss', 'best_val_acc', 'best_val_loss', '_runtime']"
 55 |       WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
 56 |       DEBUG: 'true'
 57 | ```
 58 | 
 59 | ### Inputs
 60 | 
 61 | #### Mandatory Inputs
 62 |   1. `WANDB_API_KEY`: your W&B api key.
 63 |   2. `PROJECT_NAME`:  The entity/project name associated with your wandb project.  Example - 'github/predict-issue-labels'
 64 |   3. Either `RUN_ID` or `FILTER_GITHUB_SHA` must be specified, even though these are both optional inputs.  See below for more details:
 65 | 
 66 | 
 67 | #### Optional Inputs
 68 | 
 69 |   1. `RUN_ID`: the run id, which can be found in the url https://app.wandb.ai/{entity_name}/{project_name}/runs/{run_ID}.  When supplying this input, `FILTER_GITHUB_SHA` and `FILTER_SECONDARY_SHA` are ignored and only the run corresponding to this id (along with any baselines corresponding to the input BASELINE_TAGS) are returned.
 70 |   2. `FILTER_GITHUB_SHA`: The git SHA that you want to filter runs by.  This assumes you have a logged a configuration variable named 'github_sha' to your runs. A common usage pattern is to supply the built-in environment variable $GITHUB_SHA, to get the commit SHA that triggered the workflow.  Note that this argument is ignored if `RUN_ID` is specified.
 71 |   3. `FILTER_SECONDARY_SHA`: This is an optional field you can filter your runs by.  This assumes you have logged a configuration variable named 'secondary_sha' to your model runs.  You might use this field for data versioning.  Note that this argument is ignored if `RUN_ID` is specified.
 72 |   4. `BASELINE_TAGS`:  A list of tags that correspond to runs you want to retrieve in addition to those that correspond to the FILTER_GITHUB_SHA.  You would typically use this field to obtain baseline runs to compare your current runs against.  Example - `"['baseline']"`
 73 |   5. `DISPLAY_METRICS`:  A list of summary metrics you want to retain for the csv file that is written to the actions environment.  Example - `"['acc', 'loss', 'val_acc', 'val_loss']"`
 74 |   6. `DISPLAY_CONFIG_VARS`: A list of configuration variables you want to retain for the csv file written to the actions environment.  Example - `"['learning_rate', 'num_layers']"`
 75 |   7. `DEBUG`: Setting this variable to any value will turn debug mode on.
 76 | 
 77 | ### Outputs
 78 | 
 79 | You can reference the outputs of an action using [expression syntax](https://help.github.com/en/articles/contexts-and-expression-syntax-for-github-actions).
 80 | 
 81 | 1. `BOOL_COMPLETE`: True if there is at least 1 finished run and no runs that have a state of 'running' else False
 82 | 2. `BOOL_SINGLE_RUN`: True if there is only 1 run returned from the query else False
 83 | 3. `NUM_FINISHED`: The number of non-baseline runs with a state of 'finished'
 84 | 4. `NUM_RUNNING`: The number of non-baseline runs with a state of 'running'
 85 | 5. `NUM_CRASHED`: The number of non-baseline runs with a state of 'crashed'
 86 | 6. `NUM_ABORTED`: The number of non-baseline runs with a state of 'aborted'
 87 | 7. `NUM_BASELINES`: The number of baseline runs returned.  See [this section](#querying-additonal-baseline-runs) for more context regarding baseline runs.
 88 | 
 89 | ## Features of This Action
 90 | 
 91 | ### Querying Model Runs
 92 | 
 93 | This action fetches all model runs that either:
 94 | 
 95 | 1. **Correspond to a git commit SHA**, for example the commit SHA that triggered the Action.  See the documenation for the buit-in environment variable [GITHUB_SHA](https://help.github.com/en/articles/virtual-environments-for-github-actions#environment-variables). Typical use cases for querying model runs with a git commit SHA:
 96 |     - There are many experiments generated from the same code, such has hyper-parameter tuning.
 97 |     - You want to automatically fetch experiment results that correspond to the commit SHA that triggered the GitHub Action.  This can help ensure that your experiment results are not stale relative to your code.
 98 | 
 99 |       Querying by git commit SHA **assumes you have logged a config variable named `github_sha`** to your [config variables](https://docs.wandb.com/wandb/config).  Example:
100 |     
101 |         ```py
102 |         import wandb, os
103 |         # You set the environment variable before running this script programatically with the SHA
104 |         github_sha = os.getenv('GITHUB_SHA')
105 |         wandb.config.github_sha = github_sha
106 |         ```
107 | 
108 |         In addition to querying by commit SHA, you can apply **an additional filter for a secondary SHA**. You might use this filter in addition to the commit SHA when you other external variables to the code such as data version that you want to track.  For example, you can version your data with [Pachyderm](https://www.pachyderm.io), which gives you a SHA corresponding to you data version.  Similar to the github SHA, **supplying an argument for secondary SHA assumes you have logged a config variable named `secondary_sha`** to your experiment in W&B.
109 | 
110 | 2. **Match a run id**:  The run id corresponds to the unique identifier found in the URL when viewing the run on W&B: `https://app.wandb.ai/{entity_name}/{project_name}/runs/{run_id}`.
111 | 
112 | ### Querying Additonal Baseline Runs
113 | 
114 | It is often useful to compare model runs against baseline runs or your current best models in order to properly assess model performance.  Therefore, in addition to the runs described above, you can also query runs by tag, which is a label you can assign either programatically or in the W&B user interface.  You can supply a list of tags as additional runs that will be queried.  See the `BASELINE_TAGS` input in the [Inputs](#inputs) section below).  Two properties of baseline runs that are important:
115 | 
116 | - Baseline runs will be marked as `baseline` in the output csv file in a column named `__eval.category`.  See the [outputs](#outputs) section for more detail.
117 | - Baselines runs are mutually exclusive with other runs that you are querying.  If there are runs that are in both the baseline set and the candidate set, this will be resolved by moving those runs into the candidate set. The candidate set refers to any runs returned by methods 1 or 2 in Querying Model Runs.
118 | 
119 | ### Saving & Displaying Model Run Data
120 | 
121 | **This Action saves a csv file called `wandb_report.csv` into the path specified by the [default environment variable](https://help.github.com/en/articles/virtual-environments-for-github-actions#environment-variables) `GITHUB_WORKSPACE` set for you in GitHub Actions**,  which allows this data to be accessed by subsequent Actions.  Information in this CSV can be displayed in a variety of ways, such as a markdown formatted comment in a pull request or via the [GitHub Checks](https://developer.github.com/v3/checks/) API.
122 | 
123 | This csv file always has the following fields:
124 | - `run.url`: the url for the run in the W&B api.
125 | - `run.name`: the name of the run. This is automatically set by wandb if not specified by the user.
126 | - `run.tags`: a list with all of the tags assigned to the run.
127 | - `run.id`: the id associated with the run.  This corresponds to the input `RUN_ID`
128 | - `run.entity`: this name of the entity that contains the project the run can be found in.  This is similar to an org in GitHub.
129 | - `run.project`: the name of the project that contains the run.  This is simlar to a repo in GitHub.
130 | - `github_sha`: the config variable `github_sha`.
131 | - `__eval.category`: this field will contain either the value `candiate` or `baseline`, depending on how the run was queried.
132 | 
133 | In addition to the above fields the user can specify the following additional fields from model runs.  See the [Inputs](#inputs) section for more information on how to supply these inputs.
134 | 
135 | - [summary_metrics](https://docs.wandb.com/wandb/log#summary-metrics): you can specify a list of summary metrics, for example:  `"['acc', 'loss', 'val_acc', 'val_loss']"`
136 | 
137 | - [config variables](https://docs.wandb.com/wandb/config): specify a list of configuraiton variables, for example: `"['learning_rate', 'num_layers']"`.  These fields will be prepended with an underscore in the output csv file.
138 | 
139 | Below is an example of the contents of the csv file:
140 | 
141 | | run.url                                                        | run.name       | run.tags             | run.id   | run.entity | run.project          | github_sha |    acc |   loss | val_acc | val_loss | _docker_digest | __eval.category |
142 | | -------------------------------------------------------------- | -------------- | -------------------- | -------- | ---------- | -------------------- | ---------- | ------ | ------ | ------- | -------- | -------------- | --------------- |
143 | | https://app.wandb.ai/github/predict-issue-labels/runs/e6lo523p | dashing-leaf-4 | []                   | e6lo523p | github     | predict-issue-labels | 86edd034aaba1498dbae6465cf994de90be6a4b2       | 0.896… | 0.540… |   1.000 |   1.054… |                | candidate       |
144 | | https://app.wandb.ai/github/predict-issue-labels/runs/15u8cbod | happy-frog-3   | ['baseline', 'test'] | 15u8cbod | github     | predict-issue-labels | 86edd034aaba1498dbae6465cf994de90be6a4b2       | 0.881… | 0.605… |   0.500 |   1.080… |                | candidate       |
145 | | https://app.wandb.ai/github/predict-issue-labels/runs/cqigzoxc | dandy-river-1  | ['baseline']         | cqigzoxc | github     | predict-issue-labels | 86edd034aaba1498dbae6465cf994de90be6a4b2           | 0.925… | 0.441… |   0.375 |   1.095… |                | baseline        |
146 | 
147 | 
148 | ## Keywords
149 |  MLOps, Machine Learning, Data Science
150 | 


--------------------------------------------------------------------------------