├── .cloud
    └── .azure
    │   ├── compute.json
    │   ├── registermodel.json
    │   └── workspace.json
├── .github
    └── workflows
    │   ├── azureml.yml
    │   ├── data.yml
    │   └── shiny.yml
├── .gitignore
├── LICENSE
├── README.md
├── Setup-DSVM.md
├── accident-app.png
├── accident-app
    └── app.R
├── architecture.png
├── model
    ├── accident-glm.R
    ├── accident_predict_caret.R
    ├── accidents.Rd
    ├── data.R
    ├── deploy-model.R
    ├── nassCDS.csv
    ├── test-endpoint.R
    └── train-model.R
├── shiny-server.conf
└── slides.pdf


/.cloud/.azure/compute.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "rcluster",
3 |     "compute_type": "amlcluster",
4 |     "min_nodes": 0,
5 |     "max_nodes": 4,
6 |     "idle_seconds_before_scaledown": 600  
7 | }
8 | 


--------------------------------------------------------------------------------
/.cloud/.azure/registermodel.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model_file_name": "model.Rd",
3 |     "webservice_name": "accident-gha"
4 | }


--------------------------------------------------------------------------------
/.cloud/.azure/workspace.json:
--------------------------------------------------------------------------------
1 | {
2 |     "name": "tailwind",
3 |     "resource_group": "rstudioconf",
4 |     "create_workspace": true
5 | }
6 | 


--------------------------------------------------------------------------------
/.github/workflows/azureml.yml:
--------------------------------------------------------------------------------
 1 | # Train and Deploy model for Shiny app
 2 | 
 3 | name: Train and Deploy Model
 4 | 
 5 | # Runs on any push, except mods to the Shiny app code
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |     paths: 
10 |     - 'model/**'
11 |     - '.cloud/.azure/**'
12 |   pull_request:
13 |     branches: [ main ]
14 |     paths: 
15 |     - 'model/**'
16 |     - '.cloud/.azure/**'
17 |   
18 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
19 | jobs:
20 |   # Connect to Azure ML workspace, create if needed
21 |   train:
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |     # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
25 |     - name: Check Out Repository
26 |       id: checkout_repository
27 |       uses: actions/checkout@v2
28 |         
29 |     # Connect or Create the Azure Machine Learning Workspace
30 |     - name: Connect/Create Azure Machine Learning Workspace
31 |       id: aml_workspace
32 |       uses: Azure/aml-workspace@v1
33 |       with:
34 |           azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
35 |     
36 |     # Connect or Create a Compute Target in Azure Machine Learning
37 |     - name: Connect/Create Azure Machine Learning Compute Target
38 |       id: aml_compute_training
39 |       uses: Azure/aml-compute@v1
40 |       with:
41 |           azure_credentials: ${{ secrets.AZURE_CREDENTIALS }}
42 | 
43 |     # Update code on server. This also updates the Shiny app in the app/ folder
44 |     - name: Pull source on VM
45 |       uses: JimCronqvist/action-ssh@master
46 |       env:
47 |         AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}'
48 |       with:
49 |         hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }}
50 |         privateKey: ${{ secrets.SHINYKEY }}
51 |         command: |
52 |           cd mlops-r-gha
53 |           git pull
54 | 
55 |     # Connect to the Shiny VM to train the model
56 |     - name: Train model
57 |       uses: JimCronqvist/action-ssh@master
58 |       env:
59 |         AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}'
60 |       with:
61 |         hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }}
62 |         privateKey: ${{ secrets.SHINYKEY }}
63 |         command: |
64 |           cd mlops-r-gha/model
65 |           export AZURE_CREDENTIALS
66 |           Rscript train-model.R
67 | 
68 |     # Connect to the Shiny VM to register and deploy the model
69 |     - name: Deploy model
70 |       uses: JimCronqvist/action-ssh@master
71 |       env:
72 |         AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}'
73 |       with:
74 |         hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }}
75 |         privateKey: ${{ secrets.SHINYKEY }}
76 |         command: |
77 |           cd mlops-r-gha/model
78 |           export AZURE_CREDENTIALS
79 |           Rscript deploy-model.R
80 | 


--------------------------------------------------------------------------------
/.github/workflows/data.yml:
--------------------------------------------------------------------------------
 1 | # Upload data -- only needs to be rerun once
 2 | 
 3 | name: Upload data
 4 | 
 5 | # Runs if data file modified
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |     paths: 
10 |     - 'model/nassCDS.csv'
11 |     - 'model/data.R'
12 |   pull_request:
13 |     branches: [ main ]
14 |     paths: 
15 |     - 'model/nassCDS.csv'
16 |     - 'model/data.R'
17 |       
18 | jobs:
19 |   data:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |     # Upload data file
23 |     - name: Upload data
24 |       uses: JimCronqvist/action-ssh@master
25 |       env:
26 |         AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}'
27 |       with:
28 |         hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }}
29 |         privateKey: ${{ secrets.SHINYKEY }}
30 |         command: |
31 |           cd mlops-r-gha/model
32 |           export AZURE_CREDENTIALS
33 |           Rscript data.R
34 | 


--------------------------------------------------------------------------------
/.github/workflows/shiny.yml:
--------------------------------------------------------------------------------
 1 | ## Deploy contents of app folder to Shiny Server VM
 2 | 
 3 | name: Quick Deploy to Shiny
 4 | 
 5 | # runs only when there have been changes in the Shiny app
 6 | 
 7 | on:
 8 |   push:
 9 |     paths:
10 |     - 'accident-app/**'
11 |     branches: [ main ]
12 |   pull_request:
13 |     paths:
14 |         - 'accident-app/**' 
15 |     branches: [ main ]
16 | 
17 | jobs:
18 | 
19 |     quickdeploy:
20 |         name: Quick Deploy to Shiny Server
21 |         runs-on: ubuntu-latest
22 | 
23 |         steps:
24 | 
25 |         ###  If you need to open the SSH port temporarily on the Shiny Server, uncomment this section
26 | 
27 |         #   - name: dig +short myip.opendns.com @resolver1.opendns.com
28 |         #     run: dig +short myip.opendns.com @resolver1.opendns.com
29 |     
30 |         #   - name: Add NSG Rule
31 |         #     uses: venura9/manage-nsg@master
32 |         #     id: rule
33 |         #     with:
34 |         #       azure-credentials: ${{ secrets.AZURE_CREDENTIALS }}
35 |         #       rule-nsg-resource-group-name: ${{ secrets.SHINY_RG }}
36 |         #       rule-nsg-name: ${{ secrets.SHINY_NSG }}
37 |         #       rule-inbound-port: 22
38 |     
39 |         #   - name: Print Created NSG Rule Name
40 |         #     run: echo "Rule Name ${{ steps.rule.outputs.rule_name }}"
41 |     
42 |           - uses: actions/checkout@master
43 | 
44 |           - name: copy files via ssh key
45 |             uses: appleboy/scp-action@master
46 |             with: 
47 |               host: ${{ secrets.SHINYHOST }}
48 |               username: ${{ secrets.SHINYUSERNAME }}
49 |               key: ${{ secrets.SHINYKEY }}
50 |               source: "accident-app/app.R"
51 |               target: "~"
52 | 
53 |         #  Re-close temporarily opened SSH port 
54 |         #   - name: Remove NSG Rule
55 |         #     uses: venura9/manage-nsg@master
56 |         #     with: 
57 |         #       azure-credentials: ${{ secrets.AZURE_CREDENTIALS }}
58 |         #       rule-id-for-removal: ${{ steps.rule.outputs.rule_name }}
59 |         #       rule-nsg-resource-group-name: ${{ secrets.SHINY_RG }}
60 |         #       rule-nsg-name: ${{ secrets.SHINY_NSG }}
61 |   


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Azure ML
 2 | config.json
 3 | model/outputs
 4 | outputs/
 5 | _generated_score.py
 6 | accidentdata/
 7 | secrets.txt
 8 | 
 9 | # History files
10 | .Rhistory
11 | .Rapp.history
12 | 
13 | # Session Data files
14 | .RData
15 | 
16 | # User-specific files
17 | .Ruserdata
18 | 
19 | # Example code in package build process
20 | *-Ex.R
21 | 
22 | # Output files from R CMD build
23 | /*.tar.gz
24 | 
25 | # Output files from R CMD check
26 | /*.Rcheck/
27 | 
28 | # RStudio files
29 | .Rproj.user/
30 | 
31 | # produced vignettes
32 | vignettes/*.html
33 | vignettes/*.pdf
34 | 
35 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
36 | .httr-oauth
37 | 
38 | # knitr and R markdown default cache directories
39 | *_cache/
40 | /cache/
41 | 
42 | # Temporary files created by R markdown
43 | *.utf8.md
44 | *.knit.md
45 | 
46 | # R Environment Variables
47 | .Renviron
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 David Smith
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLOPS with R: An end-to-end process for building machine learning applications
 2 | 
 3 | This repository contains resources for the talk "MLOPS with R: An end-to-end process for building machine learning applications".
 4 | 
 5 | In addition to the slides (see below), this repository contains the complete set of code and GitHub Actions to deploy a Shiny application for calculating the probability of a fatal road accident. See below for instructions on how to deploy this application yourself.
 6 | 
 7 | ![Screenshot of Shiny app](accident-app.png)
 8 | 
 9 | ## Talk Abstract
10 | 
11 | As predictive models and machine learning become key components of production applications in every industry, an end-to-end Machine Learning Operations (MLOPS) process becomes critical for reliable and efficient deployment of applications that depend on R-based models. In this talk, I’ll outline the basics of the DevOps process and focus on the areas where MLOPS diverges. The talk will show the complete process of building and deploying an application driven by a machine learning model implemented with R. We will show the process of developing models, triggering model training on code changes, and triggering the CI/CD process for an application when a new version of a model is registered. We will use the Azure Machine Learning service and the “azuremlsdk” package to orchestrate the model training and management process, but the principles will apply to MLOPS processes generally, especially for applications that involve large amounts of data or require significant computing resources.
12 | 
13 | ## Presentations (Slides)
14 | 
15 | Aug 2020: New York R Conference (online).  
16 | MLOPS with R: An end-to-end process for building machine learning applications: [slides (PDF)](slides.pdf) | [Video Recording](https://youtu.be/hCCZZyHz-ko) 
17 | 
18 | ## Resources
19 | 
20 | Links and other useful resources from the talk.
21 | 
22 | Azure Machine Learning service:  
23 | * [Documentation](https://docs.microsoft.com/azure/machine-learning/overview-what-is-azure-ml?WT.mc_id=aiml-2093-davidsmi)
24 | * Free azure credits: [register here](https://aka.ms/AML-NYR). (Credit card required, but won't be charged until you remove limits to allow it.)
25 | 
26 | azuremlsdk R package: 
27 | 
28 | * [CRAN](https://cran.r-project.org/package=azuremlsdk)
29 | * [GitHub Repository](https://github.com/azure/azureml-sdk-for-r)
30 | * [Documentation](https://azure.github.io/azureml-sdk-for-r/reference/index.html). 
31 | * [Tutorial: Create a logistic regression model in R with Azure Machine Learning](https://docs.microsoft.com/azure/machine-learning/tutorial-1st-r-experiment?WT.mc_id=aiml-2093-davidsmi)
32 | 
33 | GitHub Actions:
34 | * [Documentation](https://docs.github.com/en/actions) 
35 | * [An Unintentionally Comprehensive Introduction to GitHub Actions CI](https://dev.to/bnb/an-unintentionally-comprehensive-introduction-to-github-actions-ci-blm)
36 | * [ML Ops with GitHub Actions and Azure Machine Learning](https://github.com/machine-learning-apps/ml-template-azure)
37 | * [GitHub Actions for the R Language](https://github.com/r-lib/actions)
38 | 
39 | Visual Studio Code:
40 | 
41 | * [Remote Development](https://code.visualstudio.com/docs/remote/remote-overview?WT.mc_id=aiml-2093-davidsmi)
42 | * [Remote Containers](https://code.visualstudio.com/docs/remote/create-dev-container?WT.mc_id=aiml-2093-davidsmi)
43 | * [R extension](https://marketplace.visualstudio.com/items?itemName=Ikuyadeu.r&WT.mc_id=opensource-0000-davidsmi)
44 | 
45 | Data file `nassCDS.csv`:
46 | 
47 | * The app uses data from the US [National Highway Traffic Safety Administration](https://cdan.nhtsa.gov/tsftables/tsfar.htm) 
48 | (with thanks to [Mary C. Meyer and Tremika Finney](https://www.stat.colostate.edu/~meyer/airbags.htm)).
49 | This dataset includes data from over 25,000 car crashes in the US, with variables you can use to predict the likelihood of a fatality. 
50 | 
51 | 
52 | ## Related Presentations
53 | 
54 | [Machine Learning Operations with R](https://github.com/revodavid/mlops-r) (January, 2020)
55 | 
56 | ## Application Architecture
57 | 
58 | The application runs as a Shiny app, running on an instance of the Azure Data Science VM. Azure ML service is used to train and deploy the scoring endpoint from R scripts, and GitHub Actions orchestrates the app deployment.
59 | 
60 |  ![Architecture](architecture.png)
61 | 
62 | ## Instructions for deploying the "Accident" app
63 | 
64 | 1. Fork this repository.
65 | 
66 | 2. Follow the directions in [ML Ops with GitHub Actions and Azure Machine Learning](https://github.com/machine-learning-apps/ml-template-azure) to:
67 | 
68 |    * Create a resource group in your Azure subscription. (If you don't have one, create an [Azure Free Subscription](https://azure.microsoft.com/free/?WT.mc_id=aiml-2093-davidsmi) and get $200 in free Azure credits.)
69 |    * Create a service principal
70 |    * Add secrets to your forked repository
71 |    * Configure the `.cloud\.azure\workspace.json` file. You can use an existing Azure ML Workspace, or if none by the specified name exists it will be created for you. 
72 | 
73 | 3. Deploy an Azure Data Science Virtual Machine and configure it as the Shiny Server by [following these instructions](Setup-DSVM.md).
74 | 
75 | 


--------------------------------------------------------------------------------
/Setup-DSVM.md:
--------------------------------------------------------------------------------
 1 | # Set up Azure Data Science VM as Shiny Server
 2 | 
 3 | In this architecture, we deploy an Azure Data Science VM (DSVM) to:
 4 | 
 5 | * Host Shiny Server
 6 | * Run R scripts using the azuremlsdk package
 7 | 
 8 | This document describes the process of setting up the DSVM, which is currently a series of manual steps.
 9 | I plan to automate this process in the future.
10 | 
11 | NOTE: Using the DSVM is not a requirement, it's just convenient because many of the tools we need (git, R, Python etc.) come pre-installed. You can use any VM or even an on-premises server, as long as it supports shiny server. Here are instructions for [configuring a basic VM on Azure](https://canovasjm.netlify.app/2020/01/08/deploy-you-own-shiny-server-on-azure/).
12 | 
13 | ## Deployment Process
14 | 
15 | 1. Fork this `mlops-r-gha` repository to your GitHub account
16 | 
17 | 1. (OPTIONAL) If you wish to use an existing Azure ML workspace, edit `.cloud/.azure/workspace.json` accordingly, otherwise a new workspace will be created for you.
18 | 
19 | 1. Add the AZURE_CREDENTIALS secret to the repository, as described step 3 of [this file](https://github.com/machine-learning-apps/ml-template-azure/blob/master/README.md).
20 | 
21 | 1. Deploy an instance of the Azure Data Science Virtual Machine for Ubuntu. Call it "shinyserver". Use "azureuser" for the default account, and [enable SSH access](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/mac-create-ssh-keys?WT.mc_id=aiml-2093-davidsmi). Save the private SSH key generated as `shinyserver.pem`. Once the VM is deployed, note the server IP address: you'll need it later (we will refer to it as SHINYSERVERIP below). [Detailed Instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro?WT.mc_id=aiml-2093-davidsmi)
22 | 
23 | 1. Open port 3838 on shinyserver by adding a rule to the network security group that was created when you set up the DSVM. Also verify that Port 22 (SSH) is not blocked by the rules. [Detailed Instructions](https://docs.microsoft.com/azure/virtual-network/manage-network-security-group?WT.mc_id=aiml-2093-davidsmi). 
24 | 
25 | 1. Add secrets to your forked repo in GitHub under Setting >> secrets with the private SSH key needed to access shinyserver, the SHINYSERVERIP address, user name, and port. The new secret name should be the ALL CAPS name and the value of the secret should be as described:
26 |    - For SHINYKEY, paste in the entire contents of your SSH private key file for the shinyserver VM deployed in step 4.
27 |    - For SHINYHOST, paste in the IP address of the shinyserver VM (in the format AAA.AAA.AAA.AAA).
28 |    - For SHINYUSERNAME, set to `azureuser`
29 |    - For SHINYPORT, set to `3838`
30 | 
31 | 1. SSH to shinyserver using the private key: `ssh -i shinyserver.pem azureuser@SHINYSERVERIP`
32 | 
33 | 1. (OPTIONAL) Suppress login banner. This makes the Actions logs easier to read.  
34 | ```bash
35 | touch .hushlogin
36 | ```
37 | 
38 | 9. Install shiny-server: [Download for Ubuntu here](https://rstudio.com/products/shiny/download-server/ubuntu/). [Start the Shiny server](https://docs.rstudio.com/shiny-server/#stopping-and-starting). Visit the default Shiny homepage at http://SHINYSERVERIP:3838/
39 | 
40 | 
41 | 1. Clone the mlops-r-gha repository on shinyserver
42 | ```bash
43 | git clone https://github.com/revodavid/mlops-r-gha
44 | ```
45 | 
46 | 11. Replace /etc/shiny-server/shiny-server.conf with the file in this repository. This configures Shiny to deliver a single application from the "mlops-r-gha/accident-app" folder, and we can update files here via the configured SSH. Restart the Shiny server.
47 | ```
48 | sudo cp shiny-server.conf /etc/shiny-server/shiny-server.conf
49 | sudo systemctl restart shiny-server
50 | ```
51 | 
52 | 12. Launch R and install the `azuremlsdk` package from GitHub (not from CRAN) as described in the [`azuremlsdk` repository](https://github.com/Azure/azureml-sdk-for-r). Don't forget the `azuremlsdk::install_azureml()` step. You do not need to install Conda as it's provided by the DSVM. It's ok to answer "yes" to "Would you like to use a personal library instead?". 
53 | 
54 | 1. Trigger the "Train and Deploy Model" GitHub Action in your repository. You can do this by touching a file in the `model` folder, or by browsing the Actions tab and using the "Re-Run Jobs" feature.
55 | 
56 | 14. Wait for Actions to complete successfully, and then try our your Shiny app at https://SHNIYSERVERIP:3838/accident/
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/accident-app.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/accident-app.png


--------------------------------------------------------------------------------
/accident-app/app.R:
--------------------------------------------------------------------------------
  1 | #
  2 | # This is a Shiny web application. You can run the application by clicking
  3 | # the 'Run App' button above.
  4 | #
  5 | # Find out more about building applications with Shiny here:
  6 | #
  7 | #    http://shiny.rstudio.com/
  8 | #
  9 | 
 10 | library(shiny)
 11 | library(httr)
 12 | 
 13 | ## Replace this with the endpoint for your published model.
 14 | ## You can get this from the "Endpoints" section in ml.azure.com
 15 | ## or via the R SDK with get_webservice(ws, "accident-pred")$scoring_uri
 16 | ## If you don't specify a value here, the global "accident.endpoint" object will be used
 17 |     
 18 | accident.endpoint <- readRDS("/home/azureuser/endpoint.Rd") # file placed by deploy-model.R
 19 | 
 20 | # Define UI for application that draws a histogram
 21 | ui <- fluidPage(
 22 |   
 23 |   # Application title
 24 |   titlePanel("Accident Fatality Probability Estimator"),
 25 |   
 26 |   # Sidebar with a slider input for number of bins 
 27 |   sidebarLayout(
 28 |     sidebarPanel(
 29 |       sliderInput("age",
 30 |                   "Occupant Age:",
 31 |                   min = 16,
 32 |                   max = 95,
 33 |                   value = 16),
 34 |       selectInput("sex",
 35 |                   "Occupant gender:",
 36 |                   c("f","m")),
 37 |       selectInput("occRole",
 38 |                   "Occupant role:",
 39 |                   c("driver","pass")),
 40 |       sliderInput("yearVeh",
 41 |                   "Vehicle Year:",
 42 |                   min = 1955,
 43 |                   max = 2005,
 44 |                   value = 2002),
 45 |       selectInput("seatbelt",
 46 |                   "Seatbelt:",
 47 |                   c("none","belted")),
 48 |       selectInput("airbag",
 49 |                   "Airbag:",
 50 |                   c("none","airbag")),
 51 |       selectInput("dvcat",
 52 |                   "Impact speed:",
 53 |                   c("1-9km/h","10-24","25-39","40-54","55+")),
 54 |       selectInput("frontal",
 55 |                   "Collision type:",
 56 |                   c("notfrontal","frontal"))
 57 |     ),
 58 |     
 59 |     # Show a plot of the generated distribution
 60 |     mainPanel(
 61 |       plotOutput("barchart")
 62 |     )
 63 |   )
 64 | )
 65 | 
 66 | # Define server logic required to draw a histogram
 67 | server <- function(input, output) {
 68 |   
 69 |   newdata <- data.frame( # valid values shown below
 70 |     dvcat="55+",        # "1-9km/h" "10-24"   "25-39"   "40-54"   "55+"  
 71 |     seatbelt="none",      # "none"   "belted"  
 72 |     frontal="frontal",    # "notfrontal" "frontal"
 73 |     sex="f",              # "f" "m"
 74 |     ageOFocc=16,          # age in years, 16-97
 75 |     yearVeh=2002,         # year of vehicle, 1955-2003
 76 |     airbag="none",        # "none"   "airbag"   
 77 |     occRole="pass"        # "driver" "pass"
 78 |   )
 79 |   
 80 |   pred <- reactive({
 81 | 
 82 |     newdata$yearVeh <- input$yearVeh
 83 |     newdata$ageOFocc <- input$age
 84 |     newdata$dvcat <- input$dvcat
 85 |     newdata$seatbelt <- input$seatbelt
 86 |     newdata$frontal <- input$frontal
 87 |     newdata$sex <- input$sex
 88 |     newdata$airbag <- input$airbag
 89 |     newdata$occRole <- input$occRole
 90 |     
 91 |     v <- POST(accident.endpoint, body=newdata, encode="json")
 92 |     content(v)[[1]]*100
 93 |   })
 94 | 
 95 |   output$prediction <- renderText({pred()})
 96 | 
 97 |   output$barchart <- renderPlot({
 98 |     p <- pred()
 99 |     pp <- formatC(p, format="f", digits=2, width=5)
100 |     barplot(p, ylim=c(0,100), ylab="Probability (%)", col="#aa3600", names.arg=pp, cex.names=2.5)
101 |   })
102 | }
103 | 
104 | # Run the application 
105 | shinyApp(ui = ui, server = server)
106 | 


--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/architecture.png


--------------------------------------------------------------------------------
/model/accident-glm.R:
--------------------------------------------------------------------------------
 1 | #' Copyright(c) Microsoft Corporation.
 2 | #' Licensed under the MIT license.
 3 | 
 4 | library(azuremlsdk)
 5 | library(optparse)
 6 | library(caret) 
 7 | 
 8 | options <- list(
 9 |   make_option(c("-d", "--data_folder")),
10 |   make_option(c("-p", "--percent_train"))
11 | )
12 | 
13 | opt_parser <- OptionParser(option_list = options)
14 | opt <- parse_args(opt_parser)
15 | 
16 | ## Print data folder to log 
17 | paste(opt$data_folder)
18 | 
19 | accidents <- readRDS(file.path(opt$data_folder, "accidents.Rd"))
20 | summary(accidents)
21 | 
22 | ## Create data partition for use with caret
23 | train.pct <- as.numeric(opt$percent_train)
24 | if(length(train.pct)==0 || (train.pct<0) || (train.pct>1)) train.pct <- 0.75
25 | accident_idx <- createDataPartition(accidents$dead, p = train.pct, list = FALSE)
26 | accident_trn <- accidents[accident_idx, ]
27 | accident_tst <- accidents[-accident_idx, ]
28 | ## utility function to calculate accuracy in test set
29 | calc_acc = function(actual, predicted) {
30 |   mean(actual == predicted)
31 | }
32 | 
33 | ## Caret GLM model on training set with 5-fold cross validation
34 | accident_glm_mod <- train(
35 |   form = dead ~ .,
36 |   data = accident_trn,
37 |   trControl = trainControl(method = "cv", number = 5),
38 |   method = "glm",
39 |   family = "binomial"
40 | )
41 | summary(accident_glm_mod)
42 | 
43 | log_metric_to_run("Accuracy",
44 |   calc_acc(actual = accident_tst$dead,
45 |            predicted = predict(accident_glm_mod, newdata = accident_tst))
46 | )
47 | log_metric_to_run("Method","GLM")
48 | log_metric_to_run("TrainPCT",train.pct)
49 | 
50 | output_dir = "outputs"
51 | if (!dir.exists(output_dir)){
52 |   dir.create(output_dir)
53 | }
54 | saveRDS(accident_glm_mod, file = "./outputs/model.rds")
55 | 
56 | message("Model saved")


--------------------------------------------------------------------------------
/model/accident_predict_caret.R:
--------------------------------------------------------------------------------
 1 | #' Copyright(c) Microsoft Corporation.
 2 | #' Licensed under the MIT license.
 3 | 
 4 | library(jsonlite)
 5 | 
 6 | init <- function()
 7 | {
 8 |   model_path <- Sys.getenv("AZUREML_MODEL_DIR")
 9 |   model <- readRDS(file.path(model_path, "model.rds"))
10 |   method <- model$method
11 |   message(paste(method, "model loaded"))
12 |   
13 |   function(data)
14 |   {
15 |     vars <- as.data.frame(fromJSON(data))
16 |     prediction <- predict(model, newdata=vars, type="prob")[,"dead"]
17 |     toJSON(prediction)
18 |   }
19 | }


--------------------------------------------------------------------------------
/model/accidents.Rd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/model/accidents.Rd


--------------------------------------------------------------------------------
/model/data.R:
--------------------------------------------------------------------------------
 1 | library(azuremlsdk)
 2 | library(jsonlite)
 3 | 
 4 | ## Read data from CSV file, clean, and save to .Rd file
 5 | 
 6 | nassCDS <- read.csv("nassCDS.csv", 
 7 |                      colClasses=c("factor","numeric","factor",
 8 |                                   "factor","factor","numeric",
 9 |                                   "factor","numeric","numeric",
10 |                                   "numeric","character","character",
11 |                                   "numeric","numeric","character"))
12 | accidents <- na.omit(nassCDS[,c("dead","dvcat","seatbelt","frontal","sex","ageOFocc","yearVeh","airbag","occRole")])
13 | accidents$frontal <- factor(accidents$frontal, labels=c("notfrontal","frontal"))
14 | accidents$occRole <- factor(accidents$occRole)
15 | accidents$dvcat <- ordered(accidents$dvcat, 
16 |                           levels=c("1-9km/h","10-24","25-39","40-54","55+"))
17 | 
18 | saveRDS(accidents, file="accidents.Rd")
19 | 
20 | ## Upload .Rd file to Azure ML storage
21 | 
22 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS")
23 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS")
24 | 
25 | creds <- fromJSON(AZURE_CREDENTIALS)
26 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS")
27 | 
28 | TENANT_ID <- creds$tenantId
29 | SP_ID <- creds$clientId
30 | SP_SECRET <- creds$clientSecret
31 | SUBSCRIPTION_ID <- creds$subscriptionId
32 | 
33 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json")
34 | WSRESOURCEGROUP <- workspace.json$resource_group
35 | WSNAME <- workspace.json$name
36 | 
37 | compute.json <- fromJSON("../.cloud/.azure/compute.json")
38 | CLUSTER_NAME <- compute.json$name
39 | 
40 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID,
41 |                                            service_principal_id=SP_ID,
42 |                                            service_principal_password=SP_SECRET)
43 | 
44 | ws <- get_workspace(WSNAME,
45 |                     SUBSCRIPTION_ID,
46 |                     WSRESOURCEGROUP, auth=svc_pr)
47 | 
48 | cat("Found workspace\n")
49 | 
50 | ## Upload data file to datastore
51 | 
52 | ds <- get_default_datastore(ws)
53 | target_path <- "accidentdata"
54 | upload_files_to_datastore(ds,
55 |                           list("./accidents.Rd"),
56 |                           target_path = target_path,
57 |                           overwrite = TRUE)
58 | 
59 | 


--------------------------------------------------------------------------------
/model/deploy-model.R:
--------------------------------------------------------------------------------
 1 | library(azuremlsdk)
 2 | 
 3 | library(jsonlite)
 4 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS")
 5 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS")
 6 | 
 7 | creds <- fromJSON(AZURE_CREDENTIALS)
 8 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS")
 9 | 
10 | TENANT_ID <- creds$tenantId
11 | SP_ID <- creds$clientId
12 | SP_SECRET <- creds$clientSecret
13 | SUBSCRIPTION_ID <- creds$subscriptionId
14 | 
15 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json")
16 | WSRESOURCEGROUP <- workspace.json$resource_group
17 | WSNAME <- workspace.json$name
18 | 
19 | register.json <- fromJSON("../.cloud/.azure/registermodel.json")
20 | MODEL_FILE_NAME <- register.json$model_file_name ## TODO: Use this
21 | WEBSERVICE_NAME <- register.json$webservice_name ## TODO: Use this
22 | 
23 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID,
24 |                                            service_principal_id=SP_ID,
25 |                                            service_principal_password=SP_SECRET)
26 | 
27 | ws <- get_workspace(WSNAME,
28 |                     SUBSCRIPTION_ID,
29 |                     WSRESOURCEGROUP, auth=svc_pr)
30 | 
31 | cat("Found workspace\n")
32 | 
33 | accident_model <- readRDS("outputs/model.rds")
34 | 
35 | model <- register_model(ws, 
36 |                         model_path = "outputs/model.rds", 
37 |                         model_name = "accidents_gha",
38 |                         description = "Predict probability of auto accident using caret")
39 | 
40 | cat("Model registered.\n")
41 | 
42 | ## Delete the existing webservice, if it exists
43 | cat("If this is your first deploy, ignore any WebServiceNotFound error that follows.\n")
44 | try({
45 |     old_service <- get_webservice(ws, 'accidents-gha')
46 |     delete_webservice(old_service)
47 | })
48 | 
49 | ## Deploy the updated model
50 | 
51 | r_env <- r_environment(name = "basic_env")
52 | 
53 | inference_config <- inference_config(
54 |   entry_script = "accident_predict_caret.R",
55 |   source_directory = ".",
56 |   environment = r_env)
57 | 
58 | aci_config <- aci_webservice_deployment_config(cpu_cores = 1, memory_gb = 0.5)
59 | 
60 | aci_service <- deploy_model(ws, 
61 |                         'accidents-gha', 
62 |                         list(model), 
63 |                         inference_config, 
64 |                         aci_config)
65 | wait_for_deployment(aci_service, show_output = TRUE)
66 | 
67 | cat("Model deployed.\n")
68 | 
69 | ## Save endpoint for file for use when run on Shiny server
70 | accident.endpoint <- get_webservice(ws,   "accidents-gha")$scoring_uri
71 | saveRDS(accident.endpoint, "~/endpoint.Rd")


--------------------------------------------------------------------------------
/model/test-endpoint.R:
--------------------------------------------------------------------------------
 1 | ## This file isn't used by the model or the app, but you can use it to check if the endpoint is working
 2 | 
 3 | library(httr)
 4 | 
 5 | ## Run this on the shinyserver, where the /home/azureuser/endpoint.Rd file should be pointing to the endpoint URL
 6 | ## if not, set it manually by checking the endpoint URL in the Azure ML studio
 7 | 
 8 | accident.endpoint <- readRDS("/home/azureuser/endpoint.Rd") # file placed by deploy-model.R
 9 | 
10 | newdata <- data.frame( # valid values shown below
11 |     dvcat="10-24",        # "1-9km/h" "10-24"   "25-39"   "40-54"   "55+"  
12 |     seatbelt="none",      # "none"   "belted"  
13 |     frontal="frontal",    # "notfrontal" "frontal"
14 |     sex="f",              # "f" "m"
15 |     ageOFocc=16,          # age in years, 16-97
16 |     yearVeh=2002,         # year of vehicle, 1955-2003
17 |     airbag="none",        # "none"   "airbag"   
18 |     occRole="pass"        # "driver" "pass"
19 | )
20 | 
21 | v <- POST(accident.endpoint, body=newdata, encode="json")
22 | cat("Prediction: ")
23 | cat(content(v)[[1]]*100)
24 | cat("\n")
25 | 
26 | 


--------------------------------------------------------------------------------
/model/train-model.R:
--------------------------------------------------------------------------------
 1 | library(azuremlsdk)
 2 | cat("Completed package load\n")
 3 | 
 4 | library(jsonlite)
 5 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS")
 6 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS")
 7 | 
 8 | creds <- fromJSON(AZURE_CREDENTIALS)
 9 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS")
10 | 
11 | TENANT_ID <- creds$tenantId
12 | SP_ID <- creds$clientId
13 | SP_SECRET <- creds$clientSecret
14 | SUBSCRIPTION_ID <- creds$subscriptionId
15 | 
16 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json")
17 | WSRESOURCEGROUP <- workspace.json$resource_group
18 | WSNAME <- workspace.json$name
19 | 
20 | compute.json <- fromJSON("../.cloud/.azure/compute.json")
21 | CLUSTER_NAME <- compute.json$name
22 | 
23 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID,
24 |                                            service_principal_id=SP_ID,
25 |                                            service_principal_password=SP_SECRET)
26 | 
27 | ws <- get_workspace(WSNAME,
28 |                     SUBSCRIPTION_ID,
29 |                     WSRESOURCEGROUP, auth=svc_pr)
30 | 
31 | cat("Found workspace\n")
32 | 
33 | compute_target <- get_compute(ws, cluster_name = CLUSTER_NAME)
34 | if (is.null(compute_target)) {
35 |   vm_size <- "STANDARD_D2_V2" 
36 |   compute_target <- create_aml_compute(workspace = ws,
37 |                                        cluster_name = CLUSTER_NAME,
38 |                                        vm_size = vm_size,
39 |                                        min_nodes = 0,
40 |                                        max_nodes = 2)
41 | 
42 |   wait_for_provisioning_completion(compute_target, show_output = TRUE)
43 | }
44 | 
45 | cat("Found cluster\n")
46 | 
47 | ds <- get_default_datastore(ws)
48 | target_path <- "accidentdata"
49 | 
50 | download_from_datastore(ds, target_path=".", prefix="accidentdata")
51 | 
52 | exp <- experiment(ws, "accident")
53 | 
54 | cat("Submitting training run\n")
55 | 
56 | est <- estimator(source_directory=".",
57 |                  entry_script = "accident-glm.R",
58 |                  script_params = list("--data_folder" = ds$path(target_path)),
59 |                  compute_target = compute_target)
60 | run <- submit_experiment(exp, est)
61 | 
62 | wait_for_run_completion(run, show_output = TRUE)
63 | 
64 | cat("Training run complete.\n")
65 | 
66 | download_files_from_run(run, prefix="outputs/")
67 | 


--------------------------------------------------------------------------------
/shiny-server.conf:
--------------------------------------------------------------------------------
 1 | # Instruct Shiny Server to run applications as the user "shiny"
 2 | run_as shiny;
 3 | 
 4 | # Define a server that listens on port 3838
 5 | server {
 6 |   listen 3838;
 7 | 
 8 |   # Define the location '/accident'
 9 |   location /accident {
10 |     app_dir /home/azureuser/accident-app;
11 | 
12 |     # Log all Shiny output to files in this directory
13 |     log_dir /var/log/shiny-server;
14 | 
15 |     # When a user visits the base URL rather than a particular application,
16 |     # an index of the applications available in this directory will be shown.
17 |     directory_index on;
18 |   }
19 | }


--------------------------------------------------------------------------------
/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/slides.pdf


--------------------------------------------------------------------------------