├── .cloud └── .azure │ ├── compute.json │ ├── registermodel.json │ └── workspace.json ├── .github └── workflows │ ├── azureml.yml │ ├── data.yml │ └── shiny.yml ├── .gitignore ├── LICENSE ├── README.md ├── Setup-DSVM.md ├── accident-app.png ├── accident-app └── app.R ├── architecture.png ├── model ├── accident-glm.R ├── accident_predict_caret.R ├── accidents.Rd ├── data.R ├── deploy-model.R ├── nassCDS.csv ├── test-endpoint.R └── train-model.R ├── shiny-server.conf └── slides.pdf /.cloud/.azure/compute.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rcluster", 3 | "compute_type": "amlcluster", 4 | "min_nodes": 0, 5 | "max_nodes": 4, 6 | "idle_seconds_before_scaledown": 600 7 | } 8 | -------------------------------------------------------------------------------- /.cloud/.azure/registermodel.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_file_name": "model.Rd", 3 | "webservice_name": "accident-gha" 4 | } -------------------------------------------------------------------------------- /.cloud/.azure/workspace.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tailwind", 3 | "resource_group": "rstudioconf", 4 | "create_workspace": true 5 | } 6 | -------------------------------------------------------------------------------- /.github/workflows/azureml.yml: -------------------------------------------------------------------------------- 1 | # Train and Deploy model for Shiny app 2 | 3 | name: Train and Deploy Model 4 | 5 | # Runs on any push, except mods to the Shiny app code 6 | on: 7 | push: 8 | branches: [ main ] 9 | paths: 10 | - 'model/**' 11 | - '.cloud/.azure/**' 12 | pull_request: 13 | branches: [ main ] 14 | paths: 15 | - 'model/**' 16 | - '.cloud/.azure/**' 17 | 18 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 19 | jobs: 20 | # Connect to Azure ML workspace, create if needed 21 | train: 22 | runs-on: ubuntu-latest 23 | steps: 24 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 25 | - name: Check Out Repository 26 | id: checkout_repository 27 | uses: actions/checkout@v2 28 | 29 | # Connect or Create the Azure Machine Learning Workspace 30 | - name: Connect/Create Azure Machine Learning Workspace 31 | id: aml_workspace 32 | uses: Azure/aml-workspace@v1 33 | with: 34 | azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} 35 | 36 | # Connect or Create a Compute Target in Azure Machine Learning 37 | - name: Connect/Create Azure Machine Learning Compute Target 38 | id: aml_compute_training 39 | uses: Azure/aml-compute@v1 40 | with: 41 | azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} 42 | 43 | # Update code on server. This also updates the Shiny app in the app/ folder 44 | - name: Pull source on VM 45 | uses: JimCronqvist/action-ssh@master 46 | env: 47 | AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}' 48 | with: 49 | hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }} 50 | privateKey: ${{ secrets.SHINYKEY }} 51 | command: | 52 | cd mlops-r-gha 53 | git pull 54 | 55 | # Connect to the Shiny VM to train the model 56 | - name: Train model 57 | uses: JimCronqvist/action-ssh@master 58 | env: 59 | AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}' 60 | with: 61 | hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }} 62 | privateKey: ${{ secrets.SHINYKEY }} 63 | command: | 64 | cd mlops-r-gha/model 65 | export AZURE_CREDENTIALS 66 | Rscript train-model.R 67 | 68 | # Connect to the Shiny VM to register and deploy the model 69 | - name: Deploy model 70 | uses: JimCronqvist/action-ssh@master 71 | env: 72 | AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}' 73 | with: 74 | hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }} 75 | privateKey: ${{ secrets.SHINYKEY }} 76 | command: | 77 | cd mlops-r-gha/model 78 | export AZURE_CREDENTIALS 79 | Rscript deploy-model.R 80 | -------------------------------------------------------------------------------- /.github/workflows/data.yml: -------------------------------------------------------------------------------- 1 | # Upload data -- only needs to be rerun once 2 | 3 | name: Upload data 4 | 5 | # Runs if data file modified 6 | on: 7 | push: 8 | branches: [ main ] 9 | paths: 10 | - 'model/nassCDS.csv' 11 | - 'model/data.R' 12 | pull_request: 13 | branches: [ main ] 14 | paths: 15 | - 'model/nassCDS.csv' 16 | - 'model/data.R' 17 | 18 | jobs: 19 | data: 20 | runs-on: ubuntu-latest 21 | steps: 22 | # Upload data file 23 | - name: Upload data 24 | uses: JimCronqvist/action-ssh@master 25 | env: 26 | AZURE_CREDENTIALS: '${{ secrets.AZURE_CREDENTIALS}}' 27 | with: 28 | hosts: ${{ secrets.SHINYUSERNAME }}@${{ secrets.SHINYHOST }} 29 | privateKey: ${{ secrets.SHINYKEY }} 30 | command: | 31 | cd mlops-r-gha/model 32 | export AZURE_CREDENTIALS 33 | Rscript data.R 34 | -------------------------------------------------------------------------------- /.github/workflows/shiny.yml: -------------------------------------------------------------------------------- 1 | ## Deploy contents of app folder to Shiny Server VM 2 | 3 | name: Quick Deploy to Shiny 4 | 5 | # runs only when there have been changes in the Shiny app 6 | 7 | on: 8 | push: 9 | paths: 10 | - 'accident-app/**' 11 | branches: [ main ] 12 | pull_request: 13 | paths: 14 | - 'accident-app/**' 15 | branches: [ main ] 16 | 17 | jobs: 18 | 19 | quickdeploy: 20 | name: Quick Deploy to Shiny Server 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | 25 | ### If you need to open the SSH port temporarily on the Shiny Server, uncomment this section 26 | 27 | # - name: dig +short myip.opendns.com @resolver1.opendns.com 28 | # run: dig +short myip.opendns.com @resolver1.opendns.com 29 | 30 | # - name: Add NSG Rule 31 | # uses: venura9/manage-nsg@master 32 | # id: rule 33 | # with: 34 | # azure-credentials: ${{ secrets.AZURE_CREDENTIALS }} 35 | # rule-nsg-resource-group-name: ${{ secrets.SHINY_RG }} 36 | # rule-nsg-name: ${{ secrets.SHINY_NSG }} 37 | # rule-inbound-port: 22 38 | 39 | # - name: Print Created NSG Rule Name 40 | # run: echo "Rule Name ${{ steps.rule.outputs.rule_name }}" 41 | 42 | - uses: actions/checkout@master 43 | 44 | - name: copy files via ssh key 45 | uses: appleboy/scp-action@master 46 | with: 47 | host: ${{ secrets.SHINYHOST }} 48 | username: ${{ secrets.SHINYUSERNAME }} 49 | key: ${{ secrets.SHINYKEY }} 50 | source: "accident-app/app.R" 51 | target: "~" 52 | 53 | # Re-close temporarily opened SSH port 54 | # - name: Remove NSG Rule 55 | # uses: venura9/manage-nsg@master 56 | # with: 57 | # azure-credentials: ${{ secrets.AZURE_CREDENTIALS }} 58 | # rule-id-for-removal: ${{ steps.rule.outputs.rule_name }} 59 | # rule-nsg-resource-group-name: ${{ secrets.SHINY_RG }} 60 | # rule-nsg-name: ${{ secrets.SHINY_NSG }} 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Azure ML 2 | config.json 3 | model/outputs 4 | outputs/ 5 | _generated_score.py 6 | accidentdata/ 7 | secrets.txt 8 | 9 | # History files 10 | .Rhistory 11 | .Rapp.history 12 | 13 | # Session Data files 14 | .RData 15 | 16 | # User-specific files 17 | .Ruserdata 18 | 19 | # Example code in package build process 20 | *-Ex.R 21 | 22 | # Output files from R CMD build 23 | /*.tar.gz 24 | 25 | # Output files from R CMD check 26 | /*.Rcheck/ 27 | 28 | # RStudio files 29 | .Rproj.user/ 30 | 31 | # produced vignettes 32 | vignettes/*.html 33 | vignettes/*.pdf 34 | 35 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 36 | .httr-oauth 37 | 38 | # knitr and R markdown default cache directories 39 | *_cache/ 40 | /cache/ 41 | 42 | # Temporary files created by R markdown 43 | *.utf8.md 44 | *.knit.md 45 | 46 | # R Environment Variables 47 | .Renviron 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 David Smith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLOPS with R: An end-to-end process for building machine learning applications 2 | 3 | This repository contains resources for the talk "MLOPS with R: An end-to-end process for building machine learning applications". 4 | 5 | In addition to the slides (see below), this repository contains the complete set of code and GitHub Actions to deploy a Shiny application for calculating the probability of a fatal road accident. See below for instructions on how to deploy this application yourself. 6 | 7 | ![Screenshot of Shiny app](accident-app.png) 8 | 9 | ## Talk Abstract 10 | 11 | As predictive models and machine learning become key components of production applications in every industry, an end-to-end Machine Learning Operations (MLOPS) process becomes critical for reliable and efficient deployment of applications that depend on R-based models. In this talk, I’ll outline the basics of the DevOps process and focus on the areas where MLOPS diverges. The talk will show the complete process of building and deploying an application driven by a machine learning model implemented with R. We will show the process of developing models, triggering model training on code changes, and triggering the CI/CD process for an application when a new version of a model is registered. We will use the Azure Machine Learning service and the “azuremlsdk” package to orchestrate the model training and management process, but the principles will apply to MLOPS processes generally, especially for applications that involve large amounts of data or require significant computing resources. 12 | 13 | ## Presentations (Slides) 14 | 15 | Aug 2020: New York R Conference (online). 16 | MLOPS with R: An end-to-end process for building machine learning applications: [slides (PDF)](slides.pdf) | [Video Recording](https://youtu.be/hCCZZyHz-ko) 17 | 18 | ## Resources 19 | 20 | Links and other useful resources from the talk. 21 | 22 | Azure Machine Learning service: 23 | * [Documentation](https://docs.microsoft.com/azure/machine-learning/overview-what-is-azure-ml?WT.mc_id=aiml-2093-davidsmi) 24 | * Free azure credits: [register here](https://aka.ms/AML-NYR). (Credit card required, but won't be charged until you remove limits to allow it.) 25 | 26 | azuremlsdk R package: 27 | 28 | * [CRAN](https://cran.r-project.org/package=azuremlsdk) 29 | * [GitHub Repository](https://github.com/azure/azureml-sdk-for-r) 30 | * [Documentation](https://azure.github.io/azureml-sdk-for-r/reference/index.html). 31 | * [Tutorial: Create a logistic regression model in R with Azure Machine Learning](https://docs.microsoft.com/azure/machine-learning/tutorial-1st-r-experiment?WT.mc_id=aiml-2093-davidsmi) 32 | 33 | GitHub Actions: 34 | * [Documentation](https://docs.github.com/en/actions) 35 | * [An Unintentionally Comprehensive Introduction to GitHub Actions CI](https://dev.to/bnb/an-unintentionally-comprehensive-introduction-to-github-actions-ci-blm) 36 | * [ML Ops with GitHub Actions and Azure Machine Learning](https://github.com/machine-learning-apps/ml-template-azure) 37 | * [GitHub Actions for the R Language](https://github.com/r-lib/actions) 38 | 39 | Visual Studio Code: 40 | 41 | * [Remote Development](https://code.visualstudio.com/docs/remote/remote-overview?WT.mc_id=aiml-2093-davidsmi) 42 | * [Remote Containers](https://code.visualstudio.com/docs/remote/create-dev-container?WT.mc_id=aiml-2093-davidsmi) 43 | * [R extension](https://marketplace.visualstudio.com/items?itemName=Ikuyadeu.r&WT.mc_id=opensource-0000-davidsmi) 44 | 45 | Data file `nassCDS.csv`: 46 | 47 | * The app uses data from the US [National Highway Traffic Safety Administration](https://cdan.nhtsa.gov/tsftables/tsfar.htm) 48 | (with thanks to [Mary C. Meyer and Tremika Finney](https://www.stat.colostate.edu/~meyer/airbags.htm)). 49 | This dataset includes data from over 25,000 car crashes in the US, with variables you can use to predict the likelihood of a fatality. 50 | 51 | 52 | ## Related Presentations 53 | 54 | [Machine Learning Operations with R](https://github.com/revodavid/mlops-r) (January, 2020) 55 | 56 | ## Application Architecture 57 | 58 | The application runs as a Shiny app, running on an instance of the Azure Data Science VM. Azure ML service is used to train and deploy the scoring endpoint from R scripts, and GitHub Actions orchestrates the app deployment. 59 | 60 | ![Architecture](architecture.png) 61 | 62 | ## Instructions for deploying the "Accident" app 63 | 64 | 1. Fork this repository. 65 | 66 | 2. Follow the directions in [ML Ops with GitHub Actions and Azure Machine Learning](https://github.com/machine-learning-apps/ml-template-azure) to: 67 | 68 | * Create a resource group in your Azure subscription. (If you don't have one, create an [Azure Free Subscription](https://azure.microsoft.com/free/?WT.mc_id=aiml-2093-davidsmi) and get $200 in free Azure credits.) 69 | * Create a service principal 70 | * Add secrets to your forked repository 71 | * Configure the `.cloud\.azure\workspace.json` file. You can use an existing Azure ML Workspace, or if none by the specified name exists it will be created for you. 72 | 73 | 3. Deploy an Azure Data Science Virtual Machine and configure it as the Shiny Server by [following these instructions](Setup-DSVM.md). 74 | 75 | -------------------------------------------------------------------------------- /Setup-DSVM.md: -------------------------------------------------------------------------------- 1 | # Set up Azure Data Science VM as Shiny Server 2 | 3 | In this architecture, we deploy an Azure Data Science VM (DSVM) to: 4 | 5 | * Host Shiny Server 6 | * Run R scripts using the azuremlsdk package 7 | 8 | This document describes the process of setting up the DSVM, which is currently a series of manual steps. 9 | I plan to automate this process in the future. 10 | 11 | NOTE: Using the DSVM is not a requirement, it's just convenient because many of the tools we need (git, R, Python etc.) come pre-installed. You can use any VM or even an on-premises server, as long as it supports shiny server. Here are instructions for [configuring a basic VM on Azure](https://canovasjm.netlify.app/2020/01/08/deploy-you-own-shiny-server-on-azure/). 12 | 13 | ## Deployment Process 14 | 15 | 1. Fork this `mlops-r-gha` repository to your GitHub account 16 | 17 | 1. (OPTIONAL) If you wish to use an existing Azure ML workspace, edit `.cloud/.azure/workspace.json` accordingly, otherwise a new workspace will be created for you. 18 | 19 | 1. Add the AZURE_CREDENTIALS secret to the repository, as described step 3 of [this file](https://github.com/machine-learning-apps/ml-template-azure/blob/master/README.md). 20 | 21 | 1. Deploy an instance of the Azure Data Science Virtual Machine for Ubuntu. Call it "shinyserver". Use "azureuser" for the default account, and [enable SSH access](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/mac-create-ssh-keys?WT.mc_id=aiml-2093-davidsmi). Save the private SSH key generated as `shinyserver.pem`. Once the VM is deployed, note the server IP address: you'll need it later (we will refer to it as SHINYSERVERIP below). [Detailed Instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro?WT.mc_id=aiml-2093-davidsmi) 22 | 23 | 1. Open port 3838 on shinyserver by adding a rule to the network security group that was created when you set up the DSVM. Also verify that Port 22 (SSH) is not blocked by the rules. [Detailed Instructions](https://docs.microsoft.com/azure/virtual-network/manage-network-security-group?WT.mc_id=aiml-2093-davidsmi). 24 | 25 | 1. Add secrets to your forked repo in GitHub under Setting >> secrets with the private SSH key needed to access shinyserver, the SHINYSERVERIP address, user name, and port. The new secret name should be the ALL CAPS name and the value of the secret should be as described: 26 | - For SHINYKEY, paste in the entire contents of your SSH private key file for the shinyserver VM deployed in step 4. 27 | - For SHINYHOST, paste in the IP address of the shinyserver VM (in the format AAA.AAA.AAA.AAA). 28 | - For SHINYUSERNAME, set to `azureuser` 29 | - For SHINYPORT, set to `3838` 30 | 31 | 1. SSH to shinyserver using the private key: `ssh -i shinyserver.pem azureuser@SHINYSERVERIP` 32 | 33 | 1. (OPTIONAL) Suppress login banner. This makes the Actions logs easier to read. 34 | ```bash 35 | touch .hushlogin 36 | ``` 37 | 38 | 9. Install shiny-server: [Download for Ubuntu here](https://rstudio.com/products/shiny/download-server/ubuntu/). [Start the Shiny server](https://docs.rstudio.com/shiny-server/#stopping-and-starting). Visit the default Shiny homepage at http://SHINYSERVERIP:3838/ 39 | 40 | 41 | 1. Clone the mlops-r-gha repository on shinyserver 42 | ```bash 43 | git clone https://github.com/revodavid/mlops-r-gha 44 | ``` 45 | 46 | 11. Replace /etc/shiny-server/shiny-server.conf with the file in this repository. This configures Shiny to deliver a single application from the "mlops-r-gha/accident-app" folder, and we can update files here via the configured SSH. Restart the Shiny server. 47 | ``` 48 | sudo cp shiny-server.conf /etc/shiny-server/shiny-server.conf 49 | sudo systemctl restart shiny-server 50 | ``` 51 | 52 | 12. Launch R and install the `azuremlsdk` package from GitHub (not from CRAN) as described in the [`azuremlsdk` repository](https://github.com/Azure/azureml-sdk-for-r). Don't forget the `azuremlsdk::install_azureml()` step. You do not need to install Conda as it's provided by the DSVM. It's ok to answer "yes" to "Would you like to use a personal library instead?". 53 | 54 | 1. Trigger the "Train and Deploy Model" GitHub Action in your repository. You can do this by touching a file in the `model` folder, or by browsing the Actions tab and using the "Re-Run Jobs" feature. 55 | 56 | 14. Wait for Actions to complete successfully, and then try our your Shiny app at https://SHNIYSERVERIP:3838/accident/ 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /accident-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/accident-app.png -------------------------------------------------------------------------------- /accident-app/app.R: -------------------------------------------------------------------------------- 1 | # 2 | # This is a Shiny web application. You can run the application by clicking 3 | # the 'Run App' button above. 4 | # 5 | # Find out more about building applications with Shiny here: 6 | # 7 | # http://shiny.rstudio.com/ 8 | # 9 | 10 | library(shiny) 11 | library(httr) 12 | 13 | ## Replace this with the endpoint for your published model. 14 | ## You can get this from the "Endpoints" section in ml.azure.com 15 | ## or via the R SDK with get_webservice(ws, "accident-pred")$scoring_uri 16 | ## If you don't specify a value here, the global "accident.endpoint" object will be used 17 | 18 | accident.endpoint <- readRDS("/home/azureuser/endpoint.Rd") # file placed by deploy-model.R 19 | 20 | # Define UI for application that draws a histogram 21 | ui <- fluidPage( 22 | 23 | # Application title 24 | titlePanel("Accident Fatality Probability Estimator"), 25 | 26 | # Sidebar with a slider input for number of bins 27 | sidebarLayout( 28 | sidebarPanel( 29 | sliderInput("age", 30 | "Occupant Age:", 31 | min = 16, 32 | max = 95, 33 | value = 16), 34 | selectInput("sex", 35 | "Occupant gender:", 36 | c("f","m")), 37 | selectInput("occRole", 38 | "Occupant role:", 39 | c("driver","pass")), 40 | sliderInput("yearVeh", 41 | "Vehicle Year:", 42 | min = 1955, 43 | max = 2005, 44 | value = 2002), 45 | selectInput("seatbelt", 46 | "Seatbelt:", 47 | c("none","belted")), 48 | selectInput("airbag", 49 | "Airbag:", 50 | c("none","airbag")), 51 | selectInput("dvcat", 52 | "Impact speed:", 53 | c("1-9km/h","10-24","25-39","40-54","55+")), 54 | selectInput("frontal", 55 | "Collision type:", 56 | c("notfrontal","frontal")) 57 | ), 58 | 59 | # Show a plot of the generated distribution 60 | mainPanel( 61 | plotOutput("barchart") 62 | ) 63 | ) 64 | ) 65 | 66 | # Define server logic required to draw a histogram 67 | server <- function(input, output) { 68 | 69 | newdata <- data.frame( # valid values shown below 70 | dvcat="55+", # "1-9km/h" "10-24" "25-39" "40-54" "55+" 71 | seatbelt="none", # "none" "belted" 72 | frontal="frontal", # "notfrontal" "frontal" 73 | sex="f", # "f" "m" 74 | ageOFocc=16, # age in years, 16-97 75 | yearVeh=2002, # year of vehicle, 1955-2003 76 | airbag="none", # "none" "airbag" 77 | occRole="pass" # "driver" "pass" 78 | ) 79 | 80 | pred <- reactive({ 81 | 82 | newdata$yearVeh <- input$yearVeh 83 | newdata$ageOFocc <- input$age 84 | newdata$dvcat <- input$dvcat 85 | newdata$seatbelt <- input$seatbelt 86 | newdata$frontal <- input$frontal 87 | newdata$sex <- input$sex 88 | newdata$airbag <- input$airbag 89 | newdata$occRole <- input$occRole 90 | 91 | v <- POST(accident.endpoint, body=newdata, encode="json") 92 | content(v)[[1]]*100 93 | }) 94 | 95 | output$prediction <- renderText({pred()}) 96 | 97 | output$barchart <- renderPlot({ 98 | p <- pred() 99 | pp <- formatC(p, format="f", digits=2, width=5) 100 | barplot(p, ylim=c(0,100), ylab="Probability (%)", col="#aa3600", names.arg=pp, cex.names=2.5) 101 | }) 102 | } 103 | 104 | # Run the application 105 | shinyApp(ui = ui, server = server) 106 | -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/architecture.png -------------------------------------------------------------------------------- /model/accident-glm.R: -------------------------------------------------------------------------------- 1 | #' Copyright(c) Microsoft Corporation. 2 | #' Licensed under the MIT license. 3 | 4 | library(azuremlsdk) 5 | library(optparse) 6 | library(caret) 7 | 8 | options <- list( 9 | make_option(c("-d", "--data_folder")), 10 | make_option(c("-p", "--percent_train")) 11 | ) 12 | 13 | opt_parser <- OptionParser(option_list = options) 14 | opt <- parse_args(opt_parser) 15 | 16 | ## Print data folder to log 17 | paste(opt$data_folder) 18 | 19 | accidents <- readRDS(file.path(opt$data_folder, "accidents.Rd")) 20 | summary(accidents) 21 | 22 | ## Create data partition for use with caret 23 | train.pct <- as.numeric(opt$percent_train) 24 | if(length(train.pct)==0 || (train.pct<0) || (train.pct>1)) train.pct <- 0.75 25 | accident_idx <- createDataPartition(accidents$dead, p = train.pct, list = FALSE) 26 | accident_trn <- accidents[accident_idx, ] 27 | accident_tst <- accidents[-accident_idx, ] 28 | ## utility function to calculate accuracy in test set 29 | calc_acc = function(actual, predicted) { 30 | mean(actual == predicted) 31 | } 32 | 33 | ## Caret GLM model on training set with 5-fold cross validation 34 | accident_glm_mod <- train( 35 | form = dead ~ ., 36 | data = accident_trn, 37 | trControl = trainControl(method = "cv", number = 5), 38 | method = "glm", 39 | family = "binomial" 40 | ) 41 | summary(accident_glm_mod) 42 | 43 | log_metric_to_run("Accuracy", 44 | calc_acc(actual = accident_tst$dead, 45 | predicted = predict(accident_glm_mod, newdata = accident_tst)) 46 | ) 47 | log_metric_to_run("Method","GLM") 48 | log_metric_to_run("TrainPCT",train.pct) 49 | 50 | output_dir = "outputs" 51 | if (!dir.exists(output_dir)){ 52 | dir.create(output_dir) 53 | } 54 | saveRDS(accident_glm_mod, file = "./outputs/model.rds") 55 | 56 | message("Model saved") -------------------------------------------------------------------------------- /model/accident_predict_caret.R: -------------------------------------------------------------------------------- 1 | #' Copyright(c) Microsoft Corporation. 2 | #' Licensed under the MIT license. 3 | 4 | library(jsonlite) 5 | 6 | init <- function() 7 | { 8 | model_path <- Sys.getenv("AZUREML_MODEL_DIR") 9 | model <- readRDS(file.path(model_path, "model.rds")) 10 | method <- model$method 11 | message(paste(method, "model loaded")) 12 | 13 | function(data) 14 | { 15 | vars <- as.data.frame(fromJSON(data)) 16 | prediction <- predict(model, newdata=vars, type="prob")[,"dead"] 17 | toJSON(prediction) 18 | } 19 | } -------------------------------------------------------------------------------- /model/accidents.Rd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/model/accidents.Rd -------------------------------------------------------------------------------- /model/data.R: -------------------------------------------------------------------------------- 1 | library(azuremlsdk) 2 | library(jsonlite) 3 | 4 | ## Read data from CSV file, clean, and save to .Rd file 5 | 6 | nassCDS <- read.csv("nassCDS.csv", 7 | colClasses=c("factor","numeric","factor", 8 | "factor","factor","numeric", 9 | "factor","numeric","numeric", 10 | "numeric","character","character", 11 | "numeric","numeric","character")) 12 | accidents <- na.omit(nassCDS[,c("dead","dvcat","seatbelt","frontal","sex","ageOFocc","yearVeh","airbag","occRole")]) 13 | accidents$frontal <- factor(accidents$frontal, labels=c("notfrontal","frontal")) 14 | accidents$occRole <- factor(accidents$occRole) 15 | accidents$dvcat <- ordered(accidents$dvcat, 16 | levels=c("1-9km/h","10-24","25-39","40-54","55+")) 17 | 18 | saveRDS(accidents, file="accidents.Rd") 19 | 20 | ## Upload .Rd file to Azure ML storage 21 | 22 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS") 23 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS") 24 | 25 | creds <- fromJSON(AZURE_CREDENTIALS) 26 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS") 27 | 28 | TENANT_ID <- creds$tenantId 29 | SP_ID <- creds$clientId 30 | SP_SECRET <- creds$clientSecret 31 | SUBSCRIPTION_ID <- creds$subscriptionId 32 | 33 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json") 34 | WSRESOURCEGROUP <- workspace.json$resource_group 35 | WSNAME <- workspace.json$name 36 | 37 | compute.json <- fromJSON("../.cloud/.azure/compute.json") 38 | CLUSTER_NAME <- compute.json$name 39 | 40 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID, 41 | service_principal_id=SP_ID, 42 | service_principal_password=SP_SECRET) 43 | 44 | ws <- get_workspace(WSNAME, 45 | SUBSCRIPTION_ID, 46 | WSRESOURCEGROUP, auth=svc_pr) 47 | 48 | cat("Found workspace\n") 49 | 50 | ## Upload data file to datastore 51 | 52 | ds <- get_default_datastore(ws) 53 | target_path <- "accidentdata" 54 | upload_files_to_datastore(ds, 55 | list("./accidents.Rd"), 56 | target_path = target_path, 57 | overwrite = TRUE) 58 | 59 | -------------------------------------------------------------------------------- /model/deploy-model.R: -------------------------------------------------------------------------------- 1 | library(azuremlsdk) 2 | 3 | library(jsonlite) 4 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS") 5 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS") 6 | 7 | creds <- fromJSON(AZURE_CREDENTIALS) 8 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS") 9 | 10 | TENANT_ID <- creds$tenantId 11 | SP_ID <- creds$clientId 12 | SP_SECRET <- creds$clientSecret 13 | SUBSCRIPTION_ID <- creds$subscriptionId 14 | 15 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json") 16 | WSRESOURCEGROUP <- workspace.json$resource_group 17 | WSNAME <- workspace.json$name 18 | 19 | register.json <- fromJSON("../.cloud/.azure/registermodel.json") 20 | MODEL_FILE_NAME <- register.json$model_file_name ## TODO: Use this 21 | WEBSERVICE_NAME <- register.json$webservice_name ## TODO: Use this 22 | 23 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID, 24 | service_principal_id=SP_ID, 25 | service_principal_password=SP_SECRET) 26 | 27 | ws <- get_workspace(WSNAME, 28 | SUBSCRIPTION_ID, 29 | WSRESOURCEGROUP, auth=svc_pr) 30 | 31 | cat("Found workspace\n") 32 | 33 | accident_model <- readRDS("outputs/model.rds") 34 | 35 | model <- register_model(ws, 36 | model_path = "outputs/model.rds", 37 | model_name = "accidents_gha", 38 | description = "Predict probability of auto accident using caret") 39 | 40 | cat("Model registered.\n") 41 | 42 | ## Delete the existing webservice, if it exists 43 | cat("If this is your first deploy, ignore any WebServiceNotFound error that follows.\n") 44 | try({ 45 | old_service <- get_webservice(ws, 'accidents-gha') 46 | delete_webservice(old_service) 47 | }) 48 | 49 | ## Deploy the updated model 50 | 51 | r_env <- r_environment(name = "basic_env") 52 | 53 | inference_config <- inference_config( 54 | entry_script = "accident_predict_caret.R", 55 | source_directory = ".", 56 | environment = r_env) 57 | 58 | aci_config <- aci_webservice_deployment_config(cpu_cores = 1, memory_gb = 0.5) 59 | 60 | aci_service <- deploy_model(ws, 61 | 'accidents-gha', 62 | list(model), 63 | inference_config, 64 | aci_config) 65 | wait_for_deployment(aci_service, show_output = TRUE) 66 | 67 | cat("Model deployed.\n") 68 | 69 | ## Save endpoint for file for use when run on Shiny server 70 | accident.endpoint <- get_webservice(ws, "accidents-gha")$scoring_uri 71 | saveRDS(accident.endpoint, "~/endpoint.Rd") -------------------------------------------------------------------------------- /model/test-endpoint.R: -------------------------------------------------------------------------------- 1 | ## This file isn't used by the model or the app, but you can use it to check if the endpoint is working 2 | 3 | library(httr) 4 | 5 | ## Run this on the shinyserver, where the /home/azureuser/endpoint.Rd file should be pointing to the endpoint URL 6 | ## if not, set it manually by checking the endpoint URL in the Azure ML studio 7 | 8 | accident.endpoint <- readRDS("/home/azureuser/endpoint.Rd") # file placed by deploy-model.R 9 | 10 | newdata <- data.frame( # valid values shown below 11 | dvcat="10-24", # "1-9km/h" "10-24" "25-39" "40-54" "55+" 12 | seatbelt="none", # "none" "belted" 13 | frontal="frontal", # "notfrontal" "frontal" 14 | sex="f", # "f" "m" 15 | ageOFocc=16, # age in years, 16-97 16 | yearVeh=2002, # year of vehicle, 1955-2003 17 | airbag="none", # "none" "airbag" 18 | occRole="pass" # "driver" "pass" 19 | ) 20 | 21 | v <- POST(accident.endpoint, body=newdata, encode="json") 22 | cat("Prediction: ") 23 | cat(content(v)[[1]]*100) 24 | cat("\n") 25 | 26 | -------------------------------------------------------------------------------- /model/train-model.R: -------------------------------------------------------------------------------- 1 | library(azuremlsdk) 2 | cat("Completed package load\n") 3 | 4 | library(jsonlite) 5 | AZURE_CREDENTIALS=Sys.getenv("AZURE_CREDENTIALS") 6 | if(nchar(AZURE_CREDENTIALS)==0) stop("No AZURE_CREDENTIALS") 7 | 8 | creds <- fromJSON(AZURE_CREDENTIALS) 9 | if(length(creds)==0) stop("Malformed AZURE_CREDENTIALS") 10 | 11 | TENANT_ID <- creds$tenantId 12 | SP_ID <- creds$clientId 13 | SP_SECRET <- creds$clientSecret 14 | SUBSCRIPTION_ID <- creds$subscriptionId 15 | 16 | workspace.json <- fromJSON("../.cloud/.azure/workspace.json") 17 | WSRESOURCEGROUP <- workspace.json$resource_group 18 | WSNAME <- workspace.json$name 19 | 20 | compute.json <- fromJSON("../.cloud/.azure/compute.json") 21 | CLUSTER_NAME <- compute.json$name 22 | 23 | svc_pr <- service_principal_authentication(tenant_id=TENANT_ID, 24 | service_principal_id=SP_ID, 25 | service_principal_password=SP_SECRET) 26 | 27 | ws <- get_workspace(WSNAME, 28 | SUBSCRIPTION_ID, 29 | WSRESOURCEGROUP, auth=svc_pr) 30 | 31 | cat("Found workspace\n") 32 | 33 | compute_target <- get_compute(ws, cluster_name = CLUSTER_NAME) 34 | if (is.null(compute_target)) { 35 | vm_size <- "STANDARD_D2_V2" 36 | compute_target <- create_aml_compute(workspace = ws, 37 | cluster_name = CLUSTER_NAME, 38 | vm_size = vm_size, 39 | min_nodes = 0, 40 | max_nodes = 2) 41 | 42 | wait_for_provisioning_completion(compute_target, show_output = TRUE) 43 | } 44 | 45 | cat("Found cluster\n") 46 | 47 | ds <- get_default_datastore(ws) 48 | target_path <- "accidentdata" 49 | 50 | download_from_datastore(ds, target_path=".", prefix="accidentdata") 51 | 52 | exp <- experiment(ws, "accident") 53 | 54 | cat("Submitting training run\n") 55 | 56 | est <- estimator(source_directory=".", 57 | entry_script = "accident-glm.R", 58 | script_params = list("--data_folder" = ds$path(target_path)), 59 | compute_target = compute_target) 60 | run <- submit_experiment(exp, est) 61 | 62 | wait_for_run_completion(run, show_output = TRUE) 63 | 64 | cat("Training run complete.\n") 65 | 66 | download_files_from_run(run, prefix="outputs/") 67 | -------------------------------------------------------------------------------- /shiny-server.conf: -------------------------------------------------------------------------------- 1 | # Instruct Shiny Server to run applications as the user "shiny" 2 | run_as shiny; 3 | 4 | # Define a server that listens on port 3838 5 | server { 6 | listen 3838; 7 | 8 | # Define the location '/accident' 9 | location /accident { 10 | app_dir /home/azureuser/accident-app; 11 | 12 | # Log all Shiny output to files in this directory 13 | log_dir /var/log/shiny-server; 14 | 15 | # When a user visits the base URL rather than a particular application, 16 | # an index of the applications available in this directory will be shown. 17 | directory_index on; 18 | } 19 | } -------------------------------------------------------------------------------- /slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revodavid/mlops-r-gha/0fff6b6c29a5b7b4d4df96ff552aff62cc9d9bfa/slides.pdf --------------------------------------------------------------------------------