├── .amlignore
├── .azureml
    └── config.json
├── .gitignore
├── Code
    ├── Data_Acquisition_and_Understanding
    │   ├── Readme.md
    │   ├── define_dataset.py
    │   └── ingest_data.py
    ├── Modeling
    │   ├── Readme.md
    │   ├── hypertrain.py
    │   ├── hypertrain_submit.py
    │   ├── pipeline_train.py
    │   ├── score_realtime.py
    │   ├── train.py
    │   ├── train_datasets.py
    │   ├── train_submit.py
    │   └── train_submit_datasets.py
    ├── Operationalization
    │   ├── Readme.md
    │   ├── dashboards
    │   │   └── Readme.md
    │   └── monitoring
    │   │   ├── Readme.md
    │   │   └── monitoring_pipeline.py
    └── Readme.md
├── Docs
    ├── lab002
    │   └── Readme.md
    └── lab05
    │   └── Readme.md
├── LICENSE-CODE.TXT
├── LICENSE.TXT
├── NOTICE.TXT
├── README.md
├── SECURITY.md
├── Sample_Data
    ├── For_Modeling
    │   └── modelling.md
    ├── Processed
    │   └── processed.md
    ├── README.md
    └── Raw
    │   └── rawData.md
├── conda_dependencies.yml
├── infrastructure
    ├── README.md
    ├── arm-templates
    │   ├── appinsights
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   └── template.json
    │   ├── containerregistry
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   └── template.json
    │   ├── keyvault
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   └── template.json
    │   ├── mlcompute
    │   │   ├── parameters-vnet.dev.json
    │   │   ├── parameters-vnet.test.json
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   ├── template-vnet.json
    │   │   └── template.json
    │   ├── mlworkspace
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   └── template.json
    │   └── storage
    │   │   ├── parameters.dev.json
    │   │   ├── parameters.test.json
    │   │   └── template.json
    ├── build-and-release
    │   ├── deploy-infra.ps1
    │   ├── deploy-infra.template.yml
    │   └── deploy-infra.yml
    ├── infra_stages.png
    ├── runconfigschema.json
    └── scripts
    │   ├── create-aks.sh
    │   ├── create-azmlcompute.sh
    │   └── create-workspace.sh
└── labs
    ├── 01_setup.md
    ├── 02_experiments.md
    ├── 03_managedcompute.md
    ├── 04_datasets.md
    ├── 05_hypertune.md
    ├── 06_pipelines.md
    └── README.md


/.amlignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | azureml-logs
3 | .azureml
4 | .git
5 | outputs
6 | azureml-setup
7 | docs
8 | 


--------------------------------------------------------------------------------
/.azureml/config.json:
--------------------------------------------------------------------------------
1 | {"Id": null, "Scope": "/subscriptions/cf4e1704-b4bc-4554-bcd7-309394f2ee56/resourceGroups/azuremlworkshoprgp/providers/Microsoft.MachineLearningServices/workspaces/azuremlworkshopws"}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # ignore local folders and config files
 2 | aml_config/*.json
 3 | Sample_Data
 4 | .vscode
 5 | outputs
 6 | assets
 7 | 
 8 | # ignore python cache
 9 | *.pyc
10 | 
11 | # ignore faultive folders
12 | Code/Modeling/aml_config
13 | Code/Modeling/assets
14 | Code/Modeling/.amlignore
15 | 
16 | # ignore tmp folders
17 | code/Data_Acquisition_and_Understanding/tmp/*
18 | 


--------------------------------------------------------------------------------
/Code/Data_Acquisition_and_Understanding/Readme.md:
--------------------------------------------------------------------------------
1 | # This folder hosts production-intended data preparation logic


--------------------------------------------------------------------------------
/Code/Data_Acquisition_and_Understanding/define_dataset.py:
--------------------------------------------------------------------------------
 1 | # Defines a tabular dataset on top of an Azure ML datastore
 2 | from azureml.core import Workspace, Datastore, Dataset
 3 | from azureml.data import DataType
 4 | from azureml.core.authentication import AzureCliAuthentication
 5 | 
 6 | # Retrieve a datastore from a ML workspace
 7 | ws = Workspace.from_config(auth=AzureCliAuthentication())
 8 | datastore_name = 'workspaceblobstore'
 9 | datastore = Datastore.get(ws, datastore_name)
10 | 
11 | # Register dataset version for each data split
12 | for data_split in ['train', 'test']:
13 |     # Create a TabularDataset from paths in datastore in split folder
14 |     # Note that wildcards can be used
15 |     datastore_paths = [
16 |         (datastore, '{}/*.csv'.format(data_split))
17 |     ]
18 | 
19 |     # Create a TabularDataset from paths in datastore
20 |     dataset = Dataset.Tabular.from_delimited_files(
21 |         path=datastore_paths,
22 |         set_column_types={
23 |             'text': DataType.to_string(),
24 |             'target': DataType.to_string()
25 |         },
26 |         header=True
27 |     )
28 | 
29 |     # Register the defined dataset for later use
30 |     dataset.register(
31 |         workspace=ws,
32 |         name='newsgroups_{}'.format(data_split),
33 |         description='newsgroups data'
34 |     )
35 | 


--------------------------------------------------------------------------------
/Code/Data_Acquisition_and_Understanding/ingest_data.py:
--------------------------------------------------------------------------------
 1 | # Pre-processes SKLearn sample data 
 2 | # Ingest the data into an Azure ML Datastore for training
 3 | import pandas as pd
 4 | import time
 5 | import os
 6 | from sklearn.datasets import fetch_20newsgroups
 7 | from azureml.core import Workspace, Datastore
 8 | from azureml.core.authentication import AzureCliAuthentication
 9 | 
10 | # Define newsgroup categories to be downloaded to generate sample dataset
11 | # @TODO add additional newsgroups
12 | categories = [
13 |     'alt.atheism',
14 |     'talk.religion.misc',
15 |     'comp.graphics',
16 |     'sci.space',
17 | ]
18 | 
19 | print("Loading 20 newsgroups dataset for categories:")
20 | print(categories if categories else "all")
21 | 
22 | for data_split in ['train', 'test']:
23 |     # retrieve newsgroup data
24 |     newsgroupdata = fetch_20newsgroups(
25 |         subset=data_split,
26 |         categories=categories,
27 |         shuffle=True,
28 |         random_state=42
29 |     )
30 | 
31 |     # construct pandas data frame from loaded sklearn newsgroup data
32 |     df = pd.DataFrame({
33 |         'text': newsgroupdata.data,
34 |         'target': newsgroupdata.target
35 |     })
36 | 
37 |     print('data loaded')
38 | 
39 |     # pre-process:
40 |     # remove line breaks
41 |     # replace target index by newsgroup name
42 |     target_names = newsgroupdata.target_names
43 |     df.target = df.target.apply(lambda x: target_names[x])
44 |     df.text = df.text.replace('\n', ' ', regex=True)
45 | 
46 |     print(df.head(5))
47 | 
48 |     # write to csv
49 |     df.to_csv(os.path.join(
50 |         os.path.dirname(os.path.realpath(__file__)),
51 |         'tmp',
52 |         data_split,
53 |         '{}.csv'.format(int(time.time()))  # unique file name
54 |     ), index=False, encoding="utf-8", line_terminator='\n')
55 | 
56 | 
57 | datastore_name = 'workspaceblobstore'
58 | 
59 | # get existing ML workspace
60 | workspace = Workspace.from_config(auth=AzureCliAuthentication())
61 | 
62 | # retrieve an existing datastore in the workspace by name
63 | datastore = Datastore.get(workspace, datastore_name)
64 | 
65 | # upload files
66 | datastore.upload(
67 |     src_dir=os.path.join(
68 |         os.path.dirname(os.path.realpath(__file__)),
69 |         'tmp'
70 |     ),
71 |     target_path=None,
72 |     overwrite=True,
73 |     show_progress=True
74 | )
75 | 


--------------------------------------------------------------------------------
/Code/Modeling/Readme.md:
--------------------------------------------------------------------------------
1 | # This folder contains code for modeling and related activities (such as feature engineering, model evaluation etc.)
2 | 
3 | You can add detailed description in this markdown related to your specific data science project.
4 | 
5 | The following project structure has been provided as an example.
6 | 
7 | * modelpackage
8 | * tests e.g. unit tests and integration tests


--------------------------------------------------------------------------------
/Code/Modeling/hypertrain.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from optparse import OptionParser
  3 | import sys
  4 | 
  5 | from sklearn.datasets import fetch_20newsgroups
  6 | from sklearn.feature_extraction.text import TfidfVectorizer
  7 | from sklearn.ensemble import RandomForestClassifier
  8 | from sklearn import metrics
  9 | from sklearn.feature_extraction.text import HashingVectorizer
 10 | 
 11 | from sklearn.externals import joblib
 12 | from azureml.core import Run
 13 | 
 14 | # Display progress logs on stdout
 15 | logging.basicConfig(level=logging.INFO,
 16 |                     format='%(asctime)s %(levelname)s %(message)s')
 17 | 
 18 | op = OptionParser()
 19 | op.add_option("--all_categories",
 20 |               action="store_true", dest="all_categories",
 21 |               help="Whether to use all categories or not.")
 22 | op.add_option("--use_hashing",
 23 |               action="store_true",
 24 |               help="Use a hashing vectorizer.")
 25 | op.add_option("--n_features",
 26 |               action="store",
 27 |               type=int,
 28 |               default=2 ** 16,
 29 |               help="n_features when using the hashing vectorizer.")
 30 | op.add_option("--max_depth",
 31 |               type=int, default=10)
 32 | op.add_option("--n_estimators",
 33 |               type=int, default=100)
 34 | op.add_option("--criterion",
 35 |               type=str,
 36 |               default='gini')
 37 | op.add_option("--min_samples_split",
 38 |               type=int,
 39 |               default=2)
 40 | 
 41 | 
 42 | def is_interactive():
 43 |     return not hasattr(sys.modules['__main__'], '__file__')
 44 | 
 45 | 
 46 | # work-around for Jupyter notebook and IPython console
 47 | argv = [] if is_interactive() else sys.argv[1:]
 48 | (opts, args) = op.parse_args(argv)
 49 | if len(args) > 0:
 50 |     op.error("this script takes no arguments.")
 51 |     sys.exit(1)
 52 | 
 53 | if opts.all_categories:
 54 |     categories = None
 55 | else:
 56 |     categories = [
 57 |         'alt.atheism',
 58 |         'talk.religion.misc',
 59 |         'comp.graphics',
 60 |         'sci.space',
 61 |     ]
 62 | 
 63 | print("Loading 20 newsgroups dataset for categories:")
 64 | print(categories if categories else "all")
 65 | 
 66 | data_train = fetch_20newsgroups(subset='train', categories=categories,
 67 |                                 shuffle=True, random_state=42)
 68 | 
 69 | data_test = fetch_20newsgroups(subset='test', categories=categories,
 70 |                                shuffle=True, random_state=42)
 71 | print('data loaded')
 72 | 
 73 | # order of labels in `target_names` can be different from `categories`
 74 | target_names = data_train.target_names
 75 | 
 76 | 
 77 | def size_mb(docs):
 78 |     return sum(len(s.encode('utf-8')) for s in docs) / 1e6
 79 | 
 80 | 
 81 | # split a training set and a test set
 82 | y_train, y_test = data_train.target, data_test.target
 83 | 
 84 | print("Extracting features from the training data using a sparse vectorizer")
 85 | if opts.use_hashing:
 86 |     vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,
 87 |                                    n_features=opts.n_features)
 88 |     X_train = vectorizer.transform(data_train.data)
 89 | else:
 90 |     vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
 91 |                                  stop_words='english')
 92 |     X_train = vectorizer.fit_transform(data_train.data)
 93 | 
 94 | print("Extracting features from the test data using the same vectorizer")
 95 | X_test = vectorizer.transform(data_test.data)
 96 | 
 97 | 
 98 | # mapping from integer feature name to original token string
 99 | if opts.use_hashing:
100 |     feature_names = None
101 | else:
102 |     feature_names = vectorizer.get_feature_names()
103 | 
104 | 
105 | def trim(s):
106 |     """Trim string to fit on terminal (assuming 80-column display)"""
107 |     return s if len(s) <= 80 else s[:77] + "..."
108 | 
109 | 
110 | def benchmark(clf, name=""):
111 |     """benchmark classifier performance"""
112 | 
113 |     # train a model
114 |     print("\nTraining run with algorithm \n{}".format(clf))
115 |     clf.fit(X_train, y_train)
116 | 
117 |     # evaluate on test set
118 |     pred = clf.predict(X_test)
119 |     score = metrics.accuracy_score(y_test, pred)
120 | 
121 |     # log metrics
122 |     run_logger = Run.get_context()
123 |     run_logger.log("accuracy", float(score))
124 | 
125 |     # save .pkl file
126 |     model_name = "model" + ".pkl"
127 |     filename = "outputs/" + model_name
128 |     joblib.dump(value=clf, filename=filename)
129 |     run_logger.upload_file(name=model_name, path_or_stream=filename)
130 | 
131 |     print("accuracy:   %0.3f" % score)
132 |     clf_descr = str(clf).split('(')[0]
133 |     return clf_descr, score
134 | 
135 | 
136 | results = []
137 | 
138 | # Select the training hyperparameters.
139 | # Create a dict of hyperparameters from the input flags.
140 | hyperparameters = {
141 |     "max_depth": opts.max_depth,
142 |     "n_estimators": opts.n_estimators,
143 |     "criterion": opts.criterion,
144 |     "min_samples_split": opts.min_samples_split
145 | }
146 | 
147 | # Select the training hyperparameters.
148 | max_depth = hyperparameters["max_depth"]
149 | n_estimators = hyperparameters["n_estimators"]
150 | criterion = hyperparameters["criterion"]
151 | min_samples_split = hyperparameters["min_samples_split"]
152 | 
153 | 
154 | clf = RandomForestClassifier(max_depth=max_depth,
155 |                              n_estimators=n_estimators, criterion=criterion,
156 |                              min_samples_split=min_samples_split)
157 | 
158 | model = benchmark(clf)
159 | 


--------------------------------------------------------------------------------
/Code/Modeling/hypertrain_submit.py:
--------------------------------------------------------------------------------
 1 | from azureml.train.hyperdrive import (
 2 |     RandomParameterSampling,
 3 |     HyperDriveConfig, PrimaryMetricGoal)
 4 | from azureml.core import Workspace, Experiment
 5 | from azureml.train.estimator import Estimator
 6 | import pandas as pd
 7 | import os
 8 | from random import choice
 9 | from azureml.core.authentication import AzureCliAuthentication
10 | 
11 | # load Azure ML workspace
12 | workspace = Workspace.from_config(auth=AzureCliAuthentication())
13 | 
14 | cluster_name = 'hypetuning'
15 | 
16 | # Define Run Configuration
17 | estimator = Estimator(
18 |     entry_script='hypertrain.py',
19 |     source_directory=os.path.dirname(os.path.realpath(__file__)),
20 |     compute_target=workspace.compute_targets[cluster_name],
21 |     pip_packages=[
22 |         'numpy==1.15.4',
23 |         'pandas==0.23.4',
24 |         'scikit-learn==0.20.1',
25 |         'scipy==1.0.0',
26 |         'matplotlib==3.0.2',
27 |         'utils==0.9.0'
28 |     ]
29 | )
30 | 
31 | # Set parameters for search
32 | param_sampling = RandomParameterSampling({
33 |     "max_depth": choice([100, 50, 20, 10]),
34 |     "n_estimators": choice([50, 150, 200, 250]),
35 |     "criterion": choice(['gini', 'entropy']),
36 |     "min_samples_split": choice([2, 3, 4, 5])
37 |     }
38 | )
39 | 
40 | # Define multi-run configuration
41 | hyperdrive_run_config = HyperDriveConfig(
42 |     estimator=estimator,
43 |     hyperparameter_sampling=param_sampling,
44 |     policy=None,
45 |     primary_metric_name="accuracy",
46 |     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
47 |     max_total_runs=2,
48 |     max_concurrent_runs=None
49 | )
50 | 
51 | # Define the ML experiment
52 | experiment = Experiment(workspace, "newsgroups_train_hypertune")
53 | 
54 | hyperdrive_run = experiment.submit(hyperdrive_run_config)
55 | hyperdrive_run.wait_for_completion()
56 | 
57 | # Select the best run from all submitted
58 | best_run = hyperdrive_run.get_best_run_by_primary_metric()
59 | best_run_metrics = best_run.get_metrics()
60 | 
61 | # Log the best run's performance to the parent run
62 | hyperdrive_run.log("Accuracy", best_run_metrics['accuracy'])
63 | parameter_values = best_run.get_details()['runDefinition']['arguments']
64 | 
65 | # Print best set of parameters found
66 | best_parameters = dict(zip(parameter_values[::2], parameter_values[1::2]))
67 | pd.Series(best_parameters, name='Value').to_frame()
68 | 
69 | best_model_parameters = best_parameters.copy()
70 | pd.Series(best_model_parameters, name='Value').to_frame()
71 | print(best_model_parameters)
72 | 
73 | # Define a final training run with model's best parameters
74 | model_est = Estimator(
75 |     entry_script='hypertrain.py',
76 |     source_directory=os.path.dirname(os.path.realpath(__file__)),
77 |     script_params=best_model_parameters,
78 |     compute_target=workspace.compute_targets[cluster_name],
79 |     pip_packages=[
80 |         'numpy==1.15.4',
81 |         'pandas==0.23.4',
82 |         'scikit-learn==0.20.1',
83 |         'scipy==1.0.0',
84 |         'matplotlib==3.0.2',
85 |         'utils==0.9.0'
86 |     ]
87 | )
88 | 
89 | # Submit the experiment
90 | model_run = experiment.submit(model_est)
91 | 
92 | model_run_status = model_run.wait_for_completion(wait_post_processing=True)
93 | 
94 | model = model_run.register_model(model_name='model',
95 |                                  model_path=os.path.join('outputs', 'model.pkl'))
96 | 


--------------------------------------------------------------------------------
/Code/Modeling/pipeline_train.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Model Training Pipeline
 3 | 
 4 | Note: ML Pipelines are executed on registered compute resources.
 5 | Run configurations hence cannot reference local compute.
 6 | """
 7 | import os
 8 | from azureml.core import Experiment, Workspace
 9 | from azureml.pipeline.core import Pipeline, PipelineData
10 | from azureml.pipeline.steps import PythonScriptStep
11 | from azureml.core import RunConfiguration
12 | from azureml.core.authentication import AzureCliAuthentication
13 | from azureml.data.data_reference import DataReference
14 | 
15 | # Define run configuration (compute/environment/data references/..)
16 | run_config_name = 'dsvmcluster'
17 | exp_name = "Training_Pipeline"
18 | curr_dir = os.path.dirname(os.path.realpath(__file__))
19 | output_dir = 'outputs'
20 | output_dir_local = os.path.join(curr_dir, '../../../', 'outputs')
21 | 
22 | # Pipeline parameters
23 | run_experiment = True
24 | register_model = False
25 | publish_pipeline = False
26 | 
27 | # load workspace config, load default datastore.
28 | ws = Workspace.from_config(auth=AzureCliAuthentication())
29 | default_ds = ws.get_default_datastore()
30 | 
31 | # load run config
32 | run_config = RunConfiguration.load(
33 |     path=os.path.join(curr_dir, '../../../', 'aml_config'),
34 |     name=run_config_name
35 | )
36 | 
37 | # define training pipeline with one AMLCompute step
38 | trainStep = PythonScriptStep(
39 |     script_name="train.py",
40 |     name="Model Training",
41 |     arguments=[
42 |         '--data-dir', str(default_ds.as_mount()),
43 |         '--output-dir', output_dir
44 |     ],
45 |     inputs=[
46 |         DataReference(
47 |             datastore=default_ds,
48 |             mode="mount"
49 |         )
50 |     ],
51 |     outputs=[
52 |         PipelineData(
53 |             name="model",
54 |             datastore=default_ds,
55 |             output_path_on_compute="training"
56 |         )
57 |     ],
58 |     compute_target=run_config.target,
59 |     runconfig=run_config,
60 |     source_directory=os.path.join(curr_dir, '../')
61 | )
62 | 
63 | training_pipeline = Pipeline(workspace=ws, steps=[trainStep])
64 | training_pipeline.validate()
65 | print("Pipeline validation complete")
66 | 
67 | # Submit pipeline run
68 | pipeline_run = Experiment(ws, exp_name).submit(training_pipeline)
69 | pipeline_run.wait_for_completion()
70 | 


--------------------------------------------------------------------------------
/Code/Modeling/score_realtime.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Real Time Scoring Service
 3 | @TODO
 4 | """
 5 | import json
 6 | import time
 7 | import numpy as np
 8 | from azureml.core.model import Model
 9 | from sklearn.externals import joblib
10 | 
11 | 
12 | def init():
13 |     """
14 |     Load model and other dependencies for inferencing
15 |     """
16 |     global model
17 |     # Print statement for appinsights custom traces:
18 |     print("model initialized" + time.strftime("%H:%M:%S"))
19 | 
20 |     # note here "sklearn_regression_model.pkl" is the name of the
21 |     # model registered under the workspace this call should return
22 |     # the path to the model.pkl file on the local disk.
23 |     model_path = Model.get_model_path(model_name='model.pkl')
24 | 
25 |     # deserialize the model file back into a sklearn model
26 |     model = joblib.load(model_path)
27 | 
28 | 
29 | def run(raw_data):
30 |     """
31 |     Score new data against model
32 |     """
33 |     try:
34 |         data = json.loads(raw_data)['data']
35 |         data = np.array(data)
36 |         result = model.predict(data)
37 | 
38 |         # you can return any datatype as long as it is JSON-serializable
39 |         return result.tolist()
40 |     except Exception as e:
41 |         error = str(e)
42 |         print(error + time.strftime("%H:%M:%S"))
43 |         return error
44 | 


--------------------------------------------------------------------------------
/Code/Modeling/train.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import numpy as np
  3 | from optparse import OptionParser
  4 | import sys
  5 | from time import time
  6 | 
  7 | from sklearn.datasets import fetch_20newsgroups
  8 | from sklearn.feature_extraction.text import TfidfVectorizer
  9 | from sklearn.feature_extraction.text import HashingVectorizer
 10 | from sklearn.feature_selection import SelectFromModel
 11 | from sklearn.linear_model import RidgeClassifier
 12 | from sklearn.pipeline import Pipeline
 13 | from sklearn.svm import LinearSVC
 14 | from sklearn.linear_model import SGDClassifier
 15 | from sklearn.linear_model import Perceptron
 16 | from sklearn.linear_model import PassiveAggressiveClassifier
 17 | from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB
 18 | from sklearn.neighbors import KNeighborsClassifier
 19 | from sklearn.neighbors import NearestCentroid
 20 | from sklearn.ensemble import RandomForestClassifier
 21 | from sklearn import metrics
 22 | 
 23 | # Display progress logs on stdout
 24 | logging.basicConfig(level=logging.INFO,
 25 |                     format='%(asctime)s %(levelname)s %(message)s')
 26 | 
 27 | op = OptionParser()
 28 | op.add_option("--all_categories",
 29 |               action="store_true", dest="all_categories",
 30 |               help="Whether to use all categories or not.")
 31 | op.add_option("--use_hashing",
 32 |               action="store_true",
 33 |               help="Use a hashing vectorizer.")
 34 | op.add_option("--n_features",
 35 |               action="store", type=int, default=2 ** 16,
 36 |               help="n_features when using the hashing vectorizer.")
 37 | 
 38 | 
 39 | def is_interactive():
 40 |     return not hasattr(sys.modules['__main__'], '__file__')
 41 | 
 42 | 
 43 | # work-around for Jupyter notebook and IPython console
 44 | argv = [] if is_interactive() else sys.argv[1:]
 45 | (opts, args) = op.parse_args(argv)
 46 | if len(args) > 0:
 47 |     op.error("this script takes no arguments.")
 48 |     sys.exit(1)
 49 | 
 50 | if opts.all_categories:
 51 |     categories = None
 52 | else:
 53 |     categories = [
 54 |         'alt.atheism',
 55 |         'talk.religion.misc',
 56 |         'comp.graphics',
 57 |         'sci.space',
 58 |     ]
 59 | 
 60 | 
 61 | print("Loading 20 newsgroups dataset for categories:")
 62 | 
 63 | data_train = fetch_20newsgroups(subset='train', categories=categories,
 64 |                                 shuffle=True, random_state=42)
 65 | 
 66 | data_test = fetch_20newsgroups(subset='test', categories=categories,
 67 |                                shuffle=True, random_state=42)
 68 | print('data loaded')
 69 | 
 70 | # order of labels in `target_names` can be different from `categories`
 71 | target_names = data_train.target_names
 72 | 
 73 | 
 74 | def size_mb(docs):
 75 |     return sum(len(s.encode('utf-8')) for s in docs) / 1e6
 76 | 
 77 | 
 78 | # split a training set and a test set
 79 | y_train, y_test = data_train.target, data_test.target
 80 | 
 81 | # Extracting features from the training data using a sparse vectorizer
 82 | if opts.use_hashing:
 83 |     vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,
 84 |                                    n_features=opts.n_features)
 85 |     X_train = vectorizer.transform(data_train.data)
 86 | else:
 87 |     vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
 88 |                                  stop_words='english')
 89 |     X_train = vectorizer.fit_transform(data_train.data)
 90 | 
 91 | # Extracting features from the test data using the same vectorizer"
 92 | X_test = vectorizer.transform(data_test.data)
 93 | 
 94 | # mapping from integer feature name to original token string
 95 | if opts.use_hashing:
 96 |     feature_names = None
 97 | else:
 98 |     feature_names = vectorizer.get_feature_names()
 99 | 
100 | if feature_names:
101 |     feature_names = np.asarray(feature_names)
102 | 
103 | 
104 | def trim(s):
105 |     """Trim string to fit on terminal (assuming 80-column display)"""
106 |     return s if len(s) <= 80 else s[:77] + "..."
107 | 
108 | 
109 | def benchmark(clf, name=""):
110 |     """benchmark classifier performance"""
111 | 
112 |     # train a model
113 |     print("\nTraining run with algorithm \n{}".format(clf))
114 |     clf.fit(X_train, y_train)
115 | 
116 |     # evaluate on test set
117 |     pred = clf.predict(X_test)   
118 |     score = ?
119 | 
120 |     clf_descr = str(clf).split('(')[0]
121 |     print("?  %0.3f" % score)
122 |     return clf_descr, score
123 | 
124 | 
125 | # Run benchmark and collect results with multiple classifiers
126 | for clf, name in (
127 |         (RidgeClassifier(tol=1e-2, solver="sag"), "Ridge Classifier"),
128 |         (Perceptron(max_iter=50), "Perceptron"),
129 |         (PassiveAggressiveClassifier(max_iter=50),
130 |          "Passive-Aggressive"),
131 |         (KNeighborsClassifier(n_neighbors=10), "kNN"),
132 |         (RandomForestClassifier(), "Random forest")):
133 |     # run benchmarking function for each
134 |     benchmark(clf, name)
135 | 
136 | 
137 | # Run with different regularization techniques
138 | for penalty in ["l2", "l1"]:
139 |     # Train Liblinear model
140 |     name = penalty + "LinearSVC"
141 |     benchmark(
142 |         clf=LinearSVC(
143 |             penalty=penalty,
144 |             dual=False,
145 |             tol=1e-3
146 |         ),
147 |         name=penalty + "LinearSVC"
148 |     )
149 | 
150 |     # Train SGD model
151 |     benchmark(
152 |         SGDClassifier(
153 |             alpha=.0001,
154 |             max_iter=50,
155 |             penalty=penalty
156 |         ),
157 |         name=penalty + "SGDClassifier"
158 |     )
159 | 
160 | # Train SGD with Elastic Net penalty
161 | benchmark(
162 |     SGDClassifier(
163 |         alpha=.0001,
164 |         max_iter=50,
165 |         penalty="elasticnet"
166 |     ),
167 |     name="Elastic-Net penalty"
168 | )
169 | 
170 | # Train NearestCentroid without threshold
171 | benchmark(
172 |     NearestCentroid(),
173 |     name="NearestCentroid (aka Rocchio classifier)"
174 | )
175 | 
176 | # Train sparse Naive Bayes classifiers
177 | benchmark(
178 |     MultinomialNB(alpha=.01),
179 |     name="Naive Bayes MultinomialNB"
180 | )
181 | 
182 | benchmark(
183 |     BernoulliNB(alpha=.01),
184 |     name="Naive Bayes BernoulliNB"
185 | )
186 | 
187 | benchmark(
188 |     ComplementNB(alpha=.1),
189 |     name="Naive Bayes ComplementNB"
190 | )
191 | 
192 | # The smaller C, the stronger the regularization.
193 | # The more regularization, the more sparsity.
194 | benchmark(
195 |     Pipeline([
196 |         ('feature_selection',
197 |             SelectFromModel(
198 |                 LinearSVC(
199 |                     penalty="l1",
200 |                     dual=False,
201 |                     tol=1e-3
202 |                 )
203 |             )),
204 |         ('classification',
205 |             LinearSVC(penalty="l2"))
206 |         ]
207 |     ),
208 |     name="LinearSVC with L1-based feature selection"
209 | )


--------------------------------------------------------------------------------
/Code/Modeling/train_datasets.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from optparse import OptionParser
  3 | from time import time
  4 | from sklearn.feature_extraction.text import TfidfVectorizer
  5 | from sklearn.feature_extraction.text import HashingVectorizer
  6 | from sklearn.feature_selection import SelectFromModel
  7 | from sklearn.feature_selection import SelectKBest, chi2
  8 | from sklearn.linear_model import RidgeClassifier
  9 | from sklearn.pipeline import Pipeline
 10 | from sklearn.svm import LinearSVC
 11 | from sklearn.linear_model import SGDClassifier
 12 | from sklearn.linear_model import Perceptron
 13 | from sklearn.linear_model import PassiveAggressiveClassifier
 14 | from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB
 15 | from sklearn.neighbors import KNeighborsClassifier
 16 | from sklearn.neighbors import NearestCentroid
 17 | from sklearn.ensemble import RandomForestClassifier
 18 | from sklearn.utils.extmath import density
 19 | from sklearn import metrics
 20 | from sklearn.externals import joblib
 21 | from azureml.core import Run
 22 | 
 23 | op = OptionParser()
 24 | op.add_option("--report",
 25 |               action="store_true", dest="print_report",
 26 |               help="Print a detailed classification report.")
 27 | op.add_option("--chi2_select",
 28 |               action="store", type="int", dest="select_chi2",
 29 |               help="Select some number of features using a chi-squared test")
 30 | op.add_option("--confusion_matrix",
 31 |               action="store_true", dest="print_cm",
 32 |               help="Print the confusion matrix.")
 33 | op.add_option("--top10",
 34 |               action="store_true", dest="print_top10",
 35 |               help="Print ten most discriminative terms per class"
 36 |                    " for every classifier.")
 37 | op.add_option("--all_categories",
 38 |               action="store_true", dest="all_categories",
 39 |               help="Whether to use all categories or not.")
 40 | op.add_option("--use_hashing",
 41 |               action="store_true",
 42 |               help="Use a hashing vectorizer.")
 43 | op.add_option("--n_features",
 44 |               action="store", type=int, default=2 ** 16,
 45 |               help="n_features when using the hashing vectorizer.")
 46 | op.add_option("--filtered",
 47 |               action="store_true",
 48 |               help="Remove newsgroup information that is easily overfit: "
 49 |                    "headers, signatures, and quoting.")
 50 | 
 51 | # Retrieve the run and its context (datasets etc.)
 52 | run = Run.get_context()
 53 | 
 54 | # Load the input datasets from Azure ML
 55 | dataset_train = run.input_datasets['train'].to_pandas_dataframe()
 56 | dataset_test = run.input_datasets['test'].to_pandas_dataframe()
 57 | 
 58 | # Pre-process df for sklearn
 59 | # convert to numpy df
 60 | data_train = dataset_train.text.values
 61 | data_test = dataset_test.text.values
 62 | 
 63 | # save orginal target names
 64 | target_names = data_train.target_names
 65 | 
 66 | # convert label to int
 67 | y_train = dataset_train.target.values
 68 | y_test = dataset_test.target.values
 69 | 
 70 | # Extracting features from the training data using a sparse vectorizer")
 71 | vectorizer = HashingVectorizer(
 72 |     stop_words='english',
 73 |     alternate_sign=False,
 74 |     n_features=op.n_features
 75 | )
 76 | 
 77 | X_train = vectorizer.transform(data_train.data)
 78 | 
 79 | # Extracting features from the test data using the same vectorizer
 80 | X_test = vectorizer.transform(data_test.data)
 81 | 
 82 | # mapping from integer feature name to original token string
 83 | feature_names = vectorizer.get_feature_names()
 84 | 
 85 | # # Extracting %d best features by a chi-squared test
 86 | # ch2 = SelectKBest(chi2, k=op.select_chi2)
 87 | # X_train = ch2.fit_transform(X_train, y_train)
 88 | # X_test = ch2.transform(X_test)
 89 | 
 90 | # keep selected feature names
 91 | # feature_names = [feature_names[i] for i
 92 | #                     in ch2.get_support(indices=True)]
 93 | # feature_names = np.asarray(feature_names)
 94 | 
 95 | 
 96 | def trim(s):
 97 |     """Trim string to fit on terminal (assuming 80-column display)"""
 98 |     return s if len(s) <= 80 else s[:77] + "..."
 99 | 
100 | 
101 | def benchmark(clf, name):
102 |     print('_' * 80)
103 |     print("Training: ")
104 |     print(clf)
105 |     t0 = time()
106 |     clf.fit(X_train, y_train)
107 |     train_time = time() - t0
108 |     print("train time: %0.3fs" % train_time)
109 | 
110 |     t0 = time()
111 |     pred = clf.predict(X_test)
112 |     test_time = time() - t0
113 |     print("test time:  %0.3fs" % test_time)
114 |     score = metrics.accuracy_score(y_test, pred)
115 | 
116 |     child_run = run.child_run(name=name)
117 |     child_run.log("accuracy", float(score))
118 |     model_name = "model" + str(name) + ".pkl"
119 |     filename = "outputs/" + model_name
120 |     joblib.dump(value=clf, filename=filename)
121 |     child_run.upload_file(name=model_name, path_or_stream=filename)
122 | 
123 |     print("accuracy:   %0.3f" % score)
124 | 
125 |     if hasattr(clf, 'coef_'):
126 |         print("dimensionality: %d" % clf.coef_.shape[1])
127 |         print("density: %f" % density(clf.coef_))
128 | 
129 |         if op.print_top10 and feature_names is not None:
130 |             print("top 10 keywords per class:")
131 |             for i, label in enumerate(target_names):
132 |                 top10 = np.argsort(clf.coef_[i])[-10:]
133 |                 print(trim("%s: %s" % (label, " ".join(feature_names[top10]))))
134 |         print()
135 | 
136 |     if op.print_report:
137 |         print("classification report:")
138 |         print(metrics.classification_report(y_test, pred,
139 |                                             target_names=target_names))
140 | 
141 |     if op.print_cm:
142 |         print("confusion matrix:")
143 |         print(metrics.confusion_matrix(y_test, pred))
144 | 
145 |     print()
146 |     clf_descr = str(clf).split('(')[0]
147 | 
148 |     child_run.complete()
149 |     return clf_descr, score, train_time, test_time
150 | 
151 | 
152 | results = []
153 | 
154 | for clf, name in (
155 |         (RidgeClassifier(tol=1e-2, solver="sag"), "Ridge Classifier"),
156 |         (Perceptron(max_iter=50), "Perceptron"),
157 |         (PassiveAggressiveClassifier(max_iter=50),
158 |          "Passive-Aggressive"),
159 |         (KNeighborsClassifier(n_neighbors=10), "kNN"),
160 |         (RandomForestClassifier(), "Random forest")):
161 |     print('=' * 80)
162 |     print(name)
163 |     results.append(benchmark(clf, name))
164 | 
165 | for penalty in ["l2", "l1"]:
166 |     print('=' * 80)
167 |     print("%s penalty" % penalty.upper())
168 |     # Train Liblinear model
169 |     name = penalty +  "LinearSVC"
170 |     results.append(benchmark(LinearSVC(penalty=penalty, dual=False,
171 |                                        tol=1e-3)))
172 | 
173 |     # Train SGD model
174 |     name = penalty + "SGDClassifier"
175 |     results.append(benchmark(SGDClassifier(alpha=.0001, max_iter=50,
176 |                                            penalty=penalty)))
177 | 
178 | # Train SGD with Elastic Net penalty
179 | print('=' * 80)
180 | print("Elastic-Net penalty")
181 | name = "Elastic-Net penalty"
182 | results.append(benchmark(SGDClassifier(alpha=.0001, max_iter=50,
183 |                                        penalty="elasticnet")))
184 | 
185 | # Train NearestCentroid without threshold
186 | print('=' * 80)
187 | print("NearestCentroid (aka Rocchio classifier)")
188 | name ="NearestCentroid (aka Rocchio classifier)"
189 | results.append(benchmark(NearestCentroid()))
190 | 
191 | 
192 | # Train sparse Naive Bayes classifiers
193 | print('=' * 80)
194 | print("Naive Bayes")
195 | name = "Naive Bayes MultinomialNB"
196 | results.append(benchmark(MultinomialNB(alpha=.01)))
197 | 
198 | name = "Naive Bayes BernoulliNB"
199 | results.append(benchmark(BernoulliNB(alpha=.01)))
200 | 
201 | name = "Naive Bayes ComplementNB"
202 | results.append(benchmark(ComplementNB(alpha=.1)))
203 | 
204 | print('=' * 80)
205 | print("LinearSVC with L1-based feature selection")
206 | # The smaller C, the stronger the regularization.
207 | # The more regularization, the more sparsity.
208 | name = "LinearSVC with L1-based feature selection"
209 | results.append(benchmark(Pipeline([
210 |   ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False,
211 |                                                   tol=1e-3))),
212 |   ('classification', LinearSVC(penalty="l2"))])))
213 | 


--------------------------------------------------------------------------------
/Code/Modeling/train_submit.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Training submitter
 3 | 
 4 | Facilitates (remote) training execution through the Azure ML service.
 5 | """
 6 | import os
 7 | from azureml.core import Workspace, Experiment
 8 | from azureml.train.estimator import Estimator
 9 | from azureml.core.authentication import AzureCliAuthentication
10 | 
11 | # load Azure ML workspace
12 | workspace = Workspace.from_config(auth=AzureCliAuthentication())
13 | 
14 | # Define Run Configuration
15 | est = Estimator(
16 |     entry_script='train.py',
17 |     source_directory=os.path.dirname(os.path.realpath(__file__)),
18 |     compute_target='local',
19 |     conda_packages=[
20 |         'pip==20.0.2'
21 |     ],
22 |     pip_packages=[
23 |         'numpy==1.15.4',
24 |         'pandas==0.23.4',
25 |         'scikit-learn==0.20.1',
26 |         'scipy==1.0.0',
27 |         'matplotlib==3.0.2',
28 |         'utils==0.9.0'
29 |     ],
30 |     use_docker=False
31 | )
32 | 
33 | # Define the ML experiment
34 | experiment = Experiment(workspace, "newsgroups_train")
35 | 
36 | # Submit experiment run, if compute is idle, this may take some time')
37 | run = experiment.submit(est)
38 | 
39 | # wait for run completion of the run, while showing the logs
40 | run.wait_for_completion(show_output=True)
41 | 


--------------------------------------------------------------------------------
/Code/Modeling/train_submit_datasets.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Training submitter
 3 | 
 4 | Facilitates (remote) training execution through the Azure ML service.
 5 | """
 6 | import os
 7 | from azureml.core import Workspace, Experiment
 8 | from azureml.train.estimator import Estimator
 9 | from azureml.core.authentication import AzureCliAuthentication
10 | 
11 | # load Azure ML workspace
12 | workspace = Workspace.from_config(auth=AzureCliAuthentication())
13 | 
14 | # retrieve datasets used for training
15 | dataset_train = Dataset.get_by_name(workspace, name='newsgroups_train')
16 | dataset_test = Dataset.get_by_name(workspace, name='newsgroups_test')
17 | 
18 | # Define Run Configuration
19 | est = Estimator(
20 |     entry_script='train.py',
21 |     source_directory=os.path.dirname(os.path.realpath(__file__)),
22 |     compute_target='local',
23 |     conda_packages=[
24 |         'pip==20.0.2'
25 |     ],
26 |     pip_packages=[
27 |         'numpy==1.15.4',
28 |         'pandas==0.23.4',
29 |         'scikit-learn==0.20.1',
30 |         'scipy==1.0.0',
31 |         'matplotlib==3.0.2',
32 |         'utils==0.9.0'
33 |     ],
34 |     use_docker=False,
35 |     inputs=[
36 |         dataset_train.as_named_input('train'),
37 |         dataset_train.as_named_input('test')
38 |     ],
39 | )
40 | 
41 | # Define the ML experiment
42 | experiment = Experiment(workspace, "newsgroups_train")
43 | 
44 | # Submit experiment run, if compute is idle, this may take some time')
45 | run = experiment.submit(est)
46 | 
47 | # wait for run completion of the run, while showing the logs
48 | run.wait_for_completion(show_output=True)
49 | 


--------------------------------------------------------------------------------
/Code/Operationalization/Readme.md:
--------------------------------------------------------------------------------
1 | # This folder contains code for model deployment
2 | 
3 | You can add detailed description in this markdown related to your specific data science project.
4 | 


--------------------------------------------------------------------------------
/Code/Operationalization/dashboards/Readme.md:
--------------------------------------------------------------------------------
1 | # This folder contains dashboards e.g. end-user facing or for reporting purposes #
2 | 
3 | Use git-lfs for large binary files such as PowerBI reports.


--------------------------------------------------------------------------------
/Code/Operationalization/monitoring/Readme.md:
--------------------------------------------------------------------------------
1 | # This folder contains scripts for the monitoring (e.g. drift analysis) of deployed models
2 | 


--------------------------------------------------------------------------------
/Code/Operationalization/monitoring/monitoring_pipeline.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Model monitoring pipeline
 3 | 
 4 | Runs monitoring script by schedule (e.g. using Azure ML Pipelines and
 5 | Azure Data Factory or as an Azure Function)
 6 | @TODO read model training data
 7 | @TODO read model collected data
 8 | """
 9 | # Monitor Data Quality
10 | 
11 | # Monitor Model Performance
12 | 
13 | # Monitor Business KPIs
14 | 


--------------------------------------------------------------------------------
/Code/Readme.md:
--------------------------------------------------------------------------------
1 | # Code folder for hosting code for a Data Science Project
2 | 
3 | This folder hosts all code for a data science project. It has three sub-folders, belonging to 3 stages of the Data Science Lifecycle:
4 | 
5 | 1. Data_Acquisition_and_Understanding
6 | 2. Modeling
7 | 3. Deployment
8 | 


--------------------------------------------------------------------------------
/Docs/lab002/Readme.md:
--------------------------------------------------------------------------------
 1 | ## Lab 2: running experiments ##
 2 | 
 3 | # Understand the non-azure / open source ml model code #
 4 | We first start with understanding the training script. The training script is an open source ML model code from https://scikit-learn.org/stable/auto_examples/text/plot_document_classification_20newsgroups.html. This is an example showing how scikit-learn can be used to classify documents by topics using a bag-of-words approach. This example uses a scipy.sparse matrix to store the features and demonstrates various classifiers that can efficiently handle sparse matrices. The dataset used in this example is the 20 newsgroups dataset. It will be automatically downloaded, then cached. The newsgroup datasets contains text documents that are classified into 20 categories.
 5 | 
 6 | 1. Open the train.py document to inspect the code.
 7 | The first step in the code is to load the dataset from the 20 newsgroup dataset. In this example we are only going to use a subset of the categories. Please state the catogories we are going to use:
 8 | 
 9 | ...
10 | 
11 | The second step is to extract the features from the text. We do this with a sparse vecorizer. We also clean the data a bit. What is the operation that we do on the data to clean the text?
12 | 
13 | ...
14 | 
15 | After we have reshaped our data and made sure the feature names are in the right place, we are going to define the algorithm to fit the model. This step is defining the benchmark. We fit the data and make predictions on the test set. To validate out model we need a metric to score the model. There are many metrics we can use. Define in the code the metric that you want to use to validate your model and make sure the print statement will output your metric. (Note: you can define multiple scores if you want. If so, make sure to return these scores.)
16 | 
17 | ...
18 | 
19 | 
20 | The last step is to define tha algoritms that we want to fit over our data. In this example we are using 15 classification algoritms to fit the data. We keep track of the metrics of all olgoritms, so we can compare the performance and pick the model. Look at the code and whrite down the different algoritms that we are going to test.
21 | 
22 | ...
23 | 
24 | # Run the training locally #
25 | We are now going to train the scripts locally. The script will return the diffetent metrics for all algoritms. Inspect the metrics that you specified. Wich algoritms performs best?
26 | 
27 | ...
28 | 
29 | #  Run the code via Azure ML #
30 | We are now going to run our code via Azure ML. 
31 | We are going to make use of child runs. The expiriment will perform a parent run that is going to execute train.py. Within train.py we are going to create child runs. For every of the 15 algoritms that we have we want to create a sub run and log the metrics seprately. Whihin the child run we are going to log the performane and the model .pkl files. This way we can easily track and compare our experiment in Azure ML.
32 | 
33 | 1. Read Experiment Tracking documentation
34 | 
35 | 2. Read How to Mange a Run documentation
36 | 
37 | 3. Refactor the code to capture run metrics in train.py
38 |     1. Get the run context
39 |     2. Create a child run
40 |     3. Log the metric in the child trun
41 |     4. upload the .pkl file to the output folder of child run
42 |     5. close the child run
43 | 
44 | 4. ALter the train_submit.py file
45 | 
46 |     1. Load Azure ML workspace form config file
47 |     2. Create an extimator to define the run configuration
48 |     3. Define the ML experiment
49 |     4. Submit the experiment
50 | 
51 | 5. Go to the portal to inspect the run history
52 | 
53 | 


--------------------------------------------------------------------------------
/Docs/lab05/Readme.md:
--------------------------------------------------------------------------------
 1 | ## Lab 5: hypertune capabilities ##
 2 |  
 3 | # Understand goal #
 4 | In this lab we are going to tune the hyperparameters of a random forest classifier. We do this in order to find the best model to fit our data and that will give the highest proobabailities. In Azure ML we can run a special run that is optimized for hyperparamter tuning. 
 5 | 
 6 | 1. Read the documentation Hyperparameter tuning
 7 | 
 8 | # Define the Hyper paramters
 9 | Before we start creating the hyper parameter run, we need to know and understand the parameters that we can tune for the random forest classifier
10 | 
11 | 2. Search for sklearn randomclassifier and identify the parameters that we can tune. Write them down below
12 | 
13 | ...
14 | 
15 | # Alter the hypertrain scipt #
16 | The hypertrain script is similar to the train script, but instead of running 15 different algortims, we are only going to run the RandomForestClassifier. As we have seen in the previous step, the RandomForestClassifier has a lot of parameters that we can tune. In this example, we will only tune max_depth, n_estimators, criterion, min_samples_split. (Note: if you want to add more more hyperparameters you can do that in the similair way as we are adding these paramters.)
17 | 
18 | 3. Define the parameters as input arguments in the OptionParser(), define the input type and set the default value to the default provided in the documentation. 
19 | 
20 | op.add_option("--max_depth",
21 |               type=int, default=10)
22 | op.add_option("--n_estimators",
23 |               type=int, default=100)
24 | op.add_option("--criterion",
25 |               type=str,
26 |               default='gini')
27 | op.add_option("--min_samples_split",
28 |               type=int,
29 |               default=2)
30 | 
31 | 4. Create a dict of hyperparameters from the input flags.
32 | 
33 | hyperparameters = {
34 |     "max_depth": opts.max_depth,
35 |     "n_estimators": opts.n_estimators,
36 |     "criterion": opts.criterion,
37 |     "min_samples_split": opts.min_samples_split
38 | }
39 | 
40 | 5. Select the training hyperparameters as imput variables
41 | 
42 | max_depth = hyperparameters["max_depth"]
43 | n_estimators = hyperparameters["n_estimators"]
44 | criterion = hyperparameters["criterion"]
45 | min_samples_split = hyperparameters["min_samples_split"]
46 | 
47 | 6. Add the hyperparameters as imput options to RandomForestClassifier()
48 | 
49 | 7. Add the log metrics to the script
50 | 
51 | 8. Save the .pkl file 
52 | 
53 | # Understand differences in run configuration
54 | The run configuration for the hypertuning is slightly different from the standard run configuration. Azure ML has a special package azureml.train.hyperdrive for creating a hyperparamter tuning run. From this package, we are going to make use of the HyperDriveConfig to create the config file. 
55 | 
56 | 9. Create the estimator. (Note: the estimator for the hypertrain run is the same as for a normal run, but we are now running the script hypertrain.py)
57 | 
58 | 10. Define the parameter sampling space and the search algoritm
59 | There are primaly 3 different ways to perform parameter searching:  Random, Sweeping and....
60 | In this example we will make use of the RandomParameterSampling.
61 | For every hyperparamter we can tune, we need to specify the search space. This search space can be conitious and defined by a uniform or normal distribution, or can be dircrete and defined by a choice function.
62 | 
63 | 11. De fine the hyperdrive run configuration
64 | Make sure to use the paramter sampling as an imput of the config file and 
65 | 
66 | # Submit run on AML compute
67 | 
68 | # View results in the portal
69 | 


--------------------------------------------------------------------------------
/LICENSE-CODE.TXT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | Copyright (c) Microsoft Corporation. All rights reserved.
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
 5 | associated documentation files (the "Software"), to deal in the Software without restriction, 
 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 7 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or substantial 
11 | portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 
14 | NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
15 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
16 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/LICENSE.TXT:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/MLOps-TDSP-Template/011f5418bc3be25570a84ff8c58dca94d4b35a45/LICENSE.TXT


--------------------------------------------------------------------------------
/NOTICE.TXT:
--------------------------------------------------------------------------------
 1 | ##Legal Notices
 2 | Microsoft and any contributors grant you a license to the Microsoft documentation and other content
 3 | in this repository under the [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/legalcode),
 4 | see the LICENSE file, and grant you a license to any code in the repository under the [MIT License](https://opensource.org/licenses/MIT), see the
 5 | LICENSE-CODE file.
 6 | 
 7 | Microsoft, Windows, Microsoft Azure and/or other Microsoft products and services referenced in the documentation
 8 | may be either trademarks or registered trademarks of Microsoft in the United States and/or other countries.
 9 | The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks.
10 | Microsoft's general trademark guidelines can be found at http://go.microsoft.com/fwlink/?LinkID=254653.
11 | 
12 | Privacy information can be found at https://privacy.microsoft.com/
13 | 
14 | Microsoft and any contributors reserve all others rights, whether under their respective copyrights, patents,
15 | or trademarks, whether by implication, estoppel or otherwise.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLOps Quickstart Template #
 2 | 
 3 | This repo provides a quickstarter template as a fork on TDSP (https://github.com/Azure/Azure-TDSP-ProjectTemplate), extending the template with a suggested structure for operationalization using Azure. The current code base includes ARM templates as IaC for resource deployment, template build and release pipelines to enable ML model CI/CD, template code for working with Azure ML.
 4 | 
 5 | ## How to get started ##
 6 | 
 7 | * Clone this repo
 8 | * Make sure you have an Azure Subscription set up.
 9 | * Make sure you have an Azure DevOps instance set up.
10 | * Import the build and release definitions ('Code'>'Operationalization'>'build_and_release') into Azure DevOps pipelines.
11 | * Update the build and release definitions to use your credentials i.e. Azure subscription.
12 | * Create an initial commit.
13 | * If everything is set up correctly, Azure DevOps will provision your Azure Resources as triggered by the CI.
14 | * Use the Azure CLI ML Extension (`az ml project attach` command) or Azure ML SDK to configure your local workspace to use the created Azure ML workspace.
15 | * Run `Code/Modeling/train_submit` to run your first AzureML experiment on remote compute.
16 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/Sample_Data/For_Modeling/modelling.md:
--------------------------------------------------------------------------------
1 | # List of feature sets
2 | |  Feature Set Name | Link to the Full Feature Set   | Full Feature Set Size (MB)  | Link to Report |
3 | | ---:| ---: | ---: | ---: |
4 | | Feature Set 1 | [link](link/to/feature/set1) | 2,000 | [Feature Set 1 Report](link/to/report1)|
5 | | Feature Set 2 | [link](link/to/feature/set2) | 300 | [Feature Set 2 Report](link/to/report2)|
6 | 
7 | If the link to the full dataset does not apply, provide some information on how to access the full dataset. 
8 | 
9 | If the data stays in an Azure file storage, please provide the link to the text file with the information of the file storage that has been checked in to the git repository. 


--------------------------------------------------------------------------------
/Sample_Data/Processed/processed.md:
--------------------------------------------------------------------------------
 1 | ## List of Processed Datasets
 2 | 
 3 | 
 4 | | Processed Dataset Name | Link to the Full Processed Dataset   | Full Processed Dataset Size (MB)  | Link to Report |
 5 | | ---:| ---: | ---: | ---: |
 6 | | Processed Dataset 1 | [link](link/to/processed/dataset1) | 2,000 | [Processed Dataset 1 Report](link/to/report1)|
 7 | | Processed Dataset 2 | [link](link/to/processed/dataset2) | 300 | [Processed Dataset 2 Report](link/to/report2)|
 8 | 
 9 | 
10 | If the link to the full dataset does not apply, provide some information on how to access the full dataset. 
11 | 
12 | If the data stays in an Azure file storage, please provide the link to the text file with the information of the file storage that has been checked in to the git repository. 


--------------------------------------------------------------------------------
/Sample_Data/README.md:
--------------------------------------------------------------------------------
1 | The **Sample_Data**  directory in the project git repository is the place to store **SAMPLE** datasets which should be of small size, **NOT** the entire datasets. If your client does not allow you to store even the sample data on the github repository, if possible, store a sample dataset with all confidential fields hashed. If still not allowed, please do not store sample data here. But, please still fill in the table in each sub-directory. 
2 | 
3 | The small sample datasets can be used to make your data preprocessing, feature engineering, or modeling scripts runnable. It can be helpful to quickly run the scripts that process or model the data, and understand what the scripts are doing.  
4 | 
5 | In each directory, there is a markdown file, which lists all datasets in each directory. Please provide the link to the full dataset in case one wants to access the full dataset. 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/Sample_Data/Raw/rawData.md:
--------------------------------------------------------------------------------
 1 | ## List of Raw Datasets
 2 | 
 3 | 
 4 | | Raw Dataset Name | Link to the Full Dataset   | Full Dataset Size (MB)  | Link to Report |
 5 | | ---:| ---: | ---: | ---: |
 6 | | Raw Dataset 1 | [link](link/to/full/dataset1) | 2,000 | [Raw Dataset 1 Report](link/to/report1)|
 7 | | Raw Dataset 2 | [link](link/to/full/dataset2) | 300 | [Raw Dataset 2 Report](link/to/report2)|
 8 | 
 9 | If the link to the full dataset does not apply, provide some information on how to access the full dataset. 
10 | 
11 | If the data stays in an Azure file storage, please provide the link to the text file with the information of the file storage that has been checked in to the git repository. 
12 | 
13 | 


--------------------------------------------------------------------------------
/conda_dependencies.yml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | dependencies:
 3 | - python=3.6.2
 4 | - pip:
 5 |   - numpy==1.15.4
 6 |   - pandas==0.23.4
 7 |   - scikit-learn==0.20.1
 8 |   - scipy==1.0.0
 9 |   - matplotlib==3.0.2
10 |   - utils==0.9.0
11 |   # Required packages for AzureML execution, history, and data preparation.
12 |   - azureml-sdk==1.0.85
13 |   - azureml-defaults==1.0.85
14 |   - azure-cli==2.0.58
15 |   # Dev Tools
16 |   - setuptools
17 |   - flake8
18 |   - flake8_formatter_junit_xml
19 |   - pytest
20 |   -


--------------------------------------------------------------------------------
/infrastructure/README.md:
--------------------------------------------------------------------------------
 1 | # Infrastructure as Code
 2 | 
 3 | This folder contains examples for how to bootstrap your machine learning workflow.
 4 | Azure Resource Manager (ARM) templates & Azure ML CLI commands can easily be used to bootstrap and provision workspaces for your data scientists prior to enabling them to begin data preparation & model training.
 5 | 
 6 | * **[ARM-Templates](arm-templates)** contains infrastructure-as-code templates and parameter files for two sample environments (dev + test). The use of ARM templates gives you the most flexibility in customizing your Azure resources.
 7 | * **[Scripts](scripts)** contains Azure CLI scripts for resource deployment. The use of CLI commands for deployment provides the most lean way to deploy resources to Azure.
 8 | * **[Build-and-Release](build-and-release)** contains pipeline definitions for Azure DevOps to automate infrastructure roll out. Included moreover is a PowerShell script that can be used for test deployments of the infrastructure resources.
 9 | 
10 | ## Automated roll out of infrastructure
11 | 
12 | In this section you will learn about how you could use [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) for the automated deployment of infrastructure. This way of working enables you to incrementally deploy changes to your resources, stage the changes over different environments, and build confidence as your system growths more complex.
13 | 
14 | ### Getting started
15 | 
16 | Complete the below steps to set up your pipeline for infrastructure roll out.
17 | 
18 | * Navigate to [Azure DevOps](http://dev.azure.com/) and create a new organization and project. You can also re-use an existing organization and/or project.
19 | * Create a new [service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml) in Azure DevOps of the Azure Resources Manager connection type. Azure DevOps will authenticate using this connection to make deployments to your Azure Subscription.
20 | * In [deploy-infra.yml](build-and-release/deploy-infra.yml) replace `<your-service-connection-name>` by the name of the service connection that you created in the previous step.
21 | * Some Azure resources require you to use globally unique names across Azure. This holds for example for storage account resources. Adapt resource names in the ARM parameter files to a name that is globally unique. Note that you should update the parameter files for the ML workspace and ML compute resources as well once you update the names of the underlying resources.
22 | * Make a test deployment using the provided powershell script `deploy-infra.ps1`.
23 | * Set up a new pipeline in Azure DevOps with the option to re-use an existing template. Point to the pipeline definition [deploy-infra.yml](build-and-release/deploy-infra.yml) in your repository.
24 | * Run your pipeline from Azure DevOps. On completion, you should see a result like the below.
25 | ![An example of a pipeline for Infrastructure roll out](infra_stages.png)
26 | 
27 | ### Best practices on customizing the templates for your environment and team
28 | 
29 | * Many teams already have existing resources in their Azure tenant for e.g. Keyvault and Application Insights. These resources can be re-used by Azure Machine Learning. Simply point to these resources in the [Machine Learning Workspace template](arm-templates/mlworkspace/template.json). For ease of modification, we have provided separate templates for each of the resources in this repository.
30 | * In most situations data already resides on existing storage in Azure. The [Azure CLI ML Extension](https://docs.microsoft.com/en-us/azure/machine-learning/reference-azure-machine-learning-cli) allows for a lean way to add storage as a [Datastore](https://docs.microsoft.com/en-us/azure/machine-learning/concept-data) in Azure Machine Learning. The [Azure CLI task](https://docs.microsoft.com/en-us/azure/devops/pipelines/tasks/deploy/azure-cli?view=azure-devops) in Azure DevOps can help you to automate the datastore attachment process as part of the infrastructure roll out.  
31 | * Many teams choose to deploy multiple environments to work with, for example DEV, INT and PROD. In this way infrastructure can be rolled out in a phased way and with more confidence as your system becomes more complex.
32 | * As one rolls out additional infrastructural resources, it becomes valuable to stage changes across the different environments. You could consider to run a set of integration or component tests before rolling out to PRD.
33 | * It is a sound practice to protect the roll out of changes to PRD from originating from branches other than master. [Conditions](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/conditions?view=azure-devops&tabs=yaml) in Azure pipelines can you help to set controls like these.
34 | * One could specify a security group of users that require to give their [approval](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals?view=azure-devops&tabs=check-pass#approvals) to make roll outs to specific environments.
35 | * It is important to note that in the MLOps way of working, we make a separation of concerns between the roll out of infrastructure and the roll out of ML artifacts. Hence the two types are rolled out at different moments and with different automation pipelines.
36 | * Multiple additional security controls (virtual network rules, role-based access control and custom identities) can be applied on the Azure resources that are found in this repository. Controls can be added directly from the ARM templates. Consult the [documentation](https://docs.microsoft.com/en-us/azure/templates/) on Azure Resource Manager to find the possible modifications that can be made to each Azure Resource. As an example on modifications for the template for Azure ML compute, one can find a [template](arm-templates/mlcompute/template-vnet.json) in this repository that adds a SSH user and virtual network controls to the managed compute virtual machines.


--------------------------------------------------------------------------------
/infrastructure/arm-templates/appinsights/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "appInsightsName": {
 6 |             "value": "mlops-ain-dev"
 7 |         },
 8 |         "regionId": {
 9 |             "value": "westeurope"
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/appinsights/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "appInsightsName": {
 6 |             "value": "mlops-ain-test"
 7 |         },
 8 |         "regionId": {
 9 |             "value": "westeurope"
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/appinsights/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "appInsightsName": {
 6 |             "type": "string"
 7 |         },
 8 |         "regionId": {
 9 |             "type": "string"
10 |         }
11 |     },
12 |     "resources": [
13 |         {
14 |             "type": "Microsoft.Insights/components",
15 |             "location": "[parameters('regionId')]",
16 |             "name": "[parameters('appInsightsName')]",
17 |             "apiVersion": "2015-05-01",
18 |             "kind": "web",
19 |             "properties": {
20 |                 "Application_Type": "web"
21 |             }
22 |         }
23 |     ]
24 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/containerregistry/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "crname": {
 6 |             "value": "mlopscrdev"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/containerregistry/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "crname": {
 6 |             "value": "mlopscrtest"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/containerregistry/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "crname": {
 6 |             "type": "string"
 7 |         },
 8 |         "location": {
 9 |             "type": "string"
10 |         }
11 |     },
12 |     "variables": {},
13 |     "resources": [
14 |         {
15 |             "type": "Microsoft.ContainerRegistry/registries",
16 |             "sku": {
17 |                 "name": "Basic",
18 |                 "tier": "Basic"
19 |             },
20 |             "name": "[parameters('crname')]",
21 |             "apiVersion": "2017-10-01",
22 |             "location": "[parameters('location')]",
23 |             "tags": {},
24 |             "scale": null,
25 |             "properties": {
26 |                 "adminUserEnabled": true
27 |             }
28 |         }
29 |     ]
30 | }
31 | 


--------------------------------------------------------------------------------
/infrastructure/arm-templates/keyvault/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "kvname": {
 6 |             "value": "mlops-kv-dev"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         },
11 |         "createMode": {
12 |             "value": "default"
13 |         }
14 |     }
15 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/keyvault/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "kvname": {
 6 |             "value": "mlops-kv-test"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         },
11 |         "createMode": {
12 |             "value": "default"
13 |         }
14 |     }
15 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/keyvault/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "kvname": {
 6 |             "type": "string"
 7 |         },
 8 |         "location": {
 9 |             "type": "string"
10 |         },
11 |         "createMode": {
12 |             "type": "string"
13 |         }
14 |     },
15 |     "variables": {
16 |     },
17 |     "resources": [
18 |         {
19 |             "type": "Microsoft.KeyVault/vaults",
20 |             "name": "[parameters('kvname')]",
21 |             "apiVersion": "2018-02-14",
22 |             "location": "[parameters('location')]",
23 |             "tags": {},
24 |             "scale": null,
25 |             "properties": {
26 |                 "sku": {
27 |                     "family": "A",
28 |                     "name": "standard"
29 |                 },
30 |                 "tenantId": "[subscription().tenantId]",
31 |                 "createMode": "[parameters('createMode')]",
32 |                 "enabledForTemplateDeployment": true,
33 |                 "accessPolicies": []
34 |             }
35 |         }
36 |     ]
37 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/parameters-vnet.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-dev"
 7 |         },
 8 |         "clusterName": {
 9 |             "value": "cpu-compute"
10 |         },
11 |         "vmSize": {
12 |             "value": "STANDARD_D3_V2"
13 |         },
14 |         "minNodeCount": {
15 |             "value": 0
16 |         },
17 |         "maxNodeCount": {
18 |             "value": 3
19 |         },
20 |         "scaleDownTime": {
21 |             "value": "PT15M"
22 |         },
23 |         "subnetId": {
24 |             "value": "/subscriptions/xxxx/resourceGroups/yyyy/providers/Microsoft.Network/virtualNetworks/zzzz/subnets/ssss"
25 |         },
26 |         "adminUserName": {
27 |             "value": "<Fill-here>"
28 |         },
29 |         "adminUserPassword": {
30 |             "value": "<Fill-here>"
31 |         }
32 |     }
33 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/parameters-vnet.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-test"
 7 |         },
 8 |         "clusterName": {
 9 |             "value": "cpu-compute"
10 |         },
11 |         "vmSize": {
12 |             "value": "STANDARD_D3_V2"
13 |         },
14 |         "minNodeCount": {
15 |             "value": 0
16 |         },
17 |         "maxNodeCount": {
18 |             "value": 3
19 |         },
20 |         "scaleDownTime": {
21 |             "value": "PT15M"
22 |         },
23 |         "subnetId": {
24 |             "value": "/subscriptions/xxxx/resourceGroups/yyyy/providers/Microsoft.Network/virtualNetworks/zzzz/subnets/ssss"
25 |         },
26 |         "adminUserName": {
27 |             "value": "<Fill-here>"
28 |         },
29 |         "adminUserPassword": {
30 |             "value": "<Fill-here>"
31 |         }
32 |     }
33 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-dev"
 7 |         },
 8 |         "clusterName": {
 9 |             "value": "cpu-compute"
10 |         },
11 |         "vmSize": {
12 |             "value": "STANDARD_D3_V2"
13 |         },
14 |         "minNodeCount": {
15 |             "value": 0
16 |         },
17 |         "maxNodeCount": {
18 |             "value": 3
19 |         },
20 |         "scaleDownTime": {
21 |             "value": "PT15M"
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-test"
 7 |         },
 8 |         "clusterName": {
 9 |             "value": "cpu-compute"
10 |         },
11 |         "vmSize": {
12 |             "value": "STANDARD_D3_V2"
13 |         },
14 |         "minNodeCount": {
15 |             "value": 0
16 |         },
17 |         "maxNodeCount": {
18 |             "value": 3
19 |         },
20 |         "scaleDownTime": {
21 |             "value": "PT15M"
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/template-vnet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {        
 5 |         "workspaceName": {
 6 |             "type": "string"
 7 |         },
 8 |         "clusterName": {
 9 |             "type": "string"
10 |         },
11 |         "vmSize": {
12 |             "type": "string"
13 |         },       
14 |         "minNodeCount": {
15 |             "type": "int"
16 |         },        
17 |         "maxNodeCount": {
18 |             "type": "int"
19 |         },
20 |         "scaleDownTime": {
21 |             "type": "string"
22 |         },
23 |         "subnetId": {
24 |             "type": "string"
25 |         },
26 |         "adminUserName": {
27 |             "type": "string"
28 |         },
29 |         "adminUserPassword": {
30 |             "type": "string"
31 |         }
32 |     },
33 |     "variables": {},
34 |     "resources": [        
35 |         {
36 |             "type": "Microsoft.MachineLearningServices/workspaces/computes",
37 |             "name": "[concat(parameters('workspaceName'), '/', parameters('clusterName'))]",
38 |             "apiVersion": "2018-11-19",
39 |             "location" : "[resourceGroup().location]",
40 |             "properties": {
41 |                 "computeType": "AmlCompute",
42 |                 "computeLocation" : "[resourceGroup().location]",
43 |                 "properties":
44 |                 {
45 |                     "scaleSettings":
46 |                     {
47 |                         "minNodeCount" : "[parameters('minNodeCount')]",
48 |                         "maxNodeCount" : "[parameters('maxNodeCount')]",
49 |                         "nodeIdleTimeBeforeScaleDown": "[parameters('scaleDownTime')]"
50 |                     },
51 |                     "vmPriority": "Dedicated",  
52 |                     "vmSize" : "[parameters('vmSize')]",
53 |                     "userAccountCredentials" :
54 |                     {
55 |                         "adminUserName" : "[parameters('adminUserName')]",
56 |                         "adminUserPassword" : "[parameters('adminUserPassword')]"
57 |                     },
58 |                     "subnet" :
59 |                     {
60 |                         "id" : "[parameters('subnetId')]"
61 |                     }
62 |                 }                
63 |             }
64 |         }
65 |     ]
66 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlcompute/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {        
 5 |         "workspaceName": {
 6 |             "type": "string"
 7 |         },
 8 |         "clusterName": {
 9 |             "type": "string"
10 |         },
11 |         "vmSize": {
12 |             "type": "string"
13 |         },       
14 |         "minNodeCount": {
15 |             "type": "int"
16 |         },        
17 |         "maxNodeCount": {
18 |             "type": "int"
19 |         },
20 |         "scaleDownTime": {
21 |             "type": "string"
22 |         }
23 |     },
24 |     "variables": {},
25 |     "resources": [        
26 |         {
27 |             "type": "Microsoft.MachineLearningServices/workspaces/computes",
28 |             "name": "[concat(parameters('workspaceName'), '/', parameters('clusterName'))]",
29 |             "apiVersion": "2018-11-19",
30 |             "location" : "[resourceGroup().location]",
31 |             "properties": {
32 |                 "computeType": "AmlCompute",
33 |                 "computeLocation" : "[resourceGroup().location]",
34 |                 "properties":
35 |                 {
36 |                     "scaleSettings":
37 |                     {
38 |                         "minNodeCount" : "[parameters('minNodeCount')]",
39 |                         "maxNodeCount" : "[parameters('maxNodeCount')]",
40 |                         "nodeIdleTimeBeforeScaleDown": "[parameters('scaleDownTime')]"
41 |                     },
42 |                     "vmPriority": "Dedicated",  
43 |                     "vmSize" : "[parameters('vmSize')]"      
44 |                 }                
45 |             }
46 |         }
47 |     ]
48 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlworkspace/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-dev"
 7 |         },
 8 |         "keyVaultName": {
 9 |             "value": "mlops-kv-dev"
10 |         },
11 |         "applicationInsightsName": {
12 |             "value": "mlops-ain-dev"
13 |         },
14 |         "containerRegistryName": {
15 |             "value": "mlopscrdev"
16 |         },
17 |         "storageAccountName": {
18 |             "value": "mlopssadev"
19 |         }
20 |     }
21 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlworkspace/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": {
 6 |             "value": "mlops-mls-test"
 7 |         },
 8 |         "keyVaultName": {
 9 |             "value": "mlops-kv-test"
10 |         },
11 |         "applicationInsightsName": {
12 |             "value": "mlops-ain-test"
13 |         },
14 |         "containerRegistryName": {
15 |             "value": "mlopscrtest"
16 |         },
17 |         "storageAccountName": {
18 |             "value": "mlopssatest"
19 |         }
20 |     }
21 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/mlworkspace/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "workspaceName": { 
 6 |             "type": "string"
 7 |         },
 8 |         "keyVaultName": {
 9 |             "type": "string"
10 |         },
11 |         "applicationInsightsName": {
12 |             "type": "string"
13 |         },
14 |         "containerRegistryName": {
15 |             "type": "string"
16 |         },
17 |         "storageAccountName": {
18 |             "type": "string"
19 |         }
20 |     },
21 |     "resources": [
22 |         {
23 |             "name": "[parameters('workspaceName')]",
24 |             "type": "Microsoft.MachineLearningServices/workspaces",
25 |             "apiVersion": "2018-11-19",
26 |             "location": "[resourceGroup().location]",
27 |             "identity": {
28 |                 "type": "systemAssigned"
29 |             },
30 |             "properties": {
31 |                 "keyVault": "[resourceId('Microsoft.KeyVault/vaults', parameters('keyVaultName'))]",
32 |                 "applicationInsights": "[resourceId('Microsoft.Insights/components', parameters('applicationInsightsName'))]",
33 |                 "containerRegistry": "[resourceId('Microsoft.ContainerRegistry/registries', parameters('containerRegistryName'))]",
34 |                 "storageAccount": "[resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName'))]"
35 |             }
36 |         }
37 |     ]
38 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/storage/parameters.dev.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "saname": {
 6 |             "value": "mlopssadev"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         },
11 |         "accountType": {
12 |             "value": "Standard_RAGRS"
13 |         },
14 |         "kind": {
15 |             "value": "StorageV2"
16 |         },
17 |         "accessTier": {
18 |             "value": "Hot"
19 |         }
20 |     }
21 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/storage/parameters.test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "saname": {
 6 |             "value": "mlopssatest"
 7 |         },
 8 |         "location": {
 9 |             "value": "westeurope"
10 |         },
11 |         "accountType": {
12 |             "value": "Standard_RAGRS"
13 |         },
14 |         "kind": {
15 |             "value": "StorageV2"
16 |         },
17 |         "accessTier": {
18 |             "value": "Hot"
19 |         }
20 |     }
21 | }


--------------------------------------------------------------------------------
/infrastructure/arm-templates/storage/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "saname": {
 6 |             "type": "string"
 7 |         },
 8 |         "location": {
 9 |             "type": "string"
10 |         },
11 |         "accountType": {
12 |             "type": "string"
13 |         },
14 |         "kind": {
15 |             "type": "string"
16 |         },
17 |         "accessTier": {
18 |             "type": "string"
19 |         }
20 |     },
21 |     "variables": {},
22 |     "resources": [
23 |         {
24 |             "type": "Microsoft.Storage/storageAccounts",
25 |             "sku": {
26 |                 "name": "[parameters('accountType')]"
27 |             },
28 |             "kind": "[parameters('kind')]",
29 |             "name": "[parameters('saname')]",
30 |             "apiVersion": "2018-07-01",
31 |             "location": "[parameters('location')]",
32 |             "properties": {
33 |                 "accessTier": "[parameters('accessTier')]",
34 |                 "supportsHttpsTrafficOnly": true
35 |             }
36 |         }
37 |     ]
38 | }


--------------------------------------------------------------------------------
/infrastructure/build-and-release/deploy-infra.ps1:
--------------------------------------------------------------------------------
 1 | # Deployment script for machine learning resources
 2 | # Run locally to debug changes in the resource configuration
 3 | # Use `deploy-infrastructure.yml` for automation of deployments.
 4 | 
 5 | # Prompt users for resource group and location
 6 | $resourceGroupName = Read-Host -Prompt "Provide a resource group name"
 7 | $location = Read-Host -Prompt "Provide a DC location"
 8 | 
 9 | # Create a Resource Group
10 | New-AzResourceGroup -Name $resourceGroupName -Location $location
11 | 
12 | # Deploy Storage Account
13 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
14 |   -TemplateFile $PSScriptRoot/../arm-templates/storage/template.json `
15 |   -TemplateParameterFile $PSScriptRoot/../arm-templates/storage/parameters.dev.json
16 | 
17 | # Deploy Container Registry
18 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
19 |   -TemplateFile $PSScriptRoot/../arm-templates/containerregistry/template.json `
20 |   -TemplateParameterFile $PSScriptRoot/../arm-templates/containerregistry/parameters.dev.json
21 | 
22 | # Deploy Application Insights
23 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
24 |   -TemplateFile $PSScriptRoot/../arm-templates/appinsights/template.json `
25 |   -TemplateParameterFile $PSScriptRoot/../arm-templates/appinsights/parameters.dev.json
26 | 
27 | # Deploy Key Vault
28 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
29 |   -TemplateFile $PSScriptRoot/../arm-templates/keyvault/template.json `
30 |   -TemplateParameterFile $PSScriptRoot/../arm-templates/keyvault/parameters.dev.json
31 | 
32 | # Deploy Workspace
33 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
34 |   -TemplateFile $PSScriptRoot/../arm-templates/mlworkspace/template.json `
35 |   -TemplateParameterFile $PSScriptRoot/../arm-templates/mlworkspace/parameters.dev.json
36 | 
37 | # Deploy Compute
38 | New-AzResourceGroupDeployment -ResourceGroupName $resourceGroupName `
39 | -TemplateFile $PSScriptRoot/../arm-templates/mlcompute/template.json `
40 | -TemplateParameterFile $PSScriptRoot/../arm-templates/mlcompute/parameters.dev.json
41 | 


--------------------------------------------------------------------------------
/infrastructure/build-and-release/deploy-infra.template.yml:
--------------------------------------------------------------------------------
  1 | # Azure Pipeline Template for ML Workspace Resources Deployment
  2 | parameters:
  3 | - name: environment
  4 |   type: string
  5 | - name: serviceConnection
  6 |   type: string
  7 | 
  8 | jobs:
  9 | - deployment: DeployMLResources
 10 |   displayName: Deploy ML Resources
 11 |   pool:
 12 |     vmImage: ubuntu-16.04
 13 |   environment: ${{ parameters.environment }}
 14 |   variables:
 15 |   - name: resourceGroupName
 16 |     value: mlopsexample-${{ parameters.environment }}
 17 |   - name: resourceGroupLocation
 18 |     value: westeurope
 19 |   strategy:
 20 |     runOnce:
 21 |       deploy:
 22 |         steps:
 23 |         - download: current
 24 |           artifact: infratemplates
 25 |         - script: ls
 26 |           displayName: 'List dirs'
 27 | 
 28 |         - task: AzureResourceGroupDeployment@2
 29 |           displayName: 'Deploy Storage Account for AML'
 30 |           inputs:
 31 |             azureSubscription: ${{ parameters.serviceConnection }}
 32 |             resourceGroupName: $(resourceGroupName)
 33 |             location: $(resourceGroupLocation)
 34 |             csmFile: '$(Pipeline.Workspace)/infratemplates/storage/template.json'
 35 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/storage/parameters.${{ parameters.environment }}.json'
 36 | 
 37 |         # Optional - Add a second storage account to host data for machine learning
 38 |         # - task: AzureResourceGroupDeployment@2
 39 |         #   displayName: 'Deploy Storage Account for Data'
 40 |         #   inputs:
 41 |         #     azureSubscription: ${{ parameters.serviceConnection }}
 42 |         #     resourceGroupName: $(resourceGroupName)
 43 |         #     location: $(resourceGroupLocation)
 44 |         #     csmFile: '$(Pipeline.Workspace)/infratemplates/storage/template.json'
 45 |         #     csmParametersFile: '$(Pipeline.Workspace)/infratemplates/storage/parameters.${{ parameters.environment }}.json'
 46 |         #     overrideParameters: |
 47 |         #       -name "mlopssadata${{ parameters.environment }}"
 48 | 
 49 |         - task: AzureResourceGroupDeployment@2
 50 |           displayName: 'Deploy Container Registry'
 51 |           inputs:
 52 |             azureSubscription: ${{ parameters.serviceConnection }}
 53 |             resourceGroupName: $(resourceGroupName)
 54 |             location: $(resourceGroupLocation)
 55 |             csmFile: '$(Pipeline.Workspace)/infratemplates/containerregistry/template.json'
 56 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/containerregistry/parameters.${{ parameters.environment }}.json'
 57 | 
 58 |         - task: AzureResourceGroupDeployment@2
 59 |           displayName: 'Deploy Application Insights'
 60 |           inputs:
 61 |             azureSubscription: ${{ parameters.serviceConnection }}
 62 |             resourceGroupName: $(resourceGroupName)
 63 |             location: $(resourceGroupLocation)
 64 |             csmFile: '$(Pipeline.Workspace)/infratemplates/appinsights/template.json'
 65 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/appinsights/parameters.${{ parameters.environment }}.json'
 66 | 
 67 |         - task: AzureResourceGroupDeployment@2
 68 |           displayName: 'Deploy Key Vault'
 69 |           inputs:
 70 |             azureSubscription: ${{ parameters.serviceConnection }}
 71 |             resourceGroupName: $(resourceGroupName)
 72 |             location: $(resourceGroupLocation)
 73 |             csmFile: '$(Pipeline.Workspace)/infratemplates/keyvault/template.json'
 74 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/keyvault/parameters.${{ parameters.environment }}.json'
 75 | 
 76 |         - task: AzureResourceGroupDeployment@2
 77 |           displayName: 'Deploy ML Workspace'
 78 |           inputs:
 79 |             azureSubscription: ${{ parameters.serviceConnection }}
 80 |             resourceGroupName: $(resourceGroupName)
 81 |             location: $(resourceGroupLocation)
 82 |             csmFile: '$(Pipeline.Workspace)/infratemplates/mlworkspace/template.json'
 83 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/mlworkspace/parameters.${{ parameters.environment }}.json'
 84 | 
 85 |         # Optional - Add a second ML workspace using the same underlying infrastructure
 86 |         # - task: AzureResourceGroupDeployment@2
 87 |         #   displayName: 'Deploy ML Workspace'
 88 |         #   inputs:
 89 |         #     azureSubscription: ${{ parameters.serviceConnection }}
 90 |         #     resourceGroupName: $(resourceGroupName)
 91 |         #     location: $(resourceGroupLocation)
 92 |         #     csmFile: '$(Pipeline.Workspace)/infratemplates/mlworkspace/template.json'
 93 |         #     csmParametersFile: '$(Pipeline.Workspace)/infratemplates/mlworkspace/parameters.${{ parameters.environment }}.json'
 94 |         #     csmParametersFile: '$(Pipeline.Workspace)/infratemplates/storage/parameters.${{ parameters.environment }}.json'
 95 |         #     overrideParameters: |
 96 |         #       -name "mlops-mls2-${{ parameters.environment }}"
 97 | 
 98 |         - task: AzureResourceGroupDeployment@2
 99 |           displayName: 'Deploy ML Compute'
100 |           inputs:
101 |             azureSubscription: ${{ parameters.serviceConnection }}
102 |             resourceGroupName: $(resourceGroupName)
103 |             location: $(resourceGroupLocation)
104 |             csmFile: '$(Pipeline.Workspace)/infratemplates/mlcompute/template.json'
105 |             csmParametersFile: '$(Pipeline.Workspace)/infratemplates/mlcompute/parameters.${{ parameters.environment }}.json'
106 | 


--------------------------------------------------------------------------------
/infrastructure/build-and-release/deploy-infra.yml:
--------------------------------------------------------------------------------
 1 | # Azure Pipeline Definition for Infrastructure Deployment
 2 | 
 3 | # Trigger on changes in the infrastructure folder and on the master branch                      
 4 | trigger:
 5 |   branches:
 6 |     include:
 7 |     - master
 8 | 
 9 |   paths:
10 |     include:
11 |     - infrastructure/*
12 | 
13 | stages:
14 | - stage: Build
15 |   displayName: 'IaC Build'
16 |   jobs:
17 |   - job: Build
18 |     pool:
19 |       vmImage: ubuntu-16.04
20 |     steps:
21 |     - task: CopyFiles@2
22 |       displayName: 'Copy ARM templates'
23 |       inputs:
24 |         sourceFolder: 'infrastructure/arm-templates'
25 |         targetFolder: '$(Build.ArtifactStagingDirectory)'
26 |     - publish: '$(Build.ArtifactStagingDirectory)'
27 |       artifact: infratemplates
28 | 
29 | - stage: DEV
30 |   displayName: 'DEV Deployment'
31 |   jobs:
32 |   - template: deploy-infra.template.yml
33 |     parameters:
34 |       environment: dev
35 |       serviceConnection: <your-service-connection-name>
36 | 
37 | - stage: TEST
38 |   # only make deployments to TEST originating from the master branch
39 |   condition: and(succeeded(), eq(variables['build.sourceBranch'], 'refs/heads/master'))
40 |   displayName: 'TEST Deployment'
41 |   jobs:
42 |   - template: deploy-infra.template.yml
43 |     parameters:
44 |       environment: test
45 |       serviceConnection: <your-service-connection-name>
46 | 
47 | 


--------------------------------------------------------------------------------
/infrastructure/infra_stages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/MLOps-TDSP-Template/011f5418bc3be25570a84ff8c58dca94d4b35a45/infrastructure/infra_stages.png


--------------------------------------------------------------------------------
/infrastructure/runconfigschema.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "$schema": "http://json-schema.org/draft-04/schema#",
   3 |   "title": "RunConfiguration",
   4 |   "type": "object",
   5 |   "required": [
   6 |     "script",
   7 |     "environment"
   8 |   ],
   9 |   "properties": {
  10 |     "script": {
  11 |       "type": "string",
  12 |       "description": "The relative path to the python script file. The file path is relative to the source_directory passed to submit run.\nExample: train.py",
  13 |       "minLength": 1
  14 |     },
  15 |     "arguments": {
  16 |       "type": [
  17 |         "array",
  18 |         "null"
  19 |       ],
  20 |       "description": "Command line arguments for the python script file.\nExample: [\"234\"]",
  21 |       "items": {
  22 |         "type": "string"
  23 |       }
  24 |     },
  25 |     "sourceDirectoryDataStore": {
  26 |       "type": [
  27 |         "null",
  28 |         "string"
  29 |       ],
  30 |       "description": "The attribute is used to configure the backing datastore for the project share.\nExample: my-source-store"
  31 |     },
  32 |     "framework": {
  33 |       "description": "The supported frameworks are Python, PySpark, CNTK, TensorFlow, and PyTorch. Use Tensorflow for AmlCompute clusters, and Python for distributed training jobs.\nRemarks: If framework is set to PySpark, then spark field is required.\nIf framework is set to TensorFlow then tensorflow field is required.\nExample: Python",
  34 |       "oneOf": [
  35 |         {
  36 |           "$ref": "#/definitions/Framework"
  37 |         }
  38 |       ]
  39 |     },
  40 |     "communicator": {
  41 |       "description": "The supported communicators are None, ParameterServer, OpenMpi, and IntelMpi Keep in mind that OpenMpi requires a custom image with OpenMpi installed.\nUse ParameterServer or OpenMpi for AmlCompute clusters. Use IntelMpi for distributed training jobs.\nRemarks: If communicator is set to Mpi, then mpi field is required.\nExample: None",
  42 |       "oneOf": [
  43 |         {
  44 |           "type": "null"
  45 |         },
  46 |         {
  47 |           "$ref": "#/definitions/Communicator"
  48 |         }
  49 |       ]
  50 |     },
  51 |     "target": {
  52 |       "type": [
  53 |         "null",
  54 |         "string"
  55 |       ],
  56 |       "description": "Target refers to compute where the job is scheduled for execution. The default target is \"local\" refering to the local machine.\nRemarks: If target is amlcompute then amlCompute field is required.\nExample: amlcompute"
  57 |     },
  58 |     "dataReferences": {
  59 |       "type": [
  60 |         "null",
  61 |         "object"
  62 |       ],
  63 |       "description": "Data reference configuration details. All the data sources are made available to the run during execution based on each configuration.\n",
  64 |       "additionalProperties": {
  65 |         "$ref": "#/definitions/DataReferenceConfiguration"
  66 |       }
  67 |     },
  68 |     "jobName": {
  69 |       "type": [
  70 |         "null",
  71 |         "string"
  72 |       ],
  73 |       "description": "This is primarily intended for notebooks to override the default job name.\nDefaults to ArgumentVector[0] if not specified.\nExample: FindSquaresJob"
  74 |     },
  75 |     "autoPrepareEnvironment": {
  76 |       "type": [
  77 |         "boolean",
  78 |         "null"
  79 |       ],
  80 |       "description": "Defaulted to True, but if set to False the run will fail if no environment was found matching the requirements specified.\nThis can be set to False to fail fast when the environment is not found in the cache.\nExample: True"
  81 |     },
  82 |     "maxRunDurationSeconds": {
  83 |       "type": [
  84 |         "integer",
  85 |         "null"
  86 |       ],
  87 |       "description": "Maximum allowed time for the run. The system will attempt to automatically cancel the run if it took longer than this value.\nMaxRunDurationSeconds=null means infinite duration.\nExample: 84000",
  88 |       "format": "int64"
  89 |     },
  90 |     "nodeCount": {
  91 |       "type": [
  92 |         "integer",
  93 |         "null"
  94 |       ],
  95 |       "description": "Number of compute nodes to run the job on. Only applies to AMLCompute.\nExample: 1",
  96 |       "format": "int32"
  97 |     },
  98 |     "environment": {
  99 |       "description": "The environment definition, This field configures the python environment.\nIt can be configured to use an existing Python environment or configured to setup a temp environment for the experiment.\nThe definition is also responsible for setting the required application dependencies.\n",
 100 |       "oneOf": [
 101 |         {
 102 |           "$ref": "#/definitions/EnvironmentDefinition"
 103 |         }
 104 |       ]
 105 |     },
 106 |     "history": {
 107 |       "description": "This section is used to disable and enable experiment history logging features.\n",
 108 |       "oneOf": [
 109 |         {
 110 |           "type": "null"
 111 |         },
 112 |         {
 113 |           "$ref": "#/definitions/HistoryConfiguration"
 114 |         }
 115 |       ]
 116 |     },
 117 |     "spark": {
 118 |       "description": "Spark configuration details. When the platform is set to Pyspark, then the spark configuration is used to set the default sparkconf for the submitted job.\n",
 119 |       "oneOf": [
 120 |         {
 121 |           "type": "null"
 122 |         },
 123 |         {
 124 |           "$ref": "#/definitions/SparkConfiguration"
 125 |         }
 126 |       ]
 127 |     },
 128 |     "batchAi": {
 129 |       "oneOf": [
 130 |         {
 131 |           "type": "null"
 132 |         },
 133 |         {
 134 |           "$ref": "#/definitions/BatchAiConfiguration"
 135 |         }
 136 |       ]
 137 |     },
 138 |     "amlCompute": {
 139 |       "description": "The attribute is used to configure details of the compute target to be created during experiment.\nThe configuration only takes effect when the target is set to \"amlcompute\".\n",
 140 |       "oneOf": [
 141 |         {
 142 |           "type": "null"
 143 |         },
 144 |         {
 145 |           "$ref": "#/definitions/AMLComputeConfiguration"
 146 |         }
 147 |       ]
 148 |     },
 149 |     "tensorflow": {
 150 |       "description": "The attribute is used to configure the distributed tensorflow parameters.\nThis attribute takes effect only when the framework is set to TensorFlow, and the communicator to ParameterServer.\nAmlCompute is the only supported compute for this configuration.\n",
 151 |       "oneOf": [
 152 |         {
 153 |           "type": "null"
 154 |         },
 155 |         {
 156 |           "$ref": "#/definitions/TensorflowConfiguration"
 157 |         }
 158 |       ]
 159 |     },
 160 |     "mpi": {
 161 |       "description": "The attribute is used to configure the distributed MPI job parameters.\nThis attribute takes effect only when the framework is set to Python, and the communicator to OpenMpi or IntelMpi.\nAmlComppute is the only supported compute type for this configuration.\n",
 162 |       "oneOf": [
 163 |         {
 164 |           "type": "null"
 165 |         },
 166 |         {
 167 |           "$ref": "#/definitions/MpiConfiguration"
 168 |         }
 169 |       ]
 170 |     },
 171 |     "hdi": {
 172 |       "description": "This attribute takes effect only when the target is set to an Azure HDI compute.\nThe HDI Configuration is used to set the YARN deployment mode. It is defaulted to cluster mode.\n",
 173 |       "oneOf": [
 174 |         {
 175 |           "type": "null"
 176 |         },
 177 |         {
 178 |           "$ref": "#/definitions/HdiConfiguration"
 179 |         }
 180 |       ]
 181 |     },
 182 |     "containerInstance": {
 183 |       "oneOf": [
 184 |         {
 185 |           "type": "null"
 186 |         },
 187 |         {
 188 |           "$ref": "#/definitions/ContainerInstanceConfiguration"
 189 |         }
 190 |       ]
 191 |     },
 192 |     "exposedPorts": {
 193 |       "type": [
 194 |         "array",
 195 |         "null"
 196 |       ],
 197 |       "description": "Currently unused.\n",
 198 |       "items": {
 199 |         "type": "integer",
 200 |         "format": "int32"
 201 |       }
 202 |     },
 203 |     "prepareEnvironment": {
 204 |       "type": [
 205 |         "boolean",
 206 |         "null"
 207 |       ]
 208 |     }
 209 |   },
 210 |   "definitions": {
 211 |     "Framework": {
 212 |       "type": "string",
 213 |       "description": "",
 214 |       "x-enumNames": [
 215 |         "Python",
 216 |         "PySpark",
 217 |         "Cntk",
 218 |         "TensorFlow",
 219 |         "PyTorch",
 220 |         "TensorFlowParameterServer",
 221 |         "PythonMpi",
 222 |         "PythonIntelMpi",
 223 |         "PySparkInteractive"
 224 |       ],
 225 |       "enum": [
 226 |         "Python",
 227 |         "PySpark",
 228 |         "Cntk",
 229 |         "TensorFlow",
 230 |         "PyTorch",
 231 |         "TensorFlowParameterServer",
 232 |         "PythonMpi",
 233 |         "PythonIntelMpi",
 234 |         "PySparkInteractive"
 235 |       ]
 236 |     },
 237 |     "Communicator": {
 238 |       "type": "string",
 239 |       "description": "",
 240 |       "x-enumNames": [
 241 |         "None",
 242 |         "ParameterServer",
 243 |         "OpenMpi",
 244 |         "IntelMpi",
 245 |         "Gloo",
 246 |         "Mpi"
 247 |       ],
 248 |       "enum": [
 249 |         "None",
 250 |         "ParameterServer",
 251 |         "OpenMpi",
 252 |         "IntelMpi",
 253 |         "Gloo",
 254 |         "Mpi"
 255 |       ]
 256 |     },
 257 |     "DataReferenceConfiguration": {
 258 |       "type": "object",
 259 |       "description": "A class for managing DataReferenceConfiguration.\n",
 260 |       "properties": {
 261 |         "dataStoreName": {
 262 |           "type": [
 263 |             "null",
 264 |             "string"
 265 |           ],
 266 |           "description": "The name of the data store.\nExample: myblobstore"
 267 |         },
 268 |         "mode": {
 269 |           "description": "Operation on the datastore, mount, download, upload.\nExample: Mount",
 270 |           "oneOf": [
 271 |             {
 272 |               "$ref": "#/definitions/DataStoreMode"
 273 |             }
 274 |           ]
 275 |         },
 276 |         "pathOnDataStore": {
 277 |           "type": [
 278 |             "null",
 279 |             "string"
 280 |           ],
 281 |           "description": "Relative path on the datastore.\nExample: /images/validation"
 282 |         },
 283 |         "pathOnCompute": {
 284 |           "type": [
 285 |             "null",
 286 |             "string"
 287 |           ],
 288 |           "description": "The path on the compute target.\n"
 289 |         },
 290 |         "overwrite": {
 291 |           "type": "boolean",
 292 |           "description": "Whether to overwrite the data if existing.\nExample: False"
 293 |         }
 294 |       },
 295 |       "defaultSnippets": [
 296 |         {
 297 |           "label": "Data references configuration template.",
 298 |           "description": "Data references configuration template.",
 299 |           "body": {
 300 |             "dataStoreName": "",
 301 |             "mode": "",
 302 |             "overwrite": ""
 303 |           }
 304 |         }
 305 |       ]
 306 |     },
 307 |     "DataStoreMode": {
 308 |       "type": "string",
 309 |       "description": "",
 310 |       "x-enumNames": [
 311 |         "Mount",
 312 |         "Download",
 313 |         "Upload"
 314 |       ],
 315 |       "enum": [
 316 |         "Mount",
 317 |         "Download",
 318 |         "Upload"
 319 |       ]
 320 |     },
 321 |     "EnvironmentDefinition": {
 322 |       "type": "object",
 323 |       "properties": {
 324 |         "name": {
 325 |           "type": [
 326 |             "null",
 327 |             "string"
 328 |           ],
 329 |           "description": "The name of the environment.\nRemarks: Read-only from a contract perspective; set with URI fields on the relevant APIs.\nExample: mydevenvironment"
 330 |         },
 331 |         "version": {
 332 |           "type": [
 333 |             "null",
 334 |             "string"
 335 |           ],
 336 |           "description": "The environment version.\nRemarks: Read-only from a contract perspective; set with URI fields on the relevant APIs.\nExample: 1"
 337 |         },
 338 |         "python": {
 339 |           "description": "Settings for a Python environment.\n",
 340 |           "oneOf": [
 341 |             {
 342 |               "type": "null"
 343 |             },
 344 |             {
 345 |               "$ref": "#/definitions/PythonSection"
 346 |             }
 347 |           ]
 348 |         },
 349 |         "environmentVariables": {
 350 |           "type": [
 351 |             "null",
 352 |             "object"
 353 |           ],
 354 |           "description": "Definition of environment variables to be defined in the environment.\n",
 355 |           "additionalProperties": {
 356 |             "type": "string"
 357 |           }
 358 |         },
 359 |         "docker": {
 360 |           "description": "The definition of a Docker container.\n",
 361 |           "oneOf": [
 362 |             {
 363 |               "type": "null"
 364 |             },
 365 |             {
 366 |               "$ref": "#/definitions/DockerSection"
 367 |             }
 368 |           ]
 369 |         },
 370 |         "spark": {
 371 |           "description": "The configuration for a Spark environment.\n",
 372 |           "oneOf": [
 373 |             {
 374 |               "type": "null"
 375 |             },
 376 |             {
 377 |               "$ref": "#/definitions/SparkSection"
 378 |             }
 379 |           ]
 380 |         }
 381 |       },
 382 |       "defaultSnippets": [
 383 |         {
 384 |           "label": "Environment definition configuration template.",
 385 |           "description": "Environment definition configuration template.",
 386 |           "body": {
 387 |             "python": {
 388 |               "interpreterPath": "python",
 389 |               "userManagedDependencies": false,
 390 |               "condaDependencies": {
 391 |                 "dependencies": [
 392 |                   "python=3.6.2",
 393 |                   {
 394 |                     "pip": [
 395 |                       "azureml-defaults"
 396 |                     ]
 397 |                   }
 398 |                 ]
 399 |               }
 400 |             },
 401 |             "docker": {
 402 |               "baseImage": "mcr.microsoft.com/azureml/base:0.2.2",
 403 |               "enabled": false,
 404 |               "baseImageRegistry": {
 405 |                 "address": "",
 406 |                 "username": "",
 407 |                 "password": ""
 408 |               }
 409 |             }
 410 |           }
 411 |         }
 412 |       ]
 413 |     },
 414 |     "PythonSection": {
 415 |       "type": "object",
 416 |       "properties": {
 417 |         "interpreterPath": {
 418 |           "type": [
 419 |             "null",
 420 |             "string"
 421 |           ],
 422 |           "description": "The python interpreter path. This is only used when user_managed_dependencies=True.\n"
 423 |         },
 424 |         "userManagedDependencies": {
 425 |           "type": "boolean",
 426 |           "description": "True means that AzureML reuses an existing python environment; False means that AzureML will create a python environment based on the Conda dependencies specification.\n"
 427 |         },
 428 |         "condaDependencies": {
 429 |           "description": "Conda dependencies for the run.\nRemarks: Specify conda dependencies in the json format here, or specify 'condaDependenciesFile' field and set its value to the conda file path, like '\\\"condaDependenciesFile\\\": \\\".azureml/conda_dependencies.yml\\\"'\n",
 430 |           "oneOf": [
 431 |             {},
 432 |             {
 433 |               "type": "null"
 434 |             }
 435 |           ]
 436 |         },
 437 |         "baseCondaEnvironment": {
 438 |           "type": [
 439 |             "null",
 440 |             "string"
 441 |           ]
 442 |         }
 443 |       },
 444 |       "allOf": [
 445 |         {
 446 |           "anyOf": [
 447 |             {
 448 |               "anyOf": [
 449 |                 {
 450 |                   "not": {
 451 |                     "required": [
 452 |                       "userManagedDependencies"
 453 |                     ],
 454 |                     "properties": {
 455 |                       "userManagedDependencies": {
 456 |                         "enum": [
 457 |                           false
 458 |                         ]
 459 |                       }
 460 |                     }
 461 |                   }
 462 |                 },
 463 |                 {
 464 |                   "not": {
 465 |                     "properties": {
 466 |                       "condaDependenciesFile": {
 467 |                         "enum": [
 468 |                           null
 469 |                         ]
 470 |                       }
 471 |                     }
 472 |                   },
 473 |                   "required": [
 474 |                     "condaDependenciesFile"
 475 |                   ]
 476 |                 }
 477 |               ]
 478 |             },
 479 |             {
 480 |               "anyOf": [
 481 |                 {
 482 |                   "not": {
 483 |                     "required": [
 484 |                       "userManagedDependencies"
 485 |                     ],
 486 |                     "properties": {
 487 |                       "userManagedDependencies": {
 488 |                         "enum": [
 489 |                           false
 490 |                         ]
 491 |                       }
 492 |                     }
 493 |                   }
 494 |                 },
 495 |                 {
 496 |                   "not": {
 497 |                     "properties": {
 498 |                       "condaDependencies": {
 499 |                         "enum": [
 500 |                           null
 501 |                         ]
 502 |                       }
 503 |                     }
 504 |                   },
 505 |                   "required": [
 506 |                     "condaDependencies"
 507 |                   ]
 508 |                 }
 509 |               ]
 510 |             }
 511 |           ]
 512 |         },
 513 |         {
 514 |           "anyOf": [
 515 |             {
 516 |               "not": {
 517 |                 "required": [
 518 |                   "userManagedDependencies"
 519 |                 ],
 520 |                 "properties": {
 521 |                   "userManagedDependencies": {
 522 |                     "enum": [
 523 |                       true
 524 |                     ]
 525 |                   }
 526 |                 }
 527 |               }
 528 |             },
 529 |             {
 530 |               "not": {
 531 |                 "properties": {
 532 |                   "interpreterPath": {
 533 |                     "enum": [
 534 |                       null
 535 |                     ]
 536 |                   }
 537 |                 }
 538 |               },
 539 |               "required": [
 540 |                 "interpreterPath"
 541 |               ]
 542 |             }
 543 |           ]
 544 |         }
 545 |       ],
 546 |       "defaultSnippets": [
 547 |         {
 548 |           "label": "Python section configuration template.",
 549 |           "description": "Python section configuration template.",
 550 |           "body": {
 551 |             "interpreterPath": "python",
 552 |             "userManagedDependencies": false,
 553 |             "condaDependencies": {
 554 |               "dependencies": [
 555 |                 "python=3.6.2",
 556 |                 {
 557 |                   "pip": [
 558 |                     "azureml-defaults"
 559 |                   ]
 560 |                 }
 561 |               ]
 562 |             }
 563 |           }
 564 |         }
 565 |       ]
 566 |     },
 567 |     "DockerSection": {
 568 |       "type": "object",
 569 |       "properties": {
 570 |         "baseImage": {
 571 |           "type": [
 572 |             "null",
 573 |             "string"
 574 |           ],
 575 |           "description": "Base image used for Docker-based runs. If base image is not available in docker hub then please specify BaseImageRegistry field.\nExample: ubuntu:latest"
 576 |         },
 577 |         "enabled": {
 578 |           "type": "boolean",
 579 |           "description": "Set True to perform this run inside a Docker container.\nExample: True"
 580 |         },
 581 |         "sharedVolumes": {
 582 |           "type": "boolean",
 583 |           "description": "Set False if necessary to work around shared volume bugs on Windows.\nExample: True"
 584 |         },
 585 |         "preparation": {
 586 |           "oneOf": [
 587 |             {
 588 |               "type": "null"
 589 |             },
 590 |             {
 591 |               "$ref": "#/definitions/Preparation"
 592 |             }
 593 |           ]
 594 |         },
 595 |         "gpuSupport": {
 596 |           "type": "boolean",
 597 |           "description": "Run with NVidia Docker extension to support GPUs.\nExample: False"
 598 |         },
 599 |         "shmSize": {
 600 |           "type": [
 601 |             "null",
 602 |             "string"
 603 |           ],
 604 |           "description": "The shared memory size setting for NVidia GPUs.\nRemarks: 1GB is NVidia's recommended default shm size. In testing, more was not needed.\nExample: 1g"
 605 |         },
 606 |         "arguments": {
 607 |           "type": [
 608 |             "array",
 609 |             "null"
 610 |           ],
 611 |           "description": "Extra arguments to the Docker run command.\n",
 612 |           "items": {
 613 |             "type": "string"
 614 |           }
 615 |         },
 616 |         "baseImageRegistry": {
 617 |           "description": "Image registry that contains the base image.\n",
 618 |           "oneOf": [
 619 |             {
 620 |               "type": "null"
 621 |             },
 622 |             {
 623 |               "$ref": "#/definitions/ContainerRegistry"
 624 |             }
 625 |           ]
 626 |         }
 627 |       },
 628 |       "allOf": [
 629 |         {
 630 |           "anyOf": [
 631 |             {
 632 |               "properties": {
 633 |                 "enabled": {
 634 |                   "enum": [
 635 |                     false
 636 |                   ]
 637 |                 }
 638 |               }
 639 |             },
 640 |             {
 641 |               "not": {
 642 |                 "required": [
 643 |                   "enabled"
 644 |                 ]
 645 |               }
 646 |             },
 647 |             {
 648 |               "properties": {
 649 |                 "baseImage": {
 650 |                   "enum": [
 651 |                     null
 652 |                   ]
 653 |                 }
 654 |               }
 655 |             },
 656 |             {
 657 |               "not": {
 658 |                 "required": [
 659 |                   "baseImage"
 660 |                 ]
 661 |               }
 662 |             },
 663 |             {
 664 |               "not": {
 665 |                 "properties": {
 666 |                   "baseImageRegistry": {
 667 |                     "enum": [
 668 |                       null
 669 |                     ]
 670 |                   }
 671 |                 }
 672 |               },
 673 |               "required": [
 674 |                 "baseImageRegistry"
 675 |               ]
 676 |             }
 677 |           ]
 678 |         }
 679 |       ],
 680 |       "defaultSnippets": [
 681 |         {
 682 |           "label": "Docker section configuration template.",
 683 |           "description": "Docker section configuration template.",
 684 |           "body": {
 685 |             "baseImage": "mcr.microsoft.com/azureml/base:0.2.2",
 686 |             "enabled": false,
 687 |             "baseImageRegistry": {
 688 |               "address": "",
 689 |               "username": "",
 690 |               "password": ""
 691 |             }
 692 |           }
 693 |         }
 694 |       ]
 695 |     },
 696 |     "Preparation": {
 697 |       "type": "object",
 698 |       "properties": {
 699 |         "commandLine": {
 700 |           "type": [
 701 |             "null",
 702 |             "string"
 703 |           ]
 704 |         }
 705 |       }
 706 |     },
 707 |     "ContainerRegistry": {
 708 |       "type": "object",
 709 |       "properties": {
 710 |         "address": {
 711 |           "type": [
 712 |             "null",
 713 |             "string"
 714 |           ],
 715 |           "description": "DNS name or IP address of a container registry.\n"
 716 |         },
 717 |         "username": {
 718 |           "type": [
 719 |             "null",
 720 |             "string"
 721 |           ],
 722 |           "description": "The username for the container registry.\nRemarks: If username is specified then password is also required.\n"
 723 |         },
 724 |         "password": {
 725 |           "type": [
 726 |             "null",
 727 |             "string"
 728 |           ],
 729 |           "description": "The password for the container registry.\n"
 730 |         }
 731 |       },
 732 |       "allOf": [
 733 |         {
 734 |           "anyOf": [
 735 |             {
 736 |               "properties": {
 737 |                 "username": {
 738 |                   "enum": [
 739 |                     null
 740 |                   ]
 741 |                 }
 742 |               }
 743 |             },
 744 |             {
 745 |               "not": {
 746 |                 "required": [
 747 |                   "username"
 748 |                 ]
 749 |               }
 750 |             },
 751 |             {
 752 |               "not": {
 753 |                 "properties": {
 754 |                   "password": {
 755 |                     "enum": [
 756 |                       null
 757 |                     ]
 758 |                   }
 759 |                 }
 760 |               },
 761 |               "required": [
 762 |                 "password"
 763 |               ]
 764 |             }
 765 |           ]
 766 |         }
 767 |       ],
 768 |       "defaultSnippets": [
 769 |         {
 770 |           "label": "Container registry configuration template.",
 771 |           "description": "Container registry configuration template.",
 772 |           "body": {
 773 |             "address": "",
 774 |             "username": "",
 775 |             "password": ""
 776 |           }
 777 |         }
 778 |       ]
 779 |     },
 780 |     "SparkSection": {
 781 |       "type": "object",
 782 |       "properties": {
 783 |         "repositories": {
 784 |           "type": [
 785 |             "array",
 786 |             "null"
 787 |           ],
 788 |           "description": "The list of spark repositories.\n",
 789 |           "items": {
 790 |             "type": "string"
 791 |           }
 792 |         },
 793 |         "packages": {
 794 |           "type": [
 795 |             "array",
 796 |             "null"
 797 |           ],
 798 |           "description": "The Spark packages to use.\n",
 799 |           "items": {
 800 |             "$ref": "#/definitions/SparkMavenPackage"
 801 |           }
 802 |         },
 803 |         "precachePackages": {
 804 |           "type": "boolean",
 805 |           "description": "Whether to preckage the packages.\nExample: True"
 806 |         }
 807 |       },
 808 |       "defaultSnippets": [
 809 |         {
 810 |           "label": "Spark section configuration template.",
 811 |           "description": "Spark section configuration template.",
 812 |           "body": {
 813 |             "repositories": [
 814 |               "https://mmlspark.azureedge.net/maven"
 815 |             ],
 816 |             "packages": "",
 817 |             "precachePackages": true
 818 |           }
 819 |         }
 820 |       ]
 821 |     },
 822 |     "SparkMavenPackage": {
 823 |       "type": "object",
 824 |       "properties": {
 825 |         "group": {
 826 |           "type": [
 827 |             "null",
 828 |             "string"
 829 |           ]
 830 |         },
 831 |         "artifact": {
 832 |           "type": [
 833 |             "null",
 834 |             "string"
 835 |           ]
 836 |         },
 837 |         "version": {
 838 |           "type": [
 839 |             "null",
 840 |             "string"
 841 |           ]
 842 |         }
 843 |       },
 844 |       "defaultSnippets": [
 845 |         {
 846 |           "label": "Spark maven package configuration template.",
 847 |           "description": "Spark maven package configuration template.",
 848 |           "body": {
 849 |             "group": "com.microsoft.ml.spark",
 850 |             "artifact": "mmlspark_2.11",
 851 |             "version": "0.12"
 852 |           }
 853 |         }
 854 |       ]
 855 |     },
 856 |     "HistoryConfiguration": {
 857 |       "type": "object",
 858 |       "additionalProperties": {
 859 |         "oneOf": [
 860 |           {},
 861 |           {
 862 |             "type": "null"
 863 |           }
 864 |         ]
 865 |       },
 866 |       "properties": {
 867 |         "outputCollection": {
 868 |           "type": "boolean",
 869 |           "description": "Enable history tracking -- this allows status, logs, metrics, and outputs to be collected for a run.\"\nExample: True"
 870 |         },
 871 |         "directoriesToWatch": {
 872 |           "type": [
 873 |             "array",
 874 |             "null"
 875 |           ],
 876 |           "description": "The list of directories to monitor and upload files from.\nExample: [\"logs\", \"outputs\"]",
 877 |           "default": [
 878 |             "logs"
 879 |           ],
 880 |           "items": {
 881 |             "type": "string"
 882 |           }
 883 |         }
 884 |       },
 885 |       "defaultSnippets": [
 886 |         {
 887 |           "label": "History configuration template.",
 888 |           "description": "History configuration template.",
 889 |           "body": {
 890 |             "outputCollection": true
 891 |           }
 892 |         }
 893 |       ]
 894 |     },
 895 |     "SparkConfiguration": {
 896 |       "type": "object",
 897 |       "properties": {
 898 |         "configuration": {
 899 |           "type": [
 900 |             "null",
 901 |             "object"
 902 |           ],
 903 |           "description": "The Spark configuration.\n",
 904 |           "additionalProperties": {
 905 |             "type": "string"
 906 |           }
 907 |         }
 908 |       },
 909 |       "defaultSnippets": [
 910 |         {
 911 |           "label": "Spark configuration template.",
 912 |           "description": "Spark configuration template.",
 913 |           "body": {
 914 |             "configuration": {
 915 |               "spark.app.name": "Azure ML Experiment",
 916 |               "spark.yarn.maxAppAttempts": "1"
 917 |             }
 918 |           }
 919 |         }
 920 |       ]
 921 |     },
 922 |     "BatchAiConfiguration": {
 923 |       "type": "object",
 924 |       "properties": {
 925 |         "nodeCount": {
 926 |           "type": "integer",
 927 |           "format": "int32"
 928 |         }
 929 |       }
 930 |     },
 931 |     "AMLComputeConfiguration": {
 932 |       "type": "object",
 933 |       "properties": {
 934 |         "name": {
 935 |           "type": [
 936 |             "null",
 937 |             "string"
 938 |           ],
 939 |           "description": "Name of the cluster to be created. If not specified, runId will be used as cluster name.\nExample: my8nodeCluster"
 940 |         },
 941 |         "vmSize": {
 942 |           "type": [
 943 |             "null",
 944 |             "string"
 945 |           ],
 946 |           "description": "VM size of the Cluster to be created. Allowed values are Azure vm sizes.\nThe list of vm sizes is available in https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs\nExample: Standard_D2_v2"
 947 |         },
 948 |         "vmPriority": {
 949 |           "type": [
 950 |             "null",
 951 |             "string"
 952 |           ],
 953 |           "description": "VM priority of the Cluster to be created. Allowed values are dedicated and lowpriority.\nExample: dedicated"
 954 |         },
 955 |         "retainCluster": {
 956 |           "type": "boolean",
 957 |           "description": "Setting to true will prevent the cluster from being deleted upon completion of the run.\nExample: False"
 958 |         },
 959 |         "clusterMaxNodeCount": {
 960 |           "type": "integer",
 961 |           "description": "The maximum number of nodes that the cluster can scale up to.\nMinimum number of nodes will always be set to 0.\nExample: 10",
 962 |           "format": "int32"
 963 |         }
 964 |       },
 965 |       "defaultSnippets": [
 966 |         {
 967 |           "label": "AmlCompute configuration template.",
 968 |           "description": "AmlCompute configuration template.",
 969 |           "body": {
 970 |             "name": "",
 971 |             "retainCluster": false,
 972 |             "clusterMaxNodeCount": 1
 973 |           }
 974 |         }
 975 |       ]
 976 |     },
 977 |     "TensorflowConfiguration": {
 978 |       "type": "object",
 979 |       "properties": {
 980 |         "workerCount": {
 981 |           "type": "integer",
 982 |           "description": "The number of workers.\nExample: 2",
 983 |           "format": "int32"
 984 |         },
 985 |         "parameterServerCount": {
 986 |           "type": "integer",
 987 |           "description": "Number of parameter servers.\nExample: 1",
 988 |           "format": "int32"
 989 |         }
 990 |       },
 991 |       "defaultSnippets": [
 992 |         {
 993 |           "label": "Tensorflow configuration template.",
 994 |           "description": "Tensorflow configuration template.",
 995 |           "body": {
 996 |             "workerCount": 1,
 997 |             "parameterServerCount": 1
 998 |           }
 999 |         }
1000 |       ]
1001 |     },
1002 |     "MpiConfiguration": {
1003 |       "type": "object",
1004 |       "properties": {
1005 |         "processCountPerNode": {
1006 |           "type": "integer",
1007 |           "description": "When using MPI, the number of processes per node.\nExample: 2",
1008 |           "format": "int32"
1009 |         }
1010 |       },
1011 |       "defaultSnippets": [
1012 |         {
1013 |           "label": "Mpi configuration template.",
1014 |           "description": "Mpi configuration template.",
1015 |           "body": {
1016 |             "processCountPerNode": 1
1017 |           }
1018 |         }
1019 |       ]
1020 |     },
1021 |     "HdiConfiguration": {
1022 |       "type": "object",
1023 |       "properties": {
1024 |         "yarnDeployMode": {
1025 |           "description": "Yarn deploy mode.\n",
1026 |           "oneOf": [
1027 |             {
1028 |               "$ref": "#/definitions/YarnDeployMode"
1029 |             }
1030 |           ]
1031 |         }
1032 |       },
1033 |       "defaultSnippets": [
1034 |         {
1035 |           "label": "Hdi configuration template.",
1036 |           "description": "Hdi configuration template.",
1037 |           "body": {
1038 |             "yarnDeployMode": ""
1039 |           }
1040 |         }
1041 |       ]
1042 |     },
1043 |     "YarnDeployMode": {
1044 |       "type": "string",
1045 |       "description": "",
1046 |       "x-enumNames": [
1047 |         "None",
1048 |         "Client",
1049 |         "Cluster"
1050 |       ],
1051 |       "enum": [
1052 |         "None",
1053 |         "Client",
1054 |         "Cluster"
1055 |       ]
1056 |     },
1057 |     "ContainerInstanceConfiguration": {
1058 |       "type": "object",
1059 |       "properties": {
1060 |         "region": {
1061 |           "type": [
1062 |             "null",
1063 |             "string"
1064 |           ],
1065 |           "description": "Defaults to the region of the workspace.\nExample: eastus2"
1066 |         },
1067 |         "cpuCores": {
1068 |           "type": "number",
1069 |           "description": "Default size corresponds to the largest container supported in all regions.\nDetails: https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas\nExample: 2",
1070 |           "format": "double"
1071 |         },
1072 |         "memoryGb": {
1073 |           "type": "number",
1074 |           "description": "The memory available for the container instance.\nExample: 3.5",
1075 |           "format": "double"
1076 |         }
1077 |       },
1078 |       "defaultSnippets": [
1079 |         {
1080 |           "label": "Container instance configuration template.",
1081 |           "description": "Container instance configuration template.",
1082 |           "body": {}
1083 |         }
1084 |       ]
1085 |     }
1086 |   },
1087 |   "allOf": [
1088 |     {
1089 |       "anyOf": [
1090 |         {
1091 |           "not": {
1092 |             "required": [
1093 |               "communicator"
1094 |             ],
1095 |             "properties": {
1096 |               "communicator": {
1097 |                 "enum": [
1098 |                   "Mpi"
1099 |                 ]
1100 |               }
1101 |             }
1102 |           }
1103 |         },
1104 |         {
1105 |           "not": {
1106 |             "properties": {
1107 |               "mpi": {
1108 |                 "enum": [
1109 |                   null
1110 |                 ]
1111 |               }
1112 |             }
1113 |           },
1114 |           "required": [
1115 |             "mpi"
1116 |           ]
1117 |         }
1118 |       ]
1119 |     },
1120 |     {
1121 |       "anyOf": [
1122 |         {
1123 |           "not": {
1124 |             "required": [
1125 |               "framework"
1126 |             ],
1127 |             "properties": {
1128 |               "framework": {
1129 |                 "enum": [
1130 |                   "PySpark"
1131 |                 ]
1132 |               }
1133 |             }
1134 |           }
1135 |         },
1136 |         {
1137 |           "not": {
1138 |             "properties": {
1139 |               "spark": {
1140 |                 "enum": [
1141 |                   null
1142 |                 ]
1143 |               }
1144 |             }
1145 |           },
1146 |           "required": [
1147 |             "spark"
1148 |           ]
1149 |         }
1150 |       ]
1151 |     },
1152 |     {
1153 |       "anyOf": [
1154 |         {
1155 |           "not": {
1156 |             "required": [
1157 |               "target"
1158 |             ],
1159 |             "properties": {
1160 |               "target": {
1161 |                 "enum": [
1162 |                   "amlcompute"
1163 |                 ]
1164 |               }
1165 |             }
1166 |           }
1167 |         },
1168 |         {
1169 |           "not": {
1170 |             "properties": {
1171 |               "amlCompute": {
1172 |                 "enum": [
1173 |                   null
1174 |                 ]
1175 |               }
1176 |             }
1177 |           },
1178 |           "required": [
1179 |             "amlCompute"
1180 |           ]
1181 |         }
1182 |       ]
1183 |     },
1184 |     {
1185 |       "anyOf": [
1186 |         {
1187 |           "not": {
1188 |             "required": [
1189 |               "framework"
1190 |             ],
1191 |             "properties": {
1192 |               "framework": {
1193 |                 "enum": [
1194 |                   "TensorFlow"
1195 |                 ]
1196 |               }
1197 |             }
1198 |           }
1199 |         },
1200 |         {
1201 |           "not": {
1202 |             "properties": {
1203 |               "tensorflow": {
1204 |                 "enum": [
1205 |                   null
1206 |                 ]
1207 |               }
1208 |             }
1209 |           },
1210 |           "required": [
1211 |             "tensorflow"
1212 |           ]
1213 |         }
1214 |       ]
1215 |     },
1216 |     {
1217 |       "anyOf": [
1218 |         {
1219 |           "not": {
1220 |             "required": [
1221 |               "target"
1222 |             ],
1223 |             "properties": {
1224 |               "target": {
1225 |                 "enum": [
1226 |                   "containerinstance"
1227 |                 ]
1228 |               }
1229 |             }
1230 |           }
1231 |         },
1232 |         {
1233 |           "not": {
1234 |             "properties": {
1235 |               "containerInstance": {
1236 |                 "enum": [
1237 |                   null
1238 |                 ]
1239 |               }
1240 |             }
1241 |           },
1242 |           "required": [
1243 |             "containerInstance"
1244 |           ]
1245 |         }
1246 |       ]
1247 |     }
1248 |   ],
1249 |   "defaultSnippets": [
1250 |     {
1251 |       "label": "RunConfiguration default template.",
1252 |       "description": "RunConfiguration default template.",
1253 |       "body": {
1254 |         "script": "train.py",
1255 |         "arguments": [],
1256 |         "framework": "",
1257 |         "communicator": "None",
1258 |         "target": "local",
1259 |         "environment": {
1260 |           "python": {
1261 |             "interpreterPath": "python",
1262 |             "userManagedDependencies": false,
1263 |             "condaDependencies": {
1264 |               "dependencies": [
1265 |                 "python=3.6.2",
1266 |                 {
1267 |                   "pip": [
1268 |                     "azureml-defaults"
1269 |                   ]
1270 |                 }
1271 |               ]
1272 |             }
1273 |           },
1274 |           "docker": {
1275 |             "baseImage": "mcr.microsoft.com/azureml/base:0.2.2",
1276 |             "enabled": false,
1277 |             "baseImageRegistry": {
1278 |               "address": "",
1279 |               "username": "",
1280 |               "password": ""
1281 |             }
1282 |           }
1283 |         }
1284 |       }
1285 |     }
1286 |   ]
1287 | }


--------------------------------------------------------------------------------
/infrastructure/scripts/create-aks.sh:
--------------------------------------------------------------------------------
1 | az ml computetarget create aks -n myaks
2 | 


--------------------------------------------------------------------------------
/infrastructure/scripts/create-azmlcompute.sh:
--------------------------------------------------------------------------------
1 | az ml computetarget create amlcompute -n cpu --min-nodes 1 --max-nodes 1 -s STANDARD_D3_V2
2 | 


--------------------------------------------------------------------------------
/infrastructure/scripts/create-workspace.sh:
--------------------------------------------------------------------------------
1 | az group create -n myresourcegroup -l westus2
2 | az ml workspace create -w myworkspace -g myresourcegroup
3 | 


--------------------------------------------------------------------------------
/labs/01_setup.md:
--------------------------------------------------------------------------------
 1 | # Lab 1: setting up the environment
 2 | 
 3 | In this first lab, we'll set up our working environment.
 4 | 
 5 | ## Requirements
 6 | 
 7 | * Visual Studio Code
 8 |   Download and Install [Visual Studio Code](https://code.visualstudio.com/)
 9 | 
10 | * Miniconda
11 |   Download and install [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
12 | 
13 | * Azure ML SDK
14 |   From a command line window, run the following command to install the python client package for Azure ML: `pip install azureml-sdk`
15 | 
16 | * Azure CLI
17 |   From a command line window, run the following command to install the Azure CLI, used for authentication and management tasks: `pip install azure-cli`
18 | 
19 | * A git client to clone the lab content
20 |   For example Git SCM - https://git-scm.com/.
21 | 
22 | ## Clone the repository
23 | 
24 | Clone the following git repository: git clone  https://github.com/Azure/MLOps-TDSP-Template
25 | 
26 | ## Open the cloned git repository in VS Code or your favorite IDE
27 | 
28 | ## Az Login
29 | From a terminal, login to your subscription on Azure using the azure cli.
30 | 
31 | * `az login`
32 | 
33 | If you have multiple subscriptions, you might want to set the right subscription by using the following command. 
34 | 
35 | * `az account set -s <subid>`
36 | 
37 | ## Deploy an ML workspace and dependent resources 
38 | 
39 | Execute the script `infrastructure/create_mlworkspace.py` to deploy the ML workspace resource and dependent resources such as a Keyvault instance and a Storage Account.
40 | 
41 | ## Browse through the created resources in the portal
42 | 
43 | You can now take a look over the created resources via the [Azure Portal](http://portal.azure.com/).
44 | 


--------------------------------------------------------------------------------
/labs/02_experiments.md:
--------------------------------------------------------------------------------
 1 | # Lab 2: running experiments
 2 | 
 3 | ## Understand the non-azure / open source ml model code
 4 | Observe print statements
 5 | Observe performance metrics
 6 | 
 7 | ## Run the training locally
 8 | Inspect the results 
 9 | 
10 | ## Run the code via Azure ML
11 | Observe additional metadata
12 | Observe run history
13 | 
14 | ## Read Experiment Tracking documentation
15 | 
16 | ## Refactor the code to capture run metrics
17 | 
18 | ## Submit the experiment again
19 | 
20 | ## Refactor the code a little further, and then go to the portal to inspect the run history
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/labs/03_managedcompute.md:
--------------------------------------------------------------------------------
 1 | # Lab 3: using remote compute
 2 | 
 3 | ## Review compute management in the studio
 4 | 
 5 | ## Create a compute cluster via the studio
 6 | 
 7 | ## Refactor the training script to make use of the newly created compute
 8 | 
 9 | ## Submit a new training run
10 | 
11 | ## Observe the logs via the studio
12 | One dependency is missing for the compute to run 
13 | 
14 | ## Fix the code and re-Submit
15 | 
16 | ## Observe cluster run statuses via the studio
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/labs/04_datasets.md:
--------------------------------------------------------------------------------
 1 | # Lab 4: Datasets
 2 | 
 3 | ## Open the studio, browse through data store management and datasets tabs 
 4 | Understand the differences
 5 | 
 6 | ## Ingest some data into a data store using the script
 7 | 
 8 | ## Use the portal storage explorer to review uploaded data
 9 | 
10 | ## Define a dataset over this data using the script
11 | 
12 | ## Inspect the created datasets via the portal
13 | 
14 | ## Open a dataset and note the explorative capabilities
15 | 
16 | ## Review the train submit script that uses datasets
17 | 
18 | ## Review the train script that uses datasets
19 | 
20 | ## Submit a training run
21 | 
22 | ## Observe from the run metadata which dataset was used for training
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/labs/05_hypertune.md:
--------------------------------------------------------------------------------
 1 | Lab 5: hypertune capabilities
 2 | 
 3 | # Understand goal 
 4 | 
 5 | # Walk through hypertune codE
 6 | 
 7 | # Understand differences in run configuration
 8 | 
 9 | # Submit run on AML compute
10 | 
11 | # View results in the portal
12 | 


--------------------------------------------------------------------------------
/labs/06_pipelines.md:
--------------------------------------------------------------------------------
1 | Lab 6: pipelines
2 | 
3 | # Refactor hypertune code into an ML pipeline with
4 | 1) data prep
5 | 2) hypertune
6 | 3) train
7 | 
8 | 


--------------------------------------------------------------------------------
/labs/README.md:
--------------------------------------------------------------------------------
 1 | # Folder for hosting all documents for a Data Science Project
 2 | 
 3 | Documents will contain information about the following 
 4 | 
 5 | 1. System architecture
 6 | 2. Data dictionaries
 7 | 3. Reports related to data understanding, modeling
 8 | 4. Project management and planning docs
 9 | 5. Information obtained from a business owner or client about the project
10 | 6. Docs and presentations prepared to share information about the project 
11 | 


--------------------------------------------------------------------------------