├── .flake8 ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── MANIFEST.in ├── Makefile ├── README.md ├── documentation └── README_prerequisites.md ├── examples ├── custom_monitors.ipynb ├── mlmonitor-azure.ipynb ├── mlmonitor-sagemaker.ipynb └── mlmonitor-wml.ipynb ├── mlmonitor ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── __init__.py ├── config.py ├── credentials_example.cfg ├── custmonitor │ ├── README.md │ ├── __init__.py │ ├── metrics │ │ ├── __init__.py │ │ ├── use_case_churn.py │ │ └── use_case_gcr.py │ ├── metricsprovider │ │ ├── __init__.py │ │ └── helpers.py │ └── tests │ │ ├── __init__.py │ │ ├── it │ │ ├── __init__.py │ │ └── test_custom_metrcis_provider.py │ │ └── unit │ │ ├── __init__.py │ │ ├── config │ │ ├── .gitkeep │ │ ├── credentials_churn.cfg │ │ └── credentials_gcr.cfg │ │ └── test_get_metrics.py ├── data │ ├── __init__.py │ └── cos.py ├── datasets │ ├── .gitkeep │ ├── churn │ │ ├── .gitkeep │ │ ├── churn.txt │ │ ├── feedback_logging_churn.csv │ │ ├── feedback_logging_no_preds_churn.csv │ │ ├── feedback_logging_preds_churn.csv │ │ ├── train.csv │ │ └── validation.csv │ ├── gcr │ │ ├── .gitkeep │ │ ├── gcr_explicit_payload_logging.csv │ │ ├── gcr_feedback_logging_aws.csv │ │ ├── gcr_feedback_logging_with_pred_aws.csv │ │ ├── gcr_feedback_logging_with_pred_aws2.csv │ │ ├── german_credit_data_biased_training.csv │ │ └── test_feedback_data_gcr.csv │ └── mnist │ │ ├── .gitkeep │ │ ├── mnist_feedback_20_with_pred_aws.csv │ │ ├── t10k-images-idx3-ubyte.gz │ │ ├── t10k-labels-idx1-ubyte.gz │ │ ├── train-images-idx3-ubyte.gz │ │ └── train-labels-idx1-ubyte.gz ├── exceptions.py ├── factsheets_churn │ ├── custom_model_asset_facts_churn.csv │ ├── custom_model_asset_facts_values_churn.csv │ ├── custom_model_entry_facts_churn.csv │ └── custom_model_entry_facts_values_churn.csv ├── figures │ └── .gitkeep ├── models │ ├── .gitkeep │ ├── model_churn │ │ ├── .gitkeep │ │ ├── xgboost-model-bst │ │ └── xgboost-model-sk │ ├── model_gcr │ │ └── .gitkeep │ └── model_mnist │ │ └── .gitkeep ├── setup.py ├── src │ ├── __init__.py │ ├── aws │ │ ├── __init__.py │ │ ├── deploy_sagemaker_endpoint.py │ │ ├── deployment.py │ │ ├── score_sagemaker_ep.py │ │ ├── scoring.py │ │ ├── secrets_manager.py │ │ ├── train_sagemaker_job.py │ │ ├── training.py │ │ └── utils.py │ ├── azure │ │ ├── __init__.py │ │ ├── deployment.py │ │ ├── scoring.py │ │ └── train.py │ ├── demos │ │ ├── __init__.py │ │ ├── model_perturbator.py │ │ └── scenario_helpers.py │ ├── factsheets │ │ ├── __init__.py │ │ ├── deployments.py │ │ ├── model_asset_facts.py │ │ ├── model_entry_facts.py │ │ └── utils.py │ ├── model │ │ ├── __init__.py │ │ ├── config.py │ │ ├── config_aws.py │ │ ├── config_azure.py │ │ ├── config_wml.py │ │ ├── use_case.py │ │ ├── use_case_aws.py │ │ ├── use_case_azure.py │ │ └── use_case_wml.py │ ├── utils │ │ ├── __init__.py │ │ ├── file_utils.py │ │ ├── utils.py │ │ └── validation.py │ ├── wml │ │ ├── __init__.py │ │ ├── custom_metrics_provider_code.py │ │ ├── deploy_custom_metrics_provider.py │ │ ├── deploy_wml_endpoint.py │ │ ├── package.py │ │ ├── scoring.py │ │ └── utils.py │ └── wos │ │ ├── __init__.py │ │ ├── alerts.py │ │ ├── cleanup_custom_monitor.py │ │ ├── cleanup_resources.py │ │ ├── collect_alerts.py │ │ ├── configure_custom_monitor.py │ │ ├── configure_drift_monitor.py │ │ ├── configure_explain_monitor.py │ │ ├── configure_fairness_monitor.py │ │ ├── configure_quality_monitor.py │ │ ├── configure_wos_subscription.py │ │ ├── configure_wos_subscription_custom.py │ │ ├── custom_monitors.py │ │ ├── data_mart.py │ │ ├── evaluate.py │ │ ├── integated_system.py │ │ ├── monitors.py │ │ ├── run_feedback_logging.py │ │ ├── run_payload_logging.py │ │ ├── service_provider.py │ │ └── subscription.py ├── tests │ ├── 1-aws-sm-e2e.ipynb │ ├── __init__.py │ ├── aws_model_use_case │ │ ├── __init__.py │ │ ├── config │ │ │ ├── .gitkeep │ │ │ └── credentials_test.cfg │ │ ├── outputs │ │ │ └── .gitkeep │ │ ├── test_aws_model_config.py │ │ └── test_aws_resources.py │ ├── get_metrics_testing.py │ └── it │ │ ├── __init__.py │ │ ├── test_aws_onboard_model.py │ │ ├── test_azure_onboard_model.py │ │ └── test_wml_onboard_model.py ├── use_case_churn │ ├── .amlignore │ ├── README.md │ ├── __init__.py │ ├── factsheets.py │ ├── inference.py │ ├── inference_cc_sk.py │ ├── inference_cc_xg_boost.py │ ├── metrics.py │ ├── model_signature.json │ ├── requirements-local.txt │ ├── requirements.txt │ ├── test_inference.py │ ├── test_train.py │ ├── train_cc_xg_boost.py │ ├── utils.py │ └── visualize.py ├── use_case_gcr │ ├── .amlignore │ ├── README.md │ ├── __init__.py │ ├── inference_aws_gcr.py │ ├── inference_azure_gcr.py │ ├── model_perturbation.json │ ├── model_signature.json │ ├── requirements-local.txt │ ├── requirements.txt │ ├── test_inference.py │ ├── test_train.py │ ├── train_gcr.py │ └── utils.py ├── use_case_huggingface │ ├── README.md │ └── __init__.py ├── use_case_mnist_pt │ ├── .amlignore │ ├── README.md │ ├── __init__.py │ ├── model_signature.json │ ├── pt_models.py │ ├── pytorch_inference.py │ ├── pytorch_train.py │ ├── requirements.txt │ ├── test_inference.py │ ├── test_train.py │ ├── torch_utils.py │ └── utils.py ├── use_case_mnist_ptlt │ ├── .amlignore │ ├── README.md │ ├── __init__.py │ ├── model_signature.json │ ├── pt_models.py │ ├── ptlt_inference.py │ ├── ptlt_train.py │ ├── requirements.txt │ ├── test_inference.py │ ├── test_train.py │ ├── torch_utils.py │ └── utils.py ├── use_case_mnist_tf │ ├── .amlignore │ ├── README.md │ ├── __init__.py │ ├── factsheets_helpers.py │ ├── inference.py │ ├── model_signature.json │ ├── requirements-local.txt │ ├── requirements.txt │ ├── test_inference.py │ ├── test_train.py │ ├── tf_cnn_inference.py │ ├── tf_cnn_train.py │ ├── tf_models.py │ ├── tf_train.py │ └── utils.py └── version.meta ├── pictures ├── .gitkeep ├── AWS_pt_model_output.png ├── AWS_pt_training_job.png ├── Architectutre_MLOps_Asset.jpg ├── IAM_resources.png ├── Model_inventory.png ├── Model_inventory_dev_state.png ├── New_Model_Entry.png ├── PlatformAssetCatalog.png ├── SageMakerExecutionRole.png ├── aws-sagemaker-mnist-cnn-pytorch-yMD-HM.png ├── churn_xgboost_asset_factsheets.png ├── churn_xgboost_confusion_matrix.png ├── churn_xgboost_feature_importance.png ├── churn_xgboost_loss.png ├── churn_xgboost_metadata_factsheets.png ├── churn_xgboost_probabilities.png ├── cp4d_data_assets.png ├── custom_ml_provider.png ├── custom_ml_provider_evaluated.png ├── custom_monitor_architecture.png ├── custom_monitor_workflow.png ├── custommonitor_wml_custom_provider_deployment.png ├── custommonitor_wos_definition.png ├── external_models_toggle.png ├── gcr_deployed_factsheets.png ├── gcr_train_asset_factsheets.png ├── gcr_train_metadata_factsheets.png ├── inference_endpoint_details.png ├── inference_endpoint_online.png ├── inference_samples_mnist.png ├── mnist_tf_asset_factsheets_train_local.png ├── mnist_tf_asset_factsheets_train_remote.png ├── mnist_tf_deployed_factsheets.png ├── mnist_tf_model_endpoint.png ├── mnist_tf_scoring_inputs.png ├── mnist_tf_train_local_metadata_factsheets.png ├── mnist_tf_train_remote_metadata_factsheets.png ├── mnist_tf_wos_subscription.png ├── model_entry.png ├── modelfacts_user.png ├── new_notebook_cp4d.png ├── outcome1_gcr_endpoint_aws.png ├── outcome2_gcr_model_aws.png ├── outcome3_gcr_wos_monitors_eval.png ├── outcome4_gcr_explain_contrastive.png ├── outcome4_gcr_explain_lime.png ├── outcome5_gcr_lifecycle_operate.png ├── outcome5_gcr_monitoring_facts.png ├── pt_model_asset_FS.png ├── pt_training_facts_manual.png ├── ptlt_training_facts_autolog.png ├── sagemaker_factsheets_big.png ├── secret_manager_keys.png └── track_this_model.png ├── requirements-dev.txt ├── requirements-extra.txt ├── requirements-local.txt ├── requirements.txt ├── setup.py └── version.meta /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203,W503,E731,F401 3 | max-complexity = 18 4 | select = B,C,E,F,W,T4,B9 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | .idea/ 133 | /mlruns/ -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.3.0 4 | hooks: 5 | #- id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/psf/black 9 | rev: 22.10.0 10 | hooks: 11 | - id: black 12 | - repo: https://github.com/pycqa/flake8 13 | rev: 5.0.4 14 | hooks: 15 | - id: flake8 16 | args: ["--max-line-length=210", "--ignore=E203,W503,E731","--max-complexity=18"] 17 | - repo: https://github.com/kynan/nbstripout 18 | rev: 0.6.1 19 | hooks: 20 | - id: nbstripout -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10 2 | ARG VERSION 3 | ENV WHL_FILE=mlmonitor-${VERSION}-py3-none-any.whl 4 | ADD ./dist/${WHL_FILE} /tmp/${WHL_FILE} 5 | ADD ./mlmonitor/credentials_example.cfg /app/base/config.json 6 | 7 | RUN apt-get clean && apt-get -y update && \ 8 | apt-get install -yq less && \ 9 | apt-get install -yq vim && \ 10 | apt-get install -yq jq && \ 11 | apt-get install -yq zip && \ 12 | pip install --upgrade pip && \ 13 | pip install "/tmp/"${WHL_FILE}"[local,sagemaker,drift]" 14 | 15 | ENTRYPOINT ["/bin/bash"] 16 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include version.meta 2 | include README.md 3 | include requirements.txt 4 | global-include model_signature.json 5 | global-include model_perturbation.json 6 | global-include requirements.txt 7 | global-include *.gitkeep 8 | global-exclude *.py[cod] 9 | global-exclude *.log 10 | global-exclude tests 11 | global-exclude */mlruns/ 12 | include mlmonitor/models/model_churn/xgboost-model-bst 13 | include mlmonitor/models/model_churn/xgboost-model-sk 14 | include mlmonitor/models/model_churn/xgboost-model-bst 15 | include mlmonitor/src/logs 16 | include mlmonitor/custmonitor/README.md 17 | include mlmonitor/version.meta 18 | include mlmonitor/MANIFEST.in 19 | include mlmonitor/credentials_example.cfg 20 | exclude __pycache__ 21 | exclude mlmonitor/credentials.cfg 22 | exclude notebooks 23 | exclude mlops_orchestrator 24 | exclude documentation 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REPOSITORY=us.icr.io/mlops 2 | NAME=mlmonitor-python 3 | VERSION=1.0.0 4 | CONFIG_FILE?=/app/base/config.json 5 | 6 | .PHONY: clean docker push run 7 | 8 | docker: 9 | echo "${VERSION}" > ./version.meta 10 | rm -rf ./build && rm -rf ./dist && python setup.py bdist_wheel 11 | docker build --platform linux/amd64 --no-cache -t $(REPOSITORY)/$(NAME):$(VERSION) . -f Dockerfile --build-arg VERSION=${VERSION} 12 | rm -rf ./build && rm -rf ./dist && rm -rf ./mlmonitor.egg-info 13 | 14 | push: 15 | docker push $(REPOSITORY)/$(NAME):$(VERSION) 16 | 17 | clean: 18 | docker rmi -f $(REPOSITORY)/$(NAME):$(VERSION) 19 | 20 | run: 21 | docker run -it --platform linux/amd64 -v ${CONFIG_FILE}:/app/base/config.json:Z -e MONITOR_CONFIG_FILE=/app/base/config.json $(REPOSITORY)/$(NAME):$(VERSION) 22 | -------------------------------------------------------------------------------- /mlmonitor/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include version.meta 2 | include custmonitor/README.md 3 | -------------------------------------------------------------------------------- /mlmonitor/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | """The ``mlmonitor`` module provides a high-level API for enabling AI Governance for model use cases 3 | running on a variety of Model serving platforms such as Watson Machine Learning , Sagemaker , Azure ML. Each Model is 4 | governed in AI FatcSheets and Monitored in IBM Watson OpenScale. 5 | 6 | Onboarding a new model can be achieved by: 7 | 8 | .. code:: python 9 | 10 | from mlmonitor import SageMakerModelUseCase 11 | 12 | model_use_case = SageMakerModelUseCase(source_dir='use_case_churn',catalog_id=catalog_id,model_entry_id=model_entry_id) 13 | model_use_case.train() 14 | model_use_case.deploy() 15 | model_use_case.display_states() 16 | model_use_case.govern() 17 | model_use_case.monitor() 18 | model_use_case.configure_quality_monitor() 19 | model_use_case.configure_fairness_monitor() 20 | model_use_case.configure_explainability_monitor() 21 | model_use_case.configure_drift_monitor() 22 | model_use_case.log_payload() 23 | model_use_case.log_feedback() 24 | model_use_case.evaluate_model() 25 | model_use_case.save_use_case() 26 | 27 | """ 28 | from mlmonitor.src.model.use_case_wml import WMLModelUseCase # noqa: F401 29 | 30 | supported_uc = ["WMLModelUseCase"] 31 | 32 | try: 33 | import sagemaker 34 | 35 | print(f"sagemaker installed at {sagemaker.__path__}") 36 | from mlmonitor.src.model.use_case_aws import SageMakerModelUseCase # noqa: F401 37 | 38 | supported_uc.append("SageMakerModelUseCase") 39 | except ModuleNotFoundError: 40 | print("run pip install mlmonitor[sagemaker] to use SageMakerModelUseCase") 41 | 42 | try: 43 | import azureml 44 | 45 | print(f"azureml installed at {azureml.__path__}") 46 | from mlmonitor.src.model.use_case_azure import AzureModelUseCase # noqa: F401 47 | 48 | supported_uc.append("AzureModelUseCase") 49 | except ModuleNotFoundError: 50 | print("run pip install mlmonitor[azure] to use AzureModelUseCase") 51 | 52 | __all__ = supported_uc 53 | __author__ = "IBM Client Engineering." 54 | 55 | try: 56 | import pkg_resources 57 | 58 | __version__ = pkg_resources.get_distribution("mlmonitor").version 59 | except Exception: 60 | __version__ = "N/A" 61 | -------------------------------------------------------------------------------- /mlmonitor/credentials_example.cfg: -------------------------------------------------------------------------------- 1 | { 2 | "saas": { 3 | "apikey": "xxxxx", 4 | "wml_url": "https://us-south.ml.cloud.ibm.com", 5 | "wos_url": "https://api.aiopenscale.cloud.ibm.com", 6 | "wos_instance_id": "xxxxxxxxxxxxxxxxx", 7 | "default_space": "xxxxx", 8 | "cos_resource_crn" : "xxxxx", 9 | "cos_endpoint" : "https://s3.us-east.cloud-object-storage.appdomain.cloud", 10 | "bucket_name" : "xxxxx" 11 | }, 12 | "prem": { 13 | "version": "4.x", 14 | "username": "xxxxx", 15 | "apikey": "xxxxx", 16 | "wos_instance_id" : "00000000-0000-0000-0000-0000000000000000", 17 | "default_space": "xxxxx", 18 | "ibm_auth_endpoint": "xxxxx" 19 | }, 20 | "db2":{ 21 | "hostname":"***", 22 | "username":"***", 23 | "password":"***", 24 | "database":"***", 25 | "port":50000, 26 | "ssl":"***", 27 | "sslmode":"***", 28 | "certificate_base64":"***" 29 | }, 30 | "azure": 31 | { 32 | "client_id": "xxxxx", 33 | "client_secret": "xxxxx", 34 | "subscription_id": "xxxxx", 35 | "tenant_id": "xxxxx", 36 | "resource_group":"xxxxx", 37 | "workspace_name":"xxxxx" 38 | }, 39 | "aws" : 40 | { 41 | "access_key": "xxxxx", 42 | "role": "xxxxx", 43 | "secret_key": "xxxxx", 44 | "region_name": "xxxxx" 45 | }, 46 | "mlops_orchestrator" : 47 | { 48 | "username": "xxxxx", 49 | "password": "xxxxx", 50 | "scoring_url": "xxxxx" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /mlmonitor/custmonitor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/metrics/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/metrics/use_case_churn.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from custmonitor.metricsprovider.helpers import ( 3 | get_feedback_dataset_id, 4 | get_feedback_data, 5 | ) 6 | 7 | 8 | def get_metrics(access_token, data_mart_id, subscription_id, url): 9 | # Add the logic here to compute the metrics. Use the below metric names while creating the custom monitor definition 10 | feedback_dataset_id = get_feedback_dataset_id( 11 | access_token, data_mart_id, subscription_id, url 12 | ) 13 | 14 | if json_data := get_feedback_data( 15 | access_token, data_mart_id, feedback_dataset_id, url 16 | ): 17 | fields = json_data["records"][0]["fields"] 18 | print(fields) 19 | values = json_data["records"][0]["values"] 20 | import pandas as pd 21 | import numpy as np 22 | 23 | feedback_data = pd.DataFrame(values, columns=fields) 24 | 25 | tp = np.sum( 26 | np.logical_and( 27 | feedback_data["_original_prediction"].values == 1, 28 | feedback_data["Churn?_True."].values == 1, 29 | ) 30 | ) 31 | fp = np.sum( 32 | np.logical_and( 33 | feedback_data["_original_prediction"].values == 1, 34 | feedback_data["Churn?_True."].values == 0, 35 | ) 36 | ) 37 | tn = np.sum( 38 | np.logical_and( 39 | feedback_data["_original_prediction"].values == 0, 40 | feedback_data["Churn?_True."].values == 0, 41 | ) 42 | ) 43 | fn = np.sum( 44 | np.logical_and( 45 | feedback_data["_original_prediction"].values == 0, 46 | feedback_data["Churn?_True."].values == 1, 47 | ) 48 | ) 49 | 50 | return { 51 | "tp": int(tp), 52 | "fp": int(fp), 53 | "tn": int(tn), 54 | "fn": int(fn), 55 | "cost": int(500 * fn + 0 * tn + 100 * fp + 100 * tp), 56 | "total": int(fn + tn + fp + tp), 57 | } 58 | 59 | else: 60 | return {} 61 | -------------------------------------------------------------------------------- /mlmonitor/custmonitor/metrics/use_case_gcr.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from custmonitor.metricsprovider.helpers import ( 3 | get_feedback_dataset_id, 4 | get_feedback_data, 5 | ) 6 | 7 | 8 | def get_metrics(access_token, data_mart_id, subscription_id, url): 9 | # Add the logic here to compute the metrics. Use the below metric names while creating the custom monitor definition 10 | feedback_dataset_id = get_feedback_dataset_id( 11 | access_token, data_mart_id, subscription_id, url 12 | ) 13 | json_data = get_feedback_data(access_token, data_mart_id, feedback_dataset_id, url) 14 | gender_less40_fav_prediction_ratio = 0 15 | if json_data: 16 | fields = json_data["records"][0]["fields"] 17 | print(fields) 18 | values = json_data["records"][0]["values"] 19 | import pandas as pd 20 | 21 | feedback_data = pd.DataFrame(values, columns=fields) 22 | 23 | fp = feedback_data.query("Risk == 0 & _original_prediction == 1").shape[0] 24 | tp = feedback_data.query("Risk == 1 & _original_prediction == 1").shape[0] 25 | fn = feedback_data.query("Risk == 1 & _original_prediction == 0").shape[0] 26 | tn = feedback_data.query("Risk == 0 & _original_prediction == 0 ").shape[0] 27 | 28 | female_less40_fav_prediction = len( 29 | feedback_data.query("Sex == 'female' & Age <= 40 & Risk == 0") 30 | ) 31 | male_less40_fav_prediction = len( 32 | feedback_data.query("Sex == 'male' & Age <= 40 & Risk == 0") 33 | ) 34 | gender_less40_fav_prediction_ratio = ( 35 | female_less40_fav_prediction / male_less40_fav_prediction 36 | ) 37 | 38 | metrics = { 39 | "specificity": tn / (tn + fp), # TNR 40 | "sensitivity": tp / (tp + fn), # TPR 41 | "gender_less40_fav_prediction_ratio": gender_less40_fav_prediction_ratio, 42 | "region": "us-south", 43 | } 44 | 45 | return metrics 46 | -------------------------------------------------------------------------------- /mlmonitor/custmonitor/metricsprovider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/metricsprovider/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/tests/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/it/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/tests/it/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/tests/unit/__init__.py -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/unit/config/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/custmonitor/tests/unit/config/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/unit/config/credentials_churn.cfg: -------------------------------------------------------------------------------- 1 | { 2 | "saas": { 3 | "apikey": "xxxxx", 4 | "iam_url": "https://iam.bluemix.net/oidc/token", 5 | "wml_url": "https://us-south.ml.cloud.ibm.com", 6 | "default_space": "xxxxx", 7 | "cos_resource_crn" : "xxxxx", 8 | "cos_endpoint" : "https://s3.us-east.cloud-object-storage.appdomain.cloud", 9 | "bucket_name" : "xxxxx", 10 | "WML_INSTANCE_ID": "Machine Learning-xxxxx", 11 | "ibm_auth_endpoint" : "https://iam.bluemix.net/oidc/token" 12 | }, 13 | "prem": { 14 | "wml_instance_id": "openshift", 15 | "version": "4.x", 16 | "username": "xxxxx", 17 | "apikey": "xxxxx", 18 | "wos_service_instance_id" : "00000000-0000-0000-0000-0000000000000000", 19 | "url": "xxxxx", 20 | "default_space": "xxxxx" 21 | }, 22 | "azure": 23 | { 24 | "client_id": "xxxxx", 25 | "client_secret": "xxxxx", 26 | "subscription_id": "xxxxx", 27 | "tenant_id": "xxxxx", 28 | "resource_group":"xxxxx", 29 | "workspace_name":"xxxxx" 30 | }, 31 | "aws" : 32 | { 33 | "access_key": "xxxxx", 34 | "role": "xxxxx", 35 | "secret_key": "xxxxx", 36 | "region_name": "xxxxx" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/unit/config/credentials_gcr.cfg: -------------------------------------------------------------------------------- 1 | { 2 | "saas": { 3 | "apikey": "xxxxx", 4 | "iam_url": "https://iam.bluemix.net/oidc/token", 5 | "wml_url": "https://us-south.ml.cloud.ibm.com", 6 | "default_space": "xxxxx", 7 | "cos_resource_crn" : "xxxxx", 8 | "cos_endpoint" : "https://s3.us-east.cloud-object-storage.appdomain.cloud", 9 | "bucket_name" : "xxxxx", 10 | "WML_INSTANCE_ID": "Machine Learning-xxxxx", 11 | "ibm_auth_endpoint" : "https://iam.bluemix.net/oidc/token" 12 | }, 13 | "prem": { 14 | "wml_instance_id": "openshift", 15 | "version": "4.x", 16 | "username": "xxxxx", 17 | "apikey": "xxxxx", 18 | "wos_service_instance_id" : "00000000-0000-0000-0000-0000000000000000", 19 | "url": "xxxxx", 20 | "default_space": "xxxxx" 21 | }, 22 | "azure": 23 | { 24 | "client_id": "xxxxx", 25 | "client_secret": "xxxxx", 26 | "subscription_id": "xxxxx", 27 | "tenant_id": "xxxxx", 28 | "resource_group":"xxxxx", 29 | "workspace_name":"xxxxx" 30 | }, 31 | "aws" : 32 | { 33 | "access_key": "xxxxx", 34 | "role": "xxxxx", 35 | "secret_key": "xxxxx", 36 | "region_name": "xxxxx" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /mlmonitor/custmonitor/tests/unit/test_get_metrics.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from pathlib import Path 3 | import unittest 4 | from unittest.mock import patch 5 | from unittest import TestCase 6 | from ibm_watson_openscale import APIClient as WOS_APIClient 7 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 8 | 9 | from custmonitor.metricsprovider.helpers import get_access_token_cloud 10 | 11 | from mlmonitor.src import get_connection_details, WOS_URL 12 | from mlmonitor.src.wos.data_mart import get_datamart_ids 13 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 14 | 15 | 16 | class TestGetMetrics(TestCase): 17 | @patch.dict( 18 | "os.environ", 19 | { 20 | "MONITOR_CONFIG_FILE": str( 21 | Path(__file__).parent / "config" / "credentials_gcr.cfg" 22 | ) 23 | }, 24 | clear=True, 25 | ) 26 | def test_get_metrics_gcr(self): 27 | from custmonitor.metrics.credit_risk import get_metrics 28 | 29 | API_KEY, AUTH_ENDPOINT = get_connection_details() 30 | wos_client = WOS_APIClient(authenticator=IAMAuthenticator(apikey=API_KEY)) 31 | monitored_deployment = "GCR_Deploy_Scikit_demo" 32 | token = get_access_token_cloud(apikey=API_KEY) 33 | 34 | subscription_ids = get_subscription_id_by_deployment( 35 | wos_client=wos_client, deployment_name=monitored_deployment 36 | ) 37 | data_marts = get_datamart_ids(wos_client=wos_client) 38 | data_mart_id = data_marts[0] 39 | if len(subscription_ids) == 1: 40 | subscription_id = subscription_ids[0] 41 | else: 42 | raise ValueError( 43 | f"No WOS subscription found for deployment {monitored_deployment}" 44 | ) 45 | res = get_metrics(token, data_mart_id, subscription_id, WOS_URL) 46 | self.assertIsInstance(res, dict) 47 | print(res) 48 | 49 | @patch.dict( 50 | "os.environ", 51 | { 52 | "MONITOR_CONFIG_FILE": str( 53 | Path(__file__).parent / "config" / "credentials_churn.cfg" 54 | ) 55 | }, 56 | clear=True, 57 | ) 58 | def test_get_metrics_churn(self): 59 | from custmonitor.metrics.customer_churn import get_metrics 60 | 61 | API_KEY, AUTH_ENDPOINT = get_connection_details() 62 | wos_client = WOS_APIClient(authenticator=IAMAuthenticator(apikey=API_KEY)) 63 | monitored_deployment = "sm-cc-xgboost-2022-10-13-14-36-11-945" 64 | token = get_access_token_cloud(apikey=API_KEY) 65 | 66 | subscription_ids = get_subscription_id_by_deployment( 67 | wos_client=wos_client, deployment_name=monitored_deployment 68 | ) 69 | data_marts = get_datamart_ids(wos_client=wos_client) 70 | data_mart_id = data_marts[0] 71 | if len(subscription_ids) == 1: 72 | subscription_id = subscription_ids[0] 73 | else: 74 | raise ValueError( 75 | f"No WOS subscription found for deployment {monitored_deployment}" 76 | ) 77 | res = get_metrics(token, data_mart_id, subscription_id, WOS_URL) 78 | self.assertIsInstance(res, dict) 79 | print(res) 80 | 81 | def tearDown(self): 82 | print("tests completed") 83 | 84 | 85 | if __name__ == "__main__": 86 | unittest.main() 87 | -------------------------------------------------------------------------------- /mlmonitor/data/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import ibm_boto3 3 | from botocore.client import Config 4 | from mlmonitor.src import IAM_URL, ENV 5 | from mlmonitor.config import get_cos_details 6 | 7 | COS_RESOURCE_CRN, COS_ENDPOINT, BUCKET_NAME, CLOUD_API_KEY = get_cos_details(env=ENV) 8 | 9 | cos_client = ibm_boto3.client( 10 | service_name="s3", 11 | ibm_api_key_id=CLOUD_API_KEY, 12 | ibm_service_instance_id=COS_RESOURCE_CRN, 13 | ibm_auth_endpoint=IAM_URL, 14 | config=Config(signature_version="oauth"), 15 | endpoint_url=COS_ENDPOINT, 16 | ) 17 | 18 | cos_resource = ibm_boto3.resource( 19 | "s3", 20 | ibm_api_key_id=CLOUD_API_KEY, 21 | ibm_service_instance_id=COS_RESOURCE_CRN, 22 | ibm_auth_endpoint=IAM_URL, 23 | config=Config(signature_version="oauth"), 24 | endpoint_url=COS_ENDPOINT, 25 | ) 26 | -------------------------------------------------------------------------------- /mlmonitor/data/cos.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_botocore.client import ClientError 3 | import os 4 | 5 | from mlmonitor.data import cos_client, cos_resource 6 | from mlmonitor.data import BUCKET_NAME 7 | 8 | 9 | def get_item(item_name: str, bucket_name: str = BUCKET_NAME): 10 | print("Retrieving item from bucket: {0}, key: {1}".format(bucket_name, item_name)) 11 | try: 12 | obj = cos_client.get_object(Bucket=bucket_name, Key=item_name)["Body"] 13 | return obj 14 | except ClientError as be: 15 | print("CLIENT ERROR: {0}\n".format(be)) 16 | except Exception as e: 17 | print("Unable to retrieve file contents: {0}".format(e)) 18 | 19 | 20 | def put_item(item_name: str, item_path: str, bucket_name: str = BUCKET_NAME): 21 | print("Uploading item to bucket: {0}, key: {1}".format(bucket_name, item_name)) 22 | try: 23 | with open(os.path.join(item_path, item_name), "rb") as file_data: 24 | cos_resource.Object(bucket_name, item_name).upload_fileobj( 25 | Fileobj=file_data 26 | ) 27 | except ClientError as be: 28 | print("CLIENT ERROR: {0}\n".format(be)) 29 | except Exception as e: 30 | print("Unable to upload file contents: {0}".format(e)) 31 | 32 | 33 | def write_item_resource( 34 | item_name: str, bucket_name: str = BUCKET_NAME, dest_path: str = "/tmp" 35 | ): 36 | if not os.path.isdir(dest_path): 37 | raise ValueError(f"invalid destination path {dest_path}") 38 | # ibm_boto3.resource 39 | obj = cos_resource.Object(bucket_name, item_name).get() 40 | file = obj["Body"].read() 41 | 42 | with open(item_name, "w+b") as f: 43 | f.write(file) 44 | -------------------------------------------------------------------------------- /mlmonitor/datasets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/datasets/churn/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/churn/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/datasets/churn/feedback_logging_no_preds_churn.csv: -------------------------------------------------------------------------------- 1 | Account Length;VMail Message;Day Mins;Day Calls;Eve Mins;Eve Calls;Night Mins;Night Calls;Intl Mins;Intl Calls;CustServ Calls;State_AK;State_AL;State_AR;State_AZ;State_CA;State_CO;State_CT;State_DC;State_DE;State_FL;State_GA;State_HI;State_IA;State_ID;State_IL;State_IN;State_KS;State_KY;State_LA;State_MA;State_MD;State_ME;State_MI;State_MN;State_MO;State_MS;State_MT;State_NC;State_ND;State_NE;State_NH;State_NJ;State_NM;State_NV;State_NY;State_OH;State_OK;State_OR;State_PA;State_RI;State_SC;State_SD;State_TN;State_TX;State_UT;State_VA;State_VT;State_WA;State_WI;State_WV;State_WY;Area Code_657;Area Code_658;Area Code_659;Area Code_676;Area Code_677;Area Code_678;Area Code_686;Area Code_707;Area Code_716;Area Code_727;Area Code_736;Area Code_737;Area Code_758;Area Code_766;Area Code_776;Area Code_777;Area Code_778;Area Code_786;Area Code_787;Area Code_788;Area Code_797;Area Code_798;Area Code_806;Area Code_827;Area Code_836;Area Code_847;Area Code_848;Area Code_858;Area Code_866;Area Code_868;Area Code_876;Area Code_877;Area Code_878;Int'l Plan_no;Int'l Plan_yes;VMail Plan_no;VMail Plan_yes;Churn?_True. 2 | 145;0;8.49363287;2;7.162111277;1;2.037652604;250;4.478874132;9;6;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1 3 | 189;300;11.09851346;2;6.798985985;1;4.132633363;50;5.836759905;7;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;1;0;1;1 4 | 59;0;5.434502319;3;8.546318101;6;4.272648455;200;6.371482671;2;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0 5 | 190;400;9.715114766;3;7.262588757;0;3.960851726;150;7.067862912;7;4;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;1 6 | 77;0;5.62557254;7;3.167193901;2;2.424363465;200;3.000934503;4;7;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;1;0;1;0;0 7 | 188;0;0.961265022;5;4.654414569;5;1.400617806;350;5.504355043;7;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0 8 | 6;600;10.1703701;1;2.403071536;0;8.362608704;100;5.355288921;5;5;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0 9 | 183;0;4.805686029;4;3.637521952;4;3.612075187;150;3.952331116;4;8;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1 10 | 183;0;4.805686029;4;3.637521952;4;3.612075187;150;3.952331116;4;8;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1 11 | 176;600;3.127951401;7;3.47122818;2;6.138825476;50;2.961608843;2;7;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;1 12 | -------------------------------------------------------------------------------- /mlmonitor/datasets/churn/feedback_logging_preds_churn.csv: -------------------------------------------------------------------------------- 1 | Account Length;VMail Message;Day Mins;Day Calls;Eve Mins;Eve Calls;Night Mins;Night Calls;Intl Mins;Intl Calls;CustServ Calls;State_AK;State_AL;State_AR;State_AZ;State_CA;State_CO;State_CT;State_DC;State_DE;State_FL;State_GA;State_HI;State_IA;State_ID;State_IL;State_IN;State_KS;State_KY;State_LA;State_MA;State_MD;State_ME;State_MI;State_MN;State_MO;State_MS;State_MT;State_NC;State_ND;State_NE;State_NH;State_NJ;State_NM;State_NV;State_NY;State_OH;State_OK;State_OR;State_PA;State_RI;State_SC;State_SD;State_TN;State_TX;State_UT;State_VA;State_VT;State_WA;State_WI;State_WV;State_WY;Area Code_657;Area Code_658;Area Code_659;Area Code_676;Area Code_677;Area Code_678;Area Code_686;Area Code_707;Area Code_716;Area Code_727;Area Code_736;Area Code_737;Area Code_758;Area Code_766;Area Code_776;Area Code_777;Area Code_778;Area Code_786;Area Code_787;Area Code_788;Area Code_797;Area Code_798;Area Code_806;Area Code_827;Area Code_836;Area Code_847;Area Code_848;Area Code_858;Area Code_866;Area Code_868;Area Code_876;Area Code_877;Area Code_878;Int'l Plan_no;Int'l Plan_yes;VMail Plan_no;VMail Plan_yes;Churn?_True.;_original_probability;_original_prediction 2 | 145;0;8.49363287;2;7.162111277;1;2.037652604;250;4.478874132;9;6;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1;[0.01,0.99];1 3 | 189;300;11.09851346;2;6.798985985;1;4.132633363;50;5.836759905;7;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;1;0;1;1;[0.002,0.998];1 4 | 59;0;5.434502319;3;8.546318101;6;4.272648455;200;6.371482671;2;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0;[0.201,0.799];0 5 | 190;400;9.715114766;3;7.262588757;0;3.960851726;150;7.067862912;7;4;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;1;[0.01,0.99];1 6 | 77;0;5.62557254;7;3.167193901;2;2.424363465;200;3.000934503;4;7;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;1;0;1;0;0;[0.07,0.93];0 7 | 188;0;0.961265022;5;4.654414569;5;1.400617806;350;5.504355043;7;4;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;1;0;0;[0.0,1.0];0 8 | 6;600;10.1703701;1;2.403071536;0;8.362608704;100;5.355288921;5;5;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;[0.229,0.771];0 9 | 183;0;4.805686029;4;3.637521952;4;3.612075187;150;3.952331116;4;8;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1;[0.034,0.966];1 10 | 183;0;4.805686029;4;3.637521952;4;3.612075187;150;3.952331116;4;8;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;0;1;[0.034,0.966];1 11 | 176;600;3.127951401;7;3.47122818;2;6.138825476;50;2.961608843;2;7;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;1;1;[0.108,0.892];1 12 | -------------------------------------------------------------------------------- /mlmonitor/datasets/gcr/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/gcr/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/datasets/gcr/gcr_explicit_payload_logging.csv: -------------------------------------------------------------------------------- 1 | "CheckingStatus","LoanDuration","CreditHistory","LoanPurpose","LoanAmount","ExistingSavings","EmploymentDuration","InstallmentPercent","Sex","OthersOnLoan","CurrentResidenceDuration","OwnsProperty","Age","InstallmentPlans","Housing","ExistingCreditsCount","Job","Dependents","Telephone","ForeignWorker" 2 | "no_checking",13,"credits_paid_to_date","car_new",1343,"100_to_500","1_to_4",2,"female","none",3,"savings_insurance",46,"none","own",2,"skilled",1,"none","yes" 3 | "no_checking",24,"prior_payments_delayed","furniture",4567,"500_to_1000","1_to_4",4,"male","none",4,"savings_insurance",36,"none","free",2,"management_self-employed",1,"none","yes" 4 | "0_to_200",26,"all_credits_paid_back","car_new",863,"less_100","less_1",2,"female","co-applicant",2,"real_estate",38,"none","own",1,"skilled",1,"none","yes" 5 | "0_to_200",14,"no_credits","car_new",2368,"less_100","1_to_4",3,"female","none",3,"real_estate",29,"none","own",1,"skilled",1,"none","yes" 6 | "0_to_200",4,"no_credits","car_new",250,"less_100","unemployed",2,"female","none",3,"real_estate",23,"none","rent",1,"management_self-employed",1,"none","yes" 7 | "no_checking",17,"credits_paid_to_date","car_new",832,"100_to_500","1_to_4",2,"male","none",2,"real_estate",42,"none","own",1,"skilled",1,"none","yes" 8 | "no_checking",33,"outstanding_credit","appliances",5696,"unknown","greater_7",4,"male","co-applicant",4,"unknown",54,"none","free",2,"skilled",1,"yes","yes" 9 | "0_to_200",13,"prior_payments_delayed","retraining",1375,"100_to_500","4_to_7",3,"male","none",3,"real_estate",37,"none","own",2,"management_self-employed",1,"none","yes" 10 | -------------------------------------------------------------------------------- /mlmonitor/datasets/gcr/gcr_feedback_logging_aws.csv: -------------------------------------------------------------------------------- 1 | ;CheckingStatus;LoanDuration;CreditHistory;LoanPurpose;LoanAmount;ExistingSavings;EmploymentDuration;InstallmentPercent;Sex;OthersOnLoan;CurrentResidenceDuration;OwnsProperty;Age;InstallmentPlans;Housing;ExistingCreditsCount;Job;Dependents;Telephone;ForeignWorker;Risk;transaction_id 2 | 2947;0_to_200;16;credits_paid_to_date;vacation;5118;less_100;4_to_7;4;male;none;4;unknown;31;none;own;1;skilled;1;none;yes;0;68e51f70-7669-4b84-92c0-cff0200f5f8b 3 | 3139;no_checking;37;prior_payments_delayed;other;7536;unknown;4_to_7;4;female;none;4;unknown;52;none;own;2;skilled;1;none;yes;1;7d8b0ee0-a54a-48ca-acbc-6b9f3cb748c3 4 | 1169;less_0;17;credits_paid_to_date;radio_tv;2533;less_100;less_1;2;female;none;3;savings_insurance;32;none;own;1;skilled;1;none;yes;0;4f5a481f-4458-4510-a034-89ab14cb76a3 5 | 4946;no_checking;20;outstanding_credit;car_used;4376;500_to_1000;1_to_4;3;male;none;4;car_other;46;none;own;1;skilled;1;none;yes;1;5e23346d-6e1f-40b7-88b2-e6840c415d5c 6 | 3558;no_checking;54;outstanding_credit;education;3972;unknown;greater_7;4;female;co-applicant;4;savings_insurance;41;none;own;2;skilled;2;yes;yes;1;f865ae14-dcff-4226-bb2e-0e2ae4dbfd9d 7 | 1854;0_to_200;24;outstanding_credit;appliances;881;100_to_500;1_to_4;2;male;none;3;car_other;36;none;own;2;skilled;1;none;yes;1;c1948c95-89ed-488c-98df-08b9de32b44b 8 | 4436;less_0;4;credits_paid_to_date;car_used;250;less_100;1_to_4;3;female;none;3;savings_insurance;34;none;own;1;skilled;1;none;yes;0;485e17c6-dec9-4e80-9d00-035c75969bfe 9 | 2471;no_checking;26;prior_payments_delayed;business;6202;greater_1000;4_to_7;3;male;none;4;savings_insurance;37;none;own;2;management_self-employed;1;none;yes;1;dc81a756-2d93-4801-a49b-0d0796f5e58b 10 | 4217;less_0;29;outstanding_credit;radio_tv;1751;100_to_500;1_to_4;3;male;none;3;savings_insurance;32;none;own;2;skilled;1;none;yes;0;a7927edc-70c2-4a23-9480-28805b229ca1 11 | 1998;no_checking;33;outstanding_credit;repairs;5717;unknown;greater_7;5;male;co-applicant;5;unknown;54;none;free;2;skilled;2;yes;yes;1;4a454a5e-07d5-4986-8e97-34c2046f44a6 12 | -------------------------------------------------------------------------------- /mlmonitor/datasets/gcr/gcr_feedback_logging_with_pred_aws.csv: -------------------------------------------------------------------------------- 1 | CheckingStatus;LoanDuration;CreditHistory;LoanPurpose;LoanAmount;ExistingSavings;EmploymentDuration;InstallmentPercent;Sex;OthersOnLoan;CurrentResidenceDuration;OwnsProperty;Age;InstallmentPlans;Housing;ExistingCreditsCount;Job;Dependents;Telephone;ForeignWorker;Risk;transaction_id;predicted_label;score 2 | no_checking;41;prior_payments_delayed;furniture;5074;greater_1000;1_to_4;4;male;none;4;savings_insurance;40;none;own;2;management_self-employed;1;none;yes;0;55ba90d4-c831-4015-a8cd-23b49ee75807;0;[0.6861177417324611, 0.3138822582675389] 3 | less_0;6;all_credits_paid_back;car_new;250;less_100;1_to_4;1;female;none;2;real_estate;26;stores;rent;1;skilled;1;none;yes;0;33906bc2-ddcf-4ad8-b730-255ca018a5ae;0;[0.9588428054933256, 0.04115719450667443] 4 | less_0;4;credits_paid_to_date;education;251;less_100;4_to_7;3;male;none;2;savings_insurance;34;none;own;2;management_self-employed;1;yes;yes;0;ae440042-0591-4fb7-8697-e6225fcd0a37;0;[0.8994363944826945, 0.10056360551730548] 5 | no_checking;14;outstanding_credit;appliances;1680;100_to_500;greater_7;4;male;none;3;car_other;47;none;own;1;management_self-employed;1;none;yes;0;f3def26f-1dbf-4c83-bbcf-ca9bd2daba90;0;[0.6466004910943104, 0.3533995089056896] 6 | no_checking;38;prior_payments_delayed;other;5588;greater_1000;1_to_4;4;male;none;4;savings_insurance;43;stores;own;2;unskilled;1;yes;yes;1;b5c274f0-d560-4ab2-9f6c-f8d972d13bb0;0;[0.7772365000457244, 0.22276349995427558] 7 | no_checking;24;prior_payments_delayed;radio_tv;7762;greater_1000;less_1;4;male;none;3;savings_insurance;39;none;own;2;management_self-employed;1;none;yes;0;1191f315-d328-41cc-bc80-74c80bb42ab7;0;[0.8483684543164751, 0.15163154568352488] 8 | no_checking;38;prior_payments_delayed;repairs;6353;unknown;greater_7;4;male;co-applicant;3;car_other;31;none;own;2;skilled;1;yes;yes;1;800e5f2f-9f98-4a84-b954-715cb22a123b;1;[0.2209597305028863, 0.7790402694971137] 9 | greater_200;20;outstanding_credit;radio_tv;4237;100_to_500;4_to_7;4;female;none;4;car_other;47;none;own;1;skilled;1;none;yes;0;68b79e4f-6571-41e9-935c-cda007e89c54;0;[0.6139399998877615, 0.3860600001122385] 10 | less_0;11;no_credits;car_new;250;100_to_500;less_1;2;male;none;1;real_estate;19;bank;rent;1;unemployed;1;none;yes;0;48310777-dd77-4e4d-96e1-b6c3d0878a1a;0;[0.9880351508898781, 0.011964849110121856] 11 | 0_to_200;14;prior_payments_delayed;car_new;3582;greater_1000;less_1;3;male;none;2;savings_insurance;34;none;own;1;skilled;1;yes;yes;0;c9a4a1b5-fc98-47a3-beec-a411f9e4045f;0;[0.9046153932588816, 0.09538460674111837] 12 | -------------------------------------------------------------------------------- /mlmonitor/datasets/gcr/gcr_feedback_logging_with_pred_aws2.csv: -------------------------------------------------------------------------------- 1 | CheckingStatus;LoanDuration;CreditHistory;LoanPurpose;LoanAmount;ExistingSavings;EmploymentDuration;InstallmentPercent;Sex;OthersOnLoan;CurrentResidenceDuration;OwnsProperty;Age;InstallmentPlans;Housing;ExistingCreditsCount;Job;Dependents;Telephone;ForeignWorker;Risk;predicted_label;score 2 | no_checking;41;prior_payments_delayed;furniture;5074;greater_1000;1_to_4;4;male;none;4;savings_insurance;40;none;own;2;management_self-employed;1;none;yes;0;0;0.6861177417324611 3 | less_0;6;all_credits_paid_back;car_new;250;less_100;1_to_4;1;female;none;2;real_estate;26;stores;rent;1;skilled;1;none;yes;0;0;0.9588428054933256 4 | less_0;4;credits_paid_to_date;education;251;less_100;4_to_7;3;male;none;2;savings_insurance;34;none;own;2;management_self-employed;1;yes;yes;0;0;0.8994363944826945 5 | no_checking;14;outstanding_credit;appliances;1680;100_to_500;greater_7;4;male;none;3;car_other;47;none;own;1;management_self-employed;1;none;yes;0;0;0.6466004910943104 6 | no_checking;38;prior_payments_delayed;other;5588;greater_1000;1_to_4;4;male;none;4;savings_insurance;43;stores;own;2;unskilled;1;yes;yes;1;0;0.7772365000457244 7 | no_checking;24;prior_payments_delayed;radio_tv;7762;greater_1000;less_1;4;male;none;3;savings_insurance;39;none;own;2;management_self-employed;1;none;yes;0;0;0.8483684543164751 8 | no_checking;38;prior_payments_delayed;repairs;6353;unknown;greater_7;4;male;co-applicant;3;car_other;31;none;own;2;skilled;1;yes;yes;1;1;0.7790402694971137 9 | greater_200;20;outstanding_credit;radio_tv;4237;100_to_500;4_to_7;4;female;none;4;car_other;47;none;own;1;skilled;1;none;yes;0;0;0.6139399998877615 10 | less_0;11;no_credits;car_new;250;100_to_500;less_1;2;male;none;1;real_estate;19;bank;rent;1;unemployed;1;none;yes;0;0;0.9880351508898781 11 | 0_to_200;14;prior_payments_delayed;car_new;3582;greater_1000;less_1;3;male;none;2;savings_insurance;34;none;own;1;skilled;1;yes;yes;0;0;0.9046153932588816 12 | -------------------------------------------------------------------------------- /mlmonitor/datasets/mnist/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/mnist/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/datasets/mnist/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/mnist/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /mlmonitor/datasets/mnist/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/mnist/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /mlmonitor/datasets/mnist/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/mnist/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /mlmonitor/datasets/mnist/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/datasets/mnist/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /mlmonitor/factsheets_churn/custom_model_asset_facts_churn.csv: -------------------------------------------------------------------------------- 1 | name,type,placeholder,description,is_array,required,hidden,readonly,default_value,label,minimum,maximum,min_length,max_length,is_searchable,Not_needed 2 | ModelPurpose,string,ModelPurpose,ModelPurpose,FALSE,FALSE,FALSE,FALSE,,ModelPurpose,,,0,255,FALSE, 3 | ModelBusinessUse,string,ModelBusinessUse,ModelBusinessUse,FALSE,FALSE,FALSE,FALSE,,ModelBusinessUse,,,0,255,FALSE, 4 | ModelTesting,string,ModelTesting,ModelTesting,FALSE,FALSE,FALSE,FALSE,,ModelTesting,,,0,255,FALSE, 5 | CrossValidation,string,CrossValidation,CrossValidation,FALSE,FALSE,FALSE,FALSE,,CrossValidation,,,0,255,FALSE, 6 | OutOfSampleTesting,string,OutOfSampleTesting,OutOfSampleTesting,FALSE,FALSE,FALSE,FALSE,,OutOfSampleTesting,,,0,255,FALSE, 7 | -------------------------------------------------------------------------------- /mlmonitor/factsheets_churn/custom_model_asset_facts_values_churn.csv: -------------------------------------------------------------------------------- 1 | name;default_value 2 | ModelPurpose;This model predicts the likelihood of a customer to churn in the next 3 months 3 | ModelBusinessUse;This model is used to select the customers for discounts based on the predicted outcome 4 | ModelTesting;Conducted in sample, out of sample testing 5 | CrossValidation;5-fold cross validation performed on this dataset s3://sagemaker-sample-files/datasets/tabular/synthetic/churn.txt 6 | OutOfSampleTesting;94% precision and 97% recall on validation dataset 7 | -------------------------------------------------------------------------------- /mlmonitor/factsheets_churn/custom_model_entry_facts_churn.csv: -------------------------------------------------------------------------------- 1 | name,type,description,placeholder,is_array,required,hidden,readonly,default_value,label,minimum,maximum,min_length,max_length,is_searchable 2 | BusinessSponsor,string,BusinessSponsor,BusinessSponsor,FALSE,FALSE,FALSE,FALSE,attr1,BusinessSponsor,,,0,255,TRUE 3 | ModelDeveloper,string,ModelDeveloper,ModelDeveloper,FALSE,FALSE,FALSE,FALSE,,ModelDeveloper,,,0,255,TRUE 4 | ModelValidator,string,ModelValidator,ModelValidator,FALSE,FALSE,FALSE,FALSE,,ModelValidator,,,0,255,TRUE 5 | Risk rating,string,Risk rating,Risk rating,FALSE,FALSE,FALSE,FALSE,,Risk rating,,,0,255,TRUE 6 | Approval date,string,Approval date,Approval date,FALSE,FALSE,FALSE,FALSE,,Approval date,,,0,255,TRUE 7 | MonitoringDesign,string,MonitoringDesign,MonitoringDesign,FALSE,FALSE,FALSE,FALSE,,MonitoringDesign,,,0,255,FALSE 8 | PerformanceMonitoring,string,PerformanceMonitoring,PerformanceMonitoring,FALSE,FALSE,FALSE,FALSE,,PerformanceMonitoring,,,0,255,FALSE 9 | Regulatory Requirements,string,Regulatory Requirements,Regulatory Requirements,FALSE,FALSE,FALSE,FALSE,,Regulatory Requirements,,,0,255,TRUE 10 | ModelArchitecture,string,ModelArchitecture,ModelArchitecture,FALSE,FALSE,FALSE,FALSE,,ModelArchitecture,,,0,255,TRUE 11 | BusinessPurpose,string,BusinessPurpose,BusinessPurpose,FALSE,FALSE,FALSE,FALSE,,Business Purpose(s),,,0,255,TRUE 12 | CitedWorks,string,"If citing academic works, please include those here.","If citing academic works,please include those here.",TRUE,FALSE,FALSE,FALSE,,References: Cited Works,,,0,255,FALSE 13 | -------------------------------------------------------------------------------- /mlmonitor/factsheets_churn/custom_model_entry_facts_values_churn.csv: -------------------------------------------------------------------------------- 1 | name;default_value 2 | BusinessSponsor;Kullback–Leibler 3 | ModelDeveloper;Robert E. Schapire 4 | ModelValidator;Leo Breiman 5 | Risk rating;High 6 | Approval date;2022/08/30 7 | MonitoringDesign;Online Model monitor with Watson OpenScale for data drift and model drift. Performance are also evaluated with feedback data for Precision and Recall , FPR,FNR. ground-truth data might not be available in a timely manner. Drift monitoring is preferred 8 | PerformanceMonitoring;hourly evaluations for data drift 9 | Regulatory Requirements;ASR-11-7 for US and E-23 For Canada 10 | ModelArchitecture;xgboost 11 | BusinessPurpose;This model predicts the likelihood of a customer to churn in the next 3 months 12 | CitedWorks;Robert E. Schapire,1999,A Brief Introduction to Boosting|Tianqi Chen, Carlos Guestrin,2016,XGBoost: A Scalable Tree Boosting System 13 | -------------------------------------------------------------------------------- /mlmonitor/figures/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/figures/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/models/model_churn/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/model_churn/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/models/model_churn/xgboost-model-bst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/model_churn/xgboost-model-bst -------------------------------------------------------------------------------- /mlmonitor/models/model_churn/xgboost-model-sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/model_churn/xgboost-model-sk -------------------------------------------------------------------------------- /mlmonitor/models/model_gcr/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/model_gcr/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/models/model_mnist/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/models/model_mnist/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/setup.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from setuptools import setup 3 | import os 4 | 5 | 6 | def version(path: str): 7 | with open(os.path.join(path, "version.meta"), "r") as v: 8 | return v.read().strip() 9 | 10 | 11 | def readme(path: str): 12 | with open(os.path.join(path, "README.md"), "r") as v: 13 | return v.read() 14 | 15 | 16 | current_directory = os.path.abspath(os.path.dirname(__file__)) 17 | 18 | setup( 19 | name="custmonitor", 20 | version=version(current_directory), 21 | long_description=readme(path=os.path.join(current_directory, "custmonitor")), 22 | description="helpers for Watson OpenScale custom monitors", 23 | author="Jacques-Sylvain Lecointre", 24 | author_email="js.lecointre@ibm.com", 25 | url="https://github.com/IBM/mlmonitor/mlmonitor/custmonitor", 26 | packages=["custmonitor", "custmonitor.metricsprovider", "custmonitor.metrics"], 27 | include_package_data=True, 28 | ) 29 | -------------------------------------------------------------------------------- /mlmonitor/src/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import logging 3 | import os 4 | from os.path import dirname, abspath 5 | from ibm_cloud_sdk_core.authenticators import ( 6 | IAMAuthenticator, 7 | CloudPakForDataAuthenticator, 8 | ) 9 | from mlmonitor.config import ( 10 | get_env, 11 | get_connection_details, 12 | get_wkc_details, 13 | get_aws_credentials, 14 | get_wos_details, 15 | get_cp4d_on_prem_details, 16 | assign_verify_ssl, 17 | ) 18 | 19 | 20 | PROJECT_ROOT = abspath(dirname(dirname(__file__))) 21 | DATA_ROOT = f"{PROJECT_ROOT}/datasets" 22 | MODEL_ROOT = f"{PROJECT_ROOT}/models" 23 | FIGURES_ROOT = f"{PROJECT_ROOT}/figures" 24 | IAM_URL = "https://iam.bluemix.net/oidc/token" 25 | 26 | ENV = get_env() 27 | API_KEY, AUTH_ENDPOINT = get_connection_details(env=ENV, iam_url=IAM_URL) 28 | MODEL_ENTRY_ID, CATALOG_ID = get_wkc_details(env=ENV) 29 | 30 | key, secret, region, ROLE = get_aws_credentials() 31 | aws_credentials = { 32 | "aws_access_key_id": key, 33 | "aws_secret_access_key": secret, 34 | "region_name": region, 35 | } 36 | 37 | WOS_URL, WOS_INSTANCE_ID = get_wos_details(env=ENV) 38 | CP4D_VERSION, USERNAME = get_cp4d_on_prem_details(env=ENV) 39 | 40 | 41 | if ENV == "saas": 42 | authenticator = IAMAuthenticator(apikey=API_KEY) 43 | VERIFY_CP4D_SSL = assign_verify_ssl(default_value=True) 44 | elif ENV == "prem": 45 | authenticator = CloudPakForDataAuthenticator( 46 | url=AUTH_ENDPOINT, 47 | username=USERNAME, 48 | apikey=API_KEY, 49 | disable_ssl_verification=False, 50 | ) 51 | VERIFY_CP4D_SSL = assign_verify_ssl(default_value=False) 52 | 53 | else: 54 | raise ValueError( 55 | f"ENV set to '{ENV}'.Value should be set to 'saas' (IBM Cloud) or 'prem' (On premise cluster)" 56 | ) 57 | 58 | logging.basicConfig( 59 | level=logging.INFO, 60 | format="%(asctime)s - %(name)s - %(funcName)s -%(levelname)s - %(message)s", 61 | ) 62 | logger = logging.getLogger(__name__) 63 | logger.setLevel(int(os.getenv("LOG_LEVEL", logging.INFO))) 64 | logger.debug(f"ENV set to {ENV}") 65 | -------------------------------------------------------------------------------- /mlmonitor/src/aws/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from sagemaker.sklearn.estimator import SKLearnModel # , SKLearn 3 | from sagemaker.xgboost import XGBoostModel # , XGBoost 4 | from sagemaker.tensorflow import TensorFlowModel, TensorFlow 5 | from sagemaker.pytorch import PyTorchModel, PyTorch 6 | from sagemaker.estimator import Estimator 7 | 8 | from sagemaker.deserializers import CSVDeserializer 9 | from sagemaker.serializers import CSVSerializer 10 | from sagemaker.deserializers import JSONDeserializer 11 | from sagemaker.serializers import JSONSerializer 12 | 13 | # https://docs.aws.amazon.com/sagemaker/latest/dg/pre-built-containers-frameworks-deep-learning.html 14 | # https://github.com/aws/sagemaker-python-sdk#installing-the-sagemaker-python-sdk 15 | # https://github.com/aws/deep-learning-containers/blob/master/available_images.md 16 | sagemaker_models = { 17 | "sklearn": SKLearnModel, 18 | "xgboost": XGBoostModel, 19 | "tensorflow": TensorFlowModel, 20 | "pytorch": PyTorchModel, 21 | } 22 | sagemaker_estimators = { 23 | "sklearn": Estimator, 24 | "xgboost": Estimator, 25 | "tensorflow": TensorFlow, 26 | # https://sagemaker.readthedocs.io/en/v2.8.0/frameworks/tensorflow/sagemaker.tensorflow.html#tensorflow-estimator 27 | "pytorch": PyTorch, 28 | } 29 | sagemaker_serializers = {"json": JSONSerializer, "csv": CSVSerializer} 30 | sagemaker_deserializers = {"json": JSONDeserializer, "csv": CSVDeserializer} 31 | 32 | SUPPORTED_SAGEMAKER_ESTIMATORS = list(sagemaker_estimators.keys()) 33 | -------------------------------------------------------------------------------- /mlmonitor/src/aws/deployment.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | from sagemaker import Session as sm_Session 4 | from sagemaker import image_uris 5 | from mlmonitor.src import PROJECT_ROOT, ROLE 6 | from typing import Dict, Optional 7 | from boto3.session import Session as boto_Session 8 | 9 | 10 | def generate_base_deployment_params( 11 | trained_model_data: str, 12 | source_dir: str, 13 | framework: str, 14 | framework_version: str, 15 | py_version: str, 16 | script: str, 17 | instance: str, 18 | ) -> Dict: 19 | """ 20 | generates the base parameters for deploying am AWS Sagemaker Endpoint deployment. 21 | It takes in 7 arguments: trained_model_data, source_dir, and runtime information. 22 | trained_model_data is the location of the trained model data on S3. 23 | source_dir is where to find your inference script on S3 (e.g., /opt/ml/code). 24 | runtime information such as framework you are using (e.g., tensorflow), 25 | the version of that framework you're using (e.g., 1), and what instance type you want to use for inference. 26 | 27 | :param trained_model_data:str: Specify the location of the trained model artifact 28 | :param source_dir:str: Specify the directory in which the model scripts are located 29 | :param framework:str: 30 | :param framework_version:str: 31 | :param py_version:str: 32 | :param script:str: 33 | :param instance:str: 34 | :return: A dictionary of parameters that will be passed to the deploy_model function 35 | """ 36 | 37 | assert framework in {"pytorch", "sklearn", "tensorflow", "xgboost"} 38 | 39 | container = image_uris.retrieve( 40 | framework=framework, 41 | region=os.environ.get("AWS_DEFAULT_REGION", "ca-central-1"), 42 | version=framework_version, 43 | image_scope="inference", 44 | instance_type=instance, 45 | ) 46 | print(container) 47 | 48 | deployment_model_params = { 49 | "model_data": trained_model_data.strip(), 50 | "source_dir": f"{PROJECT_ROOT}/{source_dir}", 51 | "image_uri": container, 52 | "role": ROLE, 53 | "entry_point": script, 54 | "framework_version": framework_version, 55 | "container_log_level": 20, 56 | } 57 | 58 | if framework != "tensorflow": 59 | deployment_model_params["py_version"] = py_version 60 | 61 | if framework in {"tensorflow", "pytorch"}: 62 | deployment_model_params["container_log_level"] = ( 63 | 20, 64 | ) # 10 debug 20 info 30 warning 40 error 65 | 66 | return deployment_model_params 67 | 68 | 69 | def is_deployed( 70 | deployment_name: str, sagemaker_client: Optional[boto_Session.client] = None 71 | ) -> bool: 72 | """ 73 | checks if a Sagemaker endpoint with the specified name exists. 74 | It returns True if it does and was completed and False otherwise. 75 | 76 | :param sagemaker_client: Optional Sagemaker client to pass as argument if already instantiated 77 | :param deployment_name:str: Deployment name to be checked 78 | :return: A boolean value indicating whether the deployment_name is deployed in Sagemaker 79 | """ 80 | if not sagemaker_client: 81 | session = boto_Session() 82 | sagemaker_client = session.client("sagemaker") 83 | 84 | filtered_deployments = [ 85 | resource 86 | for resource in sagemaker_client.list_endpoints(MaxResults=100).get("Endpoints") 87 | if resource.get("EndpointName") == deployment_name 88 | ] 89 | return len(filtered_deployments) == 1 90 | 91 | 92 | def describe_ep(deployment_name: str, **aws_credentials) -> Dict: 93 | """ 94 | returns a dictionary containing the Sagemaker endpoint description 95 | 96 | :param deployment_name: str: endpoint name 97 | :param **aws_credentials: Pass in the aws_access_key_id, aws_secret_access key and region 98 | :return: A dictionary with EndpointArn CreationTime LastModifiedTime EndpointStatus 99 | """ 100 | session = boto_Session(**aws_credentials) 101 | sagemaker_session = sm_Session(session) 102 | return sagemaker_session.sagemaker_client.describe_endpoint( 103 | EndpointName=deployment_name 104 | ) 105 | -------------------------------------------------------------------------------- /mlmonitor/src/aws/score_sagemaker_ep.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from mlmonitor.src.aws.scoring import get_wos_response, _score_unstructured 3 | from mlmonitor.src.model.config import ModelConfig 4 | 5 | 6 | def score_sagemaker_endpoint( 7 | model_config: ModelConfig, 8 | deployment_name: str, 9 | aws_credentials: dict, 10 | inference_samples: int = 2, 11 | ) -> dict: 12 | 13 | """ 14 | Sends a scoring request with to the deployed Sagemaker Endpoint with name 15 | 16 | :param model_config: ModelConfig: model configuration 17 | :param deployment_name: str: EndpointName of AWS online inference endpoint. 18 | :param aws_credentials: dict: AWS credentials to use to invoke deployed online inference endpoint in AWS Sagemaker. this dictionary should be formatted as follow: 19 | {"aws_access_key_id": , 20 | "aws_secret_access_key": , 21 | "region_name": } 22 | :param inference_samples: int: Number of inference samples to send in the scoring request to the deployed Endpoint (deployment_name) 23 | :return: dictionary containing the predicted values formatted for Watson OpenScale 24 | scoring response received and transformed to Watson OpenScale accepted format for feedback logging 25 | {'fields': ['_original_prediction', '_original_probability'], 'values': [[1, 0.984], [1, 0.997]]} 26 | """ 27 | test_data = model_config._get_data( 28 | dataset_type="test", num_samples=inference_samples 29 | ) 30 | 31 | if model_config.data_type == "structured": 32 | 33 | df = test_data.loc[:, model_config.feature_columns] 34 | 35 | return get_wos_response( 36 | df=df, 37 | aws_access_key_id=aws_credentials.get("aws_access_key_id"), 38 | aws_secret_access_key=aws_credentials.get("aws_secret_access_key"), 39 | region_name=aws_credentials.get("region_name"), 40 | endpoint_name=deployment_name, 41 | prediction_field=model_config.prediction_field, 42 | probability_field=model_config.probability_fields[0], 43 | ) 44 | 45 | elif model_config.data_type == "unstructured_image": 46 | 47 | samples, labels = test_data 48 | result = _score_unstructured( 49 | payload=samples, endpoint_name=deployment_name, **aws_credentials 50 | ) 51 | 52 | return { 53 | "fields": result.get("predictions")[0].get("fields"), 54 | "values": result.get("predictions")[0].get("values"), 55 | } 56 | 57 | else: 58 | raise ValueError( 59 | "supported data_type are structured or unstructured_image (must be passed in model signature)" 60 | ) 61 | -------------------------------------------------------------------------------- /mlmonitor/src/azure/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from datetime import datetime 3 | from typing import Optional 4 | import logging 5 | from mlmonitor.config import get_azure_credentials 6 | from mlmonitor.src import logger 7 | 8 | try: 9 | from azureml.core.authentication import ServicePrincipalAuthentication 10 | from azureml.core import Workspace 11 | except ModuleNotFoundError: 12 | logger.warning("run pip install mlmonitor[azure] to use AzureModelUseCase") 13 | 14 | SUPPORTED_AZURE_COMPUTE = {"aci", "aks"} 15 | 16 | 17 | def get_workspace_age( 18 | workspace: Workspace, log: Optional[logging.Logger] = None 19 | ) -> str: 20 | """ 21 | returns the age of an Azure Machine Learning workspace. 22 | 23 | :param workspace: Workspace: workspace object 24 | :param log: Optional[logging.Logger]: Pass in a logger object 25 | :return: creation date of the workspace 26 | """ 27 | creation_date = workspace.get_details().get("creationTime") 28 | ws_age = datetime.now() - datetime.strptime( 29 | workspace.get_details().get("creationTime")[:-2], "%Y-%m-%dT%H:%M:%S.%f" 30 | ) 31 | if log: 32 | log.info( 33 | f"AZ ML workspace {workspace.name} in resource group {workspace.resource_group} fetched created {ws_age.days} " 34 | f"days {ws_age.seconds // 3600} hours {ws_age.seconds % 3600 // 60} " 35 | f"minutes {ws_age.seconds % 3600 % 60} seconds ago." 36 | ) 37 | return creation_date 38 | 39 | 40 | ( 41 | az_ws_name, 42 | az_rg, 43 | az_sub_id, 44 | az_tenant_id, 45 | az_sp_id, 46 | az_sp_secret, 47 | ) = get_azure_credentials() 48 | 49 | try: 50 | logger.debug(f"Instantiate AZURE ML Workspace {az_ws_name}") 51 | 52 | AZ_SP_AUTH = ServicePrincipalAuthentication( 53 | tenant_id=az_tenant_id, 54 | service_principal_id=az_sp_id, 55 | service_principal_password=az_sp_secret, 56 | ) 57 | 58 | AZ_WORKSPACE = Workspace.get( 59 | name=az_ws_name, 60 | subscription_id=az_sub_id, 61 | resource_group=az_rg, 62 | auth=AZ_SP_AUTH, 63 | ) 64 | 65 | get_workspace_age(workspace=AZ_WORKSPACE, log=logger) 66 | 67 | except Exception as e: 68 | AZ_WORKSPACE = None 69 | logger.warning(f"AZ_WORKSPACE {az_ws_name} instantiation failed : {e}") 70 | -------------------------------------------------------------------------------- /mlmonitor/src/demos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/src/demos/__init__.py -------------------------------------------------------------------------------- /mlmonitor/src/factsheets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/src/factsheets/__init__.py -------------------------------------------------------------------------------- /mlmonitor/src/factsheets/deployments.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import sagemaker 3 | from boto3.session import Session 4 | from typing import Optional 5 | 6 | 7 | def add_aws_deployment_details( 8 | apikey: str, 9 | session: Session, 10 | endpoint_name: str, 11 | catalog_id: str = None, 12 | model_entry_id: str = None, 13 | inference_entrypoint: str = None, 14 | source_dir: str = None, 15 | description: str = "", 16 | framework_version: str = "unknown", 17 | framework: str = "unknown", 18 | env: str = "saas", 19 | cp4d_username: Optional[str] = None, 20 | cp4d_url: Optional[str] = None, 21 | ): 22 | from ibm_aigov_facts_client import ( 23 | AIGovFactsClient, 24 | DeploymentDetails, 25 | CloudPakforDataConfig, 26 | ) 27 | from ibm_aigov_facts_client.utils.client_errors import ClientError 28 | 29 | sagemaker_session = sagemaker.Session(session) 30 | endpoint_data = sagemaker_session.sagemaker_client.describe_endpoint( 31 | EndpointName=endpoint_name 32 | ) 33 | endpoint_config_data = sagemaker_session.sagemaker_client.describe_endpoint_config( 34 | EndpointConfigName=endpoint_data.get("EndpointConfigName") 35 | ) 36 | model_data = sagemaker_session.sagemaker_client.describe_model( 37 | ModelName=endpoint_config_data.get("ProductionVariants")[0].get("ModelName") 38 | ) 39 | 40 | if env == "prem": 41 | facts_client = AIGovFactsClient( 42 | cloud_pak_for_data_configs=CloudPakforDataConfig( 43 | service_url=cp4d_url, 44 | username=cp4d_username, 45 | api_key=apikey, 46 | ), 47 | experiment_name=endpoint_name, 48 | set_as_current_experiment=True, 49 | external_model=True, 50 | enable_autolog=False, 51 | ) 52 | 53 | elif env == "saas": 54 | facts_client = AIGovFactsClient( 55 | api_key=apikey, 56 | experiment_name=endpoint_name, 57 | set_as_current_experiment=True, 58 | external_model=True, 59 | enable_autolog=False, 60 | ) 61 | 62 | facts_client.manual_log.start_trace() 63 | # get Run ID 64 | run_id = facts_client.runs.get_current_run_id() 65 | print(f"Current RunID {run_id}") 66 | facts_client.manual_log.log_params({"EndpointName": endpoint_name}) 67 | facts_client.manual_log.log_params( 68 | { 69 | "EndpointName": endpoint_data.get("EndpointName"), 70 | "EndpointArn": endpoint_data.get("EndpointArn"), 71 | "EndpointConfigName": endpoint_data.get("EndpointConfigName"), 72 | "CreationTime": endpoint_data.get("CreationTime").strftime( 73 | "%m/%d/%Y, %H:%M:%S" 74 | ), 75 | "EndpointStatus": endpoint_data.get("EndpointStatus"), 76 | "ModelName": model_data.get("ModelName"), 77 | "PrimaryContainer": model_data.get("PrimaryContainer").get("Image"), 78 | "ModelDataUrl": model_data.get("PrimaryContainer").get("ModelDataUrl"), 79 | "entry_point": inference_entrypoint, 80 | "source_dir": source_dir, 81 | "framework_version": framework_version, 82 | "framework": framework, 83 | } 84 | ) 85 | 86 | deployment = DeploymentDetails( 87 | identifier=endpoint_name, 88 | name=endpoint_name, 89 | deployment_type="online", 90 | scoring_endpoint=endpoint_name, 91 | ) 92 | 93 | facts_client.export_facts.export_payload_manual(run_id) 94 | 95 | fs_model = facts_client.external_model_facts.save_external_model_asset( 96 | model_identifier=model_data.get("ModelName"), 97 | name=model_data.get("ModelName"), 98 | description=description, 99 | deployment_details=deployment, 100 | catalog_id=catalog_id, 101 | ) 102 | 103 | muc_utilities = facts_client.assets.get_ai_usecase( 104 | ai_usecase_id=model_entry_id, 105 | catalog_id=catalog_id, 106 | ) 107 | try: 108 | fs_model.track( 109 | usecase=muc_utilities, 110 | approach=muc_utilities.get_approaches()[0], 111 | version_number="minor", # "0.1.0" 112 | ) 113 | except ClientError as e: # noqa F841 error is already being tracked 114 | pass 115 | -------------------------------------------------------------------------------- /mlmonitor/src/factsheets/model_asset_facts.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import pandas as pd 4 | 5 | from mlmonitor.src import USERNAME, AUTH_ENDPOINT 6 | from mlmonitor.src.utils.utils import parse_args 7 | from mlmonitor.src import PROJECT_ROOT, API_KEY, MODEL_ENTRY_ID, CATALOG_ID 8 | from mlmonitor.src.factsheets.utils import FactsheetHelpers 9 | 10 | 11 | def populate_model_facts( 12 | model_id: str, 13 | apikey: str = API_KEY, 14 | model_entry_id: str = MODEL_ENTRY_ID, 15 | catalog_id: str = CATALOG_ID, 16 | ): 17 | """ 18 | Creates a model asset schema and populate default values using WKC API 19 | Parameters 20 | ---------- 21 | model_id : str 22 | model asset ID 23 | apikey : str 24 | IBM Cloud API key to use for authentication 25 | model_entry_id : str 26 | IBM AI Factsheets Model use case identifier to be used to register the deployed model 27 | catalog_id : str 28 | IBM AI Factsheets catalog identifier to be used to register the deployed model 29 | Returns 30 | ------- 31 | """ 32 | modelfacts_user = os.path.join( 33 | f"{PROJECT_ROOT}/factsheets_{user_case}/", 34 | f"custom_model_asset_facts_{user_case}.csv", 35 | ) 36 | 37 | df_modelfacts_values = pd.read_csv( 38 | f"{PROJECT_ROOT}/factsheets_{user_case}/custom_model_asset_facts_values_{user_case}.csv", 39 | sep=";", 40 | ).loc[:, ["name", "default_value"]] 41 | 42 | fs_helper = FactsheetHelpers( 43 | api_key=apikey, 44 | container_type="catalog", 45 | container_id=catalog_id, 46 | model_entry_id=model_entry_id, 47 | username=USERNAME, 48 | cpd_url=AUTH_ENDPOINT, 49 | ) 50 | 51 | print(f"Applying model facts : {modelfacts_user} ") 52 | fs_helper.define_custom_model_facts(modelfacts_user) 53 | 54 | fs_helper.set_custom_fact( 55 | fact_ids=df_modelfacts_values["name"].to_list(), 56 | fact_values=df_modelfacts_values["default_value"].to_list(), 57 | model_id=model_id, 58 | op="add", 59 | ) 60 | 61 | 62 | if __name__ == "__main__": 63 | 64 | args = parse_args() 65 | user_case = "churn" 66 | ibm_apikey = API_KEY 67 | populate_model_facts( 68 | model_entry_id=args.model_entry_id, 69 | catalog_id=args.catalog_id, 70 | model_id=args.model_id, 71 | apikey=ibm_apikey, 72 | ) 73 | -------------------------------------------------------------------------------- /mlmonitor/src/factsheets/model_entry_facts.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import json 4 | import pandas as pd 5 | 6 | from mlmonitor.src import USERNAME, AUTH_ENDPOINT, API_KEY 7 | from mlmonitor.src import PROJECT_ROOT, VERIFY_CP4D_SSL 8 | from mlmonitor.src.factsheets.utils import FactsheetHelpers 9 | 10 | 11 | def populate_model_entry_facts( 12 | model_entry_id: str, 13 | catalog_id: str, 14 | use_case: str, 15 | apikey: str = API_KEY, 16 | ): 17 | """ 18 | Creates a model asset schema and populate default values using WKC API 19 | Parameters 20 | ---------- 21 | model_entry_id : str 22 | IBM AI Factsheets Model use case identifier to be used to register the deployed model 23 | apikey : str 24 | IBM Cloud API key to use for authentication 25 | catalog_id : str 26 | IBM AI Factsheets catalog identifier to be used to register the deployed model 27 | use_case : str 28 | use case name 29 | Returns 30 | ------- 31 | model_entry_details : dict 32 | dictionary with Model use case details 33 | { 34 | } 35 | """ 36 | 37 | modelentry_user = os.path.join( 38 | f"{PROJECT_ROOT}/factsheets_{use_case}/", 39 | f"custom_model_entry_facts_{use_case}.csv", 40 | ) 41 | 42 | modelentry_user_df = pd.read_csv(modelentry_user) 43 | df_modelentryfacts_values = pd.read_csv( 44 | f"{PROJECT_ROOT}/factsheets_{use_case}" 45 | f"/custom_model_entry_facts_values_{use_case}.csv", 46 | sep=";", 47 | ).loc[:, ["name", "default_value"]] 48 | 49 | fs_helper = FactsheetHelpers( 50 | api_key=apikey, 51 | container_type="catalog", 52 | container_id=catalog_id, 53 | model_entry_id=model_entry_id, 54 | username=USERNAME, 55 | cpd_url=AUTH_ENDPOINT, 56 | ) 57 | 58 | df_modelentryfacts = df_modelentryfacts_values.merge( 59 | modelentry_user_df.loc[:, ["name", "type", "is_array"]], on="name", how="left" 60 | ) 61 | fs_helper.define_custom_model_entry_facts(modelentry_user) 62 | 63 | # fs_helper.reset_model_entry_user() 64 | # print(df_["default_value"].to_list()) 65 | 66 | fs_helper.set_model_entry_fact( 67 | fact_ids=df_modelentryfacts["name"].to_list(), 68 | fact_values=df_modelentryfacts["default_value"].to_list(), 69 | is_array=df_modelentryfacts["is_array"].to_list(), 70 | op="add", 71 | verify=VERIFY_CP4D_SSL, 72 | ) 73 | 74 | model_entry_details = fs_helper.get_model_entry_content(verify=VERIFY_CP4D_SSL) 75 | with open( 76 | os.path.join( 77 | f"{PROJECT_ROOT}/factsheets_{use_case}", "model_entry_details.json" 78 | ), 79 | "w", 80 | ) as f: 81 | json.dump(model_entry_details, f, indent=4) 82 | 83 | return model_entry_details 84 | -------------------------------------------------------------------------------- /mlmonitor/src/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/src/model/__init__.py -------------------------------------------------------------------------------- /mlmonitor/src/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/src/utils/__init__.py -------------------------------------------------------------------------------- /mlmonitor/src/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import tempfile 4 | import tarfile 5 | import gzip 6 | import os 7 | import re 8 | 9 | 10 | def load_json_from_file(file_path): 11 | """ 12 | loads a JSON file from the specified path and returns it as a dictionary. 13 | 14 | :param file_path: Specify the file that is to be loaded 15 | :return: A dictionary 16 | """ 17 | with open(file_path) as json_file: 18 | return json.load(json_file) 19 | 20 | 21 | def make_model_tgzfile(output_filename: str, source_dir: str, filename: str) -> str: 22 | """ 23 | creates a gzipped tar file containing the model file in the specified directory. 24 | The output filename is given by the argument 'output_filename'. 25 | The source directory is given by 'source_dir' and should be an absolute path. 26 | 27 | 28 | :param output_filename:str: the name of the tgz file that you want to create 29 | :param source_dir:str: directory where the model is stored 30 | :param filename:str: name of the file to be compressed 31 | :return: 32 | """ 33 | temp_unzipped_dir = tempfile.mktemp() 34 | current_dir = os.path.abspath(os.getcwd()) 35 | assert os.path.exists(source_dir) 36 | os.chdir(source_dir) 37 | try: 38 | with tarfile.open(temp_unzipped_dir, "w") as tar: 39 | tar.add(filename) 40 | 41 | with gzip.GzipFile( 42 | filename="", fileobj=open(output_filename, "wb"), mode="wb", mtime=0 43 | ) as gzipped_tar, open(temp_unzipped_dir, "rb") as tar: 44 | gzipped_tar.write(tar.read()) 45 | finally: 46 | os.remove(temp_unzipped_dir) 47 | os.chdir(current_dir) 48 | return os.path.join(source_dir, output_filename) 49 | 50 | 51 | def version(path: str): 52 | with open(os.path.join(path, "version.meta"), "r") as v: 53 | return v.read().strip() 54 | 55 | 56 | def readme(path: str): 57 | with open(os.path.join(path, "README.md"), "r") as v: 58 | return v.read() 59 | 60 | 61 | def _readfile(file_path: str, file_name): 62 | with open(os.path.join(file_path, file_name), "r") as v: 63 | lines = v.readlines() 64 | return list(filter(lambda x: re.match(r"^\w+", x), lines)) 65 | 66 | 67 | def requirements(path: str): 68 | return _readfile(file_path=path, file_name="requirements.txt") 69 | -------------------------------------------------------------------------------- /mlmonitor/src/utils/validation.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import pandas as pd 4 | import re 5 | 6 | from mlmonitor.src import logger 7 | 8 | # Regex: 32 characters separated by - following xxxxxxxx-xxxx-4xxx-xxxx-xxxxxxxxxxxx 9 | _UUID4_REGEX = re.compile( 10 | r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-4[0-9a-fA-F]{3}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" 11 | ) 12 | 13 | 14 | def is_directory(name: str) -> bool: 15 | """ 16 | returns True if the given name is a directory, and False otherwise. 17 | 18 | :param name:str: name of the directory 19 | :return:bool: True if the name passed to it is a directory 20 | """ 21 | return os.path.isdir(name) 22 | 23 | 24 | def is_file(name: str) -> bool: 25 | """ 26 | returns True if the given name is a file, and False otherwise. 27 | 28 | :param name:str: name of the file 29 | :return:bool: True if the name passed to it is a file 30 | """ 31 | return os.path.isfile(name) 32 | 33 | 34 | def is_csv(path: str) -> bool: 35 | """Ensure Path corresponds to csv file. 36 | 37 | path (str): string that should correspond to Path. 38 | RETURNS: boolean value is input path us a valid and readable csv file 39 | """ 40 | if not is_file(path): 41 | return False 42 | if not path.endswith(".csv"): 43 | return False 44 | try: 45 | pd.read_csv(path, engine="python") 46 | except pd.errors.ParserError as e: 47 | logger.error(f"pandas.errors.ParserError {e}") 48 | return False 49 | return True 50 | 51 | 52 | def exists(name): 53 | """ 54 | returns True if the file exists, and False otherwise. 55 | 56 | :param name: Specify the file name 57 | :return: True if the file exists and false if it doesn't 58 | """ 59 | return os.path.exists(name) 60 | 61 | 62 | def validate_uuid4(value: str) -> bool: 63 | """ 64 | checks if the value is a valid UUID4. 65 | 66 | :param value:str: Specify the value that you want to check 67 | :return: A boolean value 68 | #""" 69 | return bool(_UUID4_REGEX.match(value)) 70 | 71 | 72 | def validate_hdf_file(path: str) -> bool: 73 | extension_match = bool(re.compile(r"^/.*/.*.h5$").match(path)) 74 | return extension_match and exists(path) 75 | -------------------------------------------------------------------------------- /mlmonitor/src/wml/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_machine_learning import APIClient as WML_APIClient 3 | 4 | from mlmonitor.src import API_KEY, ENV, logger 5 | from mlmonitor.config import get_wml_details 6 | 7 | WML_INSTANCE_ID, WML_URL, WML_SPACE_ID = get_wml_details(env=ENV) 8 | WML_CREDENTIALS = {"url": WML_URL} 9 | 10 | SUPPORTED_WML_RUNTIMES = { 11 | "runtime-22.1-py3.10", 12 | "runtime-22.2-py3.10", 13 | "spark-mllib_3.3", 14 | "runtime-23.1-py3.10", 15 | "tensorflow_rt22.1-py3.9", 16 | "tensorflow_rt22.1-py3.10", 17 | } 18 | 19 | if ENV == "prem": 20 | from mlmonitor.src import CP4D_VERSION, USERNAME 21 | 22 | WML_CREDENTIALS["username"] = USERNAME 23 | # WML_CREDENTIALS["password"] = API_KEY 24 | WML_CREDENTIALS["apikey"] = API_KEY 25 | WML_CREDENTIALS["instance_id"] = WML_INSTANCE_ID 26 | WML_CREDENTIALS["version"] = CP4D_VERSION 27 | elif ENV == "saas": 28 | WML_CREDENTIALS["apikey"] = API_KEY 29 | 30 | try: 31 | logger.debug("Instantiate WML Client") 32 | wml_client = WML_APIClient(wml_credentials=WML_CREDENTIALS) 33 | wml_client.set.default_space(WML_SPACE_ID) 34 | except Exception as e: 35 | wml_client = None 36 | logger.warning(f"Error to instantiate WML Client : {e}") 37 | -------------------------------------------------------------------------------- /mlmonitor/src/wml/custom_metrics_provider_code.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from mlmonitor.src import API_KEY, WOS_URL 3 | 4 | 5 | def custom_metrics_provider_gcr(parms={"url": WOS_URL, "apikey": API_KEY}): 6 | url = parms.get("url") 7 | apikey = parms.get("apikey") 8 | from custmonitor.metricsprovider.helpers import publish 9 | 10 | # Add your code to compute the custom metrics here. 11 | # Based on use case , you can inlude get_metrics from custmonitor.metrics. 12 | # - credit_risk from custmonitor.metrics.credit_risk import get_metrics 13 | # - churn from custmonitor.metrics.customer_churn import get_metrics 14 | # or other use case 15 | from custmonitor.metrics.use_case_gcr import get_metrics 16 | 17 | def publish_to_monitor(input_data): 18 | response_payload = publish( 19 | input_data=input_data, url=url, apikey=apikey, get_metrics_fn=get_metrics 20 | ) 21 | return response_payload 22 | 23 | return publish_to_monitor 24 | 25 | 26 | def custom_metrics_provider_churn(parms={"url": WOS_URL, "apikey": API_KEY}): 27 | url = parms.get("url") 28 | apikey = parms.get("apikey") 29 | from custmonitor.metricsprovider.helpers import publish 30 | 31 | # Add your code to compute the custom metrics here. 32 | # Based on use case , you can include get_metrics from custmonitor.metrics. 33 | # - credit_risk from custmonitor.metrics.credit_risk import get_metrics 34 | # - churn from custmonitor.metrics.customer_churn import get_metrics 35 | # or other use case 36 | from custmonitor.metrics.use_case_churn import get_metrics 37 | 38 | def publish_to_monitor(input_data): 39 | response_payload = publish( 40 | input_data=input_data, url=url, apikey=apikey, get_metrics_fn=get_metrics 41 | ) 42 | return response_payload 43 | 44 | return publish_to_monitor 45 | -------------------------------------------------------------------------------- /mlmonitor/src/wml/deploy_custom_metrics_provider.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import ibm_watson_machine_learning 3 | import json 4 | from typing import Callable 5 | 6 | from mlmonitor.src import logger 7 | from mlmonitor.src.wml import wml_client, WML_SPACE_ID 8 | 9 | 10 | def deploy_custom_metrics_provider( 11 | deployment_name: str, 12 | python_function_name: str, 13 | function_code: Callable, 14 | wml_client: ibm_watson_machine_learning.APIClient = wml_client, 15 | wml_space_id: str = WML_SPACE_ID, 16 | runtime: str = "runtime-23.1-py3.10", 17 | hardware_specifications: str = "S", 18 | ) -> str: 19 | """ 20 | deploys a custom metrics provider to the WML instance. 21 | 22 | :param deployment_name:str: deployment Name 23 | :param python_function_name:str: Name of python function asset to associate with this deployment 24 | :param function_code:Callable: Pass the python function code 25 | :param wml_client:ibm_watson_machine_learning.APIClient=wml_client: Watson Machine learning API Client 26 | :param wml_space_id:str=WML_SPACE_ID: Watson Machine learning deployment space id 27 | :param runtime:str="runtime-22.1-py3.9": WML runtime of the deployment 28 | :param hardware_specifications:str=S: hardware type used to run the python function deployment 29 | :return: The scoring url of the deployed function 30 | """ 31 | assert hardware_specifications in {"XL", "L", "M", "S", "XS", "XXS"} 32 | wml_client.set.default_space(wml_space_id) 33 | 34 | # Create the function meta properties. 35 | software_spec_id = wml_client.software_specifications.get_id_by_name(runtime) 36 | logger.info(software_spec_id) 37 | function_meta_props = { 38 | wml_client.repository.FunctionMetaNames.NAME: python_function_name, 39 | wml_client.deployments.ConfigurationMetaNames.TAGS: ["mlmonitor"], 40 | wml_client.repository.FunctionMetaNames.SOFTWARE_SPEC_ID: software_spec_id, 41 | } 42 | 43 | # Store the Python function. 44 | function_artifact = wml_client.repository.store_function( 45 | meta_props=function_meta_props, function=function_code 46 | ) 47 | function_uid = wml_client.repository.get_function_id(function_artifact) 48 | logger.info( 49 | f"Function {python_function_name} created with Function UID = {function_uid}" 50 | ) 51 | function_details = wml_client.repository.get_details(function_uid) 52 | logger.info( 53 | f"Function {python_function_name} Details:\n{json.dumps(function_details, indent=4)}" 54 | ) 55 | 56 | # Deploy the Python function. 57 | hardware_spec_id = wml_client.hardware_specifications.get_id_by_name( 58 | hardware_specifications 59 | ) 60 | 61 | # Create deployment metadata for the Python function. 62 | deploy_meta = { 63 | wml_client.deployments.ConfigurationMetaNames.NAME: deployment_name, 64 | wml_client.deployments.ConfigurationMetaNames.TAGS: ["mlmonitor"], 65 | wml_client.deployments.ConfigurationMetaNames.ONLINE: {}, 66 | wml_client.deployments.ConfigurationMetaNames.HARDWARE_SPEC: { 67 | "id": hardware_spec_id 68 | }, 69 | } 70 | # Create a deployment. 71 | deployment_details = wml_client.deployments.create( 72 | function_uid, meta_props=deploy_meta 73 | ) 74 | # Get the scoring URL. 75 | created_at = deployment_details["metadata"]["created_at"] 76 | find_string_pos = created_at.find("T") 77 | scoring_url = wml_client.deployments.get_scoring_href(deployment_details) 78 | if find_string_pos != -1: 79 | current_date = created_at[:find_string_pos] 80 | scoring_url = f"{scoring_url}?version={current_date}" 81 | 82 | return scoring_url 83 | -------------------------------------------------------------------------------- /mlmonitor/src/wml/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_machine_learning import APIClient 3 | from typing import Optional 4 | 5 | 6 | def get_deployment_uid_by_name( 7 | wml_client: APIClient, deployment_name: str, key: Optional[str] = "id" 8 | ) -> Optional[str]: 9 | """ 10 | returns the WML deployment uid for a given deployment name. 11 | searching through all deployments and returning the first one that matches the deployment_name. 12 | If no matching deployments are found, it will return None. 13 | 14 | :param wml_client:APIClient: Watson Machine Learning API Client 15 | :param deployment_name:str: deployment name in WML 16 | :return: The deployment_uid for a given deployment_name 17 | :param key:Optional[str]="id" specific key to return from deployment resource details, return all resource if None 18 | """ 19 | all_deployment = wml_client.deployments.get_details() 20 | deployment_uids = [ 21 | deploy.get("metadata").get(key) if key else deploy 22 | for deploy in all_deployment.get("resources") 23 | if deploy.get("entity").get("name") == deployment_name 24 | ] 25 | 26 | if len(deployment_uids) == 1: 27 | return deployment_uids[0] 28 | elif len(deployment_uids) == 0: 29 | return None 30 | else: 31 | raise ValueError( 32 | f"{len(deployment_uids)} deployments found for name {deployment_name} expecting 1 or 0" 33 | ) 34 | 35 | 36 | def get_model_uid_by_name( 37 | wml_client: APIClient, model_name: str, key: Optional[str] = "id" 38 | ) -> Optional[str]: 39 | """ 40 | returns the WML model uid for a given model name. 41 | searching through all models and returning the first one that matches the model_name. 42 | If no matching deployments are found, it will return None. 43 | 44 | :param wml_client:APIClient: Watson Machine Learning API Client 45 | :param model_name:str: model name in WML 46 | :param key:Optional[str]="id" specific key to return from model resource details, return all resource if None 47 | :return: The model_uid for a given model_name 48 | """ 49 | all_models = wml_client.repository.get_model_details() 50 | model_uids = [ 51 | model.get("metadata").get(key) if key else model 52 | for model in all_models.get("resources") 53 | if model.get("metadata").get("name") == model_name 54 | ] 55 | 56 | if len(model_uids) == 1: 57 | return model_uids[0] 58 | elif len(model_uids) == 0: 59 | return None 60 | else: 61 | raise ValueError( 62 | f"{len(model_uids)} models found for name {model_name} expecting 1 or 0" 63 | ) 64 | 65 | 66 | def get_function_uid_by_name( 67 | wml_client: APIClient, function_name: str, key: Optional[str] = "id" 68 | ) -> Optional[str]: 69 | """ 70 | returns the unique identifier for a function with the given name. 71 | If no function is found, it returns None. 72 | If multiple functions are found, it raises an error. 73 | 74 | :param wml_client:APIClient: Watson Machine Learning API Client 75 | :param function_name:str: Specify the name of the function to be retrieved 76 | :param key:Optional[str]="id" specific key to return from function resource details, return all resource if None 77 | :return: The unique id of the function with the given name 78 | """ 79 | all_functions = wml_client.repository.get_function_details() 80 | function_uids = [ 81 | function.get("metadata").get(key) if key else function 82 | for function in all_functions.get("resources") 83 | if function.get("metadata").get("name") == function_name 84 | ] 85 | 86 | if len(function_uids) == 1: 87 | return function_uids[0] 88 | elif len(function_uids) == 0: 89 | return None 90 | else: 91 | raise ValueError( 92 | f"{len(function_uids)} functions found for name {function_name} expecting 1 or 0" 93 | ) 94 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale import APIClient as WOS_APIClient 3 | from mlmonitor.src import authenticator, logger 4 | from mlmonitor.src import WOS_URL, WOS_INSTANCE_ID 5 | 6 | # in some account , instantiating wos client without service_instance_id failed with 7 | # AuthorizationError: You are not authorized to access AI OpenScale instance None 8 | # service_instance_id was added to fix this issue and is optional 9 | try: 10 | logger.debug("Instantiate WOS Client") 11 | wos_client = WOS_APIClient( 12 | authenticator=authenticator, 13 | service_url=WOS_URL, 14 | service_instance_id=WOS_INSTANCE_ID, 15 | ) 16 | except Exception as e: 17 | wos_client = None 18 | logger.warning(f"Error to instantiate WOS Client : {e}") 19 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/alerts.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import logging 3 | from datetime import datetime, timedelta 4 | import json 5 | 6 | from mlmonitor.src.utils.utils import parse_args, build_aws_model_data, read_model_data 7 | from mlmonitor.src.wos import wos_client 8 | from mlmonitor.src.wos.monitors import get_exising_monitors 9 | from mlmonitor.src import PROJECT_ROOT, logger 10 | 11 | 12 | def collect_alerts( 13 | deployment_name: str, monitor_types: tuple, output_model_data: str = PROJECT_ROOT 14 | ) -> None: 15 | """Trigger a new evaluation of the existing monitors listed in for 16 | the deployed Sagemaker Endpoint with name 17 | Parameters 18 | ---------- 19 | deployment_name : str 20 | this indicates the Endpoint Name in Sagemaker for which an OpenScale subscription should be created 21 | monitor_types: str 22 | type(s) of monitor(s) to evaluate. Supported monitors are ['quality','fairness','drift','mrm'] 23 | data_path : str 24 | location of dataset to be fetched to get scoring request samples 25 | output_model_data : str 26 | directory output Path where dictionary with model details in model_data.json file should be written 27 | Returns 28 | ------- 29 | """ 30 | 31 | if not deployment_name: 32 | deployment_info = read_model_data(model_dir=output_model_data) 33 | deployment_name = deployment_info.get("model_endpoint") 34 | 35 | else: 36 | deployment_info = build_aws_model_data( 37 | wos_client=wos_client, deployment_name=deployment_name 38 | ) 39 | 40 | subscription_id = deployment_info.get("subscription_id") 41 | 42 | logger.debug( 43 | f"\nDeployment Name {deployment_name}\n" f"subscription_id {subscription_id}\n" 44 | ) 45 | 46 | existing_monitors = get_exising_monitors( 47 | wos_client=wos_client, subscription_id=subscription_id 48 | ) 49 | logger.debug(f"existing monitors\n{json.dumps(existing_monitors,indent=4)}") 50 | 51 | current_time = datetime.now() 52 | for monitor_type in monitor_types: 53 | if monitor_id := existing_monitors.get(monitor_type): 54 | logging.debug(f"Evaluate {monitor_type} Monitor ID {monitor_id}") 55 | # evaluate(wos_client=wos_client, monitor_instance_id=monitor_id, monitor_type=monitor_type) 56 | 57 | metrics_count = wos_client.monitor_instances.get_metrics_count( 58 | monitor_instance_id=monitor_id, 59 | start=(current_time - timedelta(days=2)), 60 | end=(current_time + timedelta(days=2)), 61 | ) 62 | 63 | metrics = wos_client.monitor_instances.get_metrics( 64 | monitor_instance_id=monitor_id, 65 | start=(current_time - timedelta(days=2)), 66 | end=(current_time + timedelta(days=2)), 67 | ) 68 | 69 | print(metrics.result.to_dict()) 70 | 71 | logging.debug( 72 | f"metrics_count \n{json.dumps(metrics_count.result.to_dict(),indent=4)}" 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | args = parse_args() 78 | 79 | monitor_types = tuple( 80 | x 81 | for x, y in zip( 82 | ["quality", "fairness", "drift", "mrm"], 83 | [ 84 | args.wos_evaluate_quality, 85 | args.wos_evaluate_fairness, 86 | args.wos_evaluate_drift, 87 | args.wos_evaluate_mrm, 88 | ], 89 | ) 90 | if y 91 | ) 92 | logger.info(f"Collect Alerts {monitor_types}...") 93 | collect_alerts( 94 | deployment_name=args.deployment_name, 95 | monitor_types=monitor_types, 96 | output_model_data=PROJECT_ROOT, 97 | ) 98 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/cleanup_custom_monitor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from mlmonitor.src.utils.utils import parse_args 3 | from mlmonitor.src.wos import wos_client 4 | from mlmonitor.src.wml import wml_client, WML_SPACE_ID 5 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 6 | from mlmonitor.src.wos.custom_monitors import cleanup_custom_monitor 7 | from mlmonitor.src.wos.data_mart import get_datamart_ids 8 | 9 | import importlib 10 | 11 | if __name__ == "__main__": 12 | 13 | args = parse_args() 14 | 15 | model_signature = importlib.import_module( 16 | f"mlmonitor.{args.source_dir}.model_signature" 17 | ) 18 | custom_monitor = getattr(model_signature, "custom_monitor") 19 | 20 | wml_client.set.default_space(WML_SPACE_ID) 21 | 22 | # Model for which the Custom monitor is attached (via subscription) 23 | monitored_model = args.deployment_name 24 | 25 | subscription_ids = get_subscription_id_by_deployment( 26 | wos_client=wos_client, deployment_name=monitored_model 27 | ) 28 | 29 | data_marts = get_datamart_ids(wos_client=wos_client) 30 | data_mart_id = data_marts[0] 31 | 32 | if len(subscription_ids) == 1: 33 | subscription_id = subscription_ids[0] 34 | else: 35 | raise ValueError(f"No WOS subscription found for deployment {monitored_model}") 36 | 37 | # CUSTOM MONITOR SPECIFIC NAMES 38 | provider_name = custom_monitor.get("provider_name") 39 | # Name Displayed in WOS UI 40 | custom_monitor_name = custom_monitor.get("custom_monitor_name") 41 | 42 | cleanup_custom_monitor( 43 | wos_client=wos_client, 44 | provider_name=provider_name, 45 | custom_monitor_name=custom_monitor_name, 46 | subscription_id=subscription_id, 47 | data_mart_id=data_mart_id, 48 | ) 49 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/configure_explain_monitor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale.supporting_classes.enums import TargetTypes 3 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import Target 4 | 5 | from mlmonitor.src.wos.monitors import get_monitor_id_by_subscription 6 | from mlmonitor.src.wos import wos_client 7 | from mlmonitor.src import logger 8 | from mlmonitor.src.model.config import ModelConfig 9 | from mlmonitor.src.wos.data_mart import get_datamart_ids 10 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 11 | 12 | 13 | def configure_explain( 14 | model_config: ModelConfig, 15 | deployment_name: str, 16 | keep_wos_monitor: bool = True, 17 | data_mart_id: str = None, 18 | ) -> dict: 19 | """ 20 | 21 | - Finds the existing WOS subscription for a given deployment, if it exists. 22 | - Finds the existing Explainability Monitor instance for a given deployment, if it exists. 23 | - Re-Create or Create Explainability Monitor instance for WOS subscription found 24 | - parameters specified in model_config.explain_monitor_parameters will be used 25 | 26 | :param model_config: ModelConfig: Configuration parameters for the explain monitor 27 | :param deployment_name:str: Identify the deployment name 28 | :param keep_wos_monitor:bool=True: Delete the monitor instance if it already exists and value set to False 29 | :param data_mart_id:str=None: Specify the datamart to be used will be fetched if not specified 30 | :return: Explainability monitor instance id created or retrieved 31 | """ 32 | 33 | subscription_ids = get_subscription_id_by_deployment( 34 | deployment_name=deployment_name, wos_client=wos_client 35 | ) 36 | 37 | if len(subscription_ids) != 1: 38 | raise ValueError( 39 | f"{deployment_name} should have exactly one subscription ID => {len(subscription_ids)} found" 40 | ) 41 | 42 | if not data_mart_id: 43 | data_marts = get_datamart_ids(wos_client=wos_client) 44 | 45 | if len(data_marts) != 1: 46 | raise ValueError(f"Please Specify datamart to use among {data_marts}") 47 | 48 | data_mart_id = data_marts[0] 49 | 50 | subscription_id = subscription_ids[0] 51 | 52 | if model_config.explain_monitor_enabled: 53 | 54 | # Find Monitors in place for a given SUBSCRIPTION_ID 55 | explain_monitor_instance_id = get_monitor_id_by_subscription( 56 | wos_client=wos_client, 57 | subscription_id=subscription_id, 58 | monitor_type="explainability", 59 | ) 60 | 61 | if not keep_wos_monitor and explain_monitor_instance_id: 62 | wos_client.monitor_instances.delete( 63 | monitor_instance_id=explain_monitor_instance_id, background_mode=False 64 | ) 65 | explain_monitor_instance_id = None 66 | 67 | if not explain_monitor_instance_id: 68 | 69 | explain_monitor_details = wos_client.monitor_instances.create( 70 | data_mart_id=data_mart_id, 71 | background_mode=False, 72 | monitor_definition_id=wos_client.monitor_definitions.MONITORS.EXPLAINABILITY.ID, 73 | target=Target( 74 | target_type=TargetTypes.SUBSCRIPTION, target_id=subscription_id 75 | ), 76 | parameters={"enabled": model_config.explain_monitor_enabled}, 77 | ).result 78 | 79 | explain_monitor_instance_id = explain_monitor_details.metadata.id 80 | 81 | logger.debug(f"explain Monitor ID Created [{explain_monitor_details}]") 82 | else: 83 | logger.warning( 84 | f"Explainability Monitor {explain_monitor_instance_id} Already exists" 85 | ) 86 | return explain_monitor_instance_id 87 | 88 | else: 89 | logger.warning("Explain Monitor not Enabled in Configuration") 90 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/configure_fairness_monitor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale.supporting_classes.enums import TargetTypes 3 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import Target 4 | 5 | from mlmonitor.src.wos.monitors import get_monitor_id_by_subscription 6 | from mlmonitor.src.wos import wos_client 7 | from mlmonitor.src import logger 8 | from mlmonitor.src.model.config import ModelConfig 9 | from mlmonitor.src.wos.data_mart import get_datamart_ids 10 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 11 | 12 | 13 | def configure_fairness( 14 | model_config: ModelConfig, 15 | deployment_name: str, 16 | keep_wos_monitor: bool = True, 17 | data_mart_id: str = None, 18 | ) -> dict: 19 | """ 20 | 21 | - Finds the existing WOS subscription for a given deployment, if it exists. 22 | - Finds the existing Quality Monitor instance for a given deployment, if it exists. 23 | - Re-Create or Create Quality Monitor instance for WOS subscription found 24 | - parameters specified in model_config.quality_monitor_parameters will be used 25 | 26 | :param model_config: ModelConfig: Configuration parameters for the quality monitor 27 | :param deployment_name:str: Identify the deployment name 28 | :param keep_wos_monitor:bool=True: Delete the monitor instance if it already exists and value set to False 29 | :param data_mart_id:str=None: Specify the datamart to be used will be fetched if not specified 30 | :return: Quality monitor instance id created or retrieved 31 | """ 32 | 33 | subscription_ids = get_subscription_id_by_deployment( 34 | wos_client=wos_client, deployment_name=deployment_name 35 | ) 36 | 37 | if len(subscription_ids) != 1: 38 | raise ValueError( 39 | f"{deployment_name} should have exactly one subscription ID => {len(subscription_ids)} found" 40 | ) 41 | 42 | if not data_mart_id: 43 | data_marts = get_datamart_ids(wos_client=wos_client) 44 | 45 | if len(data_marts) != 1: 46 | raise ValueError(f"Please Specify datamart to use among {data_marts}") 47 | 48 | data_mart_id = data_marts[0] 49 | 50 | subscription_id = subscription_ids[0] 51 | 52 | if model_config.fairness_monitor_enabled: 53 | 54 | # Find Monitors in place for a given SUBSCRIPTION_ID 55 | fairness_monitor_instance_id = get_monitor_id_by_subscription( 56 | wos_client=wos_client, 57 | subscription_id=subscription_id, 58 | monitor_type="fairness", 59 | ) 60 | 61 | if not keep_wos_monitor and fairness_monitor_instance_id: 62 | wos_client.monitor_instances.delete( 63 | monitor_instance_id=fairness_monitor_instance_id, background_mode=False 64 | ) 65 | fairness_monitor_instance_id = None 66 | 67 | if not fairness_monitor_instance_id: 68 | parameters = model_config.fairness_monitor_parameters 69 | 70 | fairness_monitor_details = wos_client.monitor_instances.create( 71 | data_mart_id=data_mart_id, 72 | background_mode=False, 73 | monitor_definition_id=wos_client.monitor_definitions.MONITORS.FAIRNESS.ID, 74 | target=Target( 75 | target_type=TargetTypes.SUBSCRIPTION, target_id=subscription_id 76 | ), 77 | parameters=parameters, 78 | ).result 79 | 80 | fairness_monitor_instance_id = fairness_monitor_details.metadata.id 81 | 82 | logger.debug( 83 | f"Fairness Monitor ID Created [{fairness_monitor_instance_id}]" 84 | ) 85 | else: 86 | logger.warning( 87 | f"Fairness Monitor {fairness_monitor_instance_id} Already exists" 88 | ) 89 | 90 | return fairness_monitor_instance_id 91 | 92 | else: 93 | logger.warning("Fairness Monitor not Enabled in Configuration") 94 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/configure_quality_monitor.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from typing import Optional 3 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import Target 4 | from ibm_watson_openscale.supporting_classes.enums import DataSetTypes, TargetTypes 5 | 6 | from mlmonitor.src.wos.monitors import get_monitor_id_by_subscription 7 | from mlmonitor.src.wos import wos_client 8 | from mlmonitor.src import logger 9 | from mlmonitor.src.model.config import ModelConfig 10 | from mlmonitor.src.wos.data_mart import get_datamart_ids 11 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 12 | 13 | 14 | def configure_quality( 15 | model_config: ModelConfig, 16 | deployment_name: str, 17 | keep_wos_monitor: bool = True, 18 | data_mart_id: Optional[str] = None, 19 | ) -> dict: 20 | """ 21 | 22 | - Finds the existing WOS subscription for a given deployment, if it exists. 23 | - Finds the existing Quality Monitor instance for a given deployment, if it exists. 24 | - Re-Create or Create Quality Monitor instance for WOS subscription found 25 | - parameters specified in model_config.quality_monitor_parameters will be used 26 | 27 | :param model_config: ModelConfig: Configuration parameters for the quality monitor 28 | :param deployment_name:str: Identify the deployment name 29 | :param keep_wos_monitor:bool=True: Delete the monitor instance if it already exists and value set to False 30 | :param data_mart_id:str=None: Specify the datamart to be used will be fetched if not specified 31 | :return: Quality monitor instance id created or retrieved 32 | """ 33 | 34 | subscription_ids = get_subscription_id_by_deployment( 35 | wos_client=wos_client, deployment_name=deployment_name 36 | ) 37 | 38 | if len(subscription_ids) != 1: 39 | raise ValueError( 40 | f"{deployment_name} should have exactly one subscription ID => {len(subscription_ids)} found" 41 | ) 42 | subscription_id = subscription_ids[0] 43 | 44 | if not data_mart_id: 45 | data_marts = get_datamart_ids(wos_client=wos_client) 46 | 47 | if len(data_marts) != 1: 48 | raise ValueError(f"Please Specify datamart to use among {data_marts}") 49 | 50 | data_mart_id = data_marts[0] 51 | 52 | if model_config.quality_monitor_enabled: 53 | 54 | # Find Monitors in place for a given SUBSCRIPTION_ID 55 | quality_monitor_instance_id = get_monitor_id_by_subscription( 56 | wos_client=wos_client, 57 | subscription_id=subscription_id, 58 | monitor_type="quality", 59 | ) 60 | 61 | if not keep_wos_monitor and quality_monitor_instance_id: 62 | wos_client.monitor_instances.delete( 63 | monitor_instance_id=quality_monitor_instance_id, background_mode=False 64 | ) 65 | quality_monitor_instance_id = None 66 | 67 | if not quality_monitor_instance_id: 68 | parameters = model_config.quality_monitor_parameters 69 | 70 | quality_monitor_details = wos_client.monitor_instances.create( 71 | data_mart_id=data_mart_id, 72 | background_mode=False, 73 | monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID, 74 | target=Target( 75 | target_type=TargetTypes.SUBSCRIPTION, target_id=subscription_id 76 | ), 77 | parameters=parameters, 78 | ).result 79 | 80 | quality_monitor_instance_id = quality_monitor_details.metadata.id 81 | 82 | logger.debug(f"Quality Monitor ID Created [{quality_monitor_details}]") 83 | else: 84 | logger.warning( 85 | f"Quality Monitor {quality_monitor_instance_id} Already exists" 86 | ) 87 | 88 | # Datasets FEEDBACK_DATASET 89 | feedback_data_set_id = ( 90 | wos_client.data_sets.list( 91 | type=DataSetTypes.FEEDBACK, 92 | target_target_id=subscription_id, 93 | target_target_type=TargetTypes.SUBSCRIPTION, 94 | ) 95 | .result.data_sets[0] 96 | .metadata.id 97 | ) 98 | assert ( 99 | feedback_data_set_id 100 | ), f"No feedback dataset found for subscription {subscription_id}" 101 | logger.info(f"Feedback data set id {feedback_data_set_id}") 102 | wos_client.data_sets.print_records_schema(data_set_id=feedback_data_set_id) 103 | return quality_monitor_instance_id 104 | 105 | else: 106 | logger.warning("Quality Monitor not Enabled in Configuration") 107 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/data_mart.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale.supporting_classes.enums import DatabaseType 3 | from ibm_watson_openscale.supporting_classes import ( 4 | DatabaseConfigurationRequest, 5 | LocationSchemaName, 6 | PrimaryStorageCredentialsLong, 7 | ) 8 | from ibm_watson_openscale import APIClient 9 | 10 | 11 | def get_datamart_ids( 12 | wos_client: APIClient, 13 | ): 14 | return [dm.metadata.id for dm in wos_client.data_marts.list().result.data_marts] 15 | 16 | 17 | def create_datamart( 18 | wos_client: APIClient, schema_name: str = None, db_credentials: dict = None 19 | ): 20 | data_marts = get_datamart_ids(wos_client=wos_client) 21 | if len(data_marts) == 0: 22 | if db_credentials is not None: 23 | if schema_name is None: 24 | raise ValueError("Please specify the SCHEMA_NAME and rerun the cell") 25 | 26 | print("Setting up external datamart") 27 | added_data_mart_result = wos_client.data_marts.add( 28 | background_mode=False, 29 | name="WOS Data Mart", 30 | description="Data Mart created by WOS tutorial notebook", 31 | database_configuration=DatabaseConfigurationRequest( 32 | database_type=DatabaseType.POSTGRESQL, 33 | credentials=PrimaryStorageCredentialsLong( 34 | hostname=db_credentials["hostname"], 35 | username=db_credentials["username"], 36 | password=db_credentials["password"], 37 | db=db_credentials["database"], 38 | port=db_credentials["port"], 39 | ssl=True, 40 | sslmode=db_credentials["sslmode"], 41 | certificate_base64=db_credentials["certificate_base64"], 42 | ), 43 | location=LocationSchemaName(schema_name=schema_name), 44 | ), 45 | ).result 46 | 47 | else: 48 | print("Setting up internal Data Mart") 49 | added_data_mart_result = wos_client.data_marts.add( 50 | background_mode=False, 51 | name="WOS Data Mart", 52 | description="Data Mart created in PyCharm client", 53 | internal_database=True, 54 | ).result 55 | 56 | return added_data_mart_result.metadata.id 57 | 58 | else: 59 | print( 60 | f"found {len(data_marts)} data mart : {data_marts} , Using existing Data Mart id {data_marts[0]}" 61 | ) 62 | return data_marts[0] 63 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/evaluate.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from datetime import datetime, timedelta 3 | import json 4 | 5 | from mlmonitor.src.utils.utils import parse_args 6 | from mlmonitor.src.wos import wos_client 7 | from mlmonitor.src.wos.monitors import get_exising_monitors, evaluate 8 | from mlmonitor.src import logger 9 | from mlmonitor.src.wos.subscription import get_subscription_id_by_deployment 10 | 11 | 12 | def evaluate_monitor(deployment_name: str, monitor_types: tuple) -> None: 13 | """Trigger a new evaluation of the existing monitors listed in for 14 | the deployed Sagemaker Endpoint with name 15 | Parameters 16 | ---------- 17 | deployment_name : str 18 | this indicates the Endpoint Name in Sagemaker for which an OpenScale subscription should be created 19 | monitor_types: str 20 | type(s) of monitor(s) to evaluate. Supported monitors are ['quality','fairness','drift','mrm'] 21 | data_path : str 22 | location of dataset to be fetched to get scoring request samples 23 | output_model_data : str 24 | directory output Path where dictionary with model details in model_data.json file should be written 25 | Returns 26 | ------- 27 | """ 28 | 29 | subscription_ids = get_subscription_id_by_deployment( 30 | wos_client=wos_client, deployment_name=deployment_name 31 | ) 32 | 33 | if len(subscription_ids) != 1: 34 | raise ValueError( 35 | f"{deployment_name} should have exactly one subscription ID => {len(subscription_ids)} found" 36 | ) 37 | 38 | subscription_id = subscription_ids[0] 39 | 40 | logger.debug( 41 | f"\nDeployment Name {deployment_name}\n" f"subscription_id {subscription_id}\n" 42 | ) 43 | 44 | existing_monitors = get_exising_monitors( 45 | wos_client=wos_client, subscription_id=subscription_id 46 | ) 47 | logger.debug(f"existing monitors\n{json.dumps(existing_monitors, indent=4)}") 48 | 49 | current_time = datetime.now() 50 | for monitor_type in monitor_types: 51 | assert monitor_type in set( 52 | list(existing_monitors.keys()) 53 | + ["quality", "fairness", "explainability", "drift", "mrm"] 54 | ) 55 | if (monitor_id := existing_monitors.get(monitor_type)) and monitor_type not in [ 56 | "mrm", 57 | "explainability", 58 | ]: 59 | logger.info(f"Evaluate {monitor_type} Monitor ID {monitor_id}") 60 | evaluate( 61 | wos_client=wos_client, 62 | monitor_instance_id=monitor_id, 63 | monitor_type=monitor_type, 64 | ) 65 | 66 | metrics_count = wos_client.monitor_instances.get_metrics_count( 67 | monitor_instance_id=monitor_id, 68 | start=(current_time - timedelta(days=2)), 69 | end=(current_time + timedelta(days=2)), 70 | ) 71 | logger.debug( 72 | f"metrics_count \n{json.dumps(metrics_count.result.to_dict(), indent=4)}" 73 | ) 74 | 75 | 76 | if __name__ == "__main__": 77 | args = parse_args() 78 | 79 | monitor_types = tuple( 80 | x 81 | for x, y in zip( 82 | ["quality", "fairness", "drift", "mrm"], 83 | [ 84 | args.wos_evaluate_quality, 85 | args.wos_evaluate_fairness, 86 | args.wos_evaluate_drift, 87 | args.wos_evaluate_mrm, 88 | ], 89 | ) 90 | if y 91 | ) 92 | logger.info(f"Running Monitors {monitor_types}...") 93 | evaluate_monitor(deployment_name=args.deployment_name, monitor_types=monitor_types) 94 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/integated_system.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale import APIClient 3 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import MonitorMetricRequest 4 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import MetricThreshold 5 | from ibm_watson_openscale.supporting_classes.enums import MetricThresholdTypes 6 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import MonitorTagRequest 7 | from ibm_watson_openscale.base_classes.watson_open_scale_v2 import IntegratedSystems 8 | 9 | 10 | def create_integrated_system_credentials_cp4d(username: str, url: str, api_key: str): 11 | return { 12 | "auth_type": "bearer", 13 | "token_info": { 14 | "url": f"{url}/icp4d-api/v1/authorize", 15 | "headers": { 16 | "Content-Type": "application/json", 17 | "Accept": "application/json", 18 | }, 19 | "payload": {"username": username, "api_key": api_key}, 20 | "method": "post", 21 | }, 22 | } 23 | 24 | 25 | def create_integrated_system_credentials_cloud(url: str, api_key: str): 26 | return { 27 | "auth_type": "bearer", 28 | "token_info": { 29 | "url": url, # update the token generation here 30 | "headers": { 31 | "Content-type": "application/x-www-form-urlencoded" 32 | }, # update the headers here 33 | "payload": f"grant_type=urn:ibm:params:oauth:grant-type:apikey&response_type=cloud_iam&apikey={api_key}", 34 | # update the payload here 35 | "method": "POST", # update the http method here 36 | }, 37 | } 38 | 39 | 40 | def create_integrated_system( 41 | wos_client: APIClient, 42 | provider_name: str, 43 | auth_url: str, 44 | api_key: str, 45 | scoring_url: str, 46 | username: str = None, 47 | ): 48 | if username: 49 | custom_provider_creds = create_integrated_system_credentials_cp4d( 50 | username=username, url=auth_url, api_key=api_key 51 | ) 52 | else: 53 | custom_provider_creds = create_integrated_system_credentials_cloud( 54 | auth_url, api_key 55 | ) 56 | 57 | custom_metrics_integrated_system = ( 58 | IntegratedSystems(wos_client) 59 | .add( 60 | name=provider_name, 61 | description=provider_name, 62 | type="custom_metrics_provider", 63 | credentials=custom_provider_creds, 64 | connection={"display_name": provider_name, "endpoint": scoring_url}, 65 | ) 66 | .result 67 | ) 68 | 69 | return custom_metrics_integrated_system 70 | 71 | 72 | def get_integrated_system_by_provider_name(wos_client: APIClient, provider_name: str): 73 | return [ 74 | system.metadata.id 75 | for system in IntegratedSystems(wos_client).list().result.integrated_systems 76 | if system.entity.name == provider_name 77 | and system.entity.type == "custom_metrics_provider" 78 | ] 79 | 80 | 81 | def get_custom_monitor_by_name(wos_client: APIClient, provider_name: str): 82 | return [ 83 | system.metadata.id 84 | for system in IntegratedSystems(wos_client).list().result.integrated_systems 85 | if system.entity.name == provider_name 86 | and system.entity.type == "custom_metrics_provider" 87 | ] 88 | 89 | 90 | def create_custom_metric_definitions( 91 | custom_metrics_names: tuple, custom_metrics_thresholds: tuple 92 | ): 93 | # Update the tag values if you want to fetch the metrics by tags 94 | TAGS = ["region"] 95 | TAG_DESCRIPTION = ["customer geographical region"] 96 | 97 | metrics = [ 98 | MonitorMetricRequest( 99 | name=name, 100 | thresholds=[ 101 | MetricThreshold( 102 | type=MetricThresholdTypes.LOWER_LIMIT, default=threshold 103 | ) 104 | ], 105 | ) 106 | for name, threshold in zip(custom_metrics_names, custom_metrics_thresholds) 107 | ] 108 | 109 | # Comment the below tags code if there are no tags to be created 110 | tags = [MonitorTagRequest(name=TAGS[0], description=TAG_DESCRIPTION[0])] 111 | 112 | return metrics, tags 113 | -------------------------------------------------------------------------------- /mlmonitor/src/wos/monitors.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from ibm_watson_openscale import APIClient 3 | from datetime import datetime 4 | import json 5 | import requests 6 | from mlmonitor.src import logger 7 | 8 | 9 | def get_monitor_id_by_subscription( 10 | wos_client: APIClient, subscription_id: str, monitor_type: str = "quality" 11 | ): 12 | supported_monitors = [ 13 | x.metadata.id 14 | for x in wos_client.monitor_definitions.list().result.monitor_definitions 15 | ] 16 | if monitor_type not in supported_monitors: 17 | raise ValueError( 18 | f"monitor_type must be in {supported_monitors} => {monitor_type} passed " 19 | ) 20 | 21 | filtered_monitors = [ 22 | mon.metadata.id 23 | for mon in wos_client.monitor_instances.list().result.monitor_instances 24 | if mon.entity.target.target_type == "subscription" 25 | and mon.entity.target.target_id == subscription_id 26 | and mon.entity.monitor_definition_id == monitor_type 27 | ] 28 | if len(filtered_monitors) == 1: 29 | return filtered_monitors[0] 30 | elif len(filtered_monitors) == 0: 31 | return None 32 | else: 33 | raise ValueError( 34 | f"Number of {monitor_type} monitors found for subscription_id {subscription_id} !=1 => {len(filtered_monitors)} " 35 | ) 36 | 37 | 38 | def get_exising_monitors(wos_client: APIClient, subscription_id: str) -> dict: 39 | return { 40 | mon.entity.monitor_definition_id: mon.metadata.id 41 | for mon in wos_client.monitor_instances.list().result.monitor_instances 42 | if mon.entity.target.target_type == "subscription" 43 | and mon.entity.target.target_id == subscription_id 44 | } 45 | 46 | 47 | def evaluate(wos_client: APIClient, monitor_type: str, monitor_instance_id: str): 48 | current_time = datetime.now() 49 | logger.info( 50 | f'Run {monitor_type} Monitor evaluation started at {current_time.strftime("%d/%m/%Y %H:%M:%S")}' 51 | ) 52 | try: 53 | 54 | run_details = wos_client.monitor_instances.run( 55 | monitor_instance_id=monitor_instance_id, background_mode=False 56 | ).result 57 | 58 | logger.debug(f"[{monitor_type}] runs details:\n{run_details}") 59 | 60 | logger.info( 61 | f'{monitor_type} Monitor evaluation completed at {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}' 62 | ) 63 | runs = wos_client.monitor_instances.list_runs( 64 | monitor_instance_id=monitor_instance_id 65 | ).result.to_dict() 66 | 67 | logger.debug( 68 | f"[{monitor_type}] runs result:\n{json.dumps(runs, indent=4, sort_keys=True)}" 69 | ) 70 | 71 | except requests.exceptions.ReadTimeout as e: 72 | logger.error(f"requests.exceptions.ReadTimeout {e}") 73 | logger.error( 74 | f'Run Quality Monitor evaluation Failed at {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}' 75 | ) 76 | -------------------------------------------------------------------------------- /mlmonitor/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/tests/__init__.py -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/tests/aws_model_use_case/__init__.py -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/config/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/tests/aws_model_use_case/config/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/config/credentials_test.cfg: -------------------------------------------------------------------------------- 1 | { 2 | "saas": { 3 | "apikey": "xxxxx", 4 | "iam_url": "https://iam.bluemix.net/oidc/token", 5 | "wml_url": "https://us-south.ml.cloud.ibm.com", 6 | "default_space": "xxxxx", 7 | "cos_resource_crn" : "xxxxx", 8 | "cos_endpoint" : "https://s3.us-east.cloud-object-storage.appdomain.cloud", 9 | "bucket_name" : "xxxxx", 10 | "WML_INSTANCE_ID": "Machine Learning-xxxxx", 11 | "ibm_auth_endpoint" : "https://iam.bluemix.net/oidc/token" 12 | }, 13 | "aws" : 14 | { 15 | "access_key": "xxxxx", 16 | "role": "xxxxx", 17 | "secret_key": "xxxxx", 18 | "region_name": "xxxxx" 19 | }, 20 | "mlops_orchestrator" : 21 | { 22 | "username": "xxxxx", 23 | "password": "xxxxx", 24 | "scoring_url": "xxxxx" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/outputs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/tests/aws_model_use_case/outputs/.gitkeep -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/test_aws_model_config.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import unittest 3 | from unittest import TestCase 4 | from os import path 5 | import os 6 | 7 | from mlmonitor.src.model.config_aws import ( 8 | SageMakerModelConfig, 9 | # SageMakerModelConfigEncoder, 10 | ) 11 | from mlmonitor.src.model.use_case_aws import ( 12 | SageMakerModelUseCase, 13 | # SageMakerModelUseCaseEncoder, 14 | ) 15 | 16 | 17 | class TestAWS_ModelConfig_ModelUseCase(TestCase): 18 | def setUp(self): 19 | self.output_path = "./outputs" 20 | if not os.path.exists(self.output_path): 21 | os.mkdir(self.output_path) 22 | 23 | def test_load_model_config(self): 24 | mdl_cfg = SageMakerModelConfig("use_case_churn") 25 | self.assertEqual(mdl_cfg.train_instance, "ml.m4.xlarge") 26 | 27 | mdl_cfg = SageMakerModelConfig("use_case_gcr") 28 | self.assertEqual(mdl_cfg.train_instance, "ml.m4.xlarge") 29 | 30 | mdl_cfg = SageMakerModelConfig("use_case_mnist_tf") 31 | self.assertEqual(mdl_cfg.train_instance, "ml.c4.xlarge") 32 | 33 | mdl_cfg = SageMakerModelConfig("use_case_mnist_pt") 34 | self.assertEqual(mdl_cfg.train_instance, "ml.c4.xlarge") 35 | 36 | def test_load_model_use_case(self): 37 | CATALOG_ID = "11111111-1111-4111-1111-111111111111" 38 | MODEL_ENTRY_ID = "11111111-1111-4111-1111-111111111111" 39 | 40 | mdl_uc = SageMakerModelUseCase( 41 | "use_case_churn", catalog_id=CATALOG_ID, model_entry_id=MODEL_ENTRY_ID 42 | ) 43 | self.assertEqual(mdl_uc.ibm_key_name, "IBM_API_KEY_MLOPS") 44 | self.assertEqual(mdl_uc.catalog_id, CATALOG_ID) 45 | self.assertEqual(mdl_uc.model_entry_id, MODEL_ENTRY_ID) 46 | 47 | def test_save_model_config(self): 48 | mdl_cfg = SageMakerModelConfig("use_case_churn") 49 | mdl_cfg.save_config( 50 | file=os.path.join(self.output_path, "/SageMakerModelConfig.json") 51 | ) 52 | 53 | self.assertEqual( 54 | path.exists(os.path.join(self.output_path, "/SageMakerModelConfig.json")), 55 | True, 56 | ) 57 | 58 | def test_save_model_use_case(self): 59 | CATALOG_ID = "11111111-1111-4111-1111-111111111111" 60 | MODEL_ENTRY_ID = "11111111-1111-4111-1111-111111111111" 61 | 62 | mdl_uc = SageMakerModelUseCase( 63 | "use_case_churn", catalog_id=CATALOG_ID, model_entry_id=MODEL_ENTRY_ID 64 | ) 65 | mdl_uc.is_trained = True 66 | mdl_uc.save_use_case() 67 | 68 | self.assertEqual( 69 | path.exists(os.path.join(self.output_path, "/SageMakerModelUseCase.json")), 70 | True, 71 | ) 72 | 73 | mdl_uc_new = SageMakerModelUseCase( 74 | source_dir="use_case_churn", 75 | file=os.path.join(self.output_path, "/SageMakerModelUseCase.json"), 76 | ) 77 | self.assertEqual(mdl_uc_new.is_trained, True) 78 | 79 | def test_load_model_config_from_file(self): 80 | mdl_cfg = SageMakerModelConfig("use_case_churn") 81 | mdl_cfg.train_instance = "ml.m4.xs" 82 | mdl_cfg.save_config( 83 | file=os.path.join(self.output_path, "/SageMakerModelConfig.json") 84 | ) 85 | 86 | mdl_cfg_new = SageMakerModelConfig( 87 | source_dir="use_case_churn", 88 | file=os.path.join(self.output_path, "/SageMakerModelConfig.json"), 89 | ) 90 | 91 | self.assertEqual(mdl_cfg_new.train_instance, "ml.m4.xs") 92 | 93 | def tearDown(self): 94 | print("Model Config Cleaned up") 95 | if path.exists(os.path.join(self.output_path, "/SageMakerModelConfig.json")): 96 | os.remove(os.path.join(self.output_path, "/SageMakerModelConfig.json")) 97 | 98 | if path.exists(os.path.join(self.output_path, "/SageMakerModelUseCase.json")): 99 | os.remove(os.path.join(self.output_path, "/SageMakerModelUseCase.json")) 100 | 101 | 102 | if __name__ == "__main__": 103 | unittest.main() 104 | -------------------------------------------------------------------------------- /mlmonitor/tests/aws_model_use_case/test_aws_resources.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import unittest 3 | from unittest import TestCase 4 | 5 | import json 6 | 7 | 8 | class TestAWSResources(TestCase): 9 | def test_aws_secret_manager(self): 10 | from mlmonitor.src import key, secret, region, API_KEY 11 | from mlmonitor.use_case_churn.utils import _get_secret 12 | 13 | secret_name = "IBM_KEYS" 14 | API_KEY_NAME = "IBM_API_KEY_MLOPS" 15 | 16 | aws_secrets = _get_secret( 17 | secret_name=secret_name, 18 | aws_access_key_id=key, 19 | aws_secret_access_key=secret, 20 | region_name=region, 21 | ) 22 | 23 | API_KEY_SECRET_MANAGER = json.loads(aws_secrets).get(API_KEY_NAME) 24 | 25 | self.assertEqual(API_KEY, API_KEY_SECRET_MANAGER) 26 | 27 | def tearDown(self): 28 | print("AWS Resources tests completed") 29 | 30 | 31 | if __name__ == "__main__": 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /mlmonitor/tests/it/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/tests/it/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/.amlignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | model_signature.json 5 | README.md 6 | __pycache__ 7 | logs 8 | README.md 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_churn/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import pickle as pkl 4 | 5 | import numpy as np 6 | import sagemaker_xgboost_container.encoder as xgb_encoders 7 | 8 | 9 | def model_fn(model_dir): 10 | """ 11 | Deserialize and return fitted model. 12 | """ 13 | model_file = "xgboost-model" 14 | booster = pkl.load(open(os.path.join(model_dir, model_file), "rb")) 15 | return booster 16 | 17 | 18 | def input_fn(request_body, request_content_type): 19 | """ 20 | The SageMaker XGBoost model server receives the request data body and the content type, 21 | and invokes the `input_fn`. 22 | 23 | Return a DMatrix (an object that can be passed to predict_fn). 24 | """ 25 | if request_content_type == "text/libsvm": 26 | return xgb_encoders.libsvm_to_dmatrix(request_body) 27 | else: 28 | raise ValueError(f"Content type {request_content_type} is not supported.") 29 | 30 | 31 | def predict_fn(input_data, model): 32 | """ 33 | SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`. 34 | 35 | Return a two-dimensional NumPy array where the first columns are predictions 36 | and the remaining columns are the feature contributions (SHAP values) for that prediction. 37 | """ 38 | prediction = model.predict(input_data) 39 | feature_contribs = model.predict( 40 | input_data, pred_contribs=True, validate_features=False 41 | ) 42 | output = np.hstack((prediction[:, np.newaxis], feature_contribs)) 43 | return output 44 | 45 | 46 | def output_fn(predictions, content_type): 47 | """ 48 | After invoking predict_fn, the model server invokes `output_fn`. 49 | """ 50 | if content_type == "text/csv": 51 | return ",".join(str(x) for x in predictions[0]) 52 | else: 53 | raise ValueError(f"Content type {content_type} is not supported.") 54 | -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/inference_cc_xg_boost.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import numpy as np 4 | 5 | # import sagemaker_xgboost_container.encoder as xgb_encoders 6 | # from sagemaker_inference import content_types, decoder 7 | import xgboost as xgb 8 | import logging 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | def model_fn(model_dir): 15 | """ 16 | Deserialize and return fitted model. 17 | """ 18 | model_file = "xgboost-model-bst" 19 | model = xgb.Booster() 20 | model.load_model(os.path.join(model_dir, model_file)) 21 | return model 22 | 23 | 24 | def input_fn(request_body, request_content_type): 25 | """ 26 | The SageMaker XGBoost model server receives the request data body and the content type, 27 | and invokes the `input_fn`. 28 | Return a DMatrix (an object that can be passed to predict_fn). 29 | """ 30 | log.info(f"Content type {request_content_type} received.") 31 | if request_content_type == "text/libsvm": 32 | # return xgb_encoders.libsvm_to_dmatrix(request_body) 33 | return ValueError(f"Content type {request_content_type} is not supported.") 34 | if request_content_type != "text/csv": 35 | raise ValueError(f"Content type {request_content_type} is not supported.") 36 | inputs = np.array( 37 | [[float(y) for y in row.split(",")] for row in request_body.split("\n")] 38 | ) 39 | return xgb.DMatrix(inputs) 40 | 41 | 42 | def predict_fn(input_data, model): 43 | """ 44 | SageMaker XGBoost model server invokes `predict_fn` on the return value of `input_fn`. 45 | Return a two-dimensional NumPy array where the first columns are predictions 46 | and the remaining columns are the feature contributions (SHAP values) for that prediction. 47 | """ 48 | prediction = model.predict(input_data, pred_contribs=False, validate_features=False) 49 | return prediction 50 | 51 | 52 | def output_fn(predictions, content_type): 53 | """ 54 | After invoking predict_fn, the model server invokes `output_fn`. 55 | """ 56 | if content_type == "text/csv": 57 | return ",".join(str(x) for x in predictions) 58 | else: 59 | raise ValueError(f"Content type {content_type} is not supported.") 60 | -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/requirements-local.txt: -------------------------------------------------------------------------------- 1 | sagemaker 2 | python-dotenv 3 | boto3 4 | -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/requirements.txt: -------------------------------------------------------------------------------- 1 | protobuf==3.20.2 2 | xgboost==1.5.1 3 | ipython 4 | ibm-aigov-facts-client==1.0.73 5 | seaborn 6 | matplotlib 7 | pygit2 8 | boto3 9 | ibm_watson_openscale==3.0.27 10 | scikit-learn==1.0.1 11 | -------------------------------------------------------------------------------- /mlmonitor/use_case_churn/test_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import boto3 3 | import sagemaker 4 | from use_case_churn.train_cc_xg_boost import ( 5 | logger, 6 | _parse_args, 7 | run_train_job, 8 | ) 9 | from dotenv import load_dotenv 10 | import tempfile 11 | import tarfile 12 | import gzip 13 | import os 14 | import shutil 15 | from os.path import dirname 16 | 17 | PROJECT_ROOT = dirname(dirname(__file__)) 18 | 19 | 20 | def make_model_tgzfile(output_filename: str, source_dir: str): 21 | temp_unzipped_dir = tempfile.mktemp() 22 | current_dir = os.path.abspath(os.getcwd()) 23 | assert os.path.exists(source_dir) 24 | os.chdir(source_dir) 25 | try: 26 | with tarfile.open(temp_unzipped_dir, "w") as tar: 27 | for filename in os.listdir(source_dir): 28 | if filename.startswith("xgboost"): 29 | tar.add(filename) 30 | 31 | with gzip.GzipFile( 32 | filename="", fileobj=open(output_filename, "wb"), mode="wb", mtime=0 33 | ) as gzipped_tar, open(temp_unzipped_dir, "rb") as tar: 34 | gzipped_tar.write(tar.read()) 35 | finally: 36 | os.remove(temp_unzipped_dir) 37 | os.chdir(current_dir) 38 | 39 | 40 | class Env: 41 | def __init__(self): 42 | # simulate container env 43 | os.environ["SM_MODEL_DIR"] = f"{PROJECT_ROOT}/models/model_churn" 44 | os.environ["SM_CHANNEL_TRAIN"] = f"{PROJECT_ROOT}/datasets/churn" 45 | os.environ["SM_CHANNEL_TESTING"] = f"{PROJECT_ROOT}/datasets/churn" 46 | os.environ["SM_CHANNEL_VALIDATION"] = f"{PROJECT_ROOT}/datasets/churn" 47 | os.environ["SM_OUTPUT_DATA_DIR"] = f"{PROJECT_ROOT}/figures" 48 | 49 | 50 | if __name__ == "__main__": 51 | import logging 52 | 53 | load_dotenv() 54 | log_level = int(os.getenv("LOG_LEVEL", logging.INFO)) 55 | logger.setLevel(level=log_level) 56 | # Clean up metadata 57 | for dir in ["./mlruns"]: 58 | if os.path.isdir(dir): 59 | shutil.rmtree(dir) 60 | 61 | Env() 62 | sess = sagemaker.Session() 63 | bucket = sess.default_bucket() 64 | prefix = "sagemaker/DEMO-xgboost-churn-pycharm/test_train_local/" 65 | 66 | args, unknown = _parse_args() 67 | parameters = vars(args) 68 | run_train_job(logger=logger, local=True, **parameters) 69 | archive_name = "model_local.tar.gz" 70 | make_model_tgzfile( 71 | output_filename=f"{PROJECT_ROOT}/models/{archive_name}", 72 | source_dir=os.environ["SM_MODEL_DIR"], 73 | ) 74 | boto3.Session().resource("s3").Bucket(bucket).Object( 75 | os.path.join(prefix, archive_name) 76 | ).upload_file(os.path.join(PROJECT_ROOT, "models", archive_name)) 77 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/.amlignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | model_signature.json 5 | README.md 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_gcr/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/inference_aws_gcr.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import joblib 3 | import os 4 | import pandas as pd 5 | import logging 6 | import json 7 | from io import StringIO 8 | from utils import read_columns 9 | 10 | try: 11 | from sagemaker_inference import decoder 12 | 13 | except ModuleNotFoundError as e: 14 | print(f"running locally : {e}") 15 | 16 | logging.basicConfig(level=logging.INFO) 17 | log = logging.getLogger(__name__) 18 | 19 | 20 | def model_fn(model_dir): 21 | """Deserialized and return fitted model 22 | 23 | Note that this should have the same name as the serialized model in the main method 24 | """ 25 | model = joblib.load(os.path.join(model_dir, "model.joblib")) 26 | return model 27 | 28 | 29 | def predict_fn(input_data, model): 30 | log.info("Called predict_fn ") 31 | log.info(input_data) 32 | COLUMNS = read_columns() 33 | df = pd.DataFrame(input_data, columns=COLUMNS) 34 | pred = model.predict(df) 35 | scores = model.predict_proba(df) 36 | records = [ 37 | { 38 | "predicted_label": int(pred), 39 | "score": prob[pred], 40 | } 41 | for pred, prob in zip(pred, scores) 42 | ] 43 | log.info("predictions") 44 | log.info(records) 45 | return records 46 | 47 | 48 | def input_fn(request_body, request_content_type): 49 | """An input_fn that loads a csv""" 50 | log.info("Called input_fn " + request_content_type) 51 | log.info(request_body) 52 | if request_content_type == "text/csv": 53 | COLUMNS = read_columns() 54 | data = StringIO(request_body) 55 | df = pd.read_csv(data, sep=",", header=None, names=COLUMNS) 56 | log.info("returning input for prediction") 57 | return df.to_numpy() 58 | elif request_content_type == "application/json": 59 | jsondata = json.loads(request_body) 60 | arr = [] 61 | for jsonitem in jsondata["instances"]: 62 | log.info(jsonitem["features"]) 63 | arr.append(jsonitem["features"]) 64 | return arr 65 | else: 66 | # Handle other content-types here or raise an Exception 67 | # if the content type is not supported. 68 | np_array = decoder.decode(request_body, request_content_type) 69 | return np_array 70 | 71 | 72 | def output_fn(prediction, content_type): 73 | log.info(f"output_fn:\n{prediction}") 74 | return {"predictions": prediction} 75 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/inference_azure_gcr.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import pandas as pd 4 | import joblib 5 | import os 6 | 7 | 8 | def init(): 9 | # TODO pick model name from Container environment variables set to "model.joblib" 10 | global model 11 | print(f"loading model form {os.getenv('AZUREML_MODEL_DIR')}") 12 | model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "model.joblib") 13 | print(model_path) 14 | model = joblib.load(model_path) 15 | 16 | 17 | def run(input_data): 18 | print("start inference") 19 | try: 20 | if type(input_data) is str: 21 | dict_data = json.loads(input_data) 22 | print(f"input data (str):\n{dict_data}") 23 | else: 24 | dict_data = input_data 25 | print(f"input data (json):\n{dict_data}") 26 | 27 | data = pd.DataFrame.from_dict(dict_data["input"]) 28 | print(data) 29 | predictions = model.predict(data) 30 | print(predictions) 31 | scores = model.predict_proba(data).tolist() 32 | records = [ 33 | {"Scored Labels": int(pred), "Scored Probabilities": prob} 34 | for pred, prob in zip(predictions, scores) 35 | ] 36 | result = {"output": records} 37 | print(f"output:data:\n{result}") 38 | 39 | return result 40 | except Exception as e: 41 | result = str(e) 42 | # return error message back to the client 43 | print(f"output:error:\n{result}") 44 | return {"error": result} 45 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/model_perturbation.json: -------------------------------------------------------------------------------- 1 | { 2 | "drift": { 3 | "single_column_1": { 4 | "total_records": 100, 5 | "ratios": [0.01, 0.05, 0.1, 0.2, 0.3], 6 | "target_column": "LoanAmount", 7 | "perturbation_fn": "x + 15000" 8 | }, 9 | "single_column_2": { 10 | "total_records": 100, 11 | "ratios": [0.1, 0.2, 0.4, 0.8, 1.0], 12 | "target_column": "LoanAmount", 13 | "perturbation_fn": "x + 15000" 14 | }, 15 | "double_column_1": { 16 | "total_records": 100, 17 | "ratios": [0.1, 0.2, 0.3, 0.6], 18 | "source_column": "LoanPurpose", 19 | "source_cond": "car_used", 20 | "target_column": "LoanAmount", 21 | "perturbation_fn": "x + np.mean(x)*100" 22 | } 23 | }, 24 | "fairness_perturbations": { 25 | "scenario1_args": { 26 | "col": "col", 27 | "total_records": 100, 28 | "ratio": "scenario", 29 | "operation": 10000 30 | }, 31 | "scenario2_args": { 32 | "col": "col", 33 | "total_records": 100, 34 | "ratio": "scenario", 35 | "operation": 10000 36 | } 37 | }, 38 | "quality_perturbations": { 39 | "scenario1_args": { 40 | "col": "col", 41 | "total_records": 100, 42 | "ratio": "scenario", 43 | "operation": 10000 44 | }, 45 | "scenario2_args": { 46 | "col": "col", 47 | "total_records": 100, 48 | "ratio": "scenario", 49 | "operation": 10000 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/requirements-local.txt: -------------------------------------------------------------------------------- 1 | python-dotenv 2 | sagemaker 3 | boto3 4 | scikit-learn==1.0.2 5 | pygit2 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython==8.12.0 2 | ibm-aigov-facts-client==1.0.73 3 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/test_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import pandas as pd 4 | import json 5 | from os.path import dirname 6 | 7 | from use_case_gcr.inference_aws_gcr import ( 8 | model_fn, 9 | input_fn, 10 | predict_fn, 11 | output_fn, 12 | ) 13 | from use_case_gcr.utils import read_columns 14 | 15 | if __name__ == "__main__": 16 | 17 | COLUMNS = read_columns() 18 | PROJECT_ROOT = dirname(dirname(__file__)) 19 | DATA_ROOT = os.path.join(PROJECT_ROOT, "datasets") 20 | MODEL_ROOT = os.path.join(PROJECT_ROOT, "models") 21 | 22 | scoring_df = pd.read_csv( 23 | os.path.join(DATA_ROOT, "gcr_explicit_payload_logging.csv") 24 | ) 25 | 26 | scoring_payload = { 27 | "instances": [{"features": x} for x in scoring_df[COLUMNS].values.tolist()] 28 | } 29 | model = model_fn(f"{MODEL_ROOT}/model_gcr") 30 | print(scoring_payload) 31 | input_payload = input_fn(json.dumps(scoring_payload), "application/json") 32 | outputs = predict_fn(input_payload, model) 33 | preds = output_fn(outputs, "application/json") 34 | print(preds) 35 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/test_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import logging 4 | import os 5 | import shutil 6 | import argparse 7 | 8 | from use_case_gcr.train_gcr import ( 9 | init_logger, 10 | init_external_fs_client, 11 | train, 12 | save_fs_model, 13 | fetch_dataset, 14 | ) 15 | from dotenv import load_dotenv 16 | 17 | PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) 18 | GCR_DIR = os.path.dirname(os.path.abspath(__file__)) 19 | 20 | 21 | class Env: 22 | def __init__(self): 23 | # simulate container env 24 | os.environ["SM_MODEL_DIR"] = f"{PROJECT_ROOT}/models/model_gcr" 25 | os.environ["SM_CHANNEL_TRAIN"] = f"{PROJECT_ROOT}/datasets/gcr" 26 | os.environ["SM_CHANNEL_TESTING"] = f"{PROJECT_ROOT}/datasets/gcr" 27 | 28 | 29 | if __name__ == "__main__": 30 | load_dotenv() 31 | Env() 32 | 33 | for dir in ["./mlruns", "./logs"]: 34 | if os.path.isdir(dir): 35 | shutil.rmtree(dir) 36 | 37 | parser = argparse.ArgumentParser() 38 | # fmt: off 39 | # CP4D specific arguments 40 | parser.add_argument("--catalog-id", type=str) # used by train_sagemaker_job,train_az_ml_job 41 | parser.add_argument("--model-entry-id", type=str) # used by train_sagemaker_job,train_az_ml_job 42 | parser.add_argument("--ibm-key-name", type=str, default="IBM_API_KEY_MLOPS") # used by train_sagemaker_job,train_az_ml_job 43 | parser.add_argument("--cp4d-env", type=str, default=os.getenv("ENV", "saas"), choices=["saas", "prem"], ) # used by train_sagemaker_job,train_az_ml_job 44 | parser.add_argument("--cp4d-username", type=str, default=None) # used by train_sagemaker_job,train_az_ml_job 45 | parser.add_argument("--cp4d-url", type=str, default=None) # used by train_sagemaker_job,train_az_ml_job 46 | parser.add_argument("--model-name", type=str, default="gcr-model") 47 | parser.add_argument("--grc-model-name", type=str, default=None) 48 | # Training Job specific arguments (Sagemaker,Azure,WML) default SageMaker envar or Azure expected values 49 | parser.add_argument("--model-dir", type=str, default=os.getenv("SM_MODEL_DIR", "./outputs")) 50 | parser.add_argument("--output-data-dir", type=str, default=os.getenv("SM_OUTPUT_DATA_DIR", "./outputs")) 51 | 52 | parser.add_argument("--train", type=str, default=os.getenv("SM_CHANNEL_TRAIN")) 53 | parser.add_argument("--test", type=str, default=os.getenv("SM_CHANNEL_TEST")) 54 | parser.add_argument("--validation", type=str, default=os.getenv("SM_CHANNEL_VALIDATION")) 55 | 56 | parser.add_argument("--region-name", type=str, default="ca-central-1") 57 | # fmt: on 58 | 59 | log_level = int(os.getenv("LOG_LEVEL", logging.INFO)) 60 | args = parser.parse_args() 61 | parameters = vars(args) 62 | logger = init_logger(level=log_level) 63 | ( 64 | facts_client, 65 | props, 66 | EXPERIMENT_NAME, 67 | EXPERIMENT_ID, 68 | tags, 69 | params, 70 | ) = init_external_fs_client(logger=logger, **parameters) 71 | logger.debug(f"test_train parameters:\n{json.dumps(parameters,indent=4)}") 72 | 73 | logger.info(f"train {os.listdir(parameters.get('train'))}") 74 | train_data = fetch_dataset(data_path=parameters.get("train")) 75 | target_label_name = "Risk" 76 | features = [feat for feat in train_data.columns if feat != target_label_name] 77 | 78 | train(model_dir=parameters.get("model_dir"), logger=logger, train_data=train_data) 79 | 80 | save_fs_model( 81 | logger=logger, 82 | catalog_id=parameters.get("catalog_id"), 83 | model_entry_id=parameters.get("model_entry_id"), 84 | facts_client=facts_client, 85 | experiment_id=EXPERIMENT_ID, 86 | experiment_name=EXPERIMENT_NAME, 87 | tags=tags, 88 | inputs=None, 89 | outputs=None, 90 | tdataref=None, 91 | params=params, 92 | grc_model_name=parameters.get("grc_model_name"), 93 | ) 94 | -------------------------------------------------------------------------------- /mlmonitor/use_case_gcr/utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import base64 3 | from botocore.exceptions import ClientError 4 | import boto3 5 | import json 6 | import os 7 | 8 | 9 | def _get_secret( 10 | secret_name: str, 11 | aws_access_key_id: str = None, 12 | aws_secret_access_key: str = None, 13 | region_name: str = "ca-central-1", 14 | ): 15 | """ 16 | helper function that retrieves the secret value from AWS Secrets Manager. 17 | It takes in a string representing the name of the secret, and returns either a string or binary representation of 18 | the secret value. 19 | 20 | :param secret_name:str: Store the name of the secret in aws secrets manager 21 | :param aws_access_key_id:str=None: Pass the aws access key id to the function 22 | :param aws_secret_access_key:str=None: Pass the secret key to _get_secret function 23 | :param region_name:str="ca-central-1": Specify the aws region of the secret 24 | :param : Get the secret from aws secrets manager 25 | :return: decode secret value 26 | """ 27 | # Create a Secrets Manager client 28 | session = boto3.session.Session() 29 | client = session.client( 30 | service_name="secretsmanager", 31 | aws_access_key_id=aws_access_key_id, 32 | aws_secret_access_key=aws_secret_access_key, 33 | region_name=region_name, 34 | ) 35 | 36 | # In this sample we only handle the specific exceptions for the 'GetSecretValue' API. 37 | # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html 38 | # We rethrow the exception by default. 39 | 40 | try: 41 | get_secret_value_response = client.get_secret_value(SecretId=secret_name) 42 | except ClientError as e: 43 | if e.response["Error"]["Code"] == "DecryptionFailureException": 44 | # Secrets Manager can't decrypt the protected secret text using the provided KMS key. 45 | # Deal with the exception here, and/or rethrow at your discretion. 46 | raise e 47 | elif e.response["Error"]["Code"] == "InternalServiceErrorException": 48 | # An error occurred on the server side. 49 | # Deal with the exception here, and/or rethrow at your discretion. 50 | raise e 51 | elif e.response["Error"]["Code"] == "InvalidParameterException": 52 | # You provided an invalid value for a parameter. 53 | # Deal with the exception here, and/or rethrow at your discretion. 54 | raise e 55 | elif e.response["Error"]["Code"] == "InvalidRequestException": 56 | # You provided a parameter value that is not valid for the current state of the resource. 57 | # Deal with the exception here, and/or rethrow at your discretion. 58 | raise e 59 | elif e.response["Error"]["Code"] == "ResourceNotFoundException": 60 | # We can't find the resource that you asked for. 61 | # Deal with the exception here, and/or rethrow at your discretion. 62 | raise e 63 | elif e.response["Error"]["Code"] == "AccessDeniedException": 64 | # We can't find the resource that you asked for. 65 | # Deal with the exception here, and/or rethrow at your discretion. 66 | raise e 67 | else: 68 | print(e.response) 69 | else: 70 | if "SecretString" in get_secret_value_response: 71 | return get_secret_value_response["SecretString"] 72 | decoded_binary_secret = base64.b64decode( 73 | get_secret_value_response["SecretBinary"] 74 | ) 75 | return decoded_binary_secret 76 | 77 | 78 | def read_columns(): 79 | filepath = ( 80 | "/opt/ml/code/model_signature.json" 81 | if os.path.exists("/opt/ml/code/model_signature.json") 82 | else os.path.join(os.path.dirname(__file__), "model_signature.json") 83 | ) 84 | with open(filepath) as json_file: 85 | signature = json.load(json_file) 86 | 87 | return signature.get("signature").get("feature_columns") 88 | -------------------------------------------------------------------------------- /mlmonitor/use_case_huggingface/README.md: -------------------------------------------------------------------------------- 1 | ## How to run the sequence of scripts for Customer Huggingfce models 2 | -------------------------------------------------------------------------------- /mlmonitor/use_case_huggingface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_huggingface/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/.amlignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | model_signature.json 5 | README.md 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_mnist_pt/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/model_signature.json: -------------------------------------------------------------------------------- 1 | { 2 | "__class__": "SageMakerModelConfig", 3 | "signature": { 4 | "feature_columns": null, 5 | "class_label": null, 6 | "prediction_field": "prediction", 7 | "probability_fields": ["probability"], 8 | "categorical_columns": null, 9 | "problem_type": "multiclass", 10 | "data_type": "unstructured_image", 11 | "description": "Hand written digit detection CNN model using Pytorch framework" 12 | }, 13 | "datasets": { 14 | "data_dir": "mnist", 15 | "training_data": "mnist", 16 | "validation_data": "mnist", 17 | "test_data": "mnist", 18 | "fetch_data_function": "generate_data" 19 | }, 20 | "aws_runtime": { 21 | "train_script": "pytorch_train.py", 22 | "inference_script": "pytorch_inference.py", 23 | "train_framework": "pytorch", 24 | "train_framework_version": "1.9.0", 25 | "train_py_version": "py38", 26 | "inference_framework": "pytorch", 27 | "inference_framework_version": "1.9.0", 28 | "inference_py_version": "py38", 29 | "train_instance": "ml.c4.xlarge", 30 | "inference_instance": "ml.c4.xlarge", 31 | "prefix": "DEMO-mnist-mlmonitor", 32 | "job_name": "sm-mnist-pytorch", 33 | "serializer": "json", 34 | "deserializer": "json" 35 | }, 36 | "wml_runtime": {}, 37 | "hyperparameters": { 38 | "batch-size": 128, 39 | "epochs": 2, 40 | "learning-rate": 1e-3, 41 | "log-interval": 100 42 | }, 43 | "quality_monitor": { 44 | "enabled": true, 45 | "parameters": { 46 | "min_feedback_data_size": 10 47 | }, 48 | "thresholds": [{ 49 | "metric_id": "accuracy", 50 | "type": "lower_limit", 51 | "lower_limit": 0.80 52 | }] 53 | }, 54 | "fairness_monitor": { 55 | "enabled": false 56 | }, 57 | "drift_monitor": { 58 | "enabled": false 59 | }, 60 | "explain_monitor": { 61 | "enabled": false 62 | }, 63 | "mrm_monitor": { 64 | "enabled": false 65 | }, 66 | "custom_monitor": { 67 | "enabled": false 68 | }, 69 | "source_dir": "use_case_mnist_pt" 70 | } 71 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/pt_models.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import torch.nn as nn 3 | from torch.nn import functional as F 4 | 5 | 6 | # Based on https://github.com/pytorch/examples/blob/master/mnist/main.py 7 | class ConvNet(nn.Module): 8 | def __init__(self): 9 | super(ConvNet, self).__init__() 10 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 11 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 12 | self.conv2_drop = nn.Dropout2d() 13 | self.fc1 = nn.Linear(320, 50) 14 | self.fc2 = nn.Linear(50, 10) 15 | 16 | def forward(self, x): 17 | x = self.conv1(x) 18 | x = F.max_pool2d(x, 2) 19 | x = F.relu(x) 20 | 21 | x = self.conv2(x) 22 | x = self.conv2_drop(x) 23 | x = F.max_pool2d(x, 2) 24 | x = F.relu(x) 25 | 26 | x = x.view(-1, 320) 27 | x = self.fc1(x) 28 | x = F.relu(x) 29 | x = F.dropout(x, training=self.training) 30 | x = self.fc2(x) 31 | return F.log_softmax(x, dim=1) 32 | # return F.softmax(x, dim=1) 33 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/pytorch_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import logging 4 | import os 5 | import torch 6 | import numpy as np 7 | 8 | from pt_models import ConvNet 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 15 | 16 | 17 | # data preprocessing 18 | def input_fn(request_body, request_content_type): 19 | assert request_content_type == "application/json" 20 | log.info( 21 | f"input_fn request_content_type {request_content_type} request_body:\n{request_body}" 22 | ) 23 | data = json.loads(request_body).get("input_data")[0].get("values") 24 | 25 | data = torch.tensor(data, dtype=torch.float32, device=device) 26 | if data.shape[3] == 1: # 1 channel at the end of tensor 27 | log.info(f"input_fn {data.shape} channel last permuting to channel first") 28 | data = data.permute(0, 3, 1, 2) 29 | else: 30 | log.info( 31 | f"input_fn {data.shape} channel first sent (already in expected format)" 32 | ) 33 | 34 | log.info(f"input_fn data shape sent to model {data.shape}") 35 | 36 | return data 37 | 38 | 39 | # inference 40 | def predict_fn(input_object, model): 41 | with torch.no_grad(): 42 | log.info(f"predict_fn data shape sent to model {input_object.shape}") 43 | prediction = model(input_object) 44 | return prediction 45 | 46 | 47 | # postprocess 48 | def output_fn(predictions, content_type): 49 | assert content_type == "application/json" 50 | 51 | probabilities = torch.exp(predictions).cpu().numpy().tolist() 52 | 53 | log_probas = predictions.cpu().numpy().tolist() 54 | 55 | classes = np.argmax(np.array(log_probas), axis=1).tolist() 56 | fields = ["prediction", "probability"] 57 | values = list(zip([int(x) for x in classes], probabilities)) 58 | output = {"fields": fields, "values": values} 59 | return json.dumps({"predictions": [output]}) 60 | 61 | 62 | # defining model and loading weights to it. 63 | def model_fn(model_dir): 64 | model = ConvNet() 65 | with open(os.path.join(model_dir, "model.pth"), "rb") as f: 66 | model.load_state_dict(torch.load(f)) 67 | model.to(device).eval() 68 | return model 69 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | ibm-aigov-facts-client==1.0.73 3 | ibm_watson_machine_learning 4 | ibm_watson_openscale 5 | pygit2 6 | torchinfo 7 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/test_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import argparse 3 | import numpy as np 4 | import random 5 | import matplotlib.pyplot as plt 6 | from use_case_mnist_pt.pytorch_inference import ( 7 | model_fn, 8 | input_fn, 9 | predict_fn, 10 | output_fn, 11 | ) 12 | from utils import mnist_to_numpy 13 | import json 14 | 15 | parser = argparse.ArgumentParser() 16 | 17 | parser.add_argument( 18 | "--inference-samples", 19 | type=int, 20 | default=2, 21 | metavar="NSAMPLES", 22 | help="Number of samples to be sent for inference", 23 | ) 24 | 25 | 26 | if __name__ == "__main__": 27 | args = parser.parse_args() 28 | print(f"Running Inference for Pytorch Model {args}") 29 | data_dir = "/tmp/data" 30 | 31 | X, Y = mnist_to_numpy(data_dir, train=False) 32 | 33 | # randomly sample 16 images to inspect 34 | mask = random.sample(range(X.shape[0]), args.inference_samples) 35 | samples = X[mask] 36 | labels = Y[mask] 37 | # plot the images 38 | fig, axs = plt.subplots(nrows=1, ncols=args.inference_samples, figsize=(16, 1)) 39 | 40 | for i, ax in enumerate(axs): 41 | ax.imshow(samples[i]) 42 | plt.show() 43 | print(samples.shape) 44 | samples = np.expand_dims(samples, axis=1) 45 | 46 | print(samples.shape) 47 | 48 | inputs = {"input_data": [{"values": samples.tolist()}]} 49 | 50 | model = model_fn("../models") 51 | print(samples.shape, samples.dtype) 52 | print(json.dumps(inputs)) 53 | input_tensors = input_fn(json.dumps(inputs), "application/json") 54 | print(input_tensors.shape) 55 | outputs = predict_fn(input_tensors, model) 56 | print(outputs.shape) 57 | preds = output_fn(outputs, "application/json") 58 | print(preds) 59 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/test_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import shutil 4 | from utils import parse_args 5 | from use_case_mnist_pt.pytorch_train import train 6 | from dotenv import load_dotenv 7 | from use_case_mnist_tf.utils import download_from_s3 8 | 9 | PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) 10 | 11 | 12 | class Env: 13 | def __init__(self): 14 | # simulate container env 15 | os.environ["SM_MODEL_DIR"] = os.path.join(PROJECT_ROOT, "models", "model_mnist") 16 | os.environ["SM_CHANNEL_TRAINING"] = f"{PROJECT_ROOT}/datasets/mnist" 17 | os.environ["SM_CHANNEL_TESTING"] = f"{PROJECT_ROOT}/datasets/mnist" 18 | os.environ["SM_HOSTS"] = '["algo-1"]' 19 | os.environ["SM_CURRENT_HOST"] = "algo-1" 20 | os.environ["SM_NUM_GPUS"] = "0" 21 | 22 | 23 | if __name__ == "__main__": 24 | 25 | load_dotenv() 26 | Env() 27 | 28 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", True) 29 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", False) 30 | 31 | args = parse_args() 32 | # Clean up MLFLOW and lighting_logs 33 | for dir in ["./mlruns", "./lighting_logs", "./logs"]: 34 | if os.path.isdir(dir): 35 | shutil.rmtree(dir) 36 | 37 | args = vars(args) 38 | print(args) 39 | train(args) 40 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_pt/torch_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import numpy as np 3 | import gzip 4 | import os 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def normalize(x, axis): 10 | eps = np.finfo(float).eps 11 | mean = np.mean(x, axis=axis, keepdims=True) 12 | # avoid division by zero 13 | std = np.std(x, axis=axis, keepdims=True) + eps 14 | return (x - mean) / std 15 | 16 | 17 | def convert_to_tensor(data_dir, images_file, labels_file): 18 | """Byte string to torch tensor""" 19 | with gzip.open(os.path.join(data_dir, images_file), "rb") as f: 20 | images = ( 21 | np.frombuffer(f.read(), np.uint8, offset=16) 22 | .reshape(-1, 28, 28) 23 | .astype(np.float32) 24 | ) 25 | 26 | with gzip.open(os.path.join(data_dir, labels_file), "rb") as f: 27 | labels = np.frombuffer(f.read(), np.uint8, offset=8).astype(np.int64) 28 | 29 | # normalize the images 30 | images = normalize(images, axis=(1, 2)) 31 | 32 | # add channel dimension (depth-major) 33 | images = np.expand_dims(images, axis=1) 34 | 35 | # to torch tensor 36 | images = torch.tensor(images, dtype=torch.float32) 37 | labels = torch.tensor(labels, dtype=torch.int64) 38 | return images, labels 39 | 40 | 41 | class MNIST(Dataset): 42 | def __init__(self, data_dir, train=True): 43 | 44 | if train: 45 | images_file = "train-images-idx3-ubyte.gz" 46 | labels_file = "train-labels-idx1-ubyte.gz" 47 | else: 48 | images_file = "t10k-images-idx3-ubyte.gz" 49 | labels_file = "t10k-labels-idx1-ubyte.gz" 50 | 51 | self.images, self.labels = convert_to_tensor(data_dir, images_file, labels_file) 52 | 53 | def __len__(self): 54 | return len(self.labels) 55 | 56 | def __getitem__(self, idx): 57 | return self.images[idx], self.labels[idx] 58 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/.amlignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | model_signature.json 5 | README.md 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/README.md: -------------------------------------------------------------------------------- 1 | ## How to train,deploy in AWS and monitor in WOS Pytorch Lightning model on MNIST dataset 2 | 3 | ### 1 CNN Pytorch Lighting ⚡️ model 4 | 5 | #### 2.2.1 Train locally 6 | 7 | In order to benefit from the Autolog feature of in Factsheets client , we have created the same CNN model but trained with a Pytorch Lightning Trainer. 8 | 9 | You can invoke [test_train.py](./use_case_ptlt/test_train.py) for Pytorch Lightning model training as follow with the right parameters to trigger [ptlt_train.py](./ptlt_train.py) for this model. 10 | 11 | ```shell 12 | # python ./use_case_ptlt/test_train.py --model cnn --epochs 10 13 | 2022/05/20 14:00:22 INFO : Experiment successfully created with ID 1 and name aws-sagemaker-mnist-cnn-pt-lt-20220520-2000 14 | 2022/05/20 14:00:22 INFO : Autolog enabled Successfully 15 | Current Experiment aws-sagemaker-mnist-cnn-pt-lt-20220520-2000 ID 1 16 | INFO:ibm_aigov_facts_client.store.autolog.autolog_utils:Autolog enabled Successfully 17 | rank_zero_warn(Epoch 0: 92%|█████████▏| 860/939 [00:12<00:01, 68.93it/s, loss=1.58, v_num=59] 18 | Validation: 0it [00:00, ?it/s] 19 | Validation DataLoader 0: 0%| | 0/79 [00:00Tip : you can monitor your training job in AWS Cloud Watch 46 | 47 | #### 2.2.2 Review Training Facts 48 | 49 | 1. Make sure that all training facts are properly collected - Auto Log should have captured addtionnal facts 50 | 51 | ![auto log training ptlt](../../pictures/ptlt_training_facts_autolog.png) 52 | 53 | 2. Add this model to the model inventory 54 | 55 | ​ *View all catalogs > Platform Asset Catalog > aws-sagemaker-mnist-cnn-pytorch-Ymd-HM >Asset > Track this model* 56 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_mnist_ptlt/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/model_signature.json: -------------------------------------------------------------------------------- 1 | { 2 | "__class__": "SageMakerModelConfig", 3 | "signature": { 4 | "feature_columns": null, 5 | "class_label": null, 6 | "prediction_field": "prediction", 7 | "probability_fields": ["probability"], 8 | "categorical_columns": null, 9 | "problem_type": "multiclass", 10 | "data_type": "unstructured_image", 11 | "description": "Hand written digit detection CNN model using Pytorch Lightning framework" 12 | }, 13 | "datasets": { 14 | "training_data": "mnist", 15 | "validation_data": "mnist", 16 | "test_data": "mnist", 17 | "fetch_data_function": "generate_data" 18 | }, 19 | "aws_runtime": { 20 | "train_script": "ptlt_train.py", 21 | "inference_script": "ptlt_inference.py", 22 | "train_framework": "pytorch", 23 | "train_framework_version": "1.9.0", 24 | "train_py_version": "py38", 25 | "inference_framework": "pytorch", 26 | "inference_framework_version": "1.9.0", 27 | "inference_py_version": "py38", 28 | "train_instance": "ml.c4.xlarge", 29 | "inference_instance": "ml.c4.xlarge", 30 | "prefix": "DEMO-mnist-mlmonitor", 31 | "job_name": "sm-mnist-pytorch", 32 | "serializer": "json", 33 | "deserializer": "json" 34 | }, 35 | "wml_runtime": {}, 36 | "hyperparameters": { 37 | "batch-size": 128, 38 | "epochs": 12, 39 | "learning-rate": 1e-3, 40 | "log-interval": 100 41 | }, 42 | "quality_monitor": { 43 | "enabled": true, 44 | "parameters": {"min_feedback_data_size": 10}, 45 | "thresholds": [ 46 | {"metric_id": "accuracy", "type": "lower_limit", "lower_limit": 0.80} 47 | ] 48 | }, 49 | "fairness_monitor": { 50 | "enabled": false 51 | }, 52 | "drift_monitor": { 53 | "enabled": false 54 | }, 55 | "explain_monitor": { 56 | "enabled": false 57 | }, 58 | "mrm_monitor": { 59 | "enabled": false 60 | }, 61 | "custom_monitor": { 62 | "enabled": false 63 | }, 64 | "source_dir": "use_case_mnist_ptlt" 65 | } 66 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/ptlt_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | import logging 4 | import os 5 | 6 | import torch 7 | 8 | from use_case_mnist_ptlt.pt_models import PytorchLightning_CNN_MNIST 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | log = logging.getLogger(__name__) 12 | 13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 14 | 15 | 16 | # defining model and loading weights to it. 17 | def model_fn(model_dir): 18 | checkpoint = torch.load(os.path.join(model_dir, "model_checkpoint.ckpt")) 19 | hparams = checkpoint["hyper_parameters"] 20 | ptlt_model = PytorchLightning_CNN_MNIST(arguments=hparams.get("arguments")) 21 | model = ptlt_model.model 22 | model.to(device).eval() 23 | return model 24 | 25 | 26 | # data preprocessing 27 | def input_fn(request_body, request_content_type): 28 | assert request_content_type == "application/json" 29 | data = json.loads(request_body).get("input_data")[0].get("values") 30 | data = torch.tensor(data, dtype=torch.float32, device=device) 31 | 32 | if data.shape[3] == 1: # 1 channel at the end of tensor 33 | log.info(f"input_fn {data.shape} channel last permuting to channel first") 34 | data = data.permute(0, 3, 1, 2) 35 | else: 36 | log.info( 37 | f"input_fn {data.shape} channel first sent (already in expected format)" 38 | ) 39 | 40 | log.info(f"input_fn data shape sent to model {data.shape}") 41 | return data 42 | 43 | 44 | # inference 45 | def predict_fn(input_object, model): 46 | with torch.no_grad(): 47 | prediction = model(input_object) 48 | # prediction = model.forward(input_object) 49 | return prediction 50 | 51 | 52 | # postprocess 53 | def output_fn(predictions, content_type): 54 | assert content_type == "application/json" 55 | res = predictions.cpu().numpy().tolist() 56 | return json.dumps(res) 57 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | ibm-aigov-facts-client==1.0.73 3 | ibm_watson_machine_learning 4 | ibm_watson_openscale 5 | pytorch-lightning==1.6.1 6 | pygit2 7 | torchinfo 8 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/test_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import argparse 3 | import json 4 | import numpy as np 5 | import random 6 | import matplotlib.pyplot as plt 7 | 8 | from utils import mnist_to_numpy 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--model-type", 13 | type=str, 14 | default="pt-lt", 15 | choices=["cnn", "fc", "pytorch", "tf-cnn"], 16 | metavar="MDLTYPE", 17 | help="type of model to run inference", 18 | ) 19 | parser.add_argument( 20 | "--inference-samples", 21 | type=int, 22 | default=2, 23 | metavar="NSAMPLES", 24 | help="Number of samples to be sent for inference", 25 | ) 26 | 27 | if __name__ == "__main__": 28 | args = parser.parse_args() 29 | print(f"Running Inference for Pytorch Model {args}") 30 | data_dir = "/tmp/data" 31 | model_type = args.model_type 32 | print(f"model type {model_type}") 33 | 34 | X, Y = mnist_to_numpy(data_dir, train=False) 35 | 36 | # randomly sample 16 images to inspect 37 | mask = random.sample(range(X.shape[0]), args.inference_samples) 38 | samples = X[mask] 39 | labels = Y[mask] 40 | # plot the images 41 | fig, axs = plt.subplots(nrows=1, ncols=args.inference_samples, figsize=(16, 1)) 42 | 43 | for i, ax in enumerate(axs): 44 | ax.imshow(samples[i]) 45 | plt.show() 46 | 47 | if model_type not in ["cnn", "fc"]: 48 | raise ValueError("model type should be set to cnn or fc") 49 | 50 | from use_case_mnist_ptlt.ptlt_inference import ( 51 | model_fn, 52 | input_fn, 53 | predict_fn, 54 | output_fn, 55 | ) 56 | 57 | samples = np.expand_dims(samples, axis=1) 58 | inputs = {"input_data": [{"values": samples.tolist()}]} 59 | model = model_fn("../models") 60 | print(samples.shape, samples.dtype) 61 | print(json.dumps(inputs)) 62 | input_tensors = input_fn(json.dumps(inputs), "application/json") 63 | print(input_tensors.shape) 64 | outputs = predict_fn(input_tensors, model) 65 | print(outputs.shape) 66 | preds = output_fn(outputs, "application/json") 67 | print(preds) 68 | 69 | predictions = np.argmax( 70 | np.array(json.loads(preds), dtype=np.float32), axis=1 71 | ).tolist() 72 | print("Predicted digits: ", predictions) 73 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/test_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import shutil 4 | 5 | from ptlt_train import train 6 | from utils import parse_args 7 | from use_case_mnist_ptlt.utils import download_from_s3, download_npy_from_s3 8 | from dotenv import load_dotenv 9 | 10 | dirname = os.path.dirname(os.path.abspath(__file__)) 11 | PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__)) 12 | 13 | 14 | class Env: 15 | def __init__(self): 16 | # simulate container env 17 | os.environ["SM_MODEL_DIR"] = f"{PROJECT_ROOT}/models/model_mnist" 18 | os.environ["SM_CHANNEL_TRAINING"] = f"{PROJECT_ROOT}/datasets/mnist" 19 | os.environ["SM_CHANNEL_TESTING"] = f"{PROJECT_ROOT}/datasets/mnist" 20 | os.environ["SM_HOSTS"] = '["algo-1"]' 21 | os.environ["SM_CURRENT_HOST"] = "algo-1" 22 | os.environ["SM_NUM_GPUS"] = "0" 23 | print(os.environ["AWS_SECRET_ACCESS_KEY"]) 24 | 25 | 26 | if __name__ == "__main__": 27 | 28 | load_dotenv() 29 | Env() 30 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", True) 31 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", False) 32 | 33 | download_npy_from_s3(f"{PROJECT_ROOT}/datasets/mnist", True) 34 | download_npy_from_s3(f"{PROJECT_ROOT}/datasets/mnist", False) 35 | args = parse_args() 36 | # Clean up MLFLOW and lighting_logs 37 | for dir in ["./mlruns", "./lighting_logs", "./logs"]: 38 | if os.path.isdir(dir): 39 | shutil.rmtree(dir) 40 | 41 | args = vars(args) 42 | print(args) 43 | train(args) 44 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_ptlt/torch_utils.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import numpy as np 3 | import gzip 4 | import os 5 | import torch 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def normalize(x, axis): 10 | eps = np.finfo(float).eps 11 | mean = np.mean(x, axis=axis, keepdims=True) 12 | # avoid division by zero 13 | std = np.std(x, axis=axis, keepdims=True) + eps 14 | return (x - mean) / std 15 | 16 | 17 | def convert_to_tensor(data_dir, images_file, labels_file): 18 | """Byte string to torch tensor""" 19 | with gzip.open(os.path.join(data_dir, images_file), "rb") as f: 20 | images = ( 21 | np.frombuffer(f.read(), np.uint8, offset=16) 22 | .reshape(-1, 28, 28) 23 | .astype(np.float32) 24 | ) 25 | 26 | with gzip.open(os.path.join(data_dir, labels_file), "rb") as f: 27 | labels = np.frombuffer(f.read(), np.uint8, offset=8).astype(np.int64) 28 | 29 | # normalize the images 30 | images = normalize(images, axis=(1, 2)) 31 | 32 | # add channel dimension (depth-major) 33 | images = np.expand_dims(images, axis=1) 34 | 35 | # to torch tensor 36 | images = torch.tensor(images, dtype=torch.float32) 37 | labels = torch.tensor(labels, dtype=torch.int64) 38 | return images, labels 39 | 40 | 41 | class MNIST(Dataset): 42 | def __init__(self, data_dir, train=True): 43 | 44 | if train: 45 | images_file = "train-images-idx3-ubyte.gz" 46 | labels_file = "train-labels-idx1-ubyte.gz" 47 | else: 48 | images_file = "t10k-images-idx3-ubyte.gz" 49 | labels_file = "t10k-labels-idx1-ubyte.gz" 50 | 51 | self.images, self.labels = convert_to_tensor(data_dir, images_file, labels_file) 52 | 53 | def __len__(self): 54 | return len(self.labels) 55 | 56 | def __getitem__(self, idx): 57 | return self.images[idx], self.labels[idx] 58 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/.amlignore: -------------------------------------------------------------------------------- 1 | # Environments 2 | .env 3 | .venv 4 | model_signature.json 5 | README.md 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/mlmonitor/use_case_mnist_tf/__init__.py -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | from collections import namedtuple 4 | import numpy as np 5 | import requests 6 | import logging 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | log = logging.getLogger(__name__) 10 | 11 | Context = namedtuple( 12 | "Context", 13 | "model_name, model_version, method, rest_uri, grpc_uri, " 14 | "custom_attributes, request_content_type, accept_header", 15 | ) 16 | 17 | 18 | def handler(data, context): 19 | """Handle request. 20 | Args: 21 | data (obj): the request data 22 | context (Context): an object containing request and configuration details 23 | Returns: 24 | (bytes, string): data to return to client, (optional) response content type 25 | """ 26 | processed_input = _process_input(data, context) 27 | response = requests.post(context.rest_uri, data=processed_input) 28 | return _process_output(response, context) 29 | 30 | 31 | def _process_input(data, context): 32 | decoded_data = data.read().decode("utf-8") 33 | log.info( 34 | f"_process_input request_content_type {context.request_content_type} decoded_data:\n{decoded_data} " 35 | ) 36 | 37 | ####################################################################################### 38 | # Handle request format from Watson Openscale feedback Data (application/json) # 39 | ####################################################################################### 40 | # data = [json.loads(f"[{x.split(';')[0]}]") for x in decoded_data.split('\n')] 41 | 42 | if context.request_content_type == "application/json": 43 | try: 44 | data = json.loads(decoded_data).get("input_data")[0].get("values") 45 | 46 | return json.dumps({"instances": data}) 47 | except Exception as e: 48 | raise ValueError( 49 | f'Exception _process_input json "{e}"' 50 | f"Input format {type(decoded_data)}" 51 | f"{decoded_data}" 52 | ) from e 53 | 54 | elif context.request_content_type == "text/csv": 55 | 56 | try: 57 | data = [json.loads(f"{x.split(';')[0]}") for x in decoded_data.split("\n")] 58 | return json.dumps({"instances": data}) 59 | except Exception as e: 60 | raise ValueError( 61 | f'Exception _process_input csv "{e}"' 62 | f"Input format {type(decoded_data)}" 63 | f"{decoded_data}" 64 | ) from e 65 | 66 | raise ValueError( 67 | '{{"error": "unsupported content type {}"}}'.format( 68 | context.request_content_type or "unknown" 69 | ) 70 | ) 71 | 72 | 73 | def _process_output(data, context): 74 | if data.status_code != 200: 75 | raise ValueError( 76 | f"ValueError in _process_output : {data.content.decode('utf-8')}" 77 | ) 78 | 79 | d = data.content.decode("utf-8") 80 | prediction = json.loads(d).get("predictions") 81 | classes = np.argmax(np.array(prediction), axis=1).tolist() 82 | values = list(zip([int(x) for x in classes], prediction)) 83 | fields = ["prediction", "probability"] 84 | output = {"fields": fields, "values": values} 85 | response = {"predictions": [output]} 86 | ##################################################################### 87 | # Handle response format required by Watson Openscale Evaluate # 88 | ##################################################################### 89 | # values = list(zip([int(x) for x in classes], prediction)) 90 | # fields = ['_original_prediction', '_original_probability'] 91 | # fields = ['prediction', 'probability'] 92 | # output = {'fields': fields, 'values': values } 93 | # response = {'predictions': [output]} 94 | 95 | if context.request_content_type == "application/json": 96 | response_content_type = "application/json" 97 | log.info( 98 | f"_process_output request_content_type 'application/json' response:\n{json.dumps(response)} " 99 | ) 100 | return json.dumps(response), response_content_type 101 | 102 | elif context.request_content_type == "text/csv": 103 | response_content_type = "application/json" 104 | log.info( 105 | f"_process_output request_content_type 'text/csv' response:\n{json.dumps(response)} " 106 | ) 107 | return json.dumps(response), response_content_type 108 | 109 | raise ValueError( 110 | '{{"error": "unsupported content type {}"}}'.format( 111 | context.request_content_type or "unknown" 112 | ) 113 | ) 114 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/model_signature.json: -------------------------------------------------------------------------------- 1 | { 2 | "__class__": "SageMakerModelConfig", 3 | "signature": { 4 | "feature_columns": null, 5 | "class_label": null, 6 | "prediction_field": "prediction", 7 | "probability_fields": ["probability"], 8 | "categorical_columns": null, 9 | "problem_type": "multiclass", 10 | "data_type": "unstructured_image", 11 | "description": "Hand written digit detection CNN model using TF Keras framework" 12 | }, 13 | "datasets": { 14 | "data_dir": "mnist", 15 | "training_data": "mnist", 16 | "validation_data": "mnist", 17 | "test_data": "mnist", 18 | "fetch_data_module": "utils", 19 | "fetch_data_function": "generate_data" 20 | }, 21 | "aws_runtime": { 22 | "train_script": "tf_cnn_train.py", 23 | "inference_script": "tf_cnn_inference.py", 24 | "train_framework": "tensorflow", 25 | "train_framework_version": "2.8.0", 26 | "train_py_version": "py39", 27 | "inference_framework": "tensorflow", 28 | "inference_framework_version": "2.8.0", 29 | "inference_py_version": "py39", 30 | "train_instance": "ml.c4.xlarge", 31 | "inference_instance": "ml.c4.xlarge", 32 | "prefix": "DEMO-mnist-mlmonitor", 33 | "job_name": "sm-mnist-cnn-tf-keras", 34 | "platform": "aws", 35 | "serializer": "json", 36 | "deserializer": "json" 37 | }, 38 | "wml_runtime": { 39 | "train_module": "tf_cnn_train", 40 | "train_method": "train_wml", 41 | "inference_instance": "runtime-23.1-py3.10", 42 | "inference_framework": "tensorflow", 43 | "inference_framework_version": "2.9", 44 | "inference_script": "tf_cnn_inference.py" 45 | }, 46 | "hyperparameters": { 47 | "batch-size": 128, 48 | "epochs": 5 49 | }, 50 | "quality_monitor": { 51 | "enabled": true, 52 | "parameters": { 53 | "min_feedback_data_size": 10 54 | }, 55 | "thresholds": [{ 56 | "metric_id": "accuracy", 57 | "type": "lower_limit", 58 | "lower_limit": 0.80 59 | }] 60 | }, 61 | "fairness_monitor": { 62 | "enabled": false 63 | }, 64 | "drift_monitor": { 65 | "enabled": false 66 | }, 67 | "explain_monitor": { 68 | "enabled": true 69 | }, 70 | "mrm_monitor": { 71 | "enabled": false 72 | }, 73 | "custom_monitor": { 74 | "enabled": false 75 | }, 76 | "source_dir": "use_case_mnist_tf" 77 | } 78 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/requirements-local.txt: -------------------------------------------------------------------------------- 1 | protobuf==3.20.2 2 | tensorflow==2.12.0 3 | pygit2 4 | python-dotenv 5 | botocore 6 | boto3 7 | matplotlib 8 | sagemaker 9 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | ibm_watson_machine_learning 3 | ibm_watson_openscale 4 | ibm-aigov-facts-client==1.0.73 5 | pygit2 6 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/test_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import argparse 3 | import json 4 | import random 5 | import matplotlib.pyplot as plt 6 | 7 | from use_case_mnist_tf.tf_cnn_inference_bkp import ( 8 | model_fn, 9 | input_fn, 10 | predict_fn, 11 | output_fn, 12 | ) 13 | 14 | from dotenv import load_dotenv 15 | from utils import mnist_to_numpy 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument( 19 | "--model_type", 20 | type=str, 21 | default="pt-lt", 22 | choices=["cnn", "fc", "pytorch", "tf-cnn"], 23 | metavar="MDLTYPE", 24 | help="type of model to run inference", 25 | ) 26 | parser.add_argument( 27 | "--inference-samples", 28 | type=int, 29 | default=2, 30 | metavar="NSAMPLES", 31 | help="Number of samples to be sent for inference", 32 | ) 33 | 34 | if __name__ == "__main__": 35 | load_dotenv() 36 | args = parser.parse_args() 37 | print(f"Running Inference for Tensorflow Keras Model {args}") 38 | data_dir = "/tmp/data" 39 | model_type = args.model_type 40 | print(f"model type {model_type}") 41 | 42 | X, Y = mnist_to_numpy(data_dir, train=False) 43 | 44 | # randomly sample 16 images to inspect 45 | mask = random.sample(range(X.shape[0]), args.inference_samples) 46 | samples = X[mask] 47 | labels = Y[mask] 48 | # plot the images 49 | fig, axs = plt.subplots(nrows=1, ncols=args.inference_samples, figsize=(16, 1)) 50 | 51 | for i, ax in enumerate(axs): 52 | ax.imshow(samples[i]) 53 | plt.show() 54 | 55 | samples = samples.reshape(samples.shape[0], 28, 28, 1) 56 | inputs = {"input_data": [{"values": samples.tolist()}]} 57 | 58 | model = model_fn("../models/model_mnist/") 59 | print(samples.shape, samples.dtype) 60 | print(json.dumps(inputs)) 61 | input_tensors = input_fn(json.dumps(inputs), "application/json") 62 | print(input_tensors.shape) 63 | outputs = predict_fn(input_tensors, model) 64 | print(outputs.shape) 65 | preds = output_fn(outputs, "application/json") 66 | print(preds) 67 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/test_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import os 3 | import shutil 4 | from os.path import dirname 5 | from dotenv import load_dotenv 6 | 7 | # from tf_train import train_model as train_job # Fully connected model 8 | from tf_cnn_train import train_job 9 | 10 | # from tf_train import _parse_args # Fully connected model 11 | from tf_cnn_train import _parse_args 12 | 13 | from use_case_mnist_tf.utils import download_from_s3 14 | 15 | PROJECT_ROOT = dirname(dirname(__file__)) 16 | 17 | 18 | class Env: 19 | def __init__(self): 20 | # simulate container env 21 | os.environ["SM_MODEL_DIR"] = os.path.join(PROJECT_ROOT, "models", "model_mnist") 22 | os.environ["SM_CHANNEL_TRAINING"] = f"{PROJECT_ROOT}/datasets/mnist" 23 | os.environ["SM_CHANNEL_TESTING"] = f"{PROJECT_ROOT}/datasets/mnist" 24 | os.environ["SM_HOSTS"] = '["algo-1"]' 25 | os.environ["SM_CURRENT_HOST"] = "algo-1" 26 | os.environ["SM_NUM_GPUS"] = "0" 27 | 28 | 29 | if __name__ == "__main__": 30 | 31 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", True) 32 | download_from_s3(f"{PROJECT_ROOT}/datasets/mnist", False) 33 | 34 | load_dotenv() 35 | Env() 36 | 37 | # Clean up MLFLOW and lighting_logs 38 | for dir in ["./mlruns", "./lighting_logs", "./logs"]: 39 | if os.path.isdir(dir): 40 | shutil.rmtree(dir) 41 | 42 | args, unknown = _parse_args() 43 | 44 | args = vars(args) 45 | print(args) 46 | train_job(args) 47 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/tf_cnn_inference.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import json 3 | from collections import namedtuple 4 | import numpy as np 5 | import requests 6 | import logging 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | log = logging.getLogger(__name__) 10 | 11 | Context = namedtuple( 12 | "Context", 13 | "model_name, model_version, method, rest_uri, grpc_uri, " 14 | "custom_attributes, request_content_type, accept_header", 15 | ) 16 | 17 | 18 | def handler(data, context): 19 | """Handle request. 20 | Args: 21 | data (obj): the request data 22 | context (Context): an object containing request and configuration details 23 | Returns: 24 | (bytes, string): data to return to client, (optional) response content type 25 | """ 26 | processed_input = _process_input(data, context) 27 | response = requests.post(context.rest_uri, data=processed_input) 28 | return _process_output(response, context) 29 | 30 | 31 | def _process_input(data, context): 32 | decoded_data = data.read().decode("utf-8") 33 | log.info( 34 | f"_process_input request_content_type {context.request_content_type} decoded_data:\n{decoded_data} " 35 | ) 36 | 37 | ####################################################################################### 38 | # Handle request format from Watson Openscale feedback Data (application/json) # 39 | ####################################################################################### 40 | # data = [json.loads(f"[{x.split(';')[0]}]") for x in decoded_data.split('\n')] 41 | 42 | if context.request_content_type == "application/json": 43 | try: 44 | data = json.loads(decoded_data).get("input_data")[0].get("values") 45 | 46 | return json.dumps({"instances": data}) 47 | except Exception as e: 48 | raise ValueError( 49 | f'Exception _process_input json "{e}"' 50 | f"Input format {type(decoded_data)}" 51 | f"{decoded_data}" 52 | ) from e 53 | 54 | elif context.request_content_type == "text/csv": 55 | 56 | try: 57 | data = [json.loads(f"{x.split(';')[0]}") for x in decoded_data.split("\n")] 58 | return json.dumps({"instances": data}) 59 | except Exception as e: 60 | raise ValueError( 61 | f'Exception _process_input csv "{e}"' 62 | f"Input format {type(decoded_data)}" 63 | f"{decoded_data}" 64 | ) from e 65 | 66 | raise ValueError( 67 | '{{"error": "unsupported content type {}"}}'.format( 68 | context.request_content_type or "unknown" 69 | ) 70 | ) 71 | 72 | 73 | def _process_output(data, context): 74 | if data.status_code != 200: 75 | raise ValueError( 76 | f"ValueError in _process_output : {data.content.decode('utf-8')}" 77 | ) 78 | 79 | d = data.content.decode("utf-8") 80 | prediction = json.loads(d).get("predictions") 81 | classes = np.argmax(np.array(prediction), axis=1).tolist() 82 | values = list(zip([int(x) for x in classes], prediction)) 83 | fields = ["prediction", "probability"] 84 | output = {"fields": fields, "values": values} 85 | response = {"predictions": [output]} 86 | ##################################################################### 87 | # Handle response format required by Watson Openscale Evaluate # 88 | ##################################################################### 89 | # values = list(zip([int(x) for x in classes], prediction)) 90 | # fields = ['_original_prediction', '_original_probability'] 91 | # fields = ['prediction', 'probability'] 92 | # output = {'fields': fields, 'values': values } 93 | # response = {'predictions': [output]} 94 | 95 | if context.request_content_type == "application/json": 96 | response_content_type = "application/json" 97 | log.info( 98 | f"_process_output request_content_type 'application/json' response:\n{json.dumps(response)} " 99 | ) 100 | return json.dumps(response), response_content_type 101 | 102 | elif context.request_content_type == "text/csv": 103 | response_content_type = "application/json" 104 | log.info( 105 | f"_process_output request_content_type 'text/csv' response:\n{json.dumps(response)} " 106 | ) 107 | return json.dumps(response), response_content_type 108 | 109 | raise ValueError( 110 | '{{"error": "unsupported content type {}"}}'.format( 111 | context.request_content_type or "unknown" 112 | ) 113 | ) 114 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/tf_models.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from tensorflow.keras.models import Sequential 3 | from tensorflow.keras.layers import Dense, Dropout, Flatten 4 | from tensorflow.keras.layers import Conv2D, MaxPooling2D 5 | 6 | 7 | def base_model(input_shape, num_classes=10) -> Sequential: 8 | model = Sequential() 9 | model.add( 10 | Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape) 11 | ) 12 | model.add(Conv2D(64, (3, 3), activation="relu")) 13 | model.add(MaxPooling2D(pool_size=(2, 2))) 14 | model.add(Dropout(0.25)) 15 | model.add(Flatten()) 16 | model.add(Dense(128, activation="relu")) 17 | model.add(Dropout(0.25)) 18 | model.add(Dense(num_classes, activation="softmax")) 19 | return model 20 | -------------------------------------------------------------------------------- /mlmonitor/use_case_mnist_tf/tf_train.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | import argparse 3 | import json 4 | import os 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | from tensorflow.keras import callbacks 9 | 10 | 11 | def train_model(args): 12 | """Generate a simple model""" 13 | 14 | x_train, y_train = _load_training_data(args.get("train")) 15 | x_test, y_test = _load_testing_data(args.get("test")) 16 | 17 | print(x_train.shape) 18 | 19 | model = tf.keras.models.Sequential( 20 | [ 21 | tf.keras.layers.Flatten(), 22 | tf.keras.layers.Dense(1024, activation=tf.nn.relu), 23 | tf.keras.layers.Dropout(0.4), 24 | tf.keras.layers.Dense(10, activation=tf.nn.softmax), 25 | ] 26 | ) 27 | 28 | earlystopping = callbacks.EarlyStopping( 29 | monitor="val_loss", mode="min", patience=5, restore_best_weights=True 30 | ) 31 | 32 | model.compile( 33 | optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"] 34 | ) 35 | model.fit( 36 | x_train, 37 | y_train, 38 | validation_data=(x_test, y_test), 39 | epochs=args.get("epochs"), 40 | batch_size=args.get("batch_size"), 41 | callbacks=[earlystopping], 42 | ) 43 | model.evaluate(x_test, y_test) 44 | 45 | if args.get("current_host") == args.get("hosts")[0]: 46 | # save model to an S3 directory with version number '00000001' in Tensorflow SavedModel Format 47 | # To export the model as h5 format use model.save('my_model.h5') 48 | model.save(os.path.join(args.get("model_dir"), "000000001")) 49 | 50 | return model 51 | 52 | 53 | def _load_training_data(base_dir): 54 | """Load MNIST training data""" 55 | x_train = np.load(os.path.join(base_dir, "train_data.npy")) 56 | y_train = np.load(os.path.join(base_dir, "train_labels.npy")) 57 | return x_train, y_train 58 | 59 | 60 | def _load_testing_data(base_dir): 61 | """Load MNIST testing data""" 62 | x_test = np.load(os.path.join(base_dir, "eval_data.npy")) 63 | y_test = np.load(os.path.join(base_dir, "eval_labels.npy")) 64 | return x_test, y_test 65 | 66 | 67 | def _parse_args(): 68 | parser = argparse.ArgumentParser() 69 | # Data, model, and output directories 70 | # model_dir is always passed in from SageMaker. By default this is a S3 path under the default bucket. 71 | # fmt: off 72 | # Training Job specific arguments (Sagemaker,Azure,WML) default SageMaker envar or Azure expected values 73 | parser.add_argument("--model-dir", type=str, default=os.getenv("SM_MODEL_DIR", "./outputs")) 74 | parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAINING")) 75 | parser.add_argument("--test", type=str, default=os.environ["SM_CHANNEL_TESTING"]) 76 | parser.add_argument("--hosts", type=list, default=json.loads(os.getenv("SM_HOSTS", '["algo-1"]'))) 77 | parser.add_argument("--current-host", type=str, default=os.getenv("SM_CURRENT_HOST", "algo-1")) 78 | parser.add_argument("--region-name", type=str, default="ca-central-1") 79 | 80 | # Model specific hyperparameters 81 | parser.add_argument("--batch-size", type=int, default=128, metavar="N", help="input batch size for training (default: 64)") 82 | parser.add_argument("--epochs", type=int, default=3, metavar="N", help="number of epochs to train (default: 1)") 83 | # fmt: on 84 | 85 | return parser.parse_known_args() 86 | 87 | 88 | if __name__ == "__main__": 89 | args, unknown = _parse_args() 90 | args = vars(args) 91 | mnist_classifier = train_model(args) 92 | -------------------------------------------------------------------------------- /mlmonitor/version.meta: -------------------------------------------------------------------------------- 1 | 0.5 2 | -------------------------------------------------------------------------------- /pictures/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/.gitkeep -------------------------------------------------------------------------------- /pictures/AWS_pt_model_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/AWS_pt_model_output.png -------------------------------------------------------------------------------- /pictures/AWS_pt_training_job.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/AWS_pt_training_job.png -------------------------------------------------------------------------------- /pictures/Architectutre_MLOps_Asset.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/Architectutre_MLOps_Asset.jpg -------------------------------------------------------------------------------- /pictures/IAM_resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/IAM_resources.png -------------------------------------------------------------------------------- /pictures/Model_inventory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/Model_inventory.png -------------------------------------------------------------------------------- /pictures/Model_inventory_dev_state.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/Model_inventory_dev_state.png -------------------------------------------------------------------------------- /pictures/New_Model_Entry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/New_Model_Entry.png -------------------------------------------------------------------------------- /pictures/PlatformAssetCatalog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/PlatformAssetCatalog.png -------------------------------------------------------------------------------- /pictures/SageMakerExecutionRole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/SageMakerExecutionRole.png -------------------------------------------------------------------------------- /pictures/aws-sagemaker-mnist-cnn-pytorch-yMD-HM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/aws-sagemaker-mnist-cnn-pytorch-yMD-HM.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_asset_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_asset_factsheets.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_confusion_matrix.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_feature_importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_feature_importance.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_loss.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_metadata_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_metadata_factsheets.png -------------------------------------------------------------------------------- /pictures/churn_xgboost_probabilities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/churn_xgboost_probabilities.png -------------------------------------------------------------------------------- /pictures/cp4d_data_assets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/cp4d_data_assets.png -------------------------------------------------------------------------------- /pictures/custom_ml_provider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custom_ml_provider.png -------------------------------------------------------------------------------- /pictures/custom_ml_provider_evaluated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custom_ml_provider_evaluated.png -------------------------------------------------------------------------------- /pictures/custom_monitor_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custom_monitor_architecture.png -------------------------------------------------------------------------------- /pictures/custom_monitor_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custom_monitor_workflow.png -------------------------------------------------------------------------------- /pictures/custommonitor_wml_custom_provider_deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custommonitor_wml_custom_provider_deployment.png -------------------------------------------------------------------------------- /pictures/custommonitor_wos_definition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/custommonitor_wos_definition.png -------------------------------------------------------------------------------- /pictures/external_models_toggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/external_models_toggle.png -------------------------------------------------------------------------------- /pictures/gcr_deployed_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/gcr_deployed_factsheets.png -------------------------------------------------------------------------------- /pictures/gcr_train_asset_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/gcr_train_asset_factsheets.png -------------------------------------------------------------------------------- /pictures/gcr_train_metadata_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/gcr_train_metadata_factsheets.png -------------------------------------------------------------------------------- /pictures/inference_endpoint_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/inference_endpoint_details.png -------------------------------------------------------------------------------- /pictures/inference_endpoint_online.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/inference_endpoint_online.png -------------------------------------------------------------------------------- /pictures/inference_samples_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/inference_samples_mnist.png -------------------------------------------------------------------------------- /pictures/mnist_tf_asset_factsheets_train_local.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_asset_factsheets_train_local.png -------------------------------------------------------------------------------- /pictures/mnist_tf_asset_factsheets_train_remote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_asset_factsheets_train_remote.png -------------------------------------------------------------------------------- /pictures/mnist_tf_deployed_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_deployed_factsheets.png -------------------------------------------------------------------------------- /pictures/mnist_tf_model_endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_model_endpoint.png -------------------------------------------------------------------------------- /pictures/mnist_tf_scoring_inputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_scoring_inputs.png -------------------------------------------------------------------------------- /pictures/mnist_tf_train_local_metadata_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_train_local_metadata_factsheets.png -------------------------------------------------------------------------------- /pictures/mnist_tf_train_remote_metadata_factsheets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_train_remote_metadata_factsheets.png -------------------------------------------------------------------------------- /pictures/mnist_tf_wos_subscription.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/mnist_tf_wos_subscription.png -------------------------------------------------------------------------------- /pictures/model_entry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/model_entry.png -------------------------------------------------------------------------------- /pictures/modelfacts_user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/modelfacts_user.png -------------------------------------------------------------------------------- /pictures/new_notebook_cp4d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/new_notebook_cp4d.png -------------------------------------------------------------------------------- /pictures/outcome1_gcr_endpoint_aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome1_gcr_endpoint_aws.png -------------------------------------------------------------------------------- /pictures/outcome2_gcr_model_aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome2_gcr_model_aws.png -------------------------------------------------------------------------------- /pictures/outcome3_gcr_wos_monitors_eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome3_gcr_wos_monitors_eval.png -------------------------------------------------------------------------------- /pictures/outcome4_gcr_explain_contrastive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome4_gcr_explain_contrastive.png -------------------------------------------------------------------------------- /pictures/outcome4_gcr_explain_lime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome4_gcr_explain_lime.png -------------------------------------------------------------------------------- /pictures/outcome5_gcr_lifecycle_operate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome5_gcr_lifecycle_operate.png -------------------------------------------------------------------------------- /pictures/outcome5_gcr_monitoring_facts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/outcome5_gcr_monitoring_facts.png -------------------------------------------------------------------------------- /pictures/pt_model_asset_FS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/pt_model_asset_FS.png -------------------------------------------------------------------------------- /pictures/pt_training_facts_manual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/pt_training_facts_manual.png -------------------------------------------------------------------------------- /pictures/ptlt_training_facts_autolog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/ptlt_training_facts_autolog.png -------------------------------------------------------------------------------- /pictures/sagemaker_factsheets_big.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/sagemaker_factsheets_big.png -------------------------------------------------------------------------------- /pictures/secret_manager_keys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/secret_manager_keys.png -------------------------------------------------------------------------------- /pictures/track_this_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/mlmonitor/96a5b766f1177e69409219c0dea164776e94ad47/pictures/track_this_model.png -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | PyYAML 2 | wheel 3 | jupyterlab 4 | pre-commit 5 | twine 6 | -------------------------------------------------------------------------------- /requirements-extra.txt: -------------------------------------------------------------------------------- 1 | azureml-sdk==1.56.0 2 | sagemaker==2.103.0 3 | ibm-wos-utils==4.7.0.14 4 | -------------------------------------------------------------------------------- /requirements-local.txt: -------------------------------------------------------------------------------- 1 | pip 2 | boto3==1.34.94 3 | scikit-learn==1.1.1 4 | joblib==1.1.1 5 | numpy==1.23.1 6 | matplotlib 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ibm_watson_openscale==3.0.40 2 | ibm-watson-machine-learning==1.0.360 3 | ipython==8.14.0 4 | ibm-aigov-facts-client==1.0.80 5 | pygit2 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: Apache-2.0 2 | from typing import Optional, List 3 | from setuptools import setup 4 | import os 5 | import re 6 | 7 | 8 | def version(path: str): 9 | with open(os.path.join(path, "version.meta"), "r") as v: 10 | return v.read().strip() 11 | 12 | 13 | def readme(path: str): 14 | with open(os.path.join(path, "README.md"), "r") as v: 15 | return v.read() 16 | 17 | 18 | def _readfile(file_path: str, file_name): 19 | with open(os.path.join(file_path, file_name), "r") as v: 20 | lines = v.readlines() 21 | return list(filter(lambda x: re.match(r"^\w+", x), lines)) 22 | 23 | 24 | def requirements(path: str, postfix: Optional[str] = None) -> List[str]: 25 | req = f"requirements-{postfix}.txt" if postfix else "requirements.txt" 26 | return _readfile(file_path=path, file_name=req) 27 | 28 | 29 | current_directory = os.path.abspath(os.path.dirname(__file__)) 30 | MINIMUM_PYTHON_VERSION = "3.10.4" 31 | DATA_FILES = ["*.csv", "*.gzip", "*.gz", "*.txt", "*.npy"] 32 | 33 | setup( 34 | name="mlmonitor", 35 | version=version(current_directory), 36 | author="Jacques-Sylvain Lecointre", 37 | description="Orchestration of model use cases", 38 | author_email="js.lecointre@ibm.com", 39 | url="https://github.com/IBM/mlmonitor/mlmonitor", 40 | packages=[ 41 | "mlmonitor", 42 | "mlmonitor.use_case_churn", 43 | "mlmonitor.use_case_gcr", 44 | "mlmonitor.use_case_mnist_tf", 45 | "mlmonitor.custmonitor", 46 | "mlmonitor.custmonitor.metrics", 47 | "mlmonitor.custmonitor.metricsprovider", 48 | "mlmonitor.data", 49 | "mlmonitor.figures", 50 | "mlmonitor.models", 51 | "mlmonitor.datasets", 52 | "mlmonitor.datasets.mnist", 53 | "mlmonitor.datasets.gcr", 54 | "mlmonitor.datasets.churn", 55 | "mlmonitor.src", 56 | "mlmonitor.src.utils", 57 | "mlmonitor.src.demos", 58 | "mlmonitor.src.model", 59 | "mlmonitor.src.azure", 60 | "mlmonitor.src.wml", 61 | "mlmonitor.src.aws", 62 | "mlmonitor.src.wos", 63 | "mlmonitor.src.factsheets", 64 | ], 65 | package_data={ 66 | "mlmonitor.datasets": DATA_FILES, 67 | "mlmonitor.datasets.mnist": DATA_FILES, 68 | "mlmonitor.datasets.gcr": DATA_FILES, 69 | "mlmonitor.datasets.churn": DATA_FILES, 70 | }, 71 | long_description=readme(path=os.path.join(current_directory, "mlmonitor")), 72 | long_description_content_type="text/markdown", 73 | install_requires=requirements(path=current_directory), 74 | extras_require={ 75 | "local": requirements(path=current_directory, postfix="local"), 76 | "dev": requirements(path=current_directory, postfix="dev"), 77 | "azure": ["azureml-sdk==1.56.0"], 78 | "sagemaker": ["sagemaker==2.206.0"], 79 | "drift": ["ibm-wos-utils==4.7.0.14"], 80 | }, 81 | python_requires=f">={MINIMUM_PYTHON_VERSION}", 82 | include_package_data=True, 83 | ) 84 | -------------------------------------------------------------------------------- /version.meta: -------------------------------------------------------------------------------- 1 | 1.0.25 2 | --------------------------------------------------------------------------------