├── CHANGELOG.md
├── LICENSE.txt
├── README.md
├── README_bulk.md
├── README_copy.md
├── README_export_format.md
├── README_governance.md
├── README_limitations.md
├── README_options.md
├── README_single.md
├── README_tools.md
├── databricks_notebooks
├── README.md
├── _README.py
├── bulk
│ ├── Check_Model_Versions_Runs.py
│ ├── Common.py
│ ├── Export_All.py
│ ├── Export_Experiments.py
│ ├── Export_Registered_Models.py
│ ├── Import_Experiments.py
│ ├── Import_Registered_Models.py
│ └── _README.py
├── copy
│ ├── Common.py
│ ├── Copy_Model_Version.py
│ ├── Copy_Run.py
│ ├── Create_Model_Version.py
│ ├── MLflow_Copy_Model_Version.py
│ ├── _README.py
│ ├── experimental
│ │ ├── Common.py
│ │ └── Copy_Model_Version.py
│ └── tests
│ │ └── Test_Copy_Model_Version.py
├── scripts
│ ├── Common.py
│ ├── Console_Scripts.py
│ └── _README.py
├── single
│ ├── Common.py
│ ├── Export_Experiment.py
│ ├── Export_Model_Version.py
│ ├── Export_Registered_Model.py
│ ├── Export_Run.py
│ ├── Import_Experiment.py
│ ├── Import_Model_Version.py
│ ├── Import_Registered_Model.py
│ ├── Import_Run.py
│ └── _README.py
└── tools
│ ├── Common.py
│ ├── Get_Model_Signature.py
│ ├── List_Model_Versions_Without_Signature.py
│ ├── Set_Model_Signature.py
│ └── _README.py
├── diagrams
├── Copy_Model_Version_NonUC.png
├── Copy_Model_Version_UC.png
└── architecture.png
├── mlflow_export_import
├── __init__.py
├── bulk
│ ├── __init__.py
│ ├── bulk_utils.py
│ ├── experiments_merge_utils.py
│ ├── export_all.py
│ ├── export_experiments.py
│ ├── export_models.py
│ ├── import_experiments.py
│ ├── import_models.py
│ ├── model_utils.py
│ └── rename_utils.py
├── client
│ ├── __init__.py
│ ├── client_utils.py
│ ├── databricks_cli_utils.py
│ ├── databricks_utils.py
│ ├── http_client.py
│ ├── mlflow_auth_utils.py
│ └── user_agent_header.py
├── common
│ ├── __init__.py
│ ├── click_options.py
│ ├── default_logging_config.py
│ ├── dump_utils.py
│ ├── filesystem.py
│ ├── find_artifacts.py
│ ├── io_utils.py
│ ├── iterators.py
│ ├── logging_utils.py
│ ├── mlflow_utils.py
│ ├── model_utils.py
│ ├── pkg_version.py
│ ├── source_tags.py
│ ├── timestamp_utils.py
│ ├── uc_permissions_utils.py
│ ├── utils.py
│ └── ws_permissions_utils.py
├── copy
│ ├── __init__.py
│ ├── click_options.py
│ ├── copy_model_version.py
│ ├── copy_run.py
│ └── copy_utils.py
├── experiment
│ ├── __init__.py
│ ├── export_experiment.py
│ ├── import_experiment.py
│ ├── nested_runs_utils.py
│ └── oss_nested_runs_utils.py
├── model
│ ├── __init__.py
│ ├── export_model.py
│ └── import_model.py
├── model_version
│ ├── __init__.py
│ ├── click_options.py
│ ├── export_model_version.py
│ └── import_model_version.py
├── notebook
│ ├── __init__.py
│ └── download_notebook.py
├── run
│ ├── __init__.py
│ ├── export_run.py
│ ├── import_run.py
│ ├── run_data_importer.py
│ └── run_utils.py
├── tools
│ ├── __init__.py
│ ├── click_options.py
│ ├── experimental
│ │ ├── README.md
│ │ ├── filter_one_model.py
│ │ ├── rewrite_export.py
│ │ └── samples
│ │ │ └── custom_export_rewriters.py
│ ├── get_model_signature.py
│ ├── list_model_versions_without_signatures.py
│ ├── list_registered_models.py
│ ├── set_model_signature.py
│ ├── signature_utils.py
│ └── tools_utils.py
├── version.py
└── workflow_api
│ ├── README.md
│ ├── __init__.py
│ ├── log_utils.py
│ ├── run_submit.py
│ ├── utils.py
│ └── workflow_api_client.py
├── samples
├── databricks
│ ├── bulk
│ │ ├── experiments
│ │ │ ├── 1280664374380606
│ │ │ │ ├── 253000ee70914831850defc593ba4740
│ │ │ │ │ └── run.json
│ │ │ │ └── experiment.json
│ │ │ ├── 9195e233f19e49379b16c5f2d2b0c05f
│ │ │ │ ├── a17f0abf5d46464d899f0ffcebbdb7a8
│ │ │ │ │ └── run.json
│ │ │ │ └── experiment.json
│ │ │ └── experiments.json
│ │ └── models
│ │ │ ├── experiments
│ │ │ ├── 1280664374380606
│ │ │ │ ├── 851de1f466304650a77c949f5d386d9f
│ │ │ │ │ └── run.json
│ │ │ │ └── experiment.json
│ │ │ ├── 9195e233f19e49379b16c5f2d2b0c05f
│ │ │ │ ├── a17f0abf5d46464d899f0ffcebbdb7a8
│ │ │ │ │ └── run.json
│ │ │ │ └── experiment.json
│ │ │ └── experiments.json
│ │ │ ├── manifest.json
│ │ │ └── models
│ │ │ ├── Keras_MNIST
│ │ │ └── model.json
│ │ │ ├── Sklearn_WineQuality
│ │ │ └── model.json
│ │ │ └── models.json
│ └── single
│ │ ├── experiments
│ │ ├── notebook_experiments
│ │ │ ├── repo_notebook
│ │ │ │ ├── 02aeef6d8cbf449ab50c8e715e320085
│ │ │ │ │ └── run.json
│ │ │ │ └── experiment.json
│ │ │ └── workspace_notebook
│ │ │ │ ├── experiment.json
│ │ │ │ └── f7816bc76f254f22ab25549a7c2c9b06
│ │ │ │ └── run.json
│ │ └── workspace_experiments
│ │ │ ├── automl_workspace_notebook
│ │ │ └── 5e1e2c44039a40afafc760b837a4daab
│ │ │ │ ├── artifacts
│ │ │ │ ├── estimator.html
│ │ │ │ └── model
│ │ │ │ │ ├── MLmodel
│ │ │ │ │ ├── conda.yaml
│ │ │ │ │ ├── input_example.json
│ │ │ │ │ ├── python_env.yaml
│ │ │ │ │ └── requirements.txt
│ │ │ │ └── run.json
│ │ │ ├── job_repo_notebook
│ │ │ └── experiment.json
│ │ │ ├── repo_notebook
│ │ │ ├── bad4988ed1184aad953fd14efee72fa2
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ │ ├── workspace_notebook
│ │ │ ├── 253000ee70914831850defc593ba4740
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ │ └── workspace_notebook_src_tags
│ │ │ ├── 68850173104649149678090f75d36d0a
│ │ │ └── run.json
│ │ │ └── experiment.json
│ │ ├── models
│ │ ├── basic
│ │ │ └── model.json
│ │ ├── deleted_runs
│ │ │ └── model.json
│ │ └── src_tags
│ │ │ └── model.json
│ │ └── versions
│ │ └── sklearn_wine
│ │ ├── experiment.json
│ │ ├── model.json
│ │ ├── run
│ │ ├── artifacts
│ │ │ └── model
│ │ │ │ ├── MLmodel
│ │ │ │ ├── conda.yaml
│ │ │ │ ├── input_example.json
│ │ │ │ ├── metadata
│ │ │ │ ├── MLmodel
│ │ │ │ ├── conda.yaml
│ │ │ │ ├── python_env.yaml
│ │ │ │ └── requirements.txt
│ │ │ │ ├── model.pkl
│ │ │ │ ├── python_env.yaml
│ │ │ │ └── requirements.txt
│ │ └── run.json
│ │ └── version.json
└── oss_mlflow
│ ├── bulk
│ ├── experiments
│ │ ├── 1
│ │ │ ├── d057cae15f27465988e72c6212e1f226
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ ├── 2
│ │ │ ├── 5397ae67ee0c49139bf64834b4d27fab
│ │ │ │ └── run.json
│ │ │ ├── 8a6af43e756f433da7a90fd6b4e49c3a
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ └── experiments.json
│ └── models
│ │ ├── experiments
│ │ ├── 1
│ │ │ ├── d057cae15f27465988e72c6212e1f226
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ ├── 2
│ │ │ ├── 5397ae67ee0c49139bf64834b4d27fab
│ │ │ │ └── run.json
│ │ │ ├── 8a6af43e756f433da7a90fd6b4e49c3a
│ │ │ │ └── run.json
│ │ │ └── experiment.json
│ │ └── experiments.json
│ │ ├── manifest.json
│ │ └── models
│ │ ├── models.json
│ │ ├── sklearn_iris
│ │ └── model.json
│ │ └── sklearn_wine
│ │ └── model.json
│ └── single
│ ├── experiments
│ ├── basic
│ │ ├── eb66c160957d4a28b11d3f1b968df9cd
│ │ │ └── run.json
│ │ └── experiment.json
│ └── src_tags
│ │ ├── 4b0ce88fd34e45fc8ca08876127299ce
│ │ └── run.json
│ │ └── experiment.json
│ └── models
│ ├── basic
│ └── model.json
│ └── src_tags
│ └── model.json
├── setup.py
└── tests
├── README.md
├── __init__.py
├── compare_utils.py
├── core.py
├── data
├── iris_score.csv
└── iris_train.csv
├── databricks
├── README.md
├── __init__.py
├── _test_model_version.py
├── _test_registered_model.py
├── compare_utils.py
├── config.yaml.template
├── includes.py
├── init_tests.py
├── local_utils.py
├── run_tests.sh
├── test_copy_model_version.py
├── test_copy_run.py
├── test_experiments.py
├── test_model_version.py
├── test_registered_model.py
├── uc
│ ├── run_tests.sh
│ ├── test_copy_model_version.py
│ ├── test_model_version.py
│ └── test_registered_model.py
└── unity_catalog_client.py
├── databricks_notebooks
├── README.md
├── __init__.py
├── config.yaml.template
├── databricks_tester.py
├── experiment
│ └── Iris_Train.py
├── init_tests.py
├── run_tests.sh
├── samples
│ ├── failed
│ │ ├── run_tests_junit.xml
│ │ └── run_tests_report.html
│ ├── run_tests_junit.xml
│ └── run_tests_report.html
└── test_basic.py
├── open_source
├── README.md
├── __init__.py
├── init_tests.py
├── kill_server.sh
├── oss_utils_test.py
├── run_tests.sh
├── samples
│ ├── run_tests_junit.xml
│ └── run_tests_report.html
├── test_bulk_all.py
├── test_bulk_experiments.py
├── test_bulk_experiments_export_param.py
├── test_bulk_experiments_merge_utils.py
├── test_bulk_models.py
├── test_copy_model_version.py
├── test_copy_run.py
├── test_exceptions.py
├── test_experiments.py
├── test_find_run_model_names.py
├── test_iterators.py
├── test_model_signature.py
├── test_model_version.py
├── test_models.py
├── test_models_archive_vesions.py
├── test_renames.py
├── test_run_data_limits.py
├── test_runs.py
├── test_uc_permissions.py
└── test_ws_permissions.py
├── sklearn_utils.py
└── utils_test.py
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | ## 1.2.0 (2023-02-16)
4 |
5 | The 1.2.0 version of MLflow Export Import is a major milestone release.
6 |
7 | This release contains an important breaking change from the 1.x API, additional major features and improvements.
8 |
9 | Features:
10 |
11 | - New streamlined export format for MLflow objects (experiments, runs and registered models)
12 | - Exporting artifacts of a specific version of a model
13 | - Import source system fields and tags
14 | - More Databricks notebook examples: Export_All and Export_Models notebooks
15 | - Added download notebook CLI utility
16 | - Plenty of bug fixes
17 |
18 | Breaking Changes:
19 |
20 | - [Core] The JSON export file format has been overhauled and made consistent across different MLflow objects.
21 | 1.x export files cannot be read by the 2.x release.
22 |
23 | Documentation updates
24 | - Major updates to README files
25 | - Aligned sample JSON files with new format
26 |
--------------------------------------------------------------------------------
/README_options.md:
--------------------------------------------------------------------------------
1 |
2 | # Options
3 |
4 | ## Common options
5 |
6 | `notebook-formats` - If exporting a Databricks run, the run's notebook revision can be saved in the specified formats (comma-delimited argument). Each format is saved in the notebooks folder of the run's artifact root directory as `notebook.{format}`. Supported formats are SOURCE, HTML, JUPYTER and DBC. See Databricks [Export Format](https://docs.databricks.com/dev-tools/api/latest/workspace.html#notebookexportformat) documentation.
7 |
8 | `use-src-user-id` - Set the destination user ID to the source user ID. Source user ID is ignored when importing into Databricks since the user is automatically picked up from your Databricks access token.
9 |
10 | `use-src-user-id` - Set the destination user field to the source user field. Only valid for open source MLflow.
11 | When importing into Databricks, the source user field is ignored since it is automatically picked up from your Databricks access token.
12 | There is no MLflow API endpoint to explicity set the user field for any objects such as Run or Experiment.
13 |
14 | `import-source-tags` - Import source information for registered model and its versions ad tags in destination object.
15 | See section below.
16 |
17 | ## MLflow Export Import Source Tags
18 |
19 | For ML governance purposes, original source run information is saved under the `mlflow_export_import` tag prefix in the destination MLflow object.
20 |
21 |
22 | For details see [README_governance.md](README_governance.md).
23 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/Check_Model_Versions_Runs.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Check Model Versions Runs
3 | # MAGIC
4 | # MAGIC Check if versions runs are deleted.
5 | # MAGIC * Soft delete - run is marked as `deleted`(tombstoned) but still exists in database for 30 days
6 | # MAGIC * Hard delete - run has been physically deleted
7 | # MAGIC
8 | # MAGIC Widget:
9 | # MAGIC * `1. Models`
10 | # MAGIC * `2. Export latest versions`
11 | # MAGIC * `yes`: get only latest versions per stage
12 | # MAGIC * `no`: get all versions for all stages
13 | # MAGIC * `3. Bail`
14 |
15 | # COMMAND ----------
16 |
17 | # MAGIC %run ./Common
18 |
19 | # COMMAND ----------
20 |
21 | dbutils.widgets.text("1. Models", "")
22 | models = dbutils.widgets.get("1. Models")
23 |
24 | dbutils.widgets.dropdown("2. Export latest versions","yes",["yes","no"])
25 | export_latest_versions = dbutils.widgets.get("2. Export latest versions") == "yes"
26 |
27 | dbutils.widgets.text("3. Bail", "")
28 | bail = dbutils.widgets.get("3. Bail")
29 | bail = None if bail=="" else int(bail)
30 |
31 | print("models:", models)
32 | print("export_latest_versions:", export_latest_versions)
33 | print("bail:", bail)
34 |
35 | # COMMAND ----------
36 |
37 | assert_widget(models, "1. Models")
38 |
39 | # COMMAND ----------
40 |
41 | from mlflow_export_import.bulk.check_model_version_runs import mk_pandas_df
42 |
43 | pdf = mk_pandas_df(
44 | models,
45 | export_latest_versions=export_latest_versions,
46 | bail=bail
47 | )
48 | df = spark.createDataFrame(pdf)
49 | display(df)
50 |
51 | # COMMAND ----------
52 |
53 | df.count()
54 |
55 | # COMMAND ----------
56 |
57 |
58 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/Common.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %pip install -U mlflow-skinny
3 | # MAGIC %pip install -U git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import
4 | # MAGIC dbutils.library.restartPython()
5 |
6 | # COMMAND ----------
7 |
8 | import mlflow
9 | mlflow_client = mlflow.MlflowClient()
10 | print("MLflow version",mlflow.__version__)
11 |
12 | # COMMAND ----------
13 |
14 | def assert_widget(value, name):
15 | if len(value.rstrip())==0:
16 | raise Exception(f"ERROR: '{name}' widget is required")
17 |
18 | # COMMAND ----------
19 |
20 | def get_notebook_formats(num):
21 | widget_name = f"{num}. Notebook formats"
22 | all_notebook_formats = [ "SOURCE", "DBC", "HTML", "JUPYTER" ]
23 | dbutils.widgets.multiselect(widget_name, all_notebook_formats[0], all_notebook_formats)
24 | notebook_formats = dbutils.widgets.get(widget_name)
25 | notebook_formats = notebook_formats.split(",")
26 | if "" in notebook_formats: notebook_formats.remove("")
27 | return notebook_formats
28 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/Export_Experiments.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Export Experiments
3 | # MAGIC
4 | # MAGIC Export multiple experiments and all their runs.
5 | # MAGIC
6 | # MAGIC Widgets
7 | # MAGIC * `1. Experiments` - comma delimited list of either experiment IDs or experiment names. `all` will export all experiments. Or filename (ending with .txt) with experiment names/IDs.
8 | # MAGIC * `2. Output directory` - shared directory between source and destination workspaces.
9 | # MAGIC * `3. Run start date` - Export runs after this UTC date (inclusive). Example: `2023-04-05`.
10 | # MAGIC * `4. Export permissions` - export Databricks permissions.
11 | # MAGIC * `5. Export deleted runs`
12 | # MAGIC * `6. Notebook formats`
13 | # MAGIC * `7. Use threads`
14 |
15 | # COMMAND ----------
16 |
17 | # MAGIC %run ./Common
18 |
19 | # COMMAND ----------
20 |
21 | dbutils.widgets.text("1. Experiments", "")
22 | experiments = dbutils.widgets.get("1. Experiments")
23 |
24 | dbutils.widgets.text("2. Output directory", "")
25 | output_dir = dbutils.widgets.get("2. Output directory")
26 | output_dir = output_dir.replace("dbfs:","/dbfs")
27 |
28 | dbutils.widgets.text("3. Run start date", "")
29 | run_start_date = dbutils.widgets.get("3. Run start date")
30 |
31 | dbutils.widgets.dropdown("4. Export permissions","no",["yes","no"])
32 | export_permissions = dbutils.widgets.get("4. Export permissions") == "yes"
33 |
34 | dbutils.widgets.dropdown("5. Export deleted runs","no",["yes","no"])
35 | export_deleted_runs = dbutils.widgets.get("5. Export deleted runs") == "yes"
36 |
37 | notebook_formats = get_notebook_formats(6)
38 |
39 | dbutils.widgets.dropdown("7. Use threads","False",["True","False"])
40 | use_threads = dbutils.widgets.get("7. Use threads") == "True"
41 |
42 | if run_start_date=="": run_start_date = None
43 |
44 | print("experiments:", experiments)
45 | print("output_dir:", output_dir)
46 | print("run_start_date:", run_start_date)
47 | print("export_permissions:", export_permissions)
48 | print("export_deleted_runs:", export_deleted_runs)
49 | print("notebook_formats:", notebook_formats)
50 | print("use_threads:", use_threads)
51 |
52 | # COMMAND ----------
53 |
54 | assert_widget(experiments, "1. Experiments")
55 | assert_widget(output_dir, "2. Output directory")
56 |
57 | # COMMAND ----------
58 |
59 | from mlflow_export_import.bulk.export_experiments import export_experiments
60 |
61 | export_experiments(
62 | experiments = experiments,
63 | output_dir = output_dir,
64 | run_start_time = run_start_date,
65 | export_permissions = export_permissions,
66 | export_deleted_runs = export_deleted_runs,
67 | notebook_formats = notebook_formats,
68 | use_threads = use_threads
69 | )
70 |
71 | # COMMAND ----------
72 |
73 | # MAGIC %md ### Display exported files
74 |
75 | # COMMAND ----------
76 |
77 | import os
78 | output_dir = output_dir.replace("dbfs:", "/dbfs")
79 | os.environ['OUTPUT_DIR'] = output_dir
80 | output_dir
81 |
82 | # COMMAND ----------
83 |
84 | # MAGIC %sh
85 | # MAGIC echo "OUTPUT_DIR: $OUTPUT_DIR" ; echo
86 | # MAGIC ls $OUTPUT_DIR
87 |
88 | # COMMAND ----------
89 |
90 | # MAGIC %sh cat $OUTPUT_DIR/experiments.json
91 |
92 | # COMMAND ----------
93 |
94 | # MAGIC %sh ls -lR $OUTPUT_DIR
95 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/Import_Experiments.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Import Experiments
3 | # MAGIC
4 | # MAGIC Widgets
5 | # MAGIC * `1. Input directory` - directory of exported experiments.
6 | # MAGIC * `2. Experiment rename file` - Experiment rename file.
7 | # MAGIC * `3. Import permissions`
8 | # MAGIC * `4. Import source tags`
9 | # MAGIC * `5. Use threads` - use multi-threaded import.
10 | # MAGIC
11 | # MAGIC See https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md#Import-experiments.
12 |
13 | # COMMAND ----------
14 |
15 | # MAGIC %run ./Common
16 |
17 | # COMMAND ----------
18 |
19 | dbutils.widgets.text("1. Input directory", "")
20 | input_dir = dbutils.widgets.get("1. Input directory")
21 | input_dir = input_dir.replace("dbfs:","/dbfs")
22 |
23 | dbutils.widgets.text("2. Experiment rename file","")
24 | val = dbutils.widgets.get("2. Experiment rename file")
25 | experiment_rename_file = val or None
26 |
27 | dbutils.widgets.dropdown("3. Import permissions","no",["yes","no"])
28 | import_permissions = dbutils.widgets.get("3. Import permissions") == "yes"
29 |
30 | dbutils.widgets.dropdown("4. Import source tags","no",["yes","no"])
31 | import_source_tags = dbutils.widgets.get("4. Import source tags") == "yes"
32 |
33 | dbutils.widgets.dropdown("5. Use threads","no",["yes","no"])
34 | use_threads = dbutils.widgets.get("5. Use threads") == "yes"
35 |
36 | print("input_dir:", input_dir)
37 | print("experiment_rename_file:", experiment_rename_file)
38 | print("import_permissions:", import_permissions)
39 | print("import_source_tags:", import_source_tags)
40 | print("use_threads:", use_threads)
41 |
42 | # COMMAND ----------
43 |
44 | assert_widget(input_dir, "1. Input directory")
45 |
46 | # COMMAND ----------
47 |
48 | from mlflow_export_import.bulk.import_experiments import import_experiments
49 |
50 | import_experiments(
51 | input_dir = input_dir,
52 | import_source_tags = import_source_tags,
53 | experiment_renames = experiment_rename_file,
54 | import_permissions = import_permissions,
55 | use_threads = use_threads
56 | )
57 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/Import_Registered_Models.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Import Registered Models
3 | # MAGIC
4 | # MAGIC Widgets
5 | # MAGIC * `1. Input directory` - directory of exported models.
6 | # MAGIC * `2. Delete model` - delete the current contents of model
7 | # MAGIC * `3. Model rename file` - Model rename file.
8 | # MAGIC * `4. Experiment rename file` - Experiment rename file.
9 | # MAGIC * `5. Import permissions`
10 | # MAGIC * `6. Import source tags`
11 | # MAGIC * `7. Use threads` - use multi-threaded import.
12 | # MAGIC
13 | # MAGIC See https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md#Import-registered-models
14 |
15 | # COMMAND ----------
16 |
17 | # MAGIC %run ./Common
18 |
19 | # COMMAND ----------
20 |
21 | dbutils.widgets.text("1. Input directory", "")
22 | input_dir = dbutils.widgets.get("1. Input directory")
23 | input_dir = input_dir.replace("dbfs:","/dbfs")
24 |
25 | dbutils.widgets.dropdown("2. Delete model","no",["yes","no"])
26 | delete_model = dbutils.widgets.get("2. Delete model") == "yes"
27 |
28 | dbutils.widgets.text("3. Model rename file","")
29 | val = dbutils.widgets.get("3. Model rename file")
30 | model_rename_file = val or None
31 |
32 | dbutils.widgets.text("4. Experiment rename file","")
33 | val = dbutils.widgets.get("4. Experiment rename file")
34 | experiment_rename_file = val or None
35 |
36 | dbutils.widgets.dropdown("5. Import permissions","no",["yes","no"])
37 | import_permissions = dbutils.widgets.get("5. Import permissions") == "yes"
38 |
39 | dbutils.widgets.dropdown("6. Import source tags","no",["yes","no"])
40 | import_source_tags = dbutils.widgets.get("6. Import source tags") == "yes"
41 |
42 | dbutils.widgets.dropdown("6. Use threads","no",["yes","no"])
43 | use_threads = dbutils.widgets.get("6. Use threads") == "yes"
44 |
45 | print("input_dir:", input_dir)
46 | print("delete_model:", delete_model)
47 | print("model_rename_file: ", model_rename_file)
48 | print("experiment_rename_file:", experiment_rename_file)
49 | print("import_permissions:", import_permissions)
50 | print("import_source_tags:", import_source_tags)
51 | print("use_threads:", use_threads)
52 |
53 | # COMMAND ----------
54 |
55 | assert_widget(input_dir, "1. Input directory")
56 |
57 | # COMMAND ----------
58 |
59 | from mlflow_export_import.bulk.import_models import import_models
60 |
61 | import_models(
62 | input_dir = input_dir,
63 | delete_model = delete_model,
64 | model_renames = model_rename_file,
65 | experiment_renames = experiment_rename_file,
66 | import_permissions = import_permissions,
67 | import_source_tags = import_source_tags,
68 | use_threads = use_threads
69 | )
70 |
--------------------------------------------------------------------------------
/databricks_notebooks/bulk/_README.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow Export Import bulk notebooks
3 | # MAGIC
4 | # MAGIC * Experiments
5 | # MAGIC * [Export_Experiments]($Export_Experiments)
6 | # MAGIC * [Import_Experiments]($Import_Experiments)
7 | # MAGIC * Models
8 | # MAGIC * [Export_Registered_Models]($Export_Registered_Models) - Exports models and dependent objects (runs and experiments)
9 | # MAGIC * [Import_Registered_Models]($Import_Registered_Models) - Imports models and dependent objects (runs and experiments)
10 | # MAGIC * All - entire workspace of MLflow objects
11 | # MAGIC * [Export_All]($Export_All)
12 | # MAGIC * Import All - Use [Import_Models]($Import_Models) to import all objects
13 | # MAGIC * [Common]($Common) - Helper functions
14 | # MAGIC
15 | # MAGIC See: https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md.
16 | # MAGIC
17 | # MAGIC Last update: 2023-10-28
18 |
--------------------------------------------------------------------------------
/databricks_notebooks/copy/Copy_Run.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Copy Run
3 | # MAGIC
4 | # MAGIC ##### Overview
5 | # MAGIC
6 | # MAGIC Copy an MLflow run to either the current or to another workspace.
7 | # MAGIC
8 | # MAGIC ##### Widgets
9 | # MAGIC
10 | # MAGIC * `1. Source run ID` - Source run ID.
11 | # MAGIC * `2. Destination experiment name` - Destination experiment name of the run.
12 | # MAGIC * `3. Destination workspace` - Destination workspace - default is current workspace.
13 |
14 | # COMMAND ----------
15 |
16 | # MAGIC %md #### Setup
17 |
18 | # COMMAND ----------
19 |
20 | # MAGIC %run ./Common
21 |
22 | # COMMAND ----------
23 |
24 | dbutils.widgets.text("1. Source run ID", "")
25 | src_run_id = dbutils.widgets.get("1. Source run ID")
26 |
27 | dbutils.widgets.text("2. Destination experiment", "")
28 | dst_experiment_name = dbutils.widgets.get("2. Destination experiment")
29 |
30 | dbutils.widgets.text("3. Destination workspace", "databricks")
31 | dst_run_workspace = dbutils.widgets.get("3. Destination workspace")
32 | dst_run_workspace = dst_run_workspace or "databricks"
33 |
34 | print("src_run_id:", src_run_id)
35 | print("dst_experiment_name:", dst_experiment_name)
36 | print("dst_run_workspace:", dst_run_workspace)
37 |
38 | # COMMAND ----------
39 |
40 | assert_widget(src_run_id, "1. Source run ID")
41 | assert_widget(dst_experiment_name, "2. Destination experiment name")
42 |
43 | # COMMAND ----------
44 |
45 | # MAGIC %md #### Copy Run
46 |
47 | # COMMAND ----------
48 |
49 | from mlflow_export_import.copy.copy_run import copy
50 |
51 | dst_run = copy(src_run_id, dst_experiment_name, "databricks", dst_run_workspace)
52 |
53 | # COMMAND ----------
54 |
55 | dst_run
56 |
57 | # COMMAND ----------
58 |
59 | if dst_run_workspace == "databricks":
60 | display_run_uri(dst_run.info.run_id)
61 | else:
62 | print(f"Cannot display run '{dst_run.info.run_id}' since it is in a remove workspace.")
63 |
--------------------------------------------------------------------------------
/databricks_notebooks/copy/MLflow_Copy_Model_Version.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow_Copy_Model_Version
3 | # MAGIC
4 | # MAGIC Uses the standard `MlflowClient.copy_model_version()` method.
5 | # MAGIC
6 | # MAGIC ##### Widgets
7 | # MAGIC
8 | # MAGIC * `1. Source Model URI` - Source model URI (must be `models:` scheme)
9 | # MAGIC * `2. Destination Model` - Destination model name.
10 | # MAGIC
11 | # MAGIC #### Documentation
12 | # MAGIC * [MlflowClient.copy_model_version](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.copy_model_version)
13 |
14 | # COMMAND ----------
15 |
16 | # MAGIC %pip install -Uq mlflow-skinny
17 | # MAGIC dbutils.library.restartPython()
18 |
19 | # COMMAND ----------
20 |
21 | import mlflow
22 | print("mlflow.version:", mlflow.__version__)
23 | print("mlflow.get_registry_uri:", mlflow.get_registry_uri())
24 |
25 | # COMMAND ----------
26 |
27 | dbutils.widgets.text("1. Source Model URI", "")
28 | src_model_uri = dbutils.widgets.get("1. Source Model URI")
29 |
30 | dbutils.widgets.text("2. Destination Model", "")
31 | dst_model_name = dbutils.widgets.get("2. Destination Model")
32 |
33 | print("src_model_uri: ", src_model_uri)
34 | print("dst_model_name: ", dst_model_name)
35 |
36 | # COMMAND ----------
37 |
38 | if "." in src_model_uri:
39 | mlflow.set_registry_uri("databricks-uc")
40 | else:
41 | mlflow.set_registry_uri("databricks")
42 | client = mlflow.MlflowClient()
43 | print("client._registry_uri:", client._registry_uri)
44 |
45 | # COMMAND ----------
46 |
47 | dst_vr = client.copy_model_version(src_model_uri, dst_model_name)
48 |
49 | # COMMAND ----------
50 |
51 | dst_vr
52 |
--------------------------------------------------------------------------------
/databricks_notebooks/copy/_README.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow Export Import - Copy Notebooks
3 | # MAGIC
4 | # MAGIC Copy an MLflow object to either the current or to another workspace and/or model registry.
5 | # MAGIC
6 | # MAGIC ##### Core Notebooks
7 | # MAGIC * [Copy_Model_Version]($Copy_Model_Version) - Copy an MLflow model model version (deep copy).
8 | # MAGIC * [Test_Copy_Model_Version]($tests/Test_Copy_Model_Version)
9 | # MAGIC * [Copy_Run]($Copy_Run) - Copy an MLflow run.
10 | # MAGIC * [Common]($Common) - Common utilities.
11 | # MAGIC
12 | # MAGIC ##### MLflow Copy Model Version
13 | # MAGIC * [MLflow_Copy_Model_Version]($MLflow_Copy_Model_Version) - notebook
14 | # MAGIC * Uses the standard [MlflowClient.copy_model_version](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.copy_model_version) method.
15 | # MAGIC * Does not use mlflow-export-import.
16 | # MAGIC
17 | # MAGIC ##### Create Model Version
18 | # MAGIC * [Create_Model_Version]($Create_Model_Version) - notebook
19 | # MAGIC * Creates a model version from an MLflow model "source" URI in the current or in another model registry.
20 | # MAGIC * Does not use mlflow-export-import.
21 | # MAGIC * Supported sources:
22 | # MAGIC * MLflow Registry: `models:/my_catalog.my_schema.my_model/1`
23 | # MAGIC * MLflow Run: `runs:/319a3eec9fb444d4a70996091b31a940/model`
24 | # MAGIC * Volume: `/Volumes/andre_catalog/volumes/mlflow_export_import/single/sklearn_wine_best/run/artifacts/model`
25 | # MAGIC * DBFS: `/dbfs/home/andre@databricks.com/mlflow_export_import/single/sklearn_wine_best/model`
26 | # MAGIC * Local: `/root/sample_model`
27 | # MAGIC * Cloud: `s3:/my-bucket/mlflow-models/sklearn-wine_best`
28 | # MAGIC
29 | # MAGIC ##### Last updated: _2024-07-07_
30 |
--------------------------------------------------------------------------------
/databricks_notebooks/copy/experimental/Common.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # Common - copy model version
3 |
4 | # COMMAND ----------
5 |
6 | # MAGIC %pip install /dbfs/home/andre.mesarovic@databricks.com/lib/wheels/mlflow_export_import-1.2.0-py3-none-any.whl
7 |
8 | # MAGIC
9 |
10 | # COMMAND ----------
11 |
12 | import mlflow
13 | print("mlflow.version:", mlflow.__version__)
14 |
15 | # COMMAND ----------
16 |
17 | from mlflow_export_import.common.dump_utils import obj_to_dict, dict_to_json, dump_obj_as_json
18 |
19 | # COMMAND ----------
20 |
21 | def assert_widget(value, name):
22 | if len(value.rstrip())==0:
23 | raise Exception(f"ERROR: '{name}' widget is required")
24 |
25 | # COMMAND ----------
26 |
27 | from mlflow.utils import databricks_utils
28 | mlflow_client = mlflow.MlflowClient()
29 |
30 | _host_name = databricks_utils.get_browser_hostname()
31 | print("host_name:", _host_name)
32 |
33 | def display_registered_model_version_uri(model_name, version):
34 | if _host_name:
35 | if "." in model_name: # is unity catalog model
36 | model_name = model_name.replace(".","/")
37 | uri = f"https://{_host_name}/explore/data/models/{model_name}/version/{version}"
38 | else:
39 | uri = f"https://{_host_name}/#mlflow/models/{model_name}/versions/{version}"
40 | displayHTML("""Registered Model Version URI: {}""".format(uri,uri))
41 |
42 | def display_run_uri(run_id):
43 | if _host_name:
44 | run = mlflow_client.get_run(run_id)
45 | uri = f"https://{_host_name}/#mlflow/experiments/{run.info.experiment_id}/runs/{run_id}"
46 | displayHTML("""Run URI: {}""".format(uri,uri))
47 |
48 | # COMMAND ----------
49 |
50 | def copy_model_version(
51 | src_model_name,
52 | src_model_version,
53 | dst_model_name,
54 | dst_experiment_name,
55 | src_run_workspace = "databricks",
56 | copy_lineage_tags = False,
57 | verbose = False
58 | ):
59 | from mlflow_export_import.common.model_utils import is_unity_catalog_model
60 | from mlflow_export_import.copy.copy_model_version import copy
61 |
62 | def mk_registry_uri(model_name):
63 | return "databricks-uc" if is_unity_catalog_model(model_name) else "databricks"
64 |
65 | if src_run_workspace in [ "databricks", "databricks-uc"]:
66 | src_registry_uri = mk_registry_uri(src_model_name)
67 | elif is_unity_catalog_model(src_model_name):
68 | src_registry_uri = "databricks-uc"
69 | else:
70 | src_registry_uri = src_run_workspace
71 |
72 | dst_registry_uri = mk_registry_uri(dst_model_name)
73 |
74 | return copy(
75 | src_model_name,
76 | src_model_version,
77 | dst_model_name,
78 | dst_experiment_name,
79 | src_tracking_uri = src_run_workspace,
80 | dst_tracking_uri = "databricks",
81 | src_registry_uri = src_registry_uri,
82 | dst_registry_uri = dst_registry_uri,
83 | copy_lineage_tags = copy_lineage_tags,
84 | verbose = verbose
85 | )
86 |
--------------------------------------------------------------------------------
/databricks_notebooks/scripts/Common.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # Create standard .databrickscfg in custom location and specify with $DATABRICKS_CONFIG_FILE
3 |
4 | def create_databrick_config_file(secrets_scope, secrets_key, databricks_config_file=None):
5 | """ Create a .databrickscfg file so you can work in shell mode with Python scripts. """
6 | context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
7 | token = dbutils.secrets.get(scope=secrets_scope, key=secrets_key)
8 | host_name = context.tags().get("browserHostName").get()
9 | user = context.tags().get("user").get()
10 |
11 | import os
12 | if not databricks_config_file:
13 | databricks_config_file = os.path.join("/tmp", f".databrickscfg-{user}")
14 | print(f"DATABRICKS_CONFIG_FILE: {databricks_config_file}")
15 | os.environ["DATABRICKS_CONFIG_FILE"] = databricks_config_file
16 | dbutils.fs.put(f"file:///{databricks_config_file}",f"[DEFAULT]\nhost=https://{host_name}\ntoken = "+token,overwrite=True)
17 |
18 | # COMMAND ----------
19 |
20 | def assert_widget(value, name):
21 | if len(value.rstrip())==0:
22 | raise Exception(f"ERROR: '{name}' widget is required")
23 |
24 | # COMMAND ----------
25 |
26 | assert_widget(secrets_scope, "1. Secrets scope")
27 | assert_widget(secrets_token_key, "2. Secrets PAT key")
28 |
29 | # COMMAND ----------
30 |
31 | create_databrick_config_file(secrets_scope, secrets_token_key)
32 |
33 | # COMMAND ----------
34 |
35 | # MAGIC %pip install git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import
36 |
37 | # COMMAND ----------
38 |
39 | # MAGIC %sh mlflow --version
40 |
--------------------------------------------------------------------------------
/databricks_notebooks/scripts/Console_Scripts.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Console Scripts - Single
3 | # MAGIC
4 | # MAGIC * Use this notebook as a starting point template for executing console scripts.
5 | # MAGIC * See [github.com/mlflow/mlflow-export-import/blob/master/README_single.md](https://github.com/mlflow/mlflow-export-import/blob/master/README_single.md).
6 | # MAGIC * You'll first need to specify a [Databricks secret](https://docs.databricks.com/security/secrets/secrets.html) to your [PAT](https://docs.databricks.com/administration-guide/access-control/tokens.html) (personal access token) to execute CLI commands.
7 |
8 | # COMMAND ----------
9 |
10 | # MAGIC %md ### Setup
11 |
12 | # COMMAND ----------
13 |
14 | dbutils.widgets.text("1. Secrets scope", "")
15 | secrets_scope = dbutils.widgets.get("1. Secrets scope")
16 | dbutils.widgets.text("2. Secrets PAT key", "")
17 | secrets_token_key = dbutils.widgets.get("2. Secrets PAT key")
18 | secrets_scope, secrets_token_key
19 |
20 | # COMMAND ----------
21 |
22 | # MAGIC %run ./Common
23 |
24 | # COMMAND ----------
25 |
26 | # MAGIC %sh
27 | # MAGIC echo "DATABRICKS_CONFIG_FILE: $DATABRICKS_CONFIG_FILE"
28 | # MAGIC cat $DATABRICKS_CONFIG_FILE
29 |
30 | # COMMAND ----------
31 |
32 | # MAGIC %md ### Single notebooks
33 |
34 | # COMMAND ----------
35 |
36 | # MAGIC %md #### Experiment
37 |
38 | # COMMAND ----------
39 |
40 | # MAGIC %sh export-experiment --help
41 |
42 | # COMMAND ----------
43 |
44 | # MAGIC %sh import-experiment --help
45 |
46 | # COMMAND ----------
47 |
48 | # MAGIC %md #### export-model
49 |
50 | # COMMAND ----------
51 |
52 | # MAGIC %sh export-model --help
53 |
54 | # COMMAND ----------
55 |
56 | # MAGIC %sh import-model --help
57 |
58 | # COMMAND ----------
59 |
60 | # MAGIC %md #### export-run
61 |
62 | # COMMAND ----------
63 |
64 | # MAGIC %sh export-run --help
65 |
66 | # COMMAND ----------
67 |
68 | # MAGIC %sh import-run --help
69 |
70 | # COMMAND ----------
71 |
72 | # MAGIC %md ### Bulk notebooks
73 |
74 | # COMMAND ----------
75 |
76 | # MAGIC %md #### Experiments
77 |
78 | # COMMAND ----------
79 |
80 | # MAGIC %sh export-experiments --help
81 |
82 | # COMMAND ----------
83 |
84 | # MAGIC %sh import-experiments --help
85 |
86 | # COMMAND ----------
87 |
88 | # MAGIC %md #### Models
89 |
90 | # COMMAND ----------
91 |
92 | # MAGIC %sh export-models --help
93 |
94 | # COMMAND ----------
95 |
96 | # MAGIC %sh import-models --help
97 |
98 | # COMMAND ----------
99 |
100 | # MAGIC %md #### All
101 |
102 | # COMMAND ----------
103 |
104 | # MAGIC %sh export-all --help
105 |
106 | # COMMAND ----------
107 |
108 | # MAGIC %sh import-all --help
109 |
--------------------------------------------------------------------------------
/databricks_notebooks/scripts/_README.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow Export Import - Console Script Notebooks
3 | # MAGIC
4 | # MAGIC Console Script notebooks - command-line scripts that use the shell (%sh)
5 | # MAGIC
6 | # MAGIC * [Console_Scripts]($Console_Scripts)
7 | # MAGIC * [Common]($Common)
8 | # MAGIC
9 | # MAGIC You'll need to specify a [Databricks secret](https://docs.databricks.com/security/secrets/secrets.html) to your [PAT](https://docs.databricks.com/administration-guide/access-control/tokens.html) (personal access token) in the notebook to execute CLI commands.
10 | # MAGIC
11 | # MAGIC Last updated: 2023-03-26
12 |
--------------------------------------------------------------------------------
/databricks_notebooks/single/Export_Model_Version.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ### Export Model Version
3 | # MAGIC
4 | # MAGIC ##### Overview
5 | # MAGIC * Export a model version and its run.
6 | # MAGIC
7 | # MAGIC ##### Widgets
8 | # MAGIC * `1. Model` - Registered model name.
9 | # MAGIC * `2. Version` - Model version.
10 | # MAGIC * `3. Output directory` - Output directory.
11 | # MAGIC * `4. Export version MLflow model` - Export a model version's "cached" registry MLflow model (clone of run's MLflow model).
12 | # MAGIC * `5. Notebook formats` - Notebook formats to export.
13 |
14 | # COMMAND ----------
15 |
16 | # MAGIC %md ### Include setup
17 |
18 | # COMMAND ----------
19 |
20 | # MAGIC %run ./Common
21 |
22 | # COMMAND ----------
23 |
24 | # MAGIC %md ### Widget setup
25 |
26 | # COMMAND ----------
27 |
28 | dbutils.widgets.text("1. Model name", "")
29 | model_name = dbutils.widgets.get("1. Model name")
30 |
31 | dbutils.widgets.text("2. Model version", "")
32 | version = dbutils.widgets.get("2. Model version")
33 |
34 | dbutils.widgets.text("3. Output directory", "")
35 | output_dir = dbutils.widgets.get("3. Output directory")
36 |
37 | dbutils.widgets.dropdown("4. Export version MLflow model","no",["yes","no"])
38 | export_version_model = dbutils.widgets.get("4. Export version MLflow model") == "yes"
39 |
40 | notebook_formats = get_notebook_formats(5) # widget "7. Notebook formats"
41 |
42 | print("model_name:", model_name)
43 | print("version:", version)
44 | print("output_dir:", output_dir)
45 | print("export_version_model:", export_version_model)
46 | print("notebook_formats:", notebook_formats)
47 |
48 | # COMMAND ----------
49 |
50 | assert_widget(model_name, "1. Model name")
51 | assert_widget(model_name, "2. Model version")
52 | assert_widget(output_dir, "3. Output directory")
53 |
54 | # COMMAND ----------
55 |
56 | # MAGIC %md ### Turn on Unity Catalog mode if necessary
57 |
58 | # COMMAND ----------
59 |
60 | activate_unity_catalog(model_name)
61 |
62 | # COMMAND ----------
63 |
64 | # MAGIC %md ### Display model UI link
65 |
66 | # COMMAND ----------
67 |
68 | display_registered_model_uri(model_name)
69 |
70 | # COMMAND ----------
71 |
72 | # MAGIC %md ### Export the model version
73 |
74 | # COMMAND ----------
75 |
76 | from mlflow_export_import.model_version.export_model_version import export_model_version
77 |
78 | export_model_version(
79 | model_name = model_name,
80 | version = version,
81 | output_dir = output_dir,
82 | export_version_model = export_version_model,
83 | notebook_formats = notebook_formats
84 | )
85 |
86 | # COMMAND ----------
87 |
88 | # MAGIC %md ### Display exported files
89 |
90 | # COMMAND ----------
91 |
92 | import os
93 | output_dir = mk_local_path(output_dir)
94 | os.environ['OUTPUT_DIR'] = output_dir
95 |
96 | # COMMAND ----------
97 |
98 | # MAGIC %sh echo $OUTPUT_DIR
99 |
100 | # COMMAND ----------
101 |
102 | # MAGIC %sh ls -l $OUTPUT_DIR
103 |
104 | # COMMAND ----------
105 |
106 | # MAGIC %sh
107 | # MAGIC cat $OUTPUT_DIR/model_version.json
108 |
--------------------------------------------------------------------------------
/databricks_notebooks/single/Export_Run.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ### Export Run
3 | # MAGIC
4 | # MAGIC ##### Overview
5 | # MAGIC * Exports a run and its artifacts to a folder.
6 | # MAGIC * Output file `run.json` contains run metadata to be able to rehydrate the run.
7 | # MAGIC * Notebooks are also exported in several formats.
8 | # MAGIC
9 | # MAGIC #### Output folder
10 | # MAGIC ```
11 | # MAGIC +-artifacts/
12 | # MAGIC | +-sklearn-model/
13 | # MAGIC | | +-model.pkl
14 | # MAGIC | | +-conda.yaml
15 | # MAGIC | |
16 | # MAGIC +-run.json
17 | # MAGIC ```
18 | # MAGIC
19 | # MAGIC ##### Widgets
20 | # MAGIC * `1. Run ID`
21 | # MAGIC * `2. Output base directory` - Base output directory of the exported run.
22 | # MAGIC * `3. Notebook formats` - Standard Databricks notebook formats such as SOURCE, HTML, JUPYTER, DBC.
23 |
24 | # COMMAND ----------
25 |
26 | # MAGIC %run ./Common
27 |
28 | # COMMAND ----------
29 |
30 | # MAGIC %md ### Setup
31 |
32 | # COMMAND ----------
33 |
34 | dbutils.widgets.text("1. Run ID", "")
35 | run_id = dbutils.widgets.get("1. Run ID")
36 |
37 | dbutils.widgets.text("2. Output base directory", "")
38 | output_dir = dbutils.widgets.get("2. Output base directory")
39 | output_dir += f"/{run_id}"
40 |
41 | notebook_formats = get_notebook_formats(3)
42 |
43 | print("run_id:", run_id)
44 | print("output_dir:", output_dir)
45 | print("notebook_formats:", notebook_formats)
46 |
47 | # COMMAND ----------
48 |
49 | assert_widget(run_id, "1. Run ID")
50 | assert_widget(output_dir, "2. Output base directory")
51 |
52 | # COMMAND ----------
53 |
54 | # MAGIC %md ### Display run UI link
55 |
56 | # COMMAND ----------
57 |
58 | display_run_uri(run_id)
59 |
60 | # COMMAND ----------
61 |
62 | # MAGIC %md ### Export the run
63 |
64 | # COMMAND ----------
65 |
66 | from mlflow_export_import.run.export_run import export_run
67 |
68 | export_run(
69 | run_id = run_id,
70 | output_dir = output_dir,
71 | notebook_formats = notebook_formats
72 | )
73 |
74 | # COMMAND ----------
75 |
76 | # MAGIC %md ### Display exported run files
77 |
78 | # COMMAND ----------
79 |
80 | import os
81 | output_dir = output_dir.replace("dbfs:","/dbfs")
82 | os.environ['OUTPUT_DIR'] = output_dir
83 |
84 | # COMMAND ----------
85 |
86 | # MAGIC %sh echo $OUTPUT_DIR
87 |
88 | # COMMAND ----------
89 |
90 | # MAGIC %sh ls -l $OUTPUT_DIR
91 |
92 | # COMMAND ----------
93 |
94 | # MAGIC %sh cat $OUTPUT_DIR/run.json
95 |
96 | # COMMAND ----------
97 |
98 | # MAGIC %sh ls -lR $OUTPUT_DIR/artifacts
99 |
--------------------------------------------------------------------------------
/databricks_notebooks/single/Import_Experiment.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ### Import Experiment
3 | # MAGIC
4 | # MAGIC **Widgets**
5 | # MAGIC * `1. Input directory` - Input directory containing an exported experiment.
6 | # MAGIC * `2. Destination experiment name` - will create experiment if it doesn't exist.
7 | # MAGIC * `3. Import permissions`
8 | # MAGIC * `4. Import source tags`
9 |
10 | # COMMAND ----------
11 |
12 | # MAGIC %md ### Include setup
13 |
14 | # COMMAND ----------
15 |
16 | # MAGIC %run ./Common
17 |
18 | # COMMAND ----------
19 |
20 | # MAGIC %md ### Widget setup
21 |
22 | # COMMAND ----------
23 |
24 |
25 | dbutils.widgets.text("1. Destination experiment name", "")
26 | experiment_name = dbutils.widgets.get("1. Destination experiment name")
27 |
28 | dbutils.widgets.text("2. Input directory", "")
29 | input_dir = dbutils.widgets.get("2. Input directory")
30 |
31 | dbutils.widgets.dropdown("3. Import permissions","no",["yes","no"])
32 | import_permissions = dbutils.widgets.get("3. Import permissions") == "yes"
33 |
34 | dbutils.widgets.dropdown("4. Import source tags","no",["yes","no"])
35 | import_source_tags = dbutils.widgets.get("4. Import source tags") == "yes"
36 |
37 | print("input_dir:", input_dir)
38 | print("experiment_name:", experiment_name)
39 | print("import_permissions:", import_permissions)
40 | print("import_source_tags:", import_source_tags)
41 |
42 | # COMMAND ----------
43 |
44 | assert_widget(experiment_name, "1. Destination experiment name")
45 | assert_widget(input_dir, "2. Input directory")
46 |
47 | # COMMAND ----------
48 |
49 | # MAGIC %md ### Import experiment
50 |
51 | # COMMAND ----------
52 |
53 | from mlflow_export_import.experiment.import_experiment import import_experiment
54 |
55 | import_experiment(
56 | experiment_name = experiment_name,
57 | input_dir = input_dir,
58 | import_permissions = import_permissions,
59 | import_source_tags = import_source_tags
60 | )
61 |
62 | # COMMAND ----------
63 |
64 | # MAGIC %md ### Display experiment UI link
65 |
66 | # COMMAND ----------
67 |
68 | display_experiment_info(experiment_name)
69 |
--------------------------------------------------------------------------------
/databricks_notebooks/single/Import_Run.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Import Run
3 | # MAGIC
4 | # MAGIC Import run from the folder that was created by the [Export_Run]($Export_Run) notebook.
5 | # MAGIC
6 | # MAGIC #### Widgets
7 | # MAGIC * `1. Destination experiment name` - Import run into this experiment. Will create if it doesn't exist.
8 | # MAGIC * `2. Input directory` - Input directory containing an exported run.
9 | # MAGIC * `3. Import source tags`
10 |
11 | # COMMAND ----------
12 |
13 | # MAGIC %md ### Include setup
14 |
15 | # COMMAND ----------
16 |
17 | # MAGIC %run ./Common
18 |
19 | # COMMAND ----------
20 |
21 | # MAGIC %md ### Widget setup
22 |
23 | # COMMAND ----------
24 |
25 | dbutils.widgets.text("1. Destination experiment name", "")
26 | experiment_name = dbutils.widgets.get("1. Destination experiment name")
27 |
28 | dbutils.widgets.text("2. Input directory", "")
29 | input_dir = dbutils.widgets.get("2. Input directory")
30 |
31 | dbutils.widgets.dropdown("3. Import source tags","no",["yes","no"])
32 | import_source_tags = dbutils.widgets.get("3. Import source tags") == "yes"
33 |
34 | print("input_dir:", input_dir)
35 | print("experiment_name:", experiment_name)
36 | print("import_source_tags:", import_source_tags)
37 |
38 | # COMMAND ----------
39 |
40 | assert_widget(experiment_name, "1. Destination experiment name")
41 | assert_widget(input_dir, "2. Input base directory")
42 |
43 | # COMMAND ----------
44 |
45 | # MAGIC %md ### Import Run
46 |
47 | # COMMAND ----------
48 |
49 | from mlflow_export_import.run.import_run import import_run
50 |
51 | run, _ = import_run(
52 | experiment_name = experiment_name,
53 | input_dir = input_dir,
54 | import_source_tags = import_source_tags
55 | )
56 | print("Run ID:", run.info.run_id)
57 |
58 | # COMMAND ----------
59 |
60 | # MAGIC %md ### Display run UI link
61 |
62 | # COMMAND ----------
63 |
64 | display_run_uri(run.info.run_id)
65 |
66 | # COMMAND ----------
67 |
68 | # MAGIC %md ### Check imported source tags
69 |
70 | # COMMAND ----------
71 |
72 | if import_source_tags:
73 | import pandas as pd
74 | run = mlflow_client.get_run(run.info.run_id)
75 | data = [ (k, v) for k,v in run.data.tags.items() if k.startswith("mlflow_exim") ]
76 | df = pd.DataFrame(data, columns = ["Key","Value"])
77 | display(df)
78 |
--------------------------------------------------------------------------------
/databricks_notebooks/single/_README.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow Export Import - Single Notebooks
3 | # MAGIC
4 | # MAGIC Export and import one MLflow object.
5 | # MAGIC
6 | # MAGIC **Notebooks**
7 | # MAGIC * Run
8 | # MAGIC * [Export_Run]($./Export_Run) - export a run in its entirety (run.info, run.data and artifacts).
9 | # MAGIC * [Import_Run]($./Import_Run)
10 | # MAGIC * Experiment
11 | # MAGIC * [Export_Experiment]($./Export_Experiment) - export an experiment and its runs (run.info, run.data and artifacts).
12 | # MAGIC * [Import_Experiment]($./Import_Experiment)
13 | # MAGIC * Registered Model
14 | # MAGIC * [Export_Registered_Model]($./Export_Registered_Model) - export a registered model, its versions and their runs.
15 | # MAGIC * [Import_Registered_Model]($./Import_Registered_Model)
16 | # MAGIC * Model Version
17 | # MAGIC * [Export_Model_Version]($./Export_Model_Version) - export a model version and its run.
18 | # MAGIC * [Import_Model_Version]($./Import_Model_Version) - import a model version.
19 | # MAGIC * [Common]($./Common) - helper utility methods.
20 | # MAGIC
21 | # MAGIC **More information**
22 | # MAGIC
23 | # MAGIC * [Console_Scripts_Single]($../console_scripts/Console_Scripts_Single) to execute scripts from shell (%sh).
24 | # MAGIC * [github.com/mlflow/mlflow-export-import/blob/master/README_single.md](https://github.com/mlflow/mlflow-export-import/blob/master/README_single.md).
25 |
26 | # COMMAND ----------
27 |
28 | # MAGIC %md ##### Last updated: _2024-07-21_
29 |
--------------------------------------------------------------------------------
/databricks_notebooks/tools/Common.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %pip install -U mlflow-skinny
3 | # MAGIC %pip install -U git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import
4 | # MAGIC dbutils.library.restartPython()
5 |
6 | # COMMAND ----------
7 |
8 | import json
9 | def dump_json(dct,title=""):
10 | print(json.dumps(dct, indent=2))
11 | if title:
12 | print(f"{title}:")
13 |
14 | # COMMAND ----------
15 |
16 | def is_unity_catalog_model(model_name):
17 | return "." in model_name
18 |
19 | def split_model_uri(model_uri):
20 | toks = model_uri.split("/")
21 | return toks[1], toks[2]
22 |
23 | # COMMAND ----------
24 |
25 | import mlflow
26 |
27 | def set_registry_uri(model_name):
28 | if model_name.startswith("models:/"):
29 | model_name = split_model_uri(model_name)[0]
30 | if is_unity_catalog_model(model_name):
31 | mlflow.set_registry_uri("databricks-uc")
32 | else:
33 | mlflow.set_registry_uri("databricks")
34 | print("mlflow.registry_uri:", mlflow.get_registry_uri())
35 |
36 | # COMMAND ----------
37 |
38 | def to_json_signature(signature):
39 | def _normalize(lst):
40 | import json
41 | return json.loads(lst) if lst else lst
42 | return { k:_normalize(v) for k,v in signature.items()}
43 |
44 | # COMMAND ----------
45 |
46 | def assert_widget(value, name):
47 | if len(value.rstrip())==0:
48 | raise RuntimeError(f"ERROR: '{name}' widget is required")
49 |
--------------------------------------------------------------------------------
/databricks_notebooks/tools/Get_Model_Signature.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ### Get Model Signature
3 | # MAGIC
4 | # MAGIC Get the signature of an MLflow model.
5 | # MAGIC
6 | # MAGIC ##### MLflow models can live in a variety of places. Sample MLflow model URIs:
7 | # MAGIC * `models:/andre_catalog.ml_models2.sklearn_wine_best/15`
8 | # MAGIC * `models:/Sklearn_Wine_best/1`
9 | # MAGIC * `runs:/030075d9727945259c7d283e47fee4a9/model`
10 | # MAGIC * `/Volumes/andre_catalog/volumes/mlflow_export_import/single/sklearn_wine_best/run/artifacts/model`
11 | # MAGIC * `/dbfs/home/first.last@databricks.com/mlflow_export_import/single/sklearn_wine_best/model`
12 | # MAGIC * `s3:/my-bucket/mlflow-models/sklearn_wine_best`
13 | # MAGIC
14 | # MAGIC ##### The signature is located in the MLmodel artifact of the MLflow model.
15 | # MAGIC * For a run, you can view the signature in the "Artifacts" tab of the run UI page.
16 | # MAGIC * For a model version, you can only view (in the UI) the signature via the run.
17 | # MAGIC * To get the actual signature of the deployed model, you need to use the API method `mlflow.models.get_model_info()`.
18 | # MAGIC
19 | # MAGIC ##### Documentation:
20 | # MAGIC * [mlflow.models.ModelSignature](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature)
21 | # MAGIC * [mlflow.models.get_model_info](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.get_model_info)
22 | # MAGIC
23 | # MAGIC ##### Github:
24 | # MAGIC
25 |
26 | # COMMAND ----------
27 |
28 | # MAGIC %md #### Setup
29 |
30 | # COMMAND ----------
31 |
32 | # MAGIC %run ./Common
33 |
34 | # COMMAND ----------
35 |
36 | dbutils.widgets.text("Model URI", "")
37 | model_uri = dbutils.widgets.get("Model URI")
38 | print("model_uri:", model_uri)
39 |
40 | # COMMAND ----------
41 |
42 | assert_widget(model_uri, "Model URI")
43 | set_registry_uri(model_uri)
44 |
45 | # COMMAND ----------
46 |
47 | # MAGIC %md #### Get `model_info.signature`
48 |
49 | # COMMAND ----------
50 |
51 | from mlflow_export_import.tools.signature_utils import get_model_signature
52 | signature = get_model_signature(model_uri)
53 | signature
54 |
55 | # COMMAND ----------
56 |
57 | if signature:
58 | dump_json(signature)
59 | else:
60 | print(f"Model '{model_uri}' does not have a signature")
61 | dbutils.notebook.exit(None)
62 |
--------------------------------------------------------------------------------
/databricks_notebooks/tools/List_Model_Versions_Without_Signature.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ### List Model Versions Without Signature
3 | # MAGIC
4 | # MAGIC List Workspace Model Registry model version that don't have a signature.
5 | # MAGIC
6 | # MAGIC #### Widgets
7 | # MAGIC * `1. Filter` - Filter is for [search_registered_models()](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.search_registered_models) such as `name like 'Sklearn_Wine%'`
8 | # MAGIC * `2. Output file` - save output as CSV file
9 |
10 | # COMMAND ----------
11 |
12 | # MAGIC %run ./Common
13 |
14 | # COMMAND ----------
15 |
16 | import mlflow
17 | mlflow_client = mlflow.MlflowClient()
18 | mlflow.set_registry_uri("databricks")
19 | print("mlflow.version:", mlflow.__version__)
20 |
21 | # COMMAND ----------
22 |
23 | dbutils.widgets.text("1. Filter","name like 'Sklearn_Wine%'")
24 | filter = dbutils.widgets.get("1. Filter")
25 | filter = filter or None
26 |
27 | dbutils.widgets.text("2. Output file","")
28 | output_file = dbutils.widgets.get("2. Output file")
29 |
30 | print("filter:", filter)
31 | print("output_file:", output_file)
32 |
33 | # COMMAND ----------
34 |
35 | from mlflow_export_import.tools.list_model_versions_without_signatures import as_pandas_df
36 |
37 | df = as_pandas_df(filter)
38 | display(df)
39 |
40 | # COMMAND ----------
41 |
42 | if output_file:
43 | with open(output_file, "w", encoding="utf-8") as f:
44 | df.to_csv(f, index=False)
45 |
--------------------------------------------------------------------------------
/databricks_notebooks/tools/_README.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## MLflow Export Import - Tools
3 | # MAGIC
4 | # MAGIC ##### Model Signature Tools
5 | # MAGIC * [Get_Model_Signature]($Get_Model_Signature) - Get the model signature for a model URI.
6 | # MAGIC * [Set_Model_Signature]($Set_Model_Signature) - Set the model signature for a run's MLflow model.
7 | # MAGIC * [List_Model_Versions_Without_Signature]($List_Model_Versions_Without_Signature)
8 | # MAGIC * [Common]($Common)
9 | # MAGIC
10 | # MAGIC ##### Documentation:
11 | # MAGIC * [mlflow.models.ModelSignature](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature)
12 | # MAGIC * [mlflow.models.get_model_info](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.get_model_info)
13 | # MAGIC
14 | # MAGIC ##### Github
15 | # MAGIC * https://github.com/mlflow/mlflow-export-import/blob/master/mlflow_export_import/tools/signature_utils.py
16 | # MAGIC
17 | # MAGIC ##### Last updated: _2024-07-03_
18 |
--------------------------------------------------------------------------------
/diagrams/Copy_Model_Version_NonUC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/Copy_Model_Version_NonUC.png
--------------------------------------------------------------------------------
/diagrams/Copy_Model_Version_UC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/Copy_Model_Version_UC.png
--------------------------------------------------------------------------------
/diagrams/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/architecture.png
--------------------------------------------------------------------------------
/mlflow_export_import/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import mlflow
4 | from mlflow_export_import import version
5 |
6 | __version__ = version.__version__
7 |
8 | # monkey patch mlflow.tracking.MlflowClient to return tracking URI in __repr__
9 |
10 | def add_repr_to_MlflowClient():
11 | def custom_repr(self):
12 | try:
13 | msg = { "tracking_uri": self.tracking_uri, "registry_uri": self._registry_uri }
14 | except AttributeError as e:
15 | msg = { "error": str(e) }
16 | return json.dumps(msg)
17 | mlflow.client.MlflowClient.__repr__ = custom_repr
18 |
19 |
20 | add_repr_to_MlflowClient()
21 |
--------------------------------------------------------------------------------
/mlflow_export_import/bulk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/bulk/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/bulk/bulk_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common.iterators import SearchRegisteredModelsIterator
2 | from mlflow_export_import.common.iterators import SearchExperimentsIterator
3 |
4 |
5 | def _get_list(names, func_list):
6 | """
7 | Returns a list of entities specified by the 'names' filter.
8 | :param names: Filter of desired list of entities. Can be: "all", comma-delimited string, list of entities or trailing wildcard "*".
9 | :param func_list: Function that lists the entities primary keys - for experiments it is experiment_id, for registered models it is model name.
10 | :return: List of entities.
11 | """
12 | if isinstance(names, str):
13 | if names == "all":
14 | return func_list()
15 | elif names.endswith("*"):
16 | prefix = names[:-1]
17 | return [ x for x in func_list() if x.startswith(prefix) ]
18 | else:
19 | return names.split(",")
20 | else:
21 | return names
22 |
23 |
24 | def get_experiment_ids(mlflow_client, experiment_ids):
25 | def list_entities():
26 | return [ exp.experiment_id for exp in SearchExperimentsIterator(mlflow_client) ]
27 | return _get_list(experiment_ids, list_entities)
28 |
29 |
30 | def get_model_names(mlflow_client, model_names):
31 | def list_entities():
32 | return [ model.name for model in SearchRegisteredModelsIterator(mlflow_client) ]
33 | return _get_list(model_names, list_entities)
34 |
--------------------------------------------------------------------------------
/mlflow_export_import/bulk/experiments_merge_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Merge existing experiments.json generated by export_models with
3 | new export_experiments when both are called by export_all
4 | """
5 |
6 | def merge_info(info1, info2):
7 | info = info1.copy()
8 |
9 | # export_file_version 2 change
10 | if "status" in info:
11 | status = info["status"]
12 | status2 = info2["status"]
13 | else:
14 | status, status2 = info, info2
15 |
16 | status["duration"] += status2["duration"]
17 | status["duration"] = round(status["duration"],1)
18 | status["experiments"] += status2["experiments"]
19 | status["total_runs"] += status2["total_runs"]
20 | status["ok_runs"] += status2["ok_runs"]
21 | status["failed_runs"] += status2["failed_runs"]
22 |
23 | return info
24 |
25 |
26 | def merge_mlflow(mlflow1, mlflow2):
27 | return { "experiments": mlflow1["experiments"] + mlflow2["experiments"] }
28 |
--------------------------------------------------------------------------------
/mlflow_export_import/bulk/model_utils.py:
--------------------------------------------------------------------------------
1 | import mlflow
2 |
3 | from mlflow_export_import.common import utils
4 | from mlflow_export_import.bulk import bulk_utils
5 | from mlflow_export_import.common.iterators import SearchModelVersionsIterator
6 |
7 | _logger = utils.getLogger(__name__)
8 |
9 |
10 | def get_experiments_runs_of_models(client, model_names, show_experiments=False, show_runs=False):
11 | """ Get experiments and runs to to export. """
12 | model_names = bulk_utils.get_model_names(client, model_names)
13 | _logger.info(f"{len(model_names)} Models:")
14 | for model_name in model_names:
15 | _logger.info(f" {model_name}")
16 | exps_and_runs = {}
17 | for model_name in model_names:
18 | versions = SearchModelVersionsIterator(client, filter=f"name='{model_name}'")
19 | for vr in versions:
20 | try:
21 | run = client.get_run(vr.run_id)
22 | exps_and_runs.setdefault(run.info.experiment_id,[]).append(run.info.run_id)
23 | except mlflow.exceptions.MlflowException as e:
24 | if e.error_code == "RESOURCE_DOES_NOT_EXIST":
25 | _logger.warning(f"run '{vr.run_id}' of version {vr.version} of model '{model_name}' does not exist")
26 | else:
27 | _logger.warning(f"run '{vr.run_id}' of version {vr.version} of model '{model_name}': Error.code: {e.error_code}. Error.message: {e.message}")
28 | if show_experiments:
29 | show_experiments_runs_of_models(exps_and_runs, show_runs)
30 | return exps_and_runs
31 |
32 |
33 | def show_experiments_runs_of_models(exps_and_runs, show_runs=False):
34 | _logger.info("Experiments for models:")
35 | for k,v in exps_and_runs.items():
36 | _logger.info(f" Experiment: {k}")
37 | for x in v:
38 | if show_runs: _logger.info(f" {x}")
39 |
--------------------------------------------------------------------------------
/mlflow_export_import/bulk/rename_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common import MlflowExportImportException
2 | from mlflow_export_import.common import utils
3 | from mlflow_export_import.common.filesystem import mk_local_path
4 |
5 | _logger = utils.getLogger(__name__)
6 |
7 |
8 | def read_rename_file(path):
9 | with open(mk_local_path(path), "r", encoding="utf-8") as f:
10 | dct = {}
11 | for line in f:
12 | toks = line.rstrip().split(",")
13 | dct[toks[0]] = toks[1]
14 | return dct
15 |
16 |
17 | def rename(name, replacements, object_name="object"):
18 | if not replacements:
19 | return name
20 | for k,v in replacements.items():
21 | if k != "" and name.startswith(k):
22 | new_name = name.replace(k,v)
23 | _logger.info(f"Renaming {object_name} '{name}' to '{new_name}'")
24 | return new_name
25 | return name
26 |
27 |
28 | def get_renames(filename_or_dict):
29 | if filename_or_dict is None:
30 | return None
31 | if isinstance(filename_or_dict,str):
32 | return read_rename_file(filename_or_dict)
33 | elif isinstance(filename_or_dict, dict):
34 | return filename_or_dict
35 | else:
36 | raise MlflowExportImportException(f"Unknown name replacement type '{type(filename_or_dict)}'", http_status_code=400)
37 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/__init__.py:
--------------------------------------------------------------------------------
1 | USER_AGENT = "mlflow-export-import/1.0.0"
2 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/client_utils.py:
--------------------------------------------------------------------------------
1 | import mlflow
2 | from . http_client import HttpClient, MlflowHttpClient, DatabricksHttpClient
3 |
4 |
5 | def create_http_client(mlflow_client, model_name=None):
6 | """
7 | Create MLflow HTTP client from MlflowClient.
8 | If model_name is a Unity Catalog (UC) model, the returned client is UC-enabled.
9 | """
10 | from mlflow_export_import.common import model_utils
11 | creds = mlflow_client._tracking_client.store.get_host_creds()
12 | if model_name and model_utils.is_unity_catalog_model(model_name):
13 | return HttpClient("api/2.0/mlflow/unity-catalog", creds.host, creds.token)
14 | else:
15 | return MlflowHttpClient(creds.host, creds.token)
16 |
17 |
18 | def create_dbx_client(mlflow_client):
19 | """
20 | Create Databricks HTTP client from MlflowClient.
21 | """
22 | creds = mlflow_client._tracking_client.store.get_host_creds()
23 | return DatabricksHttpClient(creds.host, creds.token)
24 |
25 |
26 | def create_mlflow_client():
27 | """
28 | Create MLflowClient. If MLFLOW_TRACKING_URI is UC, then set MlflowClient.tracking_uri to the non-UC variant.
29 | """
30 | registry_uri = mlflow.get_registry_uri()
31 | if registry_uri:
32 | tracking_uri = mlflow.get_tracking_uri()
33 | nonuc_tracking_uri = tracking_uri.replace("databricks-uc","databricks") # NOTE: legacy
34 | return mlflow.MlflowClient(nonuc_tracking_uri, registry_uri)
35 | else:
36 | return mlflow.MlflowClient()
37 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/databricks_cli_utils.py:
--------------------------------------------------------------------------------
1 | from databricks_cli.configure import provider
2 | from mlflow.utils.databricks_utils import is_in_databricks_runtime
3 |
4 |
5 | def get_host_token_for_profile(profile=None):
6 | """
7 | :param profile: Databricks profile as in ~/.databrickscfg or None for the default profile
8 | :return: tuple of (host, token) from the ~/.databrickscfg profile
9 | """
10 | if profile:
11 | cfg = provider.get_config_for_profile(profile)
12 | if not cfg.host and is_in_databricks_runtime():
13 | cfg = provider.get_config()
14 | else:
15 | cfg = provider.get_config()
16 | return (cfg.host, cfg.token)
17 |
18 |
19 | if __name__ == "__main__":
20 | import sys
21 | profile = sys.argv[1] if len(sys.argv) > 1 else None
22 | print("profile:",profile)
23 | tuple = get_host_token_for_profile(profile)
24 | print("host and token:", tuple)
25 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/databricks_utils.py:
--------------------------------------------------------------------------------
1 | from databricks_cli.sdk.api_client import ApiClient
2 | from mlflow_export_import.client import mlflow_auth_utils
3 |
4 |
5 | def get_api_client():
6 | (host, token) = mlflow_auth_utils.get_mlflow_host_token()
7 | return ApiClient(None, None, host, token)
8 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/mlflow_auth_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.client import databricks_cli_utils
2 | from mlflow_export_import.common import MlflowExportImportException
3 | from mlflow_export_import.common import utils
4 |
5 | _logger = utils.getLogger(__name__)
6 |
7 |
8 | def get_mlflow_host():
9 | """ Returns the MLflow tracking URI (host) """
10 | return get_mlflow_host_token()[0]
11 |
12 |
13 | def get_mlflow_host_token():
14 | """
15 | Returns the MLflow tracking URI (host) and Databricks personal access token (PAT).
16 | For Databricks, expects the MLflow tracking URI in the form of 'databricks' or 'databricks://MY_PROFILE'.
17 | """
18 |
19 | import mlflow
20 | uri = mlflow.tracking.get_tracking_uri()
21 | if uri:
22 | if not uri.startswith("databricks"):
23 | if not uri.startswith("http"):
24 | _raise_exception(uri)
25 | else:
26 | return (uri, None)
27 | else:
28 | _raise_exception(uri)
29 |
30 | try:
31 | toks = uri.split("//")
32 | profile = uri.split("//")[1] if len(toks) > 1 else None
33 | return databricks_cli_utils.get_host_token_for_profile(profile)
34 | # databricks_cli.utils.InvalidConfigurationError
35 | # requests.exceptions.InvalidSchema(f"No connection adapters were found for {url!r}")
36 | except Exception as e:
37 | _logger.warning(e)
38 | return (None, None)
39 |
40 |
41 | def _raise_exception(uri):
42 | raise MlflowExportImportException(
43 | f"MLflow tracking URI (MLFLOW_TRACKING_URI environment variable) must be an HTTP URI: '{uri}'.",
44 | http_status_code=401)
45 |
--------------------------------------------------------------------------------
/mlflow_export_import/client/user_agent_header.py:
--------------------------------------------------------------------------------
1 | """
2 | Set HTTP User-Agent header as 'mlflow-export-import/1.X.X' for MLflow client.
3 | """
4 |
5 | from mlflow.tracking.request_header.abstract_request_header_provider import RequestHeaderProvider
6 | from mlflow_export_import.client import USER_AGENT
7 |
8 | class MlflowExportImportRequestHeaderProvider(RequestHeaderProvider):
9 | def in_context(self):
10 | return True
11 | def request_headers(self):
12 | return { "User-Agent": USER_AGENT }
13 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/__init__.py:
--------------------------------------------------------------------------------
1 | from mlflow.exceptions import MlflowException
2 | import json
3 |
4 | class MlflowExportImportException(Exception):
5 | DEFAULT_HTTP_STATUS_CODE = -1
6 |
7 | def __init__(self, ex, message=None, http_status_code=DEFAULT_HTTP_STATUS_CODE, **kwargs):
8 | self.message = str(ex) # if arg 'message' is not None else is src_exception's message
9 | self.src_message = None # message from source exception if arg 'message' is not None
10 | self.src_exception = None # source exception if exists
11 | self.http_status_code = http_status_code
12 | custom_kwargs = {}
13 | if issubclass(ex.__class__,Exception):
14 | self.src_exception = ex
15 | if issubclass(ex.__class__,MlflowException):
16 | self.http_status_code = ex.get_http_status_code()
17 | custom_kwargs = { "mlflow_error_code": ex.error_code }
18 | if message:
19 | self.message = message
20 | self.src_message = str(ex)
21 |
22 | self.kwargs = { "message": self.message, "http_status_code": self.http_status_code }
23 | self.kwargs = {**self.kwargs, **kwargs, **custom_kwargs}
24 | if self.src_message:
25 | self.kwargs["src_message"] = self.src_message
26 |
27 | def _add(self, dct, k, v):
28 | if v: dct[k] = v
29 |
30 | def __str__(self):
31 | return json.dumps(self.kwargs)
32 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/default_logging_config.py:
--------------------------------------------------------------------------------
1 | config = {
2 | "version": 1,
3 | "formatters": {
4 | "simple": {
5 | "format": "%(asctime)s - %(levelname)s - %(message)s",
6 | "datefmt": "%d-%b-%y %H:%M:%S"
7 | }
8 | },
9 | "handlers": {
10 | "console": {
11 | "class": "logging.StreamHandler",
12 | "level": "DEBUG",
13 | "formatter": "simple",
14 | "stream": "ext://sys.stdout"
15 | },
16 | "file": {
17 | "class": "logging.FileHandler",
18 | "filename": "/tmp/mlflow-export-import.log",
19 | "level": "INFO",
20 | "formatter": "simple"
21 | }
22 | },
23 | "loggers": {
24 | "sampleLogger": {
25 | "level": "DEBUG",
26 | "handlers": [
27 | "console"
28 | ],
29 | "propagate": False
30 | }
31 | },
32 | "root": {
33 | "level": "DEBUG",
34 | "handlers": [
35 | "console",
36 | "file"
37 | ]
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/dump_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | def dump_mlflow_client(client, msg=""):
5 | import mlflow
6 | print(f"MlflowClient {msg}:")
7 | print(" client.tracking_uri: ", client.tracking_uri)
8 | print(" client._registry_uri:", client._registry_uri)
9 | creds = client._tracking_client.store.get_host_creds()
10 | dump_obj(creds, "Credentials", " ")
11 | print(" mlflow fluent:")
12 | print(" mlflow.tracking_uri: ", mlflow.get_tracking_uri())
13 | print(" mlflow.registry_uri: ", mlflow.get_registry_uri())
14 |
15 |
16 | def dump_obj(obj, title=None, indent=""):
17 | if isinstance(obj, dict) or isinstance(obj, list):
18 | #dump_dict(obj, title)
19 | dump_as_json(obj, title)
20 | return
21 | if obj:
22 | title = title if title else type(obj).__name__
23 | print(f"{indent}{title}")
24 | for k,v in obj.__dict__.items():
25 | print(f"{indent} {k}: {v}")
26 | else:
27 | title = title if title else "Object"
28 | title = f"{title}: None"
29 | print(f"{indent}{title}")
30 |
31 |
32 | def dump_dict(dct, title=None):
33 | if title:
34 | print(f"{title}:")
35 | for k,v in dct.items():
36 | print(f" {k}: {v}")
37 |
38 |
39 | def dump_obj_as_json(obj, title=None):
40 | title = title if title else type(obj).__name__
41 | print(title)
42 | dump_as_json(obj_to_dict(obj))
43 |
44 |
45 | def dump_as_json(dct, title=None, sort_keys=None, indent=2):
46 | if title:
47 | print(f"{title}:")
48 | print(dict_to_json(dct, sort_keys, indent))
49 |
50 |
51 | def dict_to_json(dct, sort_keys=None, indent=2):
52 | return json.dumps(dct, sort_keys=sort_keys, indent=indent)
53 |
54 |
55 | def obj_to_dict(obj):
56 | return obj.__dict__
57 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/filesystem.py:
--------------------------------------------------------------------------------
1 | """
2 | Filesystem utilities - local or Databricks
3 | """
4 |
5 | import os
6 | import shutil
7 |
8 |
9 | def mk_dbfs_path(path):
10 | return path.replace("/dbfs","dbfs:")
11 |
12 |
13 | def mk_local_path(path):
14 | return path.replace("dbfs:","/dbfs")
15 |
16 |
17 | def exists(path):
18 | os.path.exists(mk_local_path(path))
19 |
20 |
21 | class DatabricksFileSystem():
22 | def __init__(self):
23 | import IPython
24 | self.dbutils = IPython.get_ipython().user_ns["dbutils"]
25 |
26 | def ls(self, path):
27 | return self.dbutils.fs.ls(mk_dbfs_path(path))
28 |
29 | def cp(self, src, dst, recursive=False):
30 | self.dbutils.fs.cp(mk_dbfs_path(src), mk_dbfs_path(dst), recursive)
31 |
32 | def rm(self, path, recurse=False):
33 | self.dbutils.fs.rm(mk_dbfs_path(path), recurse)
34 |
35 | def mkdirs(self, path):
36 | self.dbutils.fs.mkdirs(mk_dbfs_path(path))
37 |
38 | def write(self, path, content):
39 | self.dbutils.fs.put(mk_dbfs_path(path), content, True)
40 |
41 |
42 | class LocalFileSystem():
43 | def __init__(self):
44 | pass
45 |
46 | def cp(self, src, dst, recurse=False):
47 | shutil.copytree(mk_local_path(src), mk_local_path(dst))
48 |
49 | def rm(self, path, recurse=False):
50 | shutil.rmtree(mk_local_path(path))
51 |
52 | def mkdirs(self, path):
53 | os.makedirs(mk_local_path(path),exist_ok=True)
54 |
55 | def write(self, path, content):
56 | with open(mk_local_path(path), "w", encoding="utf-8") as f:
57 | f.write(content)
58 |
59 |
60 | def get_filesystem(dir):
61 | """ Return the filesystem object matching the directory path. """
62 | return DatabricksFileSystem() if dir.startswith("dbfs:") else LocalFileSystem()
63 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/find_artifacts.py:
--------------------------------------------------------------------------------
1 | """
2 | Find artifacts of a run that match a name.
3 | """
4 |
5 | import sys
6 | import os
7 | import click
8 | import mlflow
9 |
10 |
11 | def find_run_model_names(mlflow_client, run_id):
12 | """
13 | Return a list of model artifact directory paths of an MLflow run.
14 | Looks for any directory with an 'MLmodel' file and returns its directory.
15 | """
16 | matches = find_artifacts(mlflow_client, run_id, "", "MLmodel")
17 | return [ m.replace("/MLmodel","").replace("MLmodel","") for m in matches ]
18 |
19 |
20 | def find_artifacts(mlflow_client, run_id, path, target, max_level=sys.maxsize):
21 | return _find_artifacts(mlflow_client, run_id, path, target, max_level, 0, [])
22 |
23 |
24 | def _find_artifacts(mlflow_client, run_id, path, target, max_level, level, matches):
25 | if level+1 > max_level:
26 | return matches
27 | artifacts = mlflow_client.list_artifacts(run_id, path)
28 | for art in artifacts:
29 | filename = os.path.basename(art.path)
30 | if filename == target:
31 | matches.append(art.path)
32 | # NOTE: as of mlflow 2.11.x a new directory 'metadata' is appeared with duplicate MLmodel and friend files in.
33 | if art.is_dir and filename != "metadata":
34 | _find_artifacts(mlflow_client, run_id, art.path, target, max_level, level+1, matches)
35 | return matches
36 |
37 |
38 | @click.command()
39 | @click.option("--run-id", help="Run ID.", required=True, type=str)
40 | @click.option("--path", help="Relative artifact path.", default="", type=str, show_default=True)
41 | @click.option("--target", help="Target filename to search for.", required=True, type=str)
42 | @click.option("--max-level", help="Number of artifact levels to recurse.", default=sys.maxsize, type=int, show_default=True)
43 |
44 | def main(run_id, path, target, max_level):
45 | print("Options:")
46 | for k,v in locals().items():
47 | print(f" {k}: {v}")
48 | client = mlflow.client.MlflowClient()
49 | matches = find_artifacts(client, run_id, path, target, max_level)
50 | print("Matches:")
51 | for m in matches:
52 | print(" ",m)
53 |
54 |
55 | if __name__ == "__main__":
56 | main()
57 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/logging_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import yaml
3 | import logging.config
4 |
5 | _have_loaded_logging_config = False
6 |
7 | def get_logger(name):
8 | global _have_loaded_logging_config
9 | if _have_loaded_logging_config:
10 | return logging.getLogger(name)
11 |
12 | config_path = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_CONFIG_FILE", None)
13 | output_path = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_OUTPUT_FILE", None)
14 | log_format = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_FORMAT", None)
15 | #print(f"logging_utils.get_logger: config_path: {config_path}")
16 | #print(f"logging_utils.get_logger: output_path: {output_path}")
17 | #print(f"logging_utils.get_logger: log_format: {log_format}")
18 |
19 | if config_path:
20 | if not config_path.endswith(".yaml"):
21 | _load_default_log_config(output_path, log_format)
22 | logging.warning(f"Logging config file '{config_path}' must be .yaml file.")
23 | elif not os.path.exists(config_path):
24 | _load_default_log_config(output_path, log_format)
25 | logging.warning(f"Logging config file '{config_path}' does not exist.")
26 | else:
27 | with open(config_path, "r", encoding="utf-8") as f:
28 | cfg = yaml.safe_load(f.read())
29 | logging.config.dictConfig(cfg)
30 | logging.info(f"Reading log config file '{config_path}'")
31 | else:
32 | _load_default_log_config(output_path, log_format)
33 |
34 | _have_loaded_logging_config = True
35 | return logging.getLogger(name)
36 |
37 |
38 | def _load_default_log_config(output_path=None, log_format=None):
39 | cfg = _create_default_log_config(output_path, log_format)
40 | logging.config.dictConfig(cfg)
41 | msg = f"with output log file '{output_path}'" if output_path else "without output log file"
42 | logging.info(f"Using default logging config {msg}")
43 |
44 |
45 | def _create_default_log_config(output_path=None, log_format=None):
46 | from mlflow_export_import.common.default_logging_config import config
47 | cfg = config.copy()
48 | if log_format:
49 | cfg["formatters"]["simple"]["format"] = log_format
50 |
51 | if output_path:
52 | file_handler = cfg["handlers"]["file"]
53 | file_handler["filename"] = output_path
54 | else:
55 | handlers = cfg["root"]["handlers"]
56 | handlers.remove("file")
57 |
58 | return cfg
59 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/pkg_version.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import version, PackageNotFoundError
2 |
3 | pkg = "mlflow_export_import"
4 |
5 | def get_version():
6 | try:
7 | return version(pkg)
8 | except PackageNotFoundError:
9 | return ""
10 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/source_tags.py:
--------------------------------------------------------------------------------
1 |
2 | class ExportFields:
3 | """ Top-level fields for JSON export format. """
4 | SYSTEM = "system"
5 | INFO = "info"
6 | MLFLOW = "mlflow"
7 |
8 |
9 | class ExportTags:
10 | """ Source export tags prefixes. """
11 | PREFIX_ROOT = "mlflow_exim"
12 | PREFIX_FIELD = f"{PREFIX_ROOT}.field"
13 | PREFIX_RUN_INFO = f"{PREFIX_ROOT}.run_info"
14 | PREFIX_MLFLOW_TAG = f"{PREFIX_ROOT}.mlflow_tag"
15 |
16 |
17 | def fmt_timestamps(tag, dct, tags):
18 | from mlflow_export_import.common import timestamp_utils
19 | ts = dct[tag]
20 | tags[f"{ExportTags.PREFIX_FIELD}.{tag}"] = str(ts)
21 | tags[f"{ExportTags.PREFIX_FIELD}._{tag}"] = timestamp_utils.fmt_ts_millis(ts, True)
22 |
23 |
24 | def set_source_tags_for_field(dct, tags):
25 | """"
26 | Add an object's fields as source tags.
27 | """
28 | for k,v in dct.items():
29 | if k != "tags":
30 | tags[f"{ExportTags.PREFIX_FIELD}.{k}"] = str(v)
31 |
32 |
33 | def mk_source_tags_mlflow_tag(tags):
34 | """"
35 | Create 'mlflow_.exim.mlflow_tag' source tags from 'mlflow' tags..
36 | """
37 | prefix = "mlflow."
38 | return { f"{ExportTags.PREFIX_MLFLOW_TAG}.{k.replace(prefix,'')}":str(v) for k,v in tags.items() if k.startswith(prefix) }
39 |
40 |
41 | def mk_source_tags(tags, dst_prefix):
42 | """"
43 | Create source tags from destination prefix.
44 | """
45 | return { f"{dst_prefix}.{k}":str(v) for k,v in tags.items() }
46 |
--------------------------------------------------------------------------------
/mlflow_export_import/common/timestamp_utils.py:
--------------------------------------------------------------------------------
1 | import time
2 | from datetime import datetime
3 |
4 |
5 | TS_FORMAT = "%Y-%m-%d %H:%M:%S"
6 | ts_now_seconds = round(time.time())
7 | ts_now_fmt_utc = time.strftime(TS_FORMAT, time.gmtime(ts_now_seconds))
8 | ts_now_fmt_local = time.strftime(TS_FORMAT, time.localtime(ts_now_seconds))
9 |
10 | _default_as_utc = True
11 |
12 |
13 | def fmt_ts_millis(millis, as_utc=_default_as_utc):
14 | """ Convert epoch milliseconds to string format """
15 | if not millis:
16 | return None
17 | return fmt_ts_seconds(round(millis/1000), as_utc)
18 |
19 |
20 | def fmt_ts_seconds(seconds, as_utc=_default_as_utc):
21 | """ Convert epoch seconds to string format """
22 | if not seconds:
23 | return None
24 | if as_utc:
25 | ts = time.gmtime(seconds)
26 | else:
27 | ts = time.localtime(seconds)
28 | return time.strftime(TS_FORMAT, ts)
29 |
30 |
31 | def utc_str_to_millis(sdt):
32 | """ Convert UTC string to epoch milliseconds. """
33 | return utc_str_to_seconds(sdt) * 1000
34 |
35 |
36 | def utc_str_to_seconds(sdt):
37 | """ Convert UTC string to epoch seconds. """
38 | dt = datetime.fromisoformat(sdt)
39 | seconds = (dt - datetime(1970, 1, 1)).total_seconds()
40 | return seconds
41 |
42 |
43 | def adjust_timestamps(dct, keys):
44 | """
45 | Add human readable keys for millisecond timestamps.
46 | """
47 | keys = set(keys)
48 | for key in keys:
49 | if key in dct:
50 | dct[f"_{key}"] = fmt_ts_millis(dct[key])
51 |
52 |
53 | def format_seconds(seconds):
54 | """
55 | Format second duration h/m/s format, e.g. '6m 40s' or '40s'.
56 | """
57 | minutes, seconds = divmod(seconds, 60)
58 | minutes = round(minutes)
59 | if minutes:
60 | seconds = round(seconds)
61 | return f"{minutes}m {seconds}s"
62 | else:
63 | prec = 2 if seconds < .1 else 1
64 | seconds = round(seconds,prec)
65 | return f"{seconds}s"
66 |
--------------------------------------------------------------------------------
/mlflow_export_import/copy/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mlflow_export_import/copy/click_options.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | def opt_src_model(function):
4 | function = click.option("--src-model",
5 | help="Source registered model.",
6 | type=str,
7 | required=True
8 | )(function)
9 | return function
10 |
11 | def opt_dst_model(function):
12 | function = click.option("--dst-model",
13 | help="Destination registered model.",
14 | type=str,
15 | required=True
16 | )(function)
17 | return function
18 |
19 | def opt_src_version(function):
20 | function = click.option("--src-version",
21 | help="Source model version.",
22 | type=str,
23 | required=True
24 | )(function)
25 | return function
26 |
27 | def opt_src_mlflow_uri(function):
28 | function = click.option("--src-mlflow-uri",
29 | help="Source MLflow tracking server URI.",
30 | type=str,
31 | required=False
32 | )(function)
33 | return function
34 |
35 | def opt_dst_mlflow_uri(function):
36 | function = click.option("--dst-mlflow-uri",
37 | help="Destination MLflow tracking server URI.",
38 | type=str,
39 | required=False
40 | )(function)
41 | return function
42 |
43 | def opt_src_registry_uri(function):
44 | function = click.option("--src-registry-uri",
45 | help="Source MLflow registry URI.",
46 | type=str,
47 | required=True
48 | )(function)
49 | return function
50 |
51 | def opt_dst_registry_uri(function):
52 | function = click.option("--dst-registry-uri",
53 | help="Destination MLflow registry URI.",
54 | type=str,
55 | required=True
56 | )(function)
57 | return function
58 |
59 | def opt_dst_experiment_name(function):
60 | function = click.option("--dst-experiment-name",
61 | help="Destination experiment name. If specified, will copy old version's run to a new run. Else, use old version's run for new version.",
62 | type=str,
63 | required=False
64 | )(function)
65 | return function
66 |
67 | def opt_copy_permissions(function):
68 | function = click.option("--copy-permissions",
69 | help="Copy model permissions (only if target model does not exist).",
70 | type=bool,
71 | default=False,
72 | show_default=True
73 | )(function)
74 | return function
75 |
76 | def opt_copy_stages_and_aliases(function):
77 | function = click.option("--copy-stages-and-aliases",
78 | help="Import stages and aliases.",
79 | type=bool,
80 | default=False,
81 | show_default=True
82 | )(function)
83 | return function
84 |
85 | def opt_copy_lineage_tags(function):
86 | function = click.option("--copy-lineage-tags",
87 | help="Add source lineage info to destination version as tags starting with 'mlflow_exim'.",
88 | type=bool,
89 | default=False,
90 | show_default=True
91 | )(function)
92 | return function
93 |
--------------------------------------------------------------------------------
/mlflow_export_import/copy/copy_run.py:
--------------------------------------------------------------------------------
1 | import click
2 | import tempfile
3 | import mlflow
4 |
5 | from mlflow_export_import.run.export_run import export_run
6 | from mlflow_export_import.run.import_run import import_run
7 | from mlflow_export_import.common import utils
8 | from mlflow_export_import.common.click_options import opt_run_id, opt_experiment_name
9 | from . import copy_utils
10 | from . click_options import opt_src_mlflow_uri, opt_dst_mlflow_uri
11 |
12 | _logger = utils.getLogger(__name__)
13 |
14 |
15 | def copy(
16 | src_run_id,
17 | dst_experiment_name,
18 | src_mlflow_uri = None,
19 | dst_mlflow_uri = None
20 | ):
21 | """
22 | Copies a run to another tracking server (workspace).
23 |
24 | :param src_run_id: Source run ID.
25 | :param dst_experiment_name: Destination experiment name.
26 | :param : src_mlflow_uri: Source tracking server (workspace) URI.
27 | :param : dst_mlflow_uri: Destination tracking server (workspace) URI.
28 |
29 | :return: Destination Run object.
30 | """
31 |
32 | return _copy(src_run_id, dst_experiment_name,
33 | copy_utils.mk_client(src_mlflow_uri),
34 | copy_utils.mk_client(dst_mlflow_uri)
35 | )
36 |
37 |
38 | def _copy(src_run_id, dst_experiment_name, src_client=None, dst_client=None):
39 | src_client = src_client or mlflow.MlflowClient()
40 | dst_client = dst_client or mlflow.MlflowClient()
41 | with tempfile.TemporaryDirectory() as download_dir:
42 | export_run(
43 | src_run_id,
44 | download_dir,
45 | notebook_formats = [ "SOURCE" ],
46 | mlflow_client = src_client
47 | )
48 | dst_run, _ = import_run(
49 | download_dir,
50 | dst_experiment_name,
51 | mlflow_client = dst_client
52 | )
53 | return dst_run
54 |
55 |
56 | @click.command()
57 | @opt_run_id
58 | @opt_experiment_name
59 | @opt_src_mlflow_uri
60 | @opt_dst_mlflow_uri
61 | def main(run_id, experiment_name, src_mlflow_uri, dst_mlflow_uri):
62 | print("Options:")
63 | for k,v in locals().items():
64 | print(f" {k}: {v}")
65 | copy(run_id, experiment_name, src_mlflow_uri, dst_mlflow_uri)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/mlflow_export_import/copy/copy_utils.py:
--------------------------------------------------------------------------------
1 | import mlflow
2 | from mlflow.exceptions import MlflowException
3 |
4 |
5 | def get_model_name(artifact_path):
6 | """
7 | Return 'my-model' from '/foo/artifacts/my-model'
8 | """
9 | idx = artifact_path.find("artifacts")
10 | idx += len("artifacts") + 1
11 | return artifact_path[idx:]
12 |
13 |
14 | def create_registered_model(client, model_name):
15 | """
16 | Return True if model already exists, False otherwise.
17 | """
18 | try:
19 | client.create_registered_model(model_name)
20 | return False
21 | except MlflowException as e: # NOTE: for non-UC is RestException
22 | if e.error_code != "RESOURCE_ALREADY_EXISTS":
23 | raise
24 | return True
25 |
26 |
27 | def create_experiment(client, experiment_name):
28 | try:
29 | return client.create_experiment(experiment_name)
30 | except MlflowException as e:
31 | if e.error_code != "RESOURCE_ALREADY_EXISTS":
32 | raise
33 | experiment = client.get_experiment_by_name(experiment_name)
34 | return experiment.experiment_id
35 |
36 |
37 | def add_tag(src_tags, dst_tags, key, prefix):
38 | val = src_tags.get(key)
39 | if val is not None:
40 | dst_tags[f"{prefix}.{key}"] = val
41 |
42 |
43 | def obj_to_dict(obj):
44 | if isinstance(obj, mlflow.entities.model_registry.model_version.ModelVersion):
45 | dct = adjust_model_version(obj.__dict__)
46 | else:
47 | dct = obj.__dict__
48 | return dct
49 |
50 |
51 | def adjust_model_version(vr):
52 | dct = {}
53 | for k,v in vr.items():
54 | if k == "_aliases": # type - google._upb._message.RepeatedScalarContainer
55 | dct[k] = [ str(x) for x in v ]
56 | else:
57 | dct[k] = v
58 | return dct
59 |
60 |
61 | def mk_client(tracking_uri, registry_uri=None):
62 | if not tracking_uri and not registry_uri:
63 | return mlflow.MlflowClient()
64 | else:
65 | tracking_uri = tracking_uri.replace("databricks-uc", "databricks")
66 | return mlflow.MlflowClient(tracking_uri, registry_uri)
67 |
--------------------------------------------------------------------------------
/mlflow_export_import/experiment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/experiment/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/experiment/nested_runs_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common import utils
2 | from mlflow_export_import.common.iterators import SearchRunsIterator
3 |
4 | _logger = utils.getLogger(__name__)
5 |
6 |
7 | def get_nested_runs(client, runs):
8 | """
9 | Return set of run_ids and their nested run descendants from list of run IDs.
10 | """
11 | if utils.calling_databricks():
12 | return get_nested_runs_by_rootRunId(client, runs)
13 | else:
14 | from . import oss_nested_runs_utils
15 | return runs + oss_nested_runs_utils.get_nested_runs(client, runs)
16 |
17 |
18 | def get_nested_runs_by_rootRunId(client, runs):
19 | """
20 | Return list of nested run descendants (includes the root run).
21 | Unlike Databricks MLflow, OSS MLflow does not add the 'mlflow.rootRunId' tag to child runs.
22 | """
23 | descendant_runs= []
24 | for run in runs:
25 | filter = f"tags.mlflow.rootRunId = '{run.info.run_id}'"
26 | _descendant_runs = list(SearchRunsIterator(client, run.info.experiment_id, filter=filter))
27 | if _descendant_runs:
28 | descendant_runs += _descendant_runs
29 | else:
30 | descendant_runs.append(run)
31 | return descendant_runs
32 |
--------------------------------------------------------------------------------
/mlflow_export_import/experiment/oss_nested_runs_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common.iterators import SearchRunsIterator
2 |
3 |
4 | def get_nested_runs(client, runs, parent_runs=None):
5 | nested_runs = []
6 | for run in runs:
7 | nested_runs += _get_nested_runs_for_run(client, run, parent_runs)
8 | return nested_runs
9 |
10 | def get_nested_runs_for_experiment(client, experiment_id):
11 | filter = f"tags.mlflow.parentRunId like '%'"
12 | return list(SearchRunsIterator(client, experiment_id, filter=filter))
13 |
14 |
15 | def _get_nested_runs_for_run(client, run, parent_runs=None):
16 | nested_runs = _build_nested_runs(client, run.info.experiment_id, parent_runs)
17 | run_ids = _get_run_ids(run.info.run_id, nested_runs)
18 | return [ client.get_run(run_id) for run_id in run_ids ]
19 |
20 | def _get_run_ids(root_id, nested_runs):
21 | nested_run_ids = nested_runs.get(root_id)
22 | if not nested_run_ids:
23 | return set()
24 | all_nested_run_ids = nested_run_ids
25 | for run_id in nested_run_ids:
26 | _nested_run_ids = _get_run_ids(run_id, nested_runs)
27 | if _nested_run_ids:
28 | all_nested_run_ids += _nested_run_ids
29 | return set(all_nested_run_ids)
30 |
31 | def _build_nested_runs(client, experiment_id, parent_runs=None):
32 | """
33 | Flat dict of all descendant run IDs and their child runs
34 | dict: run_id: list of run_id's child runs (per mlflow.parentRunId tag)
35 | """
36 | if not parent_runs:
37 | parent_runs = get_nested_runs_for_experiment(client, experiment_id)
38 | dct = { run.info.run_id:run.data.tags["mlflow.parentRunId"] for run in parent_runs }
39 | nested_runs = {}
40 | for run_id,parent_id in dct.items():
41 | nested_runs.setdefault(parent_id, []).append(run_id)
42 | return nested_runs
43 |
--------------------------------------------------------------------------------
/mlflow_export_import/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/model/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/model_version/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/model_version/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/model_version/click_options.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 |
4 | # == Export model version
5 |
6 | def opt_version(function):
7 | function = click.option("--version",
8 | help="Registered model version.",
9 | type=str,
10 | required=True
11 | )(function)
12 | return function
13 |
14 |
15 | def opt_vrm_export_version_model(function):
16 | function = click.option("--vrm-export-version-model",
17 | help="Export the MLflow model (from model registry) of a model version.",
18 | type=bool,
19 | default=False
20 | )(function)
21 | return function
22 |
23 | def opt_vrm_model_artifact_path(function):
24 | function = click.option("--vrm-model-artifact-path",
25 | help="Destination artifact path of the Mlflow model of a model version.",
26 | type=str,
27 | required=False
28 | )(function)
29 | return function
30 |
31 | def opt_skip_download_run_artifacts(function):
32 | function = click.option("--skip-download-run-artifacts",
33 | help="Skip downloading run artifacts (for fine-tuned LLM models)",
34 | type=bool,
35 | default=False
36 | )(function)
37 | return function
38 |
39 |
40 | # == Import model version
41 |
42 | def opt_create_model(function):
43 | function = click.option("--create-model",
44 | help="Create registered model before creating model version.",
45 | type=bool,
46 | default=False,
47 | show_default=True
48 | )(function)
49 | return function
50 |
51 | def opt_experiment_name(function):
52 | function = click.option("--experiment-name",
53 | help="Destination experiment name for the version's run.",
54 | type=str,
55 | required=True
56 | )(function)
57 | return function
58 |
59 | def opt_import_stages_and_aliases(function):
60 | function = click.option("--import-stages-and-aliases",
61 | help="Import stages and aliases.",
62 | type=bool,
63 | default=False,
64 | show_default=True
65 | )(function)
66 | return function
67 |
68 | def opt_import_metadata(function):
69 | function = click.option("--import-metadata",
70 | help="Import registered model and experiment metadata (description and tags).",
71 | type=bool,
72 | default=False,
73 | show_default=True
74 | )(function)
75 | return function
76 |
--------------------------------------------------------------------------------
/mlflow_export_import/notebook/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/notebook/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/notebook/download_notebook.py:
--------------------------------------------------------------------------------
1 | """
2 | Downloads a Databricks notebook with optional revision.
3 | """
4 |
5 | import os
6 | import json
7 | import click
8 |
9 | from mlflow_export_import.common.click_options import opt_output_dir
10 | from mlflow_export_import.common import utils, io_utils
11 | from mlflow_export_import.common import MlflowExportImportException
12 | from mlflow_export_import.client.http_client import DatabricksHttpClient
13 |
14 | _logger = utils.getLogger(__name__)
15 |
16 |
17 | def download_notebook(output_dir, notebook_workspace_path, revision_id, notebook_formats, dbx_client):
18 | notebook_dir = os.path.join(output_dir)
19 | os.makedirs(notebook_dir, exist_ok=True)
20 | for format in notebook_formats:
21 | _download_notebook(notebook_workspace_path, notebook_dir, format, format.lower(), revision_id, dbx_client)
22 |
23 |
24 | def _download_notebook(notebook_workspace_path, output_dir, format, extension, revision_id, dbx_client):
25 | params = {
26 | "path": notebook_workspace_path,
27 | "direct_download": True,
28 | "format": format
29 | }
30 | if revision_id:
31 | params ["revision"] = { "revision_timestamp": revision_id } # NOTE: not publicly documented
32 | notebook_name = os.path.basename(notebook_workspace_path)
33 | try:
34 | rsp = dbx_client._get("workspace/export", json.dumps(params))
35 | notebook_path = os.path.join(output_dir, f"{notebook_name}.{extension}")
36 | io_utils.write_file(notebook_path, rsp.content)
37 | except MlflowExportImportException as e:
38 | _logger.warning(f"Cannot download notebook '{notebook_workspace_path}'. {e}")
39 |
40 |
41 | @click.command()
42 | @opt_output_dir
43 | @click.option("--notebook",
44 | help="Notebook path.",
45 | type=str,
46 | required=True
47 | )
48 | @click.option("--revision",
49 | help="Notebook revision. If not specified will download the latest revision.",
50 | type=str,
51 | required=False
52 | )
53 | @click.option("--notebook-formats",
54 | help="Databricks notebook formats. Values are SOURCE, HTML, JUPYTER or DBC (comma seperated).",
55 | type=str,
56 | default="SOURCE",
57 | show_default=True
58 | )
59 | def main(output_dir, notebook, revision, notebook_formats):
60 | _logger.info("Options:")
61 | for k,v in locals().items():
62 | _logger.info(f" {k}: {v}")
63 | dbx_client = DatabricksHttpClient()
64 | notebook_formats = utils.string_to_list(notebook_formats)
65 | download_notebook(output_dir, notebook, revision, notebook_formats, dbx_client)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/mlflow_export_import/run/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/mlflow_export_import/run/run_utils.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import tempfile
4 | from mlflow_export_import.common import mlflow_utils, io_utils
5 | from mlflow_export_import.common.find_artifacts import find_run_model_names
6 |
7 | def get_model_name(artifact_path):
8 | idx = artifact_path.find("artifacts")
9 | idx += len("artifacts") + 1
10 | return artifact_path[idx:]
11 |
12 |
13 | def update_mlmodel_run_id(mlflow_client, run_id):
14 | """
15 | :param: mlflow_client
16 | :param: run_id
17 | Workaround to fix the run_id in the destination MLmodel file since there is no method to get all model artifacts of a run.
18 | Since an MLflow run does not keep track of its models, there is no method to retrieve the artifact path to all its models.
19 | This workaround recursively searches the run's root artifact directory for all MLmodel files, and assumes their directory
20 | represents a path to the model.
21 | """
22 | mlmodel_paths = find_run_model_names(mlflow_client, run_id)
23 | for model_path in mlmodel_paths:
24 | download_uri = f"runs:/{run_id}/{model_path}/MLmodel"
25 | local_path = mlflow_utils.download_artifacts(mlflow_client, download_uri)
26 | mlmodel = io_utils.read_file(local_path, "yaml")
27 | mlmodel["run_id"] = run_id
28 | with tempfile.TemporaryDirectory() as dir:
29 | output_path = os.path.join(dir, "MLmodel")
30 | io_utils.write_file(output_path, mlmodel, "yaml")
31 | if model_path == "MLmodel":
32 | model_path = ""
33 | mlflow_client.log_artifact(run_id, output_path, model_path)
34 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/tools/__init__.py
--------------------------------------------------------------------------------
/mlflow_export_import/tools/click_options.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | def opt_input_file(function):
4 | function = click.option("--input-file",
5 | help="Input file.",
6 | type=str,
7 | required=True
8 | )(function)
9 | return function
10 |
11 | def opt_output_file(function):
12 | function = click.option("--output-file",
13 | help="Output file.",
14 | type=str,
15 | required=False
16 | )(function)
17 | return function
18 |
19 | def opt_model_uri(function):
20 | function = click.option("--model-uri",
21 | help="Model URI such as 'models:/my_model/3' or 'runs:/73ab168e5775409fa3595157a415bb62/my_model'.",
22 | type=str,
23 | required=True
24 | )(function)
25 | return function
26 |
27 | def opt_filter(function):
28 | function = click.option("--filter",
29 | help="For OSS MLflow this is a filter for search_model_versions(), for Databricks it is for search_registered_models() due to Databricks MLflow search limitations.",
30 | type=str,
31 | required=False
32 | )(function)
33 | return function
34 |
35 | def opt_use_get_model_info(function):
36 | function = click.option("--use-get-model-info",
37 | help="Use mlflow.models.get_model_info() which apparently downloads *all* artifacts (quite slow for large models) instead of just downloading 'MLmodel' using mlflow.artifacts.download_artifacts().",
38 | type=bool,
39 | default=False,
40 | show_default=True
41 | )(function)
42 | return function
43 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/experimental/samples/custom_export_rewriters.py:
--------------------------------------------------------------------------------
1 | """
2 | Sample post-processing rewriters for models and experiments:
3 | 1. for registered model truncate versions to one
4 | 2. for experiment truncate runs to one
5 | """
6 |
7 | import os
8 | from mlflow_export_import.common import io_utils
9 |
10 |
11 | def rewrite_model(model_dct, models_dir):
12 | """ processes model.json """
13 | versions = model_dct["mlflow"]["registered_model"]["versions"]
14 | print(f" Original versions: {len(versions)}")
15 | versions = versions[:1]
16 | print(f" New versions: {len(versions)}")
17 | model_dct["mlflow"]["registered_model"]["versions"] = versions
18 |
19 |
20 | def rewrite_experiment(experiment_dct, experiment_dir):
21 | """ processes experiment.json """
22 | def fmt_run(run_dct):
23 | from mlflow_export_import.common.timestamp_utils import fmt_ts_millis
24 | info = run_dct["info"]
25 | return f'run_id: {info["run_id"]} start_time: {info["start_time"]} {fmt_ts_millis(info["start_time"])}'
26 | runs = experiment_dct["mlflow"]["runs"]
27 | print(f" Original runs: {len(runs)}")
28 |
29 | # do some custom processing such as returning the latest run
30 | latest_run_dct = None
31 | for run_id in runs:
32 | path = os.path.join(experiment_dir, run_id, "run.json")
33 | run_dct = io_utils.read_file(path)["mlflow"]
34 | if not latest_run_dct:
35 | latest_run_dct = run_dct
36 | else if latest_run_dct is not None and latest_run_dct["info"]["start_time"] > run_dct["info"]["start_time"]:
37 | latest_run_dct = run_dct
38 | print(f" Run: {fmt_run(run_dct)}")
39 | print(f" Latest run: {fmt_run(latest_run_dct)}")
40 | runs = [ latest_run_dct ]
41 | print(f" New runs: {len(runs)}")
42 |
43 | experiment_dct["mlflow"]["runs"] = runs
44 |
45 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/get_model_signature.py:
--------------------------------------------------------------------------------
1 | """
2 | Get the signature of an MLflow model.
3 | """
4 |
5 | import click
6 | from mlflow_export_import.common import io_utils
7 | from mlflow_export_import.common.dump_utils import dump_as_json
8 | from . click_options import opt_model_uri, opt_output_file, opt_use_get_model_info
9 | from . signature_utils import get_model_signature
10 |
11 |
12 | @click.command()
13 | @opt_model_uri
14 | @opt_output_file
15 | @opt_use_get_model_info
16 | def main(model_uri, output_file, use_get_model_info):
17 | """
18 | Get the signature of an MLflow model.
19 | """
20 | print("Options:")
21 | for k,v in locals().items():
22 | print(f" {k}: {v}")
23 | signature = get_model_signature(model_uri, use_get_model_info)
24 | if signature:
25 | print("Model Signature:")
26 | dump_as_json(signature)
27 | if output_file:
28 | io_utils.write_file(output_file, signature)
29 | else:
30 | print(f"WARNING: No model signature for '{model_uri}'")
31 |
32 | if __name__ == "__main__":
33 | main()
34 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/list_model_versions_without_signatures.py:
--------------------------------------------------------------------------------
1 | """
2 | List model versions without a model signature.
3 | """
4 |
5 | import click
6 | import pandas as pd
7 | from tabulate import tabulate
8 | import mlflow
9 |
10 | from . click_options import opt_filter, opt_output_file, opt_use_get_model_info
11 | from . tools_utils import search_model_versions
12 | from . signature_utils import get_model_signature
13 |
14 |
15 | def as_pandas_df(filter, use_get_model_info=False):
16 | client = mlflow.MlflowClient()
17 | versions = search_model_versions(client, filter)
18 |
19 | print(f"Found {len(versions)} model versions")
20 | versions_without_signatures = []
21 | for j, vr in enumerate(versions):
22 | model_uri = f"models:/{vr.name}/{vr.version}"
23 | if j%10 == 0:
24 | print(f"Processing {j}/{len(versions)}: {model_uri}")
25 | try:
26 | signature = get_model_signature(model_uri, use_get_model_info)
27 | if not signature:
28 | versions_without_signatures.append([vr.name, vr.version, vr.run_id, ""])
29 | except Exception as e:
30 | versions_without_signatures.append([vr.name, vr.version, vr.run_id, str(e)])
31 | #print(f"Found {len(versions)} model versions")
32 | print(f"Found {len(versions_without_signatures)}/{len(versions)} model versions without signatures")
33 |
34 | df = pd.DataFrame(versions_without_signatures, columns = ["model","version", "run_id", "error"])
35 | return df.sort_values(by=["model", "version"], ascending = [True, False])
36 |
37 |
38 | def show(filter, output_file, use_get_model_info):
39 | df = as_pandas_df(filter, use_get_model_info)
40 | print(tabulate(df, headers="keys", tablefmt="psql", numalign="right", showindex=False))
41 | if output_file:
42 | with open(output_file, "w", encoding="utf-8") as f:
43 | df.to_csv(f, index=False)
44 |
45 |
46 | @click.command()
47 | @opt_filter
48 | @opt_output_file
49 | @opt_use_get_model_info
50 | def main(filter, output_file, use_get_model_info):
51 | """
52 | List model versions without a model signature.
53 | """
54 | print("Options:")
55 | args = locals()
56 | for k,v in args.items():
57 | print(f" {k}: {v}")
58 | show(filter, output_file, use_get_model_info)
59 |
60 |
61 | if __name__ == "__main__":
62 | main()
63 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/list_registered_models.py:
--------------------------------------------------------------------------------
1 | """
2 | Lists all registered models.
3 | """
4 |
5 | import json
6 | from mlflow_export_import.client.http_client import MlflowHttpClient
7 |
8 | def main():
9 | client = MlflowHttpClient()
10 | print("HTTP client:",client)
11 | rsp = client._get("registered-models/search")
12 | dct = json.loads(rsp.text)
13 | print(json.dumps(dct,indent=2)+"\n")
14 |
15 | if __name__ == "__main__":
16 | main()
17 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/set_model_signature.py:
--------------------------------------------------------------------------------
1 | """
2 | Set the model signature of an MLflow model.
3 |
4 | https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.set_signature
5 | """
6 |
7 | import pandas as pd
8 | import click
9 | import mlflow
10 | from mlflow.models.signature import infer_signature
11 | from mlflow_export_import.common.dump_utils import dump_as_json
12 | from . signature_utils import get_model_signature, to_json_signature
13 |
14 |
15 | def set_signature(model_uri, input_file, output_file, overwrite_signature):
16 | signature = get_model_signature(model_uri)
17 | if signature:
18 | if not overwrite_signature:
19 | print(f"WARNING: Model '{model_uri}' already has a signature. Not overwriting signature.")
20 | return
21 | else:
22 | print(f"WARNING: Model '{model_uri}' already has a signature. Overwriting existing signature.")
23 | df_input = pd.read_csv(input_file)
24 | df_output = pd.read_csv(output_file)
25 | signature = infer_signature(df_input, df_output)
26 | print("New model signature:")
27 | dump_as_json(to_json_signature(signature.to_dict()))
28 |
29 | mlflow.models.set_signature(model_uri, signature)
30 |
31 |
32 | @click.command()
33 | @click.option("--model-uri",
34 | help="""
35 | Model URI such as 'runs:/73ab168e5775409fa3595157a415bb62/my_model' or 'file:/my_mlflow_model.
36 | Per MLflow documentation 'models:/' scheme is not supported.
37 | """,
38 | type=str,
39 | required=True
40 | )
41 | @click.option("--input-file",
42 | help="Input CSV file with training data samples for signature.",
43 | type=str,
44 | required=True
45 | )
46 | @click.option("--output-file",
47 | help="Output CSV file with prediction data samples for signature.",
48 | type=str,
49 | required=False
50 | )
51 | @click.option("--overwrite-signature",
52 | help="Overwrite existing model signature.",
53 | type=bool,
54 | default=False,
55 | show_default=True
56 | )
57 | def main(model_uri, input_file, output_file, overwrite_signature):
58 | """
59 | Set the signature of an MLflow model.
60 | 'models:/' scheme URIs are not accepted.
61 | For OSS MLflow, if you add a model signature to a run, it will automatically update any model version that was created from the run.
62 | """
63 | print("Options:")
64 | for k,v in locals().items():
65 | print(f" {k}: {v}")
66 | set_signature(model_uri, input_file, output_file, overwrite_signature)
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/signature_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import yaml
3 | import mlflow
4 |
5 | def to_json_signature(signature):
6 | def _to_json(lst):
7 | return json.loads(lst) if lst else lst
8 | return { k:_to_json(v) for k,v in signature.items()}
9 |
10 |
11 | def get_model_signature(model_uri, use_get_model_info=False):
12 | """
13 | Return a fully exploded dict of of the stringified JSON signature field of MLmodel.
14 | :param use_get_model_info: Use mlflow.models.get_model_info() which apparently downloads *all* artifacts (quite slow for large models) instead of just downloading 'MLmodel' using mlflow.artifacts.download_artifacts().
15 | :return: Returns signature as dictionary..
16 | """
17 | if use_get_model_info:
18 | return get_model_signature_use_get_model_info(model_uri)
19 | else:
20 | return get_model_signature_use_download_MLmodel(model_uri)
21 |
22 | def get_model_signature_use_download_MLmodel(model_uri):
23 | artifact_uri = f"{model_uri}/MLmodel"
24 | local_path = mlflow.artifacts.download_artifacts(artifact_uri)
25 | with open(local_path, "r") as f:
26 | mlmodel = yaml.safe_load(f)
27 | sig = mlmodel.get("signature")
28 | return to_json_signature(sig) if sig else None
29 |
30 | def get_model_signature_use_get_model_info(model_uri):
31 | model_info = mlflow.models.get_model_info(model_uri)
32 | if model_info.signature:
33 | sig = model_info.signature.to_dict()
34 | return to_json_signature(sig)
35 | else:
36 | return None
37 |
--------------------------------------------------------------------------------
/mlflow_export_import/tools/tools_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common import utils
2 | from mlflow_export_import.common.iterators import SearchRegisteredModelsIterator, SearchModelVersionsIterator
3 |
4 | def search_model_versions(client, filter):
5 | if utils.calling_databricks():
6 | models = list(SearchRegisteredModelsIterator(client, filter=filter))
7 | versions = []
8 | for model in models:
9 | try:
10 | _versions = list(SearchModelVersionsIterator(client, filter=f"name='{model.name}'"))
11 | versions += _versions
12 | except Exception as e:
13 | print(f"ERROR: registered model '{model.name}': {e}")
14 | return versions
15 | else:
16 | return list(SearchModelVersionsIterator(client, filter=filter))
17 |
--------------------------------------------------------------------------------
/mlflow_export_import/version.py:
--------------------------------------------------------------------------------
1 |
2 | __version__ = "1.2.0"
3 |
--------------------------------------------------------------------------------
/mlflow_export_import/workflow_api/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/mlflow_export_import/workflow_api/log_utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logging.basicConfig(
4 | #format = "%(asctime)s %(levelname)-7s %(message)s",
5 | format = "%(asctime)s %(levelname)s %(message)s",
6 | level = logging.INFO,
7 | datefmt = "%Y-%m-%d %H:%M:%S")
8 |
--------------------------------------------------------------------------------
/mlflow_export_import/workflow_api/run_submit.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import click
3 | import logging
4 | from mlflow_export_import.workflow_api.workflow_api_client import WorkflowApiClient
5 | from mlflow_export_import.workflow_api import utils
6 |
7 | def run(profile, spec_file, sleep_seconds, timeout_seconds, verbose=False):
8 | client = WorkflowApiClient(profile, sleep_seconds, timeout_seconds)
9 |
10 | # Read JSON spec file
11 | job_spec = utils.load_json_file(spec_file)
12 |
13 | # Launch run jobs/submit
14 | res = client.run_submit(job_spec)
15 |
16 | run_id = res["run_id"]
17 | logging.info(f"New run_id: {run_id}")
18 |
19 | # Wait until cluster is created
20 | client.wait_until_cluster_is_created_for_run(run_id)
21 |
22 | # Get cluster ID
23 | dct = client.get_run(run_id)
24 | #cluster_state = dct["cluster_instance"]["cluster_id"]
25 | cluster_id = dct["cluster_instance"]["cluster_id"]
26 | logging.info(f"cluster_id: {cluster_id}")
27 |
28 | # Wait until run is done
29 | client.wait_until_run_is_done(run_id)
30 |
31 | # Get run status
32 | run = client.get_run(run_id)
33 |
34 | # Show final run
35 | if verbose:
36 | utils.dump_as_json("Final run", run)
37 |
38 | # Get cluster log directory
39 | try:
40 | log_dir = run["cluster_spec"]["new_cluster"]["cluster_log_conf"]["dbfs"]["destination"] + "/" + cluster_id
41 | logging.info(f"Log directory: '{log_dir}'")
42 | except KeyError:
43 | logging.warning(f"No cluster log directory")
44 |
45 | # Show run result state
46 | result_state = run["state"]["result_state"]
47 | logging.info(f"Run result state: {result_state}")
48 |
49 |
50 |
51 | @click.command()
52 | @click.option("--profile",
53 | help="Databricks profile",
54 | type=str,
55 | default=None,
56 | show_default=True
57 | )
58 | @click.option("--spec-file",
59 | help="JSON job specification file",
60 | type=str,
61 | required=True,
62 | show_default=True
63 | )
64 | @click.option("--sleep-seconds",
65 | help="Sleep time for checking run status(seconds)",
66 | type=int,
67 | default=5,
68 | show_default=True
69 | )
70 | @click.option("--timeout-seconds",
71 | help="Timeout (seconds)",
72 | type=int,
73 | default=sys.maxsize,
74 | show_default=True
75 | )
76 | @click.option("--verbose",
77 | help="Verbose",
78 | type=bool,
79 | default=False)
80 |
81 | def main(profile, spec_file, sleep_seconds, timeout_seconds, verbose):
82 | print("Options:")
83 | for k,v in locals().items(): print(f" {k}: {v}")
84 | run(profile, spec_file, sleep_seconds, timeout_seconds, verbose)
85 |
86 | if __name__ == "__main__":
87 | main()
88 |
--------------------------------------------------------------------------------
/mlflow_export_import/workflow_api/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | def load_json_file(path):
5 | with open(path, "r", encoding="utf-8") as f:
6 | return json.loads(f.read())
7 |
8 |
9 | def dump_as_json(msg, dct):
10 | print(f"{msg}:")
11 | print(json.dumps(dct,indent=2)+"\n")
12 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/experiments/1280664374380606/253000ee70914831850defc593ba4740/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_run.py",
5 | "export_time": 1684725509,
6 | "_export_time": "2023-05-22 03:18:29",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "mlflow": {
17 | "info": {
18 | "run_uuid": "253000ee70914831850defc593ba4740",
19 | "run_id": "253000ee70914831850defc593ba4740",
20 | "experiment_id": "1280664374380606",
21 | "user_id": "",
22 | "status": "FINISHED",
23 | "start_time": 1683945877431,
24 | "end_time": 1683945880724,
25 | "lifecycle_stage": "active",
26 | "artifact_uri": "dbfs:/databricks/mlflow-tracking/1280664374380606/253000ee70914831850defc593ba4740/artifacts",
27 | "run_name": "851de1f466304650a77c949f5d386d9f",
28 | "_start_time": "2023-05-13 02:44:37",
29 | "_end_time": "2023-05-13 02:44:41"
30 | },
31 | "params": {
32 | "max_depth": "1",
33 | "max_leaf_nodes": "None"
34 | },
35 | "metrics": {
36 | "r2": [
37 | {
38 | "value": 0.1553172302194683,
39 | "timestamp": 1681630579458,
40 | "step": 0
41 | }
42 | ],
43 | "rmse": [
44 | {
45 | "value": 0.7986004372118107,
46 | "timestamp": 1681630579236,
47 | "step": 0
48 | }
49 | ]
50 | },
51 | "tags": {
52 | "mlflow.databricks.cluster.id": "0414-154233-qm0df4rx",
53 | "mlflow.databricks.cluster.info": "{\"cluster_name\":\"Andre_ML_13.0\",\"spark_version\":\"13.0.x-cpu-ml-scala2.12\",\"node_type_id\":\"i3.xlarge\",\"driver_node_type_id\":\"i3.xlarge\",\"autotermination_minutes\":120,\"disk_spec\":{\"disk_count\":0},\"num_workers\":1}",
54 | "mlflow.databricks.cluster.libraries": "{\"installable\":[],\"redacted\":[]}",
55 | "mlflow.databricks.notebook.commandID": "3527702579137640954_8374924253965797983_041de288996c42ef97161546f39184f0",
56 | "mlflow.databricks.notebookID": "1280664374380381",
57 | "mlflow.databricks.notebookPath": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
58 | "mlflow.databricks.notebookRevisionID": "1683945880975",
59 | "mlflow.databricks.webappURL": "https://mycompany.cloud.com",
60 | "mlflow.databricks.workspaceID": "2556758628403379",
61 | "mlflow.databricks.workspaceURL": "mycompany.cloud.com",
62 | "mlflow.runName": "851de1f466304650a77c949f5d386d9f",
63 | "mlflow.source.name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
64 | "mlflow.source.type": "NOTEBOOK",
65 | "mlflow.user": "andre@mycompany.com",
66 | "save_signature": "False",
67 | "timestamp": "2023-04-16 07:36:09",
68 | "version.DATABRICKS_RUNTIME_VERSION": "13.0",
69 | "version.mlflow": "2.2.1",
70 | "version.python": "3.10.6",
71 | "version.sklearn": "1.1.1"
72 | }
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/experiments/1280664374380606/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684725509,
6 | "_export_time": "2023-05-22 03:18:29",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 3,
18 | "num_ok_runs": 3,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "1280664374380606",
25 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374380606",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
30 | "mlflow.experimentType": "MLFLOW_EXPERIMENT",
31 | "mlflow.note.content": "WS notebook - WS experiment\n/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
32 | "mlflow.ownerEmail": "andre@mycompany.com",
33 | "mlflow.ownerId": "4566812440727830",
34 | "timestamp": "2023-04-16 07:36:09",
35 | "version_mlflow": "2.2.1"
36 | },
37 | "creation_time": 1681630570495,
38 | "last_update_time": 1683945877431,
39 | "_creation_time": "2023-04-16 07:36:10",
40 | "_last_update_time": "2023-05-13 02:44:37"
41 | },
42 | "runs": [
43 | "253000ee70914831850defc593ba4740",
44 | "85716eddd2ba4d938713b245e80df662",
45 | "851de1f466304650a77c949f5d386d9f"
46 | ]
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/experiments/9195e233f19e49379b16c5f2d2b0c05f/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684725509,
6 | "_export_time": "2023-05-22 03:18:29",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 1,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "9195e233f19e49379b16c5f2d2b0c05f",
25 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/9195e233f19e49379b16c5f2d2b0c05f",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "mlflow.experiment.sourceId": "1765187885495869",
30 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
31 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK",
32 | "mlflow.ownerEmail": "andre@mycompany.com",
33 | "mlflow.ownerId": "4566812440727830"
34 | },
35 | "creation_time": 1681490229478,
36 | "last_update_time": 1684339053373,
37 | "_creation_time": "2023-04-14 16:37:09",
38 | "_last_update_time": "2023-05-17 15:57:33"
39 | },
40 | "runs": [
41 | "a17f0abf5d46464d899f0ffcebbdb7a8"
42 | ]
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/experiments/experiments.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiments.py",
5 | "export_time": 1684725509,
6 | "_export_time": "2023-05-22 03:18:29",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "experiment_names": [
18 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
19 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist"
20 | ],
21 | "duration": 14.3,
22 | "experiments": 2,
23 | "total_runs": 4,
24 | "ok_runs": 4,
25 | "failed_runs": 0
26 | },
27 | "mlflow": {
28 | "experiments": [
29 | {
30 | "id": "9195e233f19e49379b16c5f2d2b0c05f",
31 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
32 | "ok_runs": 1,
33 | "failed_runs": 0,
34 | "duration": 5.3
35 | },
36 | {
37 | "id": "1280664374380606",
38 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
39 | "ok_runs": 3,
40 | "failed_runs": 0,
41 | "duration": 13.4
42 | }
43 | ]
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/models/experiments/9195e233f19e49379b16c5f2d2b0c05f/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684724517,
6 | "_export_time": "2023-05-22 03:01:57",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 2,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 1,
20 | "failed_runs": [
21 | "9b901d0d3c214880a4d38d0fceb3092c"
22 | ]
23 | },
24 | "mlflow": {
25 | "experiment": {
26 | "experiment_id": "9195e233f19e49379b16c5f2d2b0c05f",
27 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
28 | "artifact_location": "dbfs:/databricks/mlflow-tracking/9195e233f19e49379b16c5f2d2b0c05f",
29 | "lifecycle_stage": "active",
30 | "tags": {
31 | "mlflow.experiment.sourceId": "1765187885495869",
32 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
33 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK",
34 | "mlflow.ownerEmail": "andre@mycompany.com",
35 | "mlflow.ownerId": "4566812440727830"
36 | },
37 | "creation_time": 1681490229478,
38 | "last_update_time": 1684339053373,
39 | "_creation_time": "2023-04-14 16:37:09",
40 | "_last_update_time": "2023-05-17 15:57:33"
41 | },
42 | "runs": [
43 | "a17f0abf5d46464d899f0ffcebbdb7a8"
44 | ],
45 | "permissions": {
46 | "permission_levels": [
47 | {
48 | "permission_level": "CAN_READ",
49 | "description": "Can view the experiment"
50 | },
51 | {
52 | "permission_level": "CAN_EDIT",
53 | "description": "Can view, log runs, and edit the experiment"
54 | },
55 | {
56 | "permission_level": "CAN_MANAGE",
57 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment"
58 | }
59 | ],
60 | "permissions": {}
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/models/experiments/experiments.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiments.py",
5 | "export_time": 1684724517,
6 | "_export_time": "2023-05-22 03:01:57",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "experiment_names": [
18 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
19 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist"
20 | ],
21 | "duration": 14.2,
22 | "experiments": 2,
23 | "total_runs": 3,
24 | "ok_runs": 2,
25 | "failed_runs": 1
26 | },
27 | "mlflow": {
28 | "experiments": [
29 | {
30 | "id": "1280664374380606",
31 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
32 | "ok_runs": 1,
33 | "failed_runs": 0,
34 | "duration": 7.1
35 | },
36 | {
37 | "id": "9195e233f19e49379b16c5f2d2b0c05f",
38 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist",
39 | "ok_runs": 1,
40 | "failed_runs": 1,
41 | "duration": 6.2
42 | }
43 | ]
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/models/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_models.py",
5 | "export_time": 1684724517,
6 | "_export_time": "2023-05-22 03:01:57",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "model_names": "Sklearn_Wine_ws,Keras_Mnist",
18 | "stages": "Production,Staging,Archived,None",
19 | "export_all_runs": false,
20 | "export_latest_versions": false,
21 | "export_permissions": true,
22 | "export_deleted_runs": false,
23 | "notebook_formats": [
24 | "SOURCE",
25 | "DBC"
26 | ],
27 | "use_threads": false,
28 | "output_dir": "out",
29 | "models": {
30 | "model_names": [
31 | "Sklearn_Wine_ws",
32 | "Keras_Mnist"
33 | ],
34 | "stages": "Production,Staging,Archived,None",
35 | "export_latest_versions": false,
36 | "notebook_formats": [
37 | "SOURCE",
38 | "DBC"
39 | ],
40 | "use_threads": false,
41 | "output_dir": "out/models",
42 | "num_total_models": 2,
43 | "num_ok_models": 2,
44 | "num_failed_models": 0,
45 | "duration": 20.0,
46 | "failed_models": []
47 | },
48 | "experiments": {
49 | "experiment_names": [
50 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws",
51 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist"
52 | ],
53 | "duration": 14.2,
54 | "experiments": 2,
55 | "total_runs": 3,
56 | "ok_runs": 2,
57 | "failed_runs": 1
58 | }
59 | },
60 | "mlflow": {}
61 | }
62 |
--------------------------------------------------------------------------------
/samples/databricks/bulk/models/models/models.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_models.py",
5 | "export_time": 1684724517,
6 | "_export_time": "2023-05-22 03:01:57",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "model_names": [
18 | "Sklearn_Wine_ws",
19 | "Keras_Mnist"
20 | ],
21 | "stages": "Production,Staging,Archived,None",
22 | "export_latest_versions": false,
23 | "notebook_formats": [
24 | "SOURCE",
25 | "DBC"
26 | ],
27 | "use_threads": false,
28 | "output_dir": "out/models",
29 | "num_total_models": 2,
30 | "num_ok_models": 2,
31 | "num_failed_models": 0,
32 | "duration": 20.0,
33 | "failed_models": []
34 | },
35 | "mlflow": {
36 | "models": [
37 | "Sklearn_Wine_ws",
38 | "Keras_Mnist"
39 | ]
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/notebook_experiments/repo_notebook/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684340109,
6 | "_export_time": "2023-05-17 16:15:09",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 1,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "e090757fcb8f49cb9822f65f2fe7ed91",
25 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine",
26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/e090757fcb8f49cb9822f65f2fe7ed91",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "mlflow.experiment.sourceId": "1765187885495747",
30 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine",
31 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK",
32 | "mlflow.ownerEmail": "andre@mycompany.com",
33 | "mlflow.ownerId": "4566812440727830"
34 | },
35 | "creation_time": 1681489696888,
36 | "last_update_time": 1684340017546,
37 | "_creation_time": "2023-04-14 16:28:17",
38 | "_last_update_time": "2023-05-17 16:13:38"
39 | },
40 | "runs": [
41 | "02aeef6d8cbf449ab50c8e715e320085"
42 | ],
43 | "permissions": {
44 | "permission_levels": [
45 | {
46 | "permission_level": "CAN_READ",
47 | "description": "Can view the experiment"
48 | },
49 | {
50 | "permission_level": "CAN_EDIT",
51 | "description": "Can view, log runs, and edit the experiment"
52 | },
53 | {
54 | "permission_level": "CAN_MANAGE",
55 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment"
56 | }
57 | ],
58 | "permissions": {}
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/notebook_experiments/workspace_notebook/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684718874,
6 | "_export_time": "2023-05-22 01:27:54",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 1,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "1280664374380381",
25 | "name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374380381",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
30 | "mlflow.experimentType": "NOTEBOOK",
31 | "mlflow.note.content": "WS notebook - NB experiment\n/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
32 | "mlflow.ownerEmail": "andre@mycompany.com",
33 | "mlflow.ownerId": "4566812440727830"
34 | },
35 | "creation_time": 1681628993361,
36 | "last_update_time": 1681628993361,
37 | "_creation_time": "2023-04-16 07:09:53",
38 | "_last_update_time": "2023-04-16 07:09:53"
39 | },
40 | "runs": [
41 | "f7816bc76f254f22ab25549a7c2c9b06"
42 | ],
43 | "permissions": {
44 | "permission_levels": [
45 | {
46 | "permission_level": "CAN_READ",
47 | "description": "Can view the experiment"
48 | },
49 | {
50 | "permission_level": "CAN_EDIT",
51 | "description": "Can view, log runs, and edit the experiment"
52 | },
53 | {
54 | "permission_level": "CAN_MANAGE",
55 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment"
56 | }
57 | ],
58 | "permissions": {}
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/MLmodel:
--------------------------------------------------------------------------------
1 | artifact_path: model
2 | flavors:
3 | python_function:
4 | env:
5 | conda: conda.yaml
6 | virtualenv: python_env.yaml
7 | loader_module: mlflow.sklearn
8 | model_path: model.pkl
9 | predict_fn: predict
10 | python_version: 3.10.6
11 | sklearn:
12 | code: null
13 | pickled_model: model.pkl
14 | serialization_format: cloudpickle
15 | sklearn_version: 1.1.1
16 | mlflow_version: 2.3.1
17 | model_uuid: b13b14bd62734b31baa2e5664ad86417
18 | run_id: 5e1e2c44039a40afafc760b837a4daab
19 | saved_input_example_info:
20 | artifact_path: input_example.json
21 | pandas_orient: split
22 | type: dataframe
23 | signature:
24 | inputs: '[{"name": "fixed acidity", "type": "double"}, {"name": "volatile acidity",
25 | "type": "double"}, {"name": "citric acid", "type": "double"}, {"name": "residual
26 | sugar", "type": "double"}, {"name": "chlorides", "type": "double"}, {"name": "free
27 | sulfur dioxide", "type": "double"}, {"name": "total sulfur dioxide", "type": "double"},
28 | {"name": "density", "type": "double"}, {"name": "pH", "type": "double"}, {"name":
29 | "sulphates", "type": "double"}, {"name": "alcohol", "type": "double"}]'
30 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
31 | utc_time_created: '2023-05-21 19:16:51.054335'
32 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/conda.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python=3.10.6
5 | - pip<=22.2.2
6 | - pip:
7 | - mlflow<3,>=2.3
8 | - category-encoders==2.6.0
9 | - cffi==1.15.1
10 | - cloudpickle==2.0.0
11 | - databricks-automl-runtime==0.2.16
12 | - defusedxml==0.7.1
13 | - holidays==0.22
14 | - lightgbm==3.3.5
15 | - matplotlib==3.5.2
16 | - psutil==5.9.0
17 | - scikit-learn==1.1.1
18 | - typing-extensions==4.3.0
19 | name: mlflow-env
20 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/input_example.json:
--------------------------------------------------------------------------------
1 | {"columns": ["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol"], "data": [[3.8, 0.31, 0.02, 11.1, 0.036, 20.0, 114.0, 0.99248, 3.75, 0.44, 12.4], [4.4, 0.32, 0.39, 4.3, 0.03, 31.0, 127.0, 0.98904, 3.46, 0.36, 12.8], [4.5, 0.19, 0.21, 0.95, 0.033, 89.0, 159.0, 0.99332, 3.34, 0.42, 8.0], [4.6, 0.445, 0.0, 1.4, 0.053, 11.0, 178.0, 0.99426, 3.79, 0.55, 10.2], [4.7, 0.67, 0.09, 1.0, 0.02, 5.0, 9.0, 0.98722, 3.3, 0.34, 13.6]]}
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/python_env.yaml:
--------------------------------------------------------------------------------
1 | python: 3.10.6
2 | build_dependencies:
3 | - pip==22.2.2
4 | - setuptools==63.4.1
5 | - wheel==0.37.1
6 | dependencies:
7 | - -r requirements.txt
8 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow<3,>=2.3
2 | category-encoders==2.6.0
3 | cffi==1.15.1
4 | cloudpickle==2.0.0
5 | databricks-automl-runtime==0.2.16
6 | defusedxml==0.7.1
7 | holidays==0.22
8 | lightgbm==3.3.5
9 | matplotlib==3.5.2
10 | psutil==5.9.0
11 | scikit-learn==1.1.1
12 | typing-extensions==4.3.0
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/job_repo_notebook/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1681541244,
6 | "_export_time": "2023-04-15 06:47:24",
7 | "mlflow_version": "2.2.1",
8 | "mlflow_tracking_uri": "databricks",
9 | "platform": {
10 | "python_version": "3.10.6",
11 | "system": "Linux",
12 | "processor": "x86_64"
13 | },
14 | "user": "root",
15 | "databricks": {
16 | "DATABRICKS_RUNTIME_VERSION": "13.0"
17 | }
18 | },
19 | "info": {
20 | "num_total_runs": 1,
21 | "num_ok_runs": 1,
22 | "num_failed_runs": 0,
23 | "failed_runs": []
24 | },
25 | "mlflow": {
26 | "experiment": {
27 | "experiment_id": "1280664374378362",
28 | "name": "/Users/andre@mycompany.com/experiments/Sklearn_Wine_job",
29 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374378362",
30 | "lifecycle_stage": "active",
31 | "tags": {
32 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/experiments/Sklearn_Wine_job",
33 | "mlflow.experimentType": "MLFLOW_EXPERIMENT",
34 | "mlflow.ownerEmail": "andre@mycompany.com",
35 | "mlflow.ownerId": "4566812440727830",
36 | "timestamp": "2023-04-15 06:21:44",
37 | "version_mlflow": "2.2.1"
38 | },
39 | "creation_time": 1681539704973,
40 | "last_update_time": 1681539706472,
41 | "_creation_time": "2023-04-15 06:21:45",
42 | "_last_update_time": "2023-04-15 06:21:46"
43 | },
44 | "runs": [
45 | "e559b53f67884160b37e27f4475a26e3"
46 | ]
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/samples/databricks/single/experiments/workspace_experiments/workspace_notebook/253000ee70914831850defc593ba4740/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_run.py",
5 | "export_time": 1684303741,
6 | "_export_time": "2023-05-17 06:09:01",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "databricks://e2_demo",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "mlflow": {
17 | "info": {
18 | "run_uuid": "253000ee70914831850defc593ba4740",
19 | "run_id": "253000ee70914831850defc593ba4740",
20 | "experiment_id": "1280664374380606",
21 | "user_id": "",
22 | "status": "FINISHED",
23 | "start_time": 1683945877431,
24 | "end_time": 1683945880724,
25 | "lifecycle_stage": "active",
26 | "artifact_uri": "dbfs:/databricks/mlflow-tracking/1280664374380606/253000ee70914831850defc593ba4740/artifacts",
27 | "run_name": "851de1f466304650a77c949f5d386d9f",
28 | "_start_time": "2023-05-13 02:44:37",
29 | "_end_time": "2023-05-13 02:44:41"
30 | },
31 | "params": {
32 | "max_depth": "1",
33 | "max_leaf_nodes": "None"
34 | },
35 | "metrics": {
36 | "r2": [
37 | {
38 | "value": 0.1553172302194683,
39 | "timestamp": 1681630579458,
40 | "step": 0
41 | }
42 | ],
43 | "rmse": [
44 | {
45 | "value": 0.7986004372118107,
46 | "timestamp": 1681630579236,
47 | "step": 0
48 | }
49 | ]
50 | },
51 | "tags": {
52 | "mlflow.databricks.cluster.id": "0414-154233-qm0df4rx",
53 | "mlflow.databricks.cluster.info": "{\"cluster_name\":\"Andre_ML_13.0\",\"spark_version\":\"13.0.x-cpu-ml-scala2.12\",\"node_type_id\":\"i3.xlarge\",\"driver_node_type_id\":\"i3.xlarge\",\"autotermination_minutes\":120,\"disk_spec\":{\"disk_count\":0},\"num_workers\":1}",
54 | "mlflow.databricks.cluster.libraries": "{\"installable\":[],\"redacted\":[]}",
55 | "mlflow.databricks.notebook.commandID": "3527702579137640954_8374924253965797983_041de288996c42ef97161546f39184f0",
56 | "mlflow.databricks.notebookID": "1280664374380381",
57 | "mlflow.databricks.notebookPath": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
58 | "mlflow.databricks.notebookRevisionID": "1683945880975",
59 | "mlflow.databricks.webappURL": "https://mycompany.cloud.com",
60 | "mlflow.databricks.workspaceID": "2556758628403379",
61 | "mlflow.databricks.workspaceURL": "mycompany.cloud.com",
62 | "mlflow.runName": "851de1f466304650a77c949f5d386d9f",
63 | "mlflow.source.name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine",
64 | "mlflow.source.type": "NOTEBOOK",
65 | "mlflow.user": "andre@mycompany.com",
66 | "save_signature": "False",
67 | "timestamp": "2023-04-16 07:36:09",
68 | "version.DATABRICKS_RUNTIME_VERSION": "13.0",
69 | "version.mlflow": "2.2.1",
70 | "version.python": "3.10.6",
71 | "version.sklearn": "1.1.1"
72 | }
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_model_version.py",
5 | "export_file_version": "2",
6 | "export_time": 1721364374,
7 | "_export_time": "2024-07-19 04:46:14",
8 | "mlflow_version": "2.14.3",
9 | "mlflow_tracking_uri": "databricks://e2_demo_fieldeng",
10 | "platform": {
11 | "python_version": "3.8.16",
12 | "system": "Darwin",
13 | "processor": "i386"
14 | },
15 | "user": "andre.mesarovic"
16 | },
17 | "mlflow": {
18 | "experiment": {
19 | "experiment_id": "0828080c9c7b43a7b7624307809cfcda",
20 | "name": "/Repos/andre.mesarovic@databricks.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine_UC",
21 | "artifact_location": "dbfs:/databricks/mlflow-tracking/0828080c9c7b43a7b7624307809cfcda",
22 | "lifecycle_stage": "active",
23 | "last_update_time": 1717552430207,
24 | "creation_time": 1716309738995,
25 | "tags": [
26 | {
27 | "key": "mlflow.experiment.sourceType",
28 | "value": "REPO_NOTEBOOK"
29 | },
30 | {
31 | "key": "mlflow.ownerId",
32 | "value": "4566812440727830"
33 | },
34 | {
35 | "key": "mlflow.sharedViewState.d6a47c70ec552dd064068bd2040c8c53e68e466c44a75fa3353a7747a8c6489c",
36 | "value": "{\"searchFilter\":\"\",\"orderByKey\":\"attributes.start_time\",\"orderByAsc\":false,\"startTime\":\"ALL\",\"lifecycleFilter\":\"Active\",\"datasetsFilter\":[],\"modelVersionFilter\":\"All Runs\",\"selectedColumns\":[\"attributes.`Source`\",\"attributes.`Models`\",\"attributes.`Dataset`\"],\"runsExpanded\":{},\"runsPinned\":[],\"runsHidden\":[],\"runsHiddenMode\":\"FIRST_10_RUNS\",\"viewMaximized\":false,\"runListHidden\":false,\"isAccordionReordered\":false,\"useGroupedValuesInCharts\":true,\"groupBy\":null,\"groupsExpanded\":{},\"autoRefreshEnabled\":true}"
37 | },
38 | {
39 | "key": "mlflow.experiment.sourceName",
40 | "value": "/Repos/andre.mesarovic@databricks.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine_UC"
41 | },
42 | {
43 | "key": "mlflow.ownerId",
44 | "value": "4566812440727830"
45 | },
46 | {
47 | "key": "mlflow.ownerEmail",
48 | "value": "andre.mesarovic@databricks.com"
49 | },
50 | {
51 | "key": "mlflow.experiment.sourceId",
52 | "value": "2824690123548175"
53 | }
54 | ],
55 | "_last_update_time": "2024-06-05 01:53:50",
56 | "_creation_time": "2024-05-21 16:42:19"
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/MLmodel:
--------------------------------------------------------------------------------
1 | artifact_path: model
2 | databricks_runtime: '15.1'
3 | flavors:
4 | python_function:
5 | env:
6 | conda: conda.yaml
7 | virtualenv: python_env.yaml
8 | loader_module: mlflow.sklearn
9 | model_path: model.pkl
10 | predict_fn: predict
11 | python_version: 3.11.0
12 | sklearn:
13 | code: null
14 | pickled_model: model.pkl
15 | serialization_format: cloudpickle
16 | sklearn_version: 1.3.0
17 | mlflow_version: 2.13.1
18 | model_size_bytes: 102527
19 | model_uuid: 7c56d8b80973448b8c4e7b5a3b9fc7b6
20 | run_id: 6222162b4c7f47c2820a7e5b520f65a9
21 | saved_input_example_info:
22 | artifact_path: input_example.json
23 | pandas_orient: split
24 | type: dataframe
25 | signature:
26 | inputs: '[{"type": "double", "name": "fixed_acidity", "required": true}, {"type":
27 | "double", "name": "volatile_acidity", "required": true}, {"type": "double", "name":
28 | "citric_acid", "required": true}, {"type": "double", "name": "residual_sugar",
29 | "required": true}, {"type": "double", "name": "chlorides", "required": true},
30 | {"type": "double", "name": "free_sulfur_dioxide", "required": true}, {"type":
31 | "double", "name": "total_sulfur_dioxide", "required": true}, {"type": "double",
32 | "name": "density", "required": true}, {"type": "double", "name": "pH", "required":
33 | true}, {"type": "double", "name": "sulphates", "required": true}, {"type": "double",
34 | "name": "alcohol", "required": true}]'
35 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
36 | params: null
37 | utc_time_created: '2024-06-05 01:53:53.910623'
38 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/conda.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python=3.11.0
5 | - pip<=23.0.1
6 | - pip:
7 | - mlflow==2.13.1
8 | - cloudpickle==2.2.1
9 | - lz4==4.3.2
10 | - psutil==5.9.0
11 | - scikit-learn==1.3.0
12 | name: mlflow-env
13 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/MLmodel:
--------------------------------------------------------------------------------
1 | artifact_path: model
2 | databricks_runtime: '15.1'
3 | flavors:
4 | python_function:
5 | env:
6 | conda: conda.yaml
7 | virtualenv: python_env.yaml
8 | loader_module: mlflow.sklearn
9 | model_path: model.pkl
10 | predict_fn: predict
11 | python_version: 3.11.0
12 | sklearn:
13 | code: null
14 | pickled_model: model.pkl
15 | serialization_format: cloudpickle
16 | sklearn_version: 1.3.0
17 | mlflow_version: 2.13.1
18 | model_size_bytes: 102527
19 | model_uuid: 7c56d8b80973448b8c4e7b5a3b9fc7b6
20 | run_id: 6222162b4c7f47c2820a7e5b520f65a9
21 | saved_input_example_info:
22 | artifact_path: input_example.json
23 | pandas_orient: split
24 | type: dataframe
25 | signature:
26 | inputs: '[{"type": "double", "name": "fixed_acidity", "required": true}, {"type":
27 | "double", "name": "volatile_acidity", "required": true}, {"type": "double", "name":
28 | "citric_acid", "required": true}, {"type": "double", "name": "residual_sugar",
29 | "required": true}, {"type": "double", "name": "chlorides", "required": true},
30 | {"type": "double", "name": "free_sulfur_dioxide", "required": true}, {"type":
31 | "double", "name": "total_sulfur_dioxide", "required": true}, {"type": "double",
32 | "name": "density", "required": true}, {"type": "double", "name": "pH", "required":
33 | true}, {"type": "double", "name": "sulphates", "required": true}, {"type": "double",
34 | "name": "alcohol", "required": true}]'
35 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
36 | params: null
37 | utc_time_created: '2024-06-05 01:53:53.910623'
38 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/conda.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 | - conda-forge
3 | dependencies:
4 | - python=3.11.0
5 | - pip<=23.0.1
6 | - pip:
7 | - mlflow==2.13.1
8 | - cloudpickle==2.2.1
9 | - lz4==4.3.2
10 | - psutil==5.9.0
11 | - scikit-learn==1.3.0
12 | name: mlflow-env
13 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/python_env.yaml:
--------------------------------------------------------------------------------
1 | python: 3.11.0
2 | build_dependencies:
3 | - pip==23.0.1
4 | - setuptools==68.0.0
5 | - wheel==0.38.4
6 | dependencies:
7 | - -r requirements.txt
8 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow==2.13.1
2 | cloudpickle==2.2.1
3 | lz4==4.3.2
4 | psutil==5.9.0
5 | scikit-learn==1.3.0
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/model.pkl
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/python_env.yaml:
--------------------------------------------------------------------------------
1 | python: 3.11.0
2 | build_dependencies:
3 | - pip==23.0.1
4 | - setuptools==68.0.0
5 | - wheel==0.38.4
6 | dependencies:
7 | - -r requirements.txt
8 |
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/requirements.txt:
--------------------------------------------------------------------------------
1 | mlflow==2.13.1
2 | cloudpickle==2.2.1
3 | lz4==4.3.2
4 | psutil==5.9.0
5 | scikit-learn==1.3.0
--------------------------------------------------------------------------------
/samples/databricks/single/versions/sklearn_wine/version.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_model_version.py",
5 | "export_file_version": "2",
6 | "export_time": 1721364374,
7 | "_export_time": "2024-07-19 04:46:14",
8 | "mlflow_version": "2.14.3",
9 | "mlflow_tracking_uri": "databricks://e2_demo_fieldeng",
10 | "platform": {
11 | "python_version": "3.8.16",
12 | "system": "Darwin",
13 | "processor": "i386"
14 | },
15 | "user": "andre.mesarovic"
16 | },
17 | "mlflow": {
18 | "model_version": {
19 | "name": "andre_catalog.ml_models2.sklearn_wine_best",
20 | "version": "15",
21 | "creation_timestamp": 1717552439516,
22 | "last_updated_timestamp": 1717552440318,
23 | "description": "white_2",
24 | "user_id": "andre.mesarovic@databricks.com",
25 | "current_stage": null,
26 | "source": "dbfs:/databricks/mlflow-tracking/0828080c9c7b43a7b7624307809cfcda/6222162b4c7f47c2820a7e5b520f65a9/artifacts/model",
27 | "run_id": "6222162b4c7f47c2820a7e5b520f65a9",
28 | "run_link": null,
29 | "status": "READY",
30 | "status_message": "",
31 | "tags": {
32 | "alias": "white_2"
33 | },
34 | "aliases": [
35 | "white_2"
36 | ],
37 | "_creation_timestamp": "2024-06-05 01:54:00",
38 | "_last_updated_timestamp": "2024-06-05 01:54:00"
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/experiments/1/d057cae15f27465988e72c6212e1f226/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_run.py",
5 | "export_time": 1671260983,
6 | "_export_time": "2022-12-17 07:09:43",
7 | "mlflow_version": "2.0.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "mlflow": {
16 | "info": {
17 | "run_uuid": "a83cebbccbca41299360c695c5ea72f3",
18 | "run_id": "a83cebbccbca41299360c695c5ea72f3",
19 | "experiment_id": "1",
20 | "user_id": "andre",
21 | "status": "FINISHED",
22 | "start_time": 1671070664322,
23 | "end_time": 1671070667923,
24 | "lifecycle_stage": "active",
25 | "artifact_uri": "/opt/mlflow/server/mlruns/1/a83cebbccbca41299360c695c5ea72f3/artifacts",
26 | "run_name": "train.sh 2.0.1 2022-12-15 02:17:43"
27 | },
28 | "params": {
29 | "max_depth": "None",
30 | "max_leaf_nodes": "32"
31 | },
32 | "metrics": {
33 | "rmse": [
34 | {
35 | "value": 0.7256044469217515,
36 | "timestamp": 1671070665139,
37 | "step": 0
38 | }
39 | ],
40 | "r2": [
41 | {
42 | "value": 0.30267631032833586,
43 | "timestamp": 1671070665152,
44 | "step": 0
45 | }
46 | ],
47 | "mae": [
48 | {
49 | "value": 0.5688309814398113,
50 | "timestamp": 1671070665163,
51 | "step": 0
52 | }
53 | ]
54 | },
55 | "tags": {
56 | "data_path": "../../data/train/wine-quality-white.csv",
57 | "mlflow.log-model.history": "[{\"run_id\": \"a83cebbccbca41299360c695c5ea72f3\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2022-12-15 02:17:45.173770\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.14\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"b0022500ab944161b3f97b9746509418\", \"mlflow_version\": \"2.0.1\"}]",
58 | "mlflow.runName": "train.sh 2.0.1 2022-12-15 02:17:43",
59 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009",
60 | "mlflow.source.name": "/Users/andre/git/andre/mlflow-examples/python/sklearn/wine_quality/train.py",
61 | "mlflow.source.type": "LOCAL",
62 | "mlflow.user": "andre",
63 | "registered_model_name": "sklearn_wine",
64 | "registered_model_version_stage": "Production",
65 | "run_origin": "train.sh",
66 | "save_signature": "False",
67 | "uuid": "CfHSHRv2yXaDioNR46FRoL",
68 | "version.mlflow": "2.0.1",
69 | "version.platform": "macOS-10.16-x86_64-i386-64bit",
70 | "version.python": "3.8.14",
71 | "version.sklearn": "1.1.1"
72 | }
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/experiments/1/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiment.py",
5 | "export_time": 1671260983,
6 | "_export_time": "2022-12-17 07:09:43",
7 | "mlflow_version": "2.0.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_total_runs": 1,
17 | "num_ok_runs": 1,
18 | "num_failed_runs": 0,
19 | "failed_runs": []
20 | },
21 | "mlflow": {
22 | "experiment": {
23 | "experiment_id": "1",
24 | "name": "sklearn_wine",
25 | "artifact_location": "/opt/mlflow/server/mlruns/1",
26 | "lifecycle_stage": "active",
27 | "tags": {
28 | "experiment_created": "2022-12-15 02:17:43",
29 | "version_mlflow": "2.0.1"
30 | },
31 | "creation_time": 1671070664091,
32 | "last_update_time": 1671070664091
33 | },
34 | "runs": [
35 | "d057cae15f27465988e72c6212e1f226"
36 | ]
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/experiments/2/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiment.py",
5 | "export_time": 1671260983,
6 | "_export_time": "2022-12-17 07:09:45",
7 | "mlflow_version": "2.0.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_total_runs": 1,
17 | "num_ok_runs": 1,
18 | "num_failed_runs": 0,
19 | "failed_runs": []
20 | },
21 | "mlflow": {
22 | "experiment": {
23 | "experiment_id": "1",
24 | "name": "sklearn_wine",
25 | "artifact_location": "/opt/mlflow/server/mlruns/1",
26 | "lifecycle_stage": "active",
27 | "tags": {
28 | "experiment_created": "2022-12-15 02:17:4555
29 | "version_mlflow": "2.0.1"
30 | },
31 | "creation_time": 1671070664091,
32 | "last_update_time": 1671070664091
33 | },
34 | "runs": [
35 | "5397ae67ee0c49139bf64834b4d27fab",
36 | "8a6af43e756f433da7a90fd6b4e49c3a"
37 | ]
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/experiments/experiments.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiments.py",
5 | "export_time": 1671260983,
6 | "_export_time": "2022-12-17 07:09:43",
7 | "mlflow_version": "2.0.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "duration": 0.2,
17 | "experiments": 3,
18 | "total_runs": 2,
19 | "ok_runs": 2,
20 | "failed_runs": 0
21 | },
22 | "mlflow": {
23 | "experiments": [
24 | {
25 | "id": "2",
26 | "name": "sklearn_iris",
27 | "ok_runs": 1,
28 | "failed_runs": 0,
29 | "duration": 0.1
30 | },
31 | {
32 | "id": "1",
33 | "name": "sklearn_wine",
34 | "ok_runs": 1,
35 | "failed_runs": 0,
36 | "duration": 0.1
37 | }
38 | ]
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/1/d057cae15f27465988e72c6212e1f226/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_run.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "mlflow": {
16 | "info": {
17 | "run_uuid": "d057cae15f27465988e72c6212e1f226",
18 | "run_id": "d057cae15f27465988e72c6212e1f226",
19 | "experiment_id": "1",
20 | "user_id": "andre",
21 | "status": "FINISHED",
22 | "start_time": 1672601484918,
23 | "end_time": 1672601487986,
24 | "lifecycle_stage": "active",
25 | "artifact_uri": "/opt/mlflow/server/mlruns/1/d057cae15f27465988e72c6212e1f226/artifacts",
26 | "run_name": "2023-01-01 19:31:23 train.sh 2.1.1"
27 | },
28 | "params": {
29 | "max_depth": "4",
30 | "max_leaf_nodes": "32"
31 | },
32 | "metrics": {
33 | "rmse": [
34 | {
35 | "value": 0.7367947360663162,
36 | "timestamp": 1672601485655,
37 | "step": 0
38 | }
39 | ],
40 | "r2": [
41 | {
42 | "value": 0.28100217442439346,
43 | "timestamp": 1672601485668,
44 | "step": 0
45 | }
46 | ],
47 | "mae": [
48 | {
49 | "value": 0.5877424565761121,
50 | "timestamp": 1672601485675,
51 | "step": 0
52 | }
53 | ]
54 | },
55 | "tags": {
56 | "data_path": "https://raw.githubusercontent.com/amesar/mlflow-examples/master/data/train/wine-quality-white.csv",
57 | "dataset": "wine-quality",
58 | "mlflow.log-model.history": "[{\"run_id\": \"d057cae15f27465988e72c6212e1f226\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:31:25.684661\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"46d75d52dc604c5bb49a0c4fae32c1b3\", \"mlflow_version\": \"2.1.1\"}]",
59 | "mlflow.runName": "2023-01-01 19:31:23 train.sh 2.1.1",
60 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009",
61 | "mlflow.source.name": "/Users/andre/git/andre/mlflow-examples/python/sklearn/wine_quality/train.py",
62 | "mlflow.source.type": "LOCAL",
63 | "mlflow.user": "andre",
64 | "output_path": "None",
65 | "registered_model_name": "sklearn_wine",
66 | "registered_model_version_stage": "Production",
67 | "run_origin": "train.sh",
68 | "save_signature": "False",
69 | "uuid": "feX2hCEDYwCJRgypBKDpJZ",
70 | "version.mlflow": "2.1.1",
71 | "version.platform": "macOS-10.16-x86_64-i386-64bit",
72 | "version.python": "3.8.15",
73 | "version.sklearn": "1.1.1"
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/1/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiment.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_total_runs": 1,
17 | "num_ok_runs": 1,
18 | "num_failed_runs": 0,
19 | "failed_runs": []
20 | },
21 | "mlflow": {
22 | "experiment": {
23 | "experiment_id": "1",
24 | "name": "sklearn_wine",
25 | "artifact_location": "/opt/mlflow/server/mlruns/1",
26 | "lifecycle_stage": "active",
27 | "tags": {
28 | "experiment_created": "2023-01-01 19:31:23",
29 | "version_mlflow": "2.1.1"
30 | },
31 | "creation_time": 1672601484703,
32 | "last_update_time": 1672601484703
33 | },
34 | "runs": [
35 | "d057cae15f27465988e72c6212e1f226"
36 | ]
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/2/5397ae67ee0c49139bf64834b4d27fab/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_run.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "mlflow": {
16 | "info": {
17 | "run_uuid": "5397ae67ee0c49139bf64834b4d27fab",
18 | "run_id": "5397ae67ee0c49139bf64834b4d27fab",
19 | "experiment_id": "2",
20 | "user_id": "andre",
21 | "status": "FINISHED",
22 | "start_time": 1672601579550,
23 | "end_time": 1672601583143,
24 | "lifecycle_stage": "active",
25 | "artifact_uri": "/opt/mlflow/server/mlruns/2/5397ae67ee0c49139bf64834b4d27fab/artifacts",
26 | "run_name": "sklearn_iris"
27 | },
28 | "params": {
29 | "max_depth": "5"
30 | },
31 | "metrics": {
32 | "accuracy_score": [
33 | {
34 | "value": 0.9555555555555556,
35 | "timestamp": 1672601579593,
36 | "step": 0
37 | }
38 | ],
39 | "zero_one_loss": [
40 | {
41 | "value": 0.0444444444444444,
42 | "timestamp": 1672601579602,
43 | "step": 0
44 | }
45 | ]
46 | },
47 | "tags": {
48 | "dataset": "sklearn-iris",
49 | "int": "123",
50 | "mlflow.log-model.history": "[{\"run_id\": \"5397ae67ee0c49139bf64834b4d27fab\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:32:59.612140\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"3bd3c618169546189858df68e9610e0a\", \"mlflow_version\": \"2.1.1\"}]",
51 | "mlflow.runName": "sklearn_iris",
52 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009",
53 | "mlflow.source.name": "train.py",
54 | "mlflow.source.type": "LOCAL",
55 | "mlflow.user": "andre",
56 | "mlflow_version": "2.1.1"
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/2/8a6af43e756f433da7a90fd6b4e49c3a/run.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_run.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "mlflow": {
16 | "info": {
17 | "run_uuid": "5397ae67ee0c49139bf64834b4d27fab",
18 | "run_id": "5397ae67ee0c49139bf64834b4d27fab",
19 | "experiment_id": "2",
20 | "user_id": "andre",
21 | "status": "FINISHED",
22 | "start_time": 1672601579550,
23 | "end_time": 1672601583143,
24 | "lifecycle_stage": "active",
25 | "artifact_uri": "/opt/mlflow/server/mlruns/2/5397ae67ee0c49139bf64834b4d27fab/artifacts",
26 | "run_name": "sklearn_iris"
27 | },
28 | "params": {
29 | "max_depth": "5"
30 | },
31 | "metrics": {
32 | "accuracy_score": [
33 | {
34 | "value": 0.9555555555555556,
35 | "timestamp": 1672601579593,
36 | "step": 0
37 | }
38 | ],
39 | "zero_one_loss": [
40 | {
41 | "value": 0.0444444444444444,
42 | "timestamp": 1672601579602,
43 | "step": 0
44 | }
45 | ]
46 | },
47 | "tags": {
48 | "dataset": "sklearn-iris",
49 | "int": "123",
50 | "mlflow.log-model.history": "[{\"run_id\": \"5397ae67ee0c49139bf64834b4d27fab\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:32:59.612140\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"3bd3c618169546189858df68e9610e0a\", \"mlflow_version\": \"2.1.1\"}]",
51 | "mlflow.runName": "sklearn_iris",
52 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009",
53 | "mlflow.source.name": "train.py",
54 | "mlflow.source.type": "LOCAL",
55 | "mlflow.user": "andre",
56 | "mlflow_version": "2.1.1"
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/2/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiment.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_total_runs": 2,
17 | "num_ok_runs": 2,
18 | "num_failed_runs": 0,
19 | "failed_runs": []
20 | },
21 | "mlflow": {
22 | "experiment": {
23 | "experiment_id": "2",
24 | "name": "sklearn_iris",
25 | "artifact_location": "/opt/mlflow/server/mlruns/2",
26 | "lifecycle_stage": "active",
27 | "tags": {},
28 | "creation_time": 1672601579372,
29 | "last_update_time": 1672601579372
30 | },
31 | "runs": [
32 | "5397ae67ee0c49139bf64834b4d27fab",
33 | "8a6af43e756f433da7a90fd6b4e49c3a"
34 | ]
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/experiments/experiments.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_experiments.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "duration": 0.1,
17 | "experiments": 2,
18 | "total_runs": 2,
19 | "ok_runs": 2,
20 | "failed_runs": 0
21 | },
22 | "mlflow": {
23 | "experiments": [
24 | {
25 | "id": "2",
26 | "name": "sklearn_iris",
27 | "ok_runs": 1,
28 | "failed_runs": 0,
29 | "duration": 0.1
30 | },
31 | {
32 | "id": "1",
33 | "name": "sklearn_wine",
34 | "ok_runs": 1,
35 | "failed_runs": 0,
36 | "duration": 0.1
37 | }
38 | ]
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_models.py",
5 | "export_time": 1676530201,
6 | "_export_time": "2023-02-16 06:50:01",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "model_names": "sklearn*",
18 | "stages": "production,staging",
19 | "export_all_runs": true,
20 | "export_latest_versions": false,
21 | "notebook_formats": "",
22 | "use_threads": false,
23 | "output_dir": "out",
24 | "models": {
25 | "model_names": [
26 | "sklearn_iris",
27 | "sklearn_wine"
28 | ],
29 | "stages": "production,staging",
30 | "export_run": false,
31 | "export_latest_versions": false,
32 | "notebook_formats": "",
33 | "use_threads": false,
34 | "output_dir": "out/models",
35 | "num_total_models": 2,
36 | "num_ok_models": 2,
37 | "num_failed_models": 0,
38 | "duration": 0.1,
39 | "failed_models": []
40 | },
41 | "experiments": {
42 | "duration": 0.3,
43 | "experiments": 2,
44 | "total_runs": 5,
45 | "ok_runs": 5,
46 | "failed_runs": 0
47 | }
48 | },
49 | "mlflow": {}
50 | }
51 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/models/models.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_models.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "stages": "production,staging",
17 | "notebook_formats": "",
18 | "num_total_models": 2,
19 | "num_ok_models": 2,
20 | "num_failed_models": 0,
21 | "duration": 0.1,
22 | "failed_models": []
23 | },
24 | "mlflow": {
25 | "models": [
26 | "sklearn_iris",
27 | "sklearn_wine"
28 | ]
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/models/sklearn_iris/model.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_model.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_target_stages": 2,
17 | "num_target_versions": 0,
18 | "num_src_versions": 1,
19 | "num_dst_versions": 1
20 | },
21 | "mlflow": {
22 | "registered_model": {
23 | "name": "sklearn_iris",
24 | "creation_timestamp": 1672601581580,
25 | "last_updated_timestamp": 1672601581600,
26 | "latest_versions": [
27 | {
28 | "creation_timestamp": 1672601581587,
29 | "current_stage": "Staging",
30 | "description": "",
31 | "last_updated_timestamp": 1672601581600,
32 | "name": "sklearn_iris",
33 | "run_id": "dee04cd7f0054008a3c8fe0260feb814",
34 | "run_link": "",
35 | "source": "/opt/mlflow/server/mlruns/2/dee04cd7f0054008a3c8fe0260feb814/artifacts/artifacts/sklearn-model",
36 | "status": "READY",
37 | "status_message": "",
38 | "tags": {},
39 | "user_id": "",
40 | "version": "1",
41 | "_run_artifact_uri": "/opt/mlflow/server/mlruns/2/dee04cd7f0054008a3c8fe0260feb814/artifacts",
42 | "_experiment_name": "sklearn_iris"
43 | }
44 | ]
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/bulk/models/models/sklearn_wine/model.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.1.2",
4 | "script": "export_model.py",
5 | "export_time": 1672601766,
6 | "_export_time": "2023-01-01 19:36:06",
7 | "mlflow_version": "2.1.1",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "user": "andre",
10 | "platform": {
11 | "python_version": "3.8.15",
12 | "system": "Darwin"
13 | }
14 | },
15 | "info": {
16 | "num_target_stages": 2,
17 | "num_target_versions": 0,
18 | "num_src_versions": 1,
19 | "num_dst_versions": 1
20 | },
21 | "mlflow": {
22 | "registered_model": {
23 | "name": "sklearn_wine",
24 | "creation_timestamp": 1672601487738,
25 | "last_updated_timestamp": 1672601487764,
26 | "description": "Skearn Wine Quality model",
27 | "latest_versions": [
28 | {
29 | "creation_timestamp": 1672601487749,
30 | "current_stage": "Production",
31 | "description": "v1 Production - wine",
32 | "last_updated_timestamp": 1672601487775,
33 | "name": "sklearn_wine",
34 | "run_id": "939157dfaaf24244adafcb72acdfc4a6",
35 | "run_link": "",
36 | "source": "/opt/mlflow/server/mlruns/1/939157dfaaf24244adafcb72acdfc4a6/artifacts/sklearn-model",
37 | "status": "READY",
38 | "status_message": "",
39 | "tags": {
40 | "registered_version_info": "v1 Production - wine"
41 | },
42 | "user_id": "",
43 | "version": "1",
44 | "_run_artifact_uri": "/opt/mlflow/server/mlruns/1/939157dfaaf24244adafcb72acdfc4a6/artifacts",
45 | "_experiment_name": "sklearn_wine"
46 | }
47 | ],
48 | "tags": [
49 | {
50 | "key": "info",
51 | "value": "Skearn Wine Quality model"
52 | }
53 | ]
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/single/experiments/basic/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684688346,
6 | "_export_time": "2023-05-21 16:59:06",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 1,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "1",
25 | "name": "sklearn_wine",
26 | "artifact_location": "/opt/mlflow/server/mlruns/1",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "experiment_created": "2023-05-20 19:04:09",
30 | "version_mlflow": "2.3.2"
31 | },
32 | "creation_time": 1684623705797,
33 | "last_update_time": 1684623705797,
34 | "_creation_time": "2023-05-20 23:01:46",
35 | "_last_update_time": "2023-05-20 23:01:46"
36 | },
37 | "runs": [
38 | "eb66c160957d4a28b11d3f1b968df9cd"
39 | ]
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/samples/oss_mlflow/single/experiments/src_tags/experiment.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "package_version": "1.2.0",
4 | "script": "export_experiment.py",
5 | "export_time": 1684698448,
6 | "_export_time": "2023-05-21 19:47:28",
7 | "mlflow_version": "2.3.2",
8 | "mlflow_tracking_uri": "http://localhost:5005",
9 | "platform": {
10 | "python_version": "3.8.15",
11 | "system": "Darwin",
12 | "processor": "i386"
13 | },
14 | "user": "andre"
15 | },
16 | "info": {
17 | "num_total_runs": 1,
18 | "num_ok_runs": 1,
19 | "num_failed_runs": 0,
20 | "failed_runs": []
21 | },
22 | "mlflow": {
23 | "experiment": {
24 | "experiment_id": "1",
25 | "name": "sklearn_wine",
26 | "artifact_location": "/opt/mlflow/server/mlruns/1",
27 | "lifecycle_stage": "active",
28 | "tags": {
29 | "experiment_created": "2023-05-20 19:04:09",
30 | "mlflow_exim.field._creation_time": "2023-05-20 23:01:46",
31 | "mlflow_exim.field._last_update_time": "2023-05-20 23:01:46",
32 | "mlflow_exim.field.artifact_location": "/opt/mlflow/server/mlruns/1",
33 | "mlflow_exim.field.creation_time": "1684623705797",
34 | "mlflow_exim.field.experiment_id": "1",
35 | "mlflow_exim.field.last_update_time": "1684623705797",
36 | "mlflow_exim.field.lifecycle_stage": "active",
37 | "mlflow_exim.field.name": "sklearn_wine",
38 | "version_mlflow": "2.3.2"
39 | },
40 | "creation_time": 1684698373087,
41 | "last_update_time": 1684698373087,
42 | "_creation_time": "2023-05-21 19:46:13",
43 | "_last_update_time": "2023-05-21 19:46:13"
44 | },
45 | "runs": [
46 | "4b0ce88fd34e45fc8ca08876127299ce"
47 | ]
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Mlflow Export Import - Tests
2 |
3 | ## Overview
4 |
5 | Following sets of tests:
6 | * [Open source MLflow tests](open_source/README.md).
7 | * Launches a source and destination tracking server and then runs tests to ensure that the exported MLflow objects (runs, experiments and registered models) are correctly imported.
8 | * Numerous tests - 100+.
9 | * [Databricks tests](databricks/README.md).
10 | * Remote tests using the Databricks MLflow REST API.
11 | * WIP.
12 | * [Databricks MLflow notebook tests](databricks_notebooks/README.md).
13 | * Simple smoke tests for Databricks notebooks. Launches Databricks jobs to ensure that [Databricks export-import notebooks](../databricks_notebooks/README.md) execute properly.
14 |
15 | ## Setup
16 |
17 | ```
18 | pip install -e ..[tests] --upgrade
19 | ```
20 |
21 | ## Reports and logs
22 |
23 | The test script creates the folowing files:
24 | * run_tests.log - log of the entire test run.
25 | * run_tests_junit.xml - report for all tests in standard JUnit XML format.
26 | * run_tests_report.html - report for all tests in HTML format.
27 |
28 | **Sample reports**
29 |
30 | Open Source Tests:
31 | * [run_tests_junit.xml](open_source/samples/run_tests_junit.xml)
32 | * [run_tests_report.html](open_source/samples/run_tests_report.html)
33 |
34 | Databricks Tests:
35 | * [run_tests_junit.xml](databricks/samples/run_tests_junit.xml)
36 | * [run_tests_report.html](databricks/samples/run_tests_report.html)
37 |
38 | Failed Databricks Tests:
39 | * [run_tests_junit.xml](databricks/samples/failed/run_tests_junit.xml)
40 | * [run_tests_report.html](databricks/samples/failed/run_tests_report.html)
41 |
42 |
43 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import mlflow
2 |
3 | print(f"mlflow.version: {mlflow.__version__}")
4 |
--------------------------------------------------------------------------------
/tests/core.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 | from dataclasses import dataclass
3 |
4 |
5 | @dataclass()
6 | class MlflowContext:
7 | """
8 | For tests.open_source tests. Original tests.
9 | """
10 | client_src: Any
11 | client_dst: Any
12 | output_dir: str
13 | output_run_dir: str
14 |
15 |
16 | @dataclass()
17 | class TestContext:
18 | """
19 | For tests.databricks tests. Newer tests.
20 | """
21 | mlflow_client_src: Any
22 | mlflow_client_dst: Any
23 | dbx_client_src: Any
24 | dbx_client_dst: Any
25 | output_dir: str
26 | output_run_dir: str
27 |
28 |
29 | def to_MlflowContext(test_context):
30 | """
31 | Convert TestContext to MlflowContext in order to reuse plentiful existing test comparisons.
32 | """
33 | return MlflowContext(
34 | test_context.mlflow_client_src,
35 | test_context.mlflow_client_dst,
36 | test_context.output_dir,
37 | test_context.output_run_dir
38 | )
39 |
--------------------------------------------------------------------------------
/tests/databricks/README.md:
--------------------------------------------------------------------------------
1 | # Mlflow Export Import - Databricks Tests
2 |
3 | ## Overview
4 |
5 | Remote tests using the Databricks MLflow REST API.
6 |
7 | ## Setup
8 |
9 | For Unity Catalog tests do the following:
10 | ```
11 | pip install -U mlflow[databricks]
12 | ```
13 |
14 | Configuration is straightforward.
15 | Copy [config.yaml.template](config.yaml.template) to `config.yaml` and adjust.
16 |
17 | For both source and destination workspaces, set the following attributes:
18 |
19 | * profile - Databricks profile
20 | * base_dir - Workspace base directory where all test experiments will be created.
21 |
22 |
23 | ```
24 | workspace_src:
25 | profile: databricks://ws_src_profile
26 | base_dir: /tmp/test-mlflow-expot-import
27 |
28 | workspace_dst:
29 | profile: databricks://ws_dst_profile
30 | base_dir: /tmp/test-mlflow-expot-import
31 | ```
32 |
33 | The `base_dir` folder will be deleted before each test session.
34 |
35 | ## Run tests
36 |
37 | ```
38 | python -u -m pytest -s test_*.py
39 | ```
40 |
41 | The script [run_tests.sh](run_tests.sh) is provided as a convenience.
42 |
43 | ## Debug
44 |
45 | If the environment variable `MLFLOW_EXPORT_IMPORT_OUTPUT_DIR` is set,
46 | it will be used as the test directory instead of `tempfile.TemporaryDirectory()`.
47 |
--------------------------------------------------------------------------------
/tests/databricks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/tests/databricks/__init__.py
--------------------------------------------------------------------------------
/tests/databricks/_test_registered_model.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.model.export_model import export_model
2 | from mlflow_export_import.model.import_model import import_model
3 | from mlflow_export_import.common import dump_utils
4 | from mlflow_export_import.common import model_utils
5 |
6 | from tests.core import to_MlflowContext
7 | from tests.compare_utils import compare_models_with_versions
8 | from tests.databricks.init_tests import workspace_src, workspace_dst
9 | from tests.databricks.init_tests import test_context
10 | from tests.databricks import local_utils
11 |
12 | num_versions = 3
13 |
14 | def _init(test_context, is_uc):
15 | src_model_name = local_utils.mk_model_name(workspace_src, is_uc)
16 |
17 | src_vrs = [ local_utils.create_version(test_context.mlflow_client_src, src_model_name) for _ in range(num_versions) ]
18 | src_model = src_vrs[0][1]
19 | src_vrs = [ vr[0] for vr in src_vrs ]
20 | dump_utils.dump_obj(src_model, "SRC MODEL")
21 | for vr in src_vrs:
22 | dump_utils.dump_obj(vr, f"SRC Version {vr.version}")
23 |
24 | export_model(
25 | mlflow_client = test_context.mlflow_client_src,
26 | model_name = src_model.name,
27 | output_dir = test_context.output_dir
28 | )
29 | dst_model_name = local_utils.mk_model_name(workspace_dst, is_uc)
30 | import_model(
31 | mlflow_client = test_context.mlflow_client_dst,
32 | model_name = dst_model_name,
33 | experiment_name = local_utils.mk_experiment_name(workspace=workspace_dst),
34 | input_dir = test_context.output_dir
35 | )
36 | dst_model = test_context.mlflow_client_dst.get_registered_model(dst_model_name)
37 | dump_utils.dump_obj(dst_model, "DST MODEL")
38 | dst_vrs = model_utils.list_model_versions(test_context.mlflow_client_dst, dst_model.name)
39 | assert len(dst_vrs) == num_versions
40 | for vr in dst_vrs:
41 | dump_utils.dump_obj(vr, f"DST Version {vr.version}")
42 |
43 | return src_model, dst_model
44 |
45 |
46 | def test_registered_model(test_context, is_uc):
47 | src_model, dst_model = _init(test_context, is_uc)
48 | compare_models_with_versions(to_MlflowContext(test_context), src_model, dst_model, compare_names=False)
49 |
--------------------------------------------------------------------------------
/tests/databricks/compare_utils.py:
--------------------------------------------------------------------------------
1 | def compare_experiments(exp1, exp2, client1, client2, num_runs):
2 | assert exp1.name == exp2.name
3 | _compare_experiment_tags(exp1.tags, exp2.tags)
4 | runs1 = client1.search_runs(exp1.experiment_id)
5 | runs2 = client2.search_runs(exp2.experiment_id)
6 | assert len(runs1) == num_runs
7 | assert len(runs1) == len(runs2)
8 | for run1,run2 in zip(runs1, runs2):
9 | compare_runs(run1, run2)
10 |
11 | def _compare_experiment_tags(tags1, tags2):
12 | _assert_tag("mlflow.ownerEmail", tags1, tags2)
13 | #_assert_tag("mlflow.experimentType", tags1, tags2) # might not be the same
14 | _compare_non_mlflow_tags(tags1, tags2)
15 |
16 |
17 | def compare_runs(run1, run2):
18 | _compare_non_mlflow_tags(run1.data.tags, run1.data.tags)
19 | assert run1.data.params == run2.data.params
20 | assert run1.data.metrics == run2.data.metrics
21 |
22 |
23 | def _get_non_mlflow_tags(tags):
24 | return { k:v for k,v in tags.items() if not k.startswith("mlflow.") }
25 |
26 | def _compare_non_mlflow_tags(tags1, tags2):
27 | tags1 = _get_non_mlflow_tags(tags1)
28 | tags2 = _get_non_mlflow_tags(tags2)
29 | assert tags1 == tags2
30 |
31 | def _assert_tag(key, tags1, tags2):
32 | assert tags1.get(key,None) == tags2.get(key,None)
33 |
--------------------------------------------------------------------------------
/tests/databricks/config.yaml.template:
--------------------------------------------------------------------------------
1 |
2 | workspace_src:
3 | profile: databricks://ws_src_profile
4 | base_dir: /tmp/test-mlflow-expot-import
5 |
6 | workspace_dst:
7 | profile: databricks://ws_dst_profile
8 | base_dir: /tmp/test-mlflow-expot-import
9 |
--------------------------------------------------------------------------------
/tests/databricks/includes.py:
--------------------------------------------------------------------------------
1 | class Dict2Class():
2 | def __init__(self, dct):
3 | self.dct = dct
4 | for k,v in dct.items():
5 | if isinstance(v,dict):
6 | v = Dict2Class(v)
7 | setattr(self, k, v)
8 | def __str__(self):
9 | return str(self.dct)
10 |
--------------------------------------------------------------------------------
/tests/databricks/run_tests.sh:
--------------------------------------------------------------------------------
1 |
2 | run() {
3 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" `
4 | echo "MLFLOW.VERSION: $mlflow_version"
5 | python -u -m pytest -s test_*.py
6 | echo "MLFLOW.VERSION: $mlflow_version"
7 | }
8 | run 2>&1 | tee run_tests.log
9 |
--------------------------------------------------------------------------------
/tests/databricks/test_copy_run.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.copy import copy_run
2 | from . init_tests import test_context
3 | from . import local_utils
4 | from . import compare_utils
5 | from . init_tests import workspace_src, workspace_dst
6 |
7 |
8 | def _init_run_test(test_context, workspace_src, workspace_dst):
9 | exp_src = local_utils.create_experiment(test_context.mlflow_client_src)
10 | src_run = local_utils.create_run(test_context.mlflow_client_src, exp_src.experiment_id)
11 | dst_exp_name = local_utils.mk_experiment_name(workspace_dst)
12 | dst_run = copy_run.copy(
13 | src_run.info.run_id,
14 | dst_exp_name,
15 | workspace_src.cfg.profile,
16 | workspace_dst.cfg.profile
17 | )
18 | return src_run, dst_run
19 |
20 |
21 | def test_run_same_workspace(test_context):
22 | src_run, dst_run = _init_run_test(test_context, workspace_src, workspace_src)
23 | compare_utils.compare_runs(src_run, dst_run)
24 |
25 | def test_run_different__workspace(test_context):
26 | src_run, dst_run = _init_run_test(test_context, workspace_src, workspace_dst)
27 | compare_utils.compare_runs(src_run, dst_run)
28 |
--------------------------------------------------------------------------------
/tests/databricks/test_model_version.py:
--------------------------------------------------------------------------------
1 | from tests.databricks.init_tests import test_context
2 | from tests.databricks import _test_model_version
3 |
4 | def test_import_metadata_false(test_context):
5 | _test_model_version.test_import_metadata_false(test_context, False)
6 |
7 | def test_import_metadata_true(test_context):
8 | _test_model_version.test_import_metadata_true(test_context, False)
9 |
--------------------------------------------------------------------------------
/tests/databricks/test_registered_model.py:
--------------------------------------------------------------------------------
1 | from tests.databricks.init_tests import test_context
2 | from tests.databricks import _test_registered_model
3 |
4 | def test_registered_model(test_context):
5 | _test_registered_model.test_registered_model(test_context, False)
6 |
--------------------------------------------------------------------------------
/tests/databricks/uc/run_tests.sh:
--------------------------------------------------------------------------------
1 |
2 | run() {
3 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" `
4 | echo "MLFLOW.VERSION: $mlflow_version"
5 | python -u -m pytest -s test_*.py
6 | echo "MLFLOW.VERSION: $mlflow_version"
7 | }
8 | run 2>&1 | tee run_tests.log
9 |
--------------------------------------------------------------------------------
/tests/databricks/uc/test_model_version.py:
--------------------------------------------------------------------------------
1 | from tests.databricks.init_tests import test_context
2 | from tests.databricks import _test_model_version
3 |
4 | def test_import_metadata_false(test_context):
5 | _test_model_version.test_import_metadata_false(test_context, True)
6 |
7 | def test_import_metadata_true(test_context):
8 | _test_model_version.test_import_metadata_true(test_context, True)
9 |
--------------------------------------------------------------------------------
/tests/databricks/uc/test_registered_model.py:
--------------------------------------------------------------------------------
1 | from tests.databricks.init_tests import test_context
2 | from tests.databricks import _test_registered_model
3 |
4 | def test_registered_model(test_context):
5 | _test_registered_model.test_registered_model(test_context, True)
6 |
--------------------------------------------------------------------------------
/tests/databricks/unity_catalog_client.py:
--------------------------------------------------------------------------------
1 | class UnityCatalogClient:
2 | def __init__(self, dbx_client):
3 | self.client = mk_uc_dbx_client(dbx_client)
4 |
5 | def list_models(self, catalog_name=None, schema_name=None):
6 | if catalog_name and schema_name:
7 | params = { "catalog_name": catalog_name, "schema_name": schema_name }
8 | else:
9 | params = { "max_results": 5000 }
10 | rsp = self.client.get("unity-catalog/models", params)
11 | if len(rsp) == 0:
12 | return rsp
13 | return rsp["registered_models"]
14 |
15 | def list_model_names(self, catalog_name, schema_name):
16 | return [ m["full_name"] for m in self.list_models(catalog_name, schema_name) ]
17 |
18 | def create_schema(self, catalog_name, schema_name):
19 | params = { "catalog_name": catalog_name, "name": schema_name }
20 | self.client.post("unity-catalog/schemas", params)
21 |
22 | def __repr__(self):
23 | return str(self.client)
24 |
25 |
26 | def mk_uc_dbx_client(client):
27 | from mlflow_export_import.client.http_client import HttpClient
28 | return HttpClient("api/2.1", client.host, client.token)
29 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/config.yaml.template:
--------------------------------------------------------------------------------
1 |
2 | # ----
3 | # Uses default workspace specified in DEFAULT profile in ~.databrickscfg
4 | #
5 | # see run_tests.sh
6 | # export MLFLOW_TRACKING_URI=databricks
7 |
8 | # ----
9 |
10 | # test workspace directory
11 | ws_base_dir: /Users/first.last@mycompany.com/tmp/test-mlflow-exim
12 |
13 | # test DBFS directory
14 | dbfs_base_export_dir: dbfs:/tmp/first.last@mycompany.com/test-mlflow-exim
15 |
16 | # test registered model name
17 | model_name: andre-test-mlflow-exim-Iris-Train
18 |
19 | # test prefix for imported runs
20 | run_name_prefix: andre-test-mlflow-exim
21 |
22 | # ---- toggle either for existing or new cluster
23 |
24 | # - - toggle option 1 - existing cluster
25 |
26 | # cluster: 0318-151752-abed99
27 |
28 | # - - toggle option 2 - new cluster
29 |
30 | cluster: {
31 | cluster_name: test-mlflow-export-import,
32 | spark_version: 12.0.x-cpu-ml-scala2.12,
33 | node_type_id: i3.xlarge,
34 | num_workers: 1,
35 | autotermination_minutes: 20,
36 | }
37 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/experiment/Iris_Train.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | # MAGIC %md ## Iris Train
3 | # MAGIC * Train and register a model for testing purposes.
4 |
5 | # COMMAND ----------
6 |
7 | dbutils.widgets.text("Experiment", "")
8 | experiment_name = dbutils.widgets.get("Experiment")
9 |
10 | dbutils.widgets.text("Registered model", "")
11 | registered_model = dbutils.widgets.get("Registered model")
12 | if registered_model == "": registered_model = None
13 |
14 | experiment_name, registered_model
15 |
16 | # COMMAND ----------
17 |
18 | import mlflow
19 | if experiment_name:
20 | mlflow.set_experiment(experiment_name)
21 |
22 | # COMMAND ----------
23 |
24 | from sklearn import svm, datasets
25 | print("mlflow.version:", mlflow.__version__)
26 |
27 | with mlflow.start_run() as run:
28 | print("run_id:",run.info.run_id)
29 | print("experiment_id:",run.info.experiment_id)
30 | iris = datasets.load_iris()
31 | mlflow.log_metric("degree", 5)
32 | model = svm.SVC(C=2.0, degree=5, kernel="rbf")
33 | model.fit(iris.data, iris.target)
34 | mlflow.sklearn.log_model(model, "model", registered_model_name=registered_model)
35 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/init_tests.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | import pytest
3 | import tempfile
4 |
5 | from mlflow_export_import.client import databricks_utils
6 |
7 | from tests import utils_test
8 | from tests.databricks.databricks_tester import DatabricksTester
9 |
10 | cfg = utils_test.read_config_file()
11 |
12 |
13 | _tester = DatabricksTester(
14 | ws_base_dir = cfg["ws_base_dir"],
15 | dbfs_base_export_dir = cfg["dbfs_base_export_dir"],
16 | local_artifacts_compare_dir = cfg.get("local_artifacts_compare_dir", None),
17 | cluster_spec = cfg["cluster"],
18 | model_name = cfg["model_name"],
19 | run_name_prefix = cfg["run_name_prefix"]
20 | )
21 |
22 |
23 | from databricks_cli.dbfs.api import DbfsApi
24 | _dbfs_api = DbfsApi(databricks_utils.get_api_client())
25 |
26 |
27 | TestContext = namedtuple(
28 | "TestContext",
29 | [ "tester", "dbfs_api" ]
30 | )
31 |
32 |
33 | @pytest.fixture(scope="session")
34 | def test_context():
35 | if _tester.local_artifacts_compare_dir: # NOTE: for debugging
36 | utils_test.create_output_dir(_tester.local_artifacts_compare_dir)
37 | else:
38 | with tempfile.TemporaryDirectory() as tmpdir:
39 | _tester.local_artifacts_compare_dir = tmpdir
40 | yield TestContext(_tester, _dbfs_api)
41 | _tester.teardown()
42 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/run_tests.sh:
--------------------------------------------------------------------------------
1 |
2 | export MLFLOW_TRACKING_URI=databricks
3 |
4 | if [ $# -gt 0 ] ; then
5 | DATABRICKS_PROFILE=$1
6 | export MLFLOW_TRACKING_URI=databricks://$DATABRICKS_PROFILE
7 | fi
8 |
9 | JUNIT_FILE=run_tests_junit.xml
10 | HTML_FILE=run_tests_report.html
11 | LOG_FILE=run_tests.log
12 |
13 | run() {
14 | echo "MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI"
15 | time -p pytest -s \
16 | --junitxml=$JUNIT_FILE \
17 | --html=$HTML_FILE \
18 | --self-contained-html \
19 | --override-ini log_cli=true \
20 | `ls test_*.py`
21 | echo
22 | echo "******************************************************"
23 | echo
24 | echo "MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI"
25 | echo "LOG_FILE : $LOG_FILE"
26 | echo "JUNIT REPORT: $JUNIT_FILE"
27 | echo "HTML REPORT : $HTML_FILE"
28 | echo
29 | }
30 |
31 | run | 2>&1 tee $LOG_FILE
32 |
--------------------------------------------------------------------------------
/tests/databricks_notebooks/samples/run_tests_junit.xml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/open_source/README.md:
--------------------------------------------------------------------------------
1 | # mlflow-export-import - Open Source Tests
2 |
3 | ## Overview
4 |
5 | Open source MLflow Export Import tests use two MLflow tracking servers:
6 | * Source tracking for exporting MLflow objects.
7 | * Destination tracking server for importing the exported MLflow objects.
8 |
9 | ## Setup
10 |
11 | See the [Setup](../../README.md#Setup) section.
12 |
13 | ## Test Configuration
14 |
15 | Test environment variables.
16 |
17 | |Name | Required | Description|
18 | |-----|----------|---------|
19 | | MLFLOW_TRACKING_URI_SRC | yes | URI of source tracking server |
20 | | MLFLOW_TRACKING_URI_DST | yes | URI of destination tracking server |
21 | | MLFLOW_EXPORT_IMPORT_OUTPUT_DIR | no | If set, will use this as the export output directory instead of `tempfile.TemporaryDirectory()` |
22 |
23 |
24 |
25 | ## Run tests
26 |
27 | Use the [run_tests.sh](run_tests.sh) script to run the tests and and specify the source and destination tracking server port number.
28 | Output will be in the `run_tests.log` file.
29 |
30 | The script does the following:
31 | * Launches a source MLflow tracking server and destination MLflow tracking server in the background.
32 | * Runs tests against these servers with pytest.
33 | * Tears down the two MLflow tracking servers.
34 |
35 | **Example**
36 | ```
37 | run_tests.sh 5010 5011
38 | ```
39 | ```
40 | ======================== 43 passed in 295.36s (0:04:55) ========================
41 |
42 | LOG_FILE : run_tests.log
43 | JUNIT REPORT: run_tests_junit.xml
44 | HTML REPORT : run_tests_report.html
45 | ```
46 |
47 |
--------------------------------------------------------------------------------
/tests/open_source/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tests/open_source/init_tests.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | import tempfile
4 | import mlflow
5 | from tests import utils_test
6 |
7 | from tests.core import MlflowContext
8 | from mlflow_export_import.common import utils
9 | _logger = utils.getLogger(__name__)
10 |
11 | _logger.info(f"mlflow.tracking_uri {mlflow.tracking.get_tracking_uri()}")
12 |
13 | uri_src = os.environ.get("MLFLOW_TRACKING_URI_SRC",None)
14 | _logger.info(f"MLFLOW_TRACKING_URI_SRC: {uri_src}")
15 | assert uri_src,"Environment variable MLFLOW_TRACKING_URI_SRC must be set"
16 | client_src = mlflow.tracking.MlflowClient(uri_src)
17 | _logger.info(f"client_src: {client_src}")
18 |
19 | uri_dst = os.environ.get("MLFLOW_TRACKING_URI_DST",None)
20 | _logger.info(f"MLFLOW_TRACKING_URI_DST: {uri_dst}")
21 | assert uri_dst
22 | client_dst = mlflow.tracking.MlflowClient(uri_dst)
23 | _logger.info(f"client_dst: {client_dst}")
24 |
25 |
26 | @pytest.fixture(scope="session")
27 | def mlflow_context():
28 | with tempfile.TemporaryDirectory() as tmpdir:
29 | assert mlflow.get_tracking_uri() is not None
30 | output_dir = os.environ.get("MLFLOW_EXPORT_IMPORT_OUTPUT_DIR",None) # for debugging
31 | if output_dir:
32 | utils_test.create_output_dir(output_dir)
33 | else:
34 | output_dir = tmpdir
35 | yield MlflowContext(
36 | client_src, client_dst, output_dir, os.path.join(output_dir,"run")
37 | )
38 |
--------------------------------------------------------------------------------
/tests/open_source/kill_server.sh:
--------------------------------------------------------------------------------
1 |
2 | # =============================================
3 | #
4 | # Kill an MLflow tracking server by port number
5 | #
6 | # =============================================
7 |
8 | if [ $# -lt 1 ] ; then
9 | echo "$0: Expecting MLflow Tracking Server port"
10 | exit 1
11 | fi
12 | port=$1
13 |
14 | pids=`lsof -n -i :$port | awk ' { print ( $2 ) } ' | grep -v PID`
15 | echo "PIDs: $pids"
16 | echo "Killing MLflow Tracking Server running on port $port"
17 | for pid in $pids ; do
18 | echo "Killing PID $pid"
19 | kill $pid
20 | done
21 |
--------------------------------------------------------------------------------
/tests/open_source/run_tests.sh:
--------------------------------------------------------------------------------
1 |
2 | # ===========================================================
3 | #
4 | # Script to run tests against a source and destination MLflow tracking server.
5 | # Expects the source and destination server port numbers as arguments.
6 | #
7 | # Does the following:
8 | # 1. Launches a source and destination tracking server in the background.
9 | # 2. Runs tests against the tracking servers with pytest.
10 | # 3. Kills the tracking servers.
11 | #
12 | # Example:
13 | #
14 | # run_tests.sh 5010 5011
15 | #
16 | # ===========================================================
17 |
18 | if [ $# -lt 2 ] ; then
19 | echo "ERROR: Expecting source and destination MLflow Tracking Server ports"
20 | exit 1
21 | fi
22 | PORT_SRC=$1
23 | PORT_DST=$2
24 |
25 | export MLFLOW_TRACKING_URI=http://localhost:$PORT_SRC
26 | export MLFLOW_TRACKING_URI_SRC=http://localhost:${PORT_SRC}
27 | export MLFLOW_TRACKING_URI_DST=http://localhost:${PORT_DST}
28 |
29 | JUNIT_FILE=run_tests_junit.xml
30 | HTML_FILE=run_tests_report.html
31 | LOG_FILE=run_tests.log
32 |
33 | message() {
34 | echo
35 | echo "******************************************************"
36 | echo "*"
37 | echo "* $*"
38 | echo "*"
39 | echo "******************************************************"
40 | echo
41 | }
42 |
43 | run_tests() {
44 | message "STAGE 2: RUN TESTS"
45 | time -p pytest -s \
46 | --junitxml=$JUNIT_FILE \
47 | --html=$HTML_FILE \
48 | --self-contained-html \
49 | test_*.py
50 | }
51 |
52 | launch_server() {
53 | port=$1
54 | message "STAGE 1: LAUNCH TRACKING SERVER on port $port"
55 | rm mlflow_${port}.db
56 | rm -rf mlruns_${port}
57 | mlflow server \
58 | --host localhost --port ${port} \
59 | --backend-store-uri sqlite:///mlflow_${port}.db \
60 | --default-artifact-root $PWD/mlruns_${port}
61 | }
62 |
63 | kill_server() {
64 | port=$1
65 | message "STAGE 3: KILL TRACKING SERVER on port ${port}"
66 | echo "Killing MLflow Tracking Server pids:"
67 | pids=`lsof -n -i :${port} | awk '{ print ( $2 ) }' | grep -v PID`
68 | for pid in $pids ; do
69 | echo " Killing PID=$pid"
70 | kill $pid
71 | done
72 | rm -rf mlruns_${port}
73 | rm mlflow_${port}.db
74 | }
75 |
76 | run() {
77 | echo "$0: MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI"
78 | launch_server $PORT_SRC &
79 | launch_server $PORT_DST &
80 | sleep 5 # wait for the tracking servers to come up
81 | run_tests
82 | kill_server $PORT_SRC
83 | kill_server $PORT_DST
84 | }
85 |
86 | run_all() {
87 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" `
88 | echo "MLFLOW.VERSION: $mlflow_version"
89 | time -p run
90 | echo
91 | echo "******************************************************"
92 | echo
93 | echo "MLFLOW.VERSION: $mlflow_version"
94 | echo "LOG_FILE: $LOG_FILE"
95 | echo "JUNIT REPORT: $JUNIT_FILE"
96 | echo "HTML REPORT : $HTML_FILE"
97 | echo
98 | }
99 |
100 | time run_all 2>&1 | tee run_tests.log
101 |
102 | exit 0
103 |
--------------------------------------------------------------------------------
/tests/open_source/samples/run_tests_junit.xml:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/open_source/test_bulk_all.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.bulk.export_all import export_all
2 | from mlflow_export_import.bulk.import_models import import_models
3 | from tests.open_source.test_bulk_experiments import compare_experiments
4 | from tests.open_source.test_bulk_models import create_model, compare_models_with_versions, get_num_deleted_runs
5 |
6 | from tests.open_source.init_tests import mlflow_context
7 | from tests.compare_utils import compare_runs
8 | from tests.open_source.oss_utils_test import delete_experiments_and_models
9 |
10 | # == Helper functions
11 |
12 | _notebook_formats = "SOURCE,DBC"
13 | _num_models = 2
14 | _num_runs = 2
15 |
16 |
17 | def _run_test(mlflow_context, compare_func=compare_runs, use_threads=False):
18 | delete_experiments_and_models(mlflow_context)
19 | for _ in range( _num_models):
20 | create_model(mlflow_context.client_src)
21 | export_all(
22 | mlflow_client = mlflow_context.client_src,
23 | output_dir = mlflow_context.output_dir,
24 | notebook_formats = _notebook_formats,
25 | use_threads = use_threads
26 | )
27 | import_models(
28 | mlflow_client = mlflow_context.client_dst,
29 | input_dir = mlflow_context.output_dir,
30 | delete_model = True
31 | )
32 | compare_experiments(mlflow_context, compare_func)
33 | compare_models_with_versions(mlflow_context, compare_func)
34 |
35 |
36 | # == Test basic
37 |
38 | def test_basic(mlflow_context):
39 | _run_test(mlflow_context)
40 |
41 |
42 | def test_basic_threads(mlflow_context):
43 | _run_test(mlflow_context, use_threads=True)
44 |
45 |
46 | # == Test deleted runs
47 |
48 | def test_model_deleted_runs(mlflow_context):
49 | model_name = create_model(mlflow_context.client_src)
50 | versions = mlflow_context.client_src.search_model_versions(filter_string=f"name='{model_name}'")
51 | assert len(versions) == _num_runs
52 |
53 | mlflow_context.client_src.delete_run(versions[0].run_id)
54 | num_deleted = get_num_deleted_runs(mlflow_context.client_src, versions)
55 | assert num_deleted == _num_runs - 1
56 |
57 | export_all(
58 | mlflow_client = mlflow_context.client_src,
59 | output_dir = mlflow_context.output_dir,
60 | export_deleted_runs = True
61 | )
62 | import_models(
63 | mlflow_client = mlflow_context.client_dst,
64 | input_dir = mlflow_context.output_dir,
65 | delete_model = True
66 | )
67 | versions = mlflow_context.client_dst.search_model_versions(filter_string=f"name='{model_name}'")
68 | assert len(versions) == _num_runs
69 |
70 | num_deleted2 = get_num_deleted_runs(mlflow_context.client_dst, versions)
71 | assert num_deleted == num_deleted2
72 |
--------------------------------------------------------------------------------
/tests/open_source/test_bulk_experiments_merge_utils.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.bulk import experiments_merge_utils
2 | import pytest
3 |
4 |
5 | dct1 = {
6 | "system": {
7 | "package_version": "1.2.0",
8 | "script": "export_experiments.py",
9 | "export_time": 1683865840,
10 | "_export_time": "2023-05-12 04:30:40",
11 | "mlflow_version": "2.3.0",
12 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
13 | "platform": {
14 | "python_version": "3.8.15",
15 | "system": "Darwin",
16 | "processor": "i386"
17 | },
18 | "user": "k2"
19 | },
20 | "info": {
21 | "experiment_names": [
22 | "sklearn_wine"
23 | ],
24 | "duration": 0.1,
25 | "experiments": 1,
26 | "total_runs": 1,
27 | "ok_runs": 1,
28 | "failed_runs": 0
29 | },
30 | "mlflow": {
31 | "experiments": [
32 | {
33 | "id": "1",
34 | "name": "sklearn_wine",
35 | "ok_runs": 3,
36 | "failed_runs": 1,
37 | "duration": 0.1
38 | }
39 | ]
40 | }
41 | }
42 |
43 |
44 | dct2 = {
45 | "system": {
46 | "package_version": "1.2.0",
47 | "script": "export_experiments.py",
48 | "export_time": 1683865840,
49 | "_export_time": "2023-05-12 04:30:40",
50 | "mlflow_version": "2.3.0",
51 | "mlflow_tracking_uri": "http://127.0.0.1:5020",
52 | "platform": {
53 | "python_version": "3.8.15",
54 | "system": "Darwin",
55 | "processor": "i386"
56 | },
57 | "user": "k2"
58 | },
59 | "info": {
60 | "experiment_names": [
61 | "Default"
62 | ],
63 | "duration": 0.2,
64 | "experiments": 1,
65 | "total_runs": 0,
66 | "ok_runs": 0,
67 | "failed_runs": 0
68 | },
69 | "mlflow": {
70 | "experiments": [
71 | {
72 | "id": "0",
73 | "name": "Default",
74 | "ok_runs": 0,
75 | "failed_runs": 0,
76 | "duration": 0.0
77 | }
78 | ]
79 | }
80 | }
81 |
82 |
83 | def test_merge_info():
84 | info1 = dct1["info"]
85 | info2 = dct2["info"]
86 | info = experiments_merge_utils.merge_info(info1, info2)
87 | assert info["duration"] == pytest.approx(info1["duration"] + info2["duration"])
88 | assert info["total_runs"] == info1["total_runs"] + info2["total_runs"]
89 | assert info["ok_runs"] == info1["ok_runs"] + info2["ok_runs"]
90 | assert info["failed_runs"] == info1["failed_runs"] + info2["failed_runs"]
91 | assert info["experiments"] == info1["experiments"] + info2["experiments"]
92 |
93 |
94 | def test_merge_mlflow():
95 | mlflow1 = dct1["mlflow"]
96 | mlflow2 = dct2["mlflow"]
97 | mlflow = experiments_merge_utils.merge_mlflow(mlflow1, mlflow2)
98 |
99 | assert len(mlflow["experiments"]) == len(mlflow1["experiments"]) + len(mlflow2["experiments"])
100 | assert mlflow["experiments"] == mlflow1["experiments"] + mlflow2["experiments"]
101 |
--------------------------------------------------------------------------------
/tests/open_source/test_copy_run.py:
--------------------------------------------------------------------------------
1 | from tests.open_source.oss_utils_test import create_simple_run
2 | from tests.compare_utils import compare_runs
3 | from tests.open_source.init_tests import mlflow_context
4 |
5 | from mlflow_export_import.copy import copy_run
6 | from tests.open_source.oss_utils_test import mk_test_object_name_default
7 |
8 |
9 | # == Setup
10 |
11 | def _init_run_test(mlflow_context, dst_mlflow_uri=None):
12 | _, src_run = create_simple_run(mlflow_context.client_src, model_artifact = "model")
13 | dst_exp_name = mk_test_object_name_default()
14 | dst_run = copy_run.copy(
15 | src_run.info.run_id,
16 | dst_exp_name,
17 | mlflow_context.client_src.tracking_uri,
18 | dst_mlflow_uri
19 | )
20 | return src_run, dst_run
21 |
22 |
23 | # == Regular tests
24 |
25 | def test_run_basic_without_dst_mlflow_uri(mlflow_context):
26 | run1, run2 = _init_run_test(mlflow_context)
27 | compare_runs(mlflow_context, run1, run2)
28 |
29 |
30 | def test_run_basic_with_dst_mlflow_uri(mlflow_context):
31 | run1, run2 = _init_run_test(mlflow_context, mlflow_context.client_dst.tracking_uri)
32 | compare_runs(mlflow_context, run1, run2)
33 |
34 |
35 | # == Test for source and exported model prediction equivalence
36 |
37 | from tests.sklearn_utils import X_test
38 | import cloudpickle as pickle
39 | import numpy as np
40 |
41 |
42 | def test_model_predictions(mlflow_context):
43 | _, run1 = create_simple_run(mlflow_context.client_src)
44 | run2 = copy_run._copy(run1.info.run_id, mk_test_object_name_default(), mlflow_context.client_src, mlflow_context.client_dst)
45 |
46 | # Since you cannot load model flavors (such as mlflow.sklearn.load_model()) with the MlflowClient,
47 | # we have to manually load the model pickle file
48 |
49 | path1 = mlflow_context.client_src.download_artifacts(run1.info.run_id, "model/model.pkl")
50 | with open(path1,"rb") as f:
51 | model1 = pickle.load(f)
52 | path2 = mlflow_context.client_src.download_artifacts(run2.info.run_id, "model/model.pkl")
53 | with open(path2, "rb") as f:
54 | model2 = pickle.load(f)
55 |
56 | predictions1 = model1.predict(X_test)
57 | predictions2 = model2.predict(X_test)
58 | assert np.array_equal(predictions1, predictions2)
59 |
--------------------------------------------------------------------------------
/tests/open_source/test_find_run_model_names.py:
--------------------------------------------------------------------------------
1 | """
2 | Test find_artifacts.find_run_model_names()
3 | """
4 |
5 | import mlflow
6 | from mlflow_export_import.common.find_artifacts import find_run_model_names
7 | from tests.open_source.oss_utils_test import create_experiment
8 | from tests.sklearn_utils import create_sklearn_model
9 |
10 | client = mlflow.MlflowClient()
11 |
12 |
13 | def test_no_model():
14 | create_experiment(client)
15 | with mlflow.start_run() as run:
16 | mlflow.set_tag("name","foo")
17 | model_paths = find_run_model_names(client, run.info.run_id)
18 | assert len(model_paths) == 0
19 |
20 |
21 | def test_one_model_at_artifact_root():
22 | """ Test when model artifact root is '' """
23 | create_experiment(client)
24 | model = create_sklearn_model()
25 | with mlflow.start_run() as run:
26 | mlflow.sklearn.log_model(model, "")
27 | model_paths = find_run_model_names(client, run.info.run_id)
28 | assert len(model_paths) == 1
29 | assert model_paths[0] == ""
30 |
31 |
32 | def test_one_model():
33 | create_experiment(client)
34 | model = create_sklearn_model()
35 | with mlflow.start_run() as run:
36 | mlflow.sklearn.log_model(model, "model")
37 | model_paths = find_run_model_names(client, run.info.run_id)
38 | assert len(model_paths) == 1
39 | assert model_paths[0] == "model"
40 |
41 |
42 | def test_two_models():
43 | create_experiment(client)
44 | model = create_sklearn_model()
45 | with mlflow.start_run() as run:
46 | mlflow.sklearn.log_model(model, "model")
47 | mlflow.sklearn.log_model(model, "model-onnx")
48 | model_paths = find_run_model_names(client, run.info.run_id)
49 | assert len(model_paths) == 2
50 | assert model_paths[0] == "model"
51 | assert model_paths[1] == "model-onnx"
52 |
53 |
54 | def test_two_models_nested():
55 | create_experiment(client)
56 | model = create_sklearn_model()
57 | with mlflow.start_run() as run:
58 | mlflow.sklearn.log_model(model, "model")
59 | mlflow.sklearn.log_model(model, "other_models/model-onnx")
60 | model_paths = find_run_model_names(client, run.info.run_id)
61 | assert len(model_paths) == 2
62 | assert model_paths[0] == "model"
63 | assert model_paths[1] == "other_models/model-onnx"
64 |
--------------------------------------------------------------------------------
/tests/open_source/test_renames.py:
--------------------------------------------------------------------------------
1 |
2 | # Test experiment and model rename core logic
3 |
4 | from mlflow_export_import.bulk import rename_utils
5 |
6 | def test_equal():
7 | renames = { "/User/foo": "/User/bar" }
8 | new_name = rename_utils.rename("/User/foo" , renames)
9 | assert new_name == "/User/bar"
10 |
11 | def test_prefix():
12 | renames = { "/User/foo": "/User/bar" }
13 | new_name = rename_utils.rename("/User/foo/home" , renames)
14 | assert new_name == "/User/bar/home"
15 |
16 | def test_no_match():
17 | renames = { "/User/foo": "/User/bar" }
18 | new_name = rename_utils.rename("foo" , renames)
19 | assert new_name == "foo"
20 |
21 | def test_blank_key():
22 | renames = { "": "/User/bar" }
23 | new_name = rename_utils.rename("foo" , renames)
24 | assert new_name == "foo"
25 |
26 | def test_blank_key_2():
27 | renames = { "/User/foo": "/User/bar" }
28 | new_name = rename_utils.rename("" , renames)
29 | assert new_name == ""
30 |
--------------------------------------------------------------------------------
/tests/open_source/test_ws_permissions.py:
--------------------------------------------------------------------------------
1 | from mlflow_export_import.common.ws_permissions_utils import _map_acl_element, map_acl
2 |
3 |
4 | # == Setup data
5 |
6 | group_name_element = {
7 | "group_name": "admins",
8 | "all_permissions": [
9 | {
10 | "permission_level": "CAN_MANAGE",
11 | "inherited": True,
12 | "inherited_from_object": [
13 | "/directories/"
14 | ]
15 | }
16 | ]
17 | }
18 |
19 | user_name_element = {
20 | "user_name": "k2@karakoram.com",
21 | "all_permissions": [
22 | {
23 | "permission_level": "CAN_EDIT",
24 | "inherited": True,
25 | "inherited_from_object": [
26 | "/directories/"
27 | ]
28 | }
29 | ]
30 | }
31 |
32 | user_name_element_2 = {
33 | "user_name": "k2@karakoram.com",
34 | "all_permissions": [
35 | {
36 | "permission_level": "CAN_MANAGE",
37 | "inherited": False
38 | },
39 | {
40 | "permission_level": "CAN_MANAGE",
41 | "inherited": True,
42 | "inherited_from_object": [
43 | "/directories/767933989557963"
44 | ]
45 | }
46 | ]
47 | }
48 |
49 | mixed_acl = [ group_name_element, user_name_element ]
50 | mixed_acl_2 = [ group_name_element, user_name_element_2 ]
51 |
52 |
53 | # == Tests
54 |
55 | def test_acl_element_group_name():
56 | acl2 = _map_acl_element(group_name_element)
57 | assert acl2 == [
58 | {
59 | "group_name": "admins",
60 | "permission_level": "CAN_MANAGE"
61 | }
62 | ]
63 |
64 | def test_acl_element_user_name():
65 | acl2 = _map_acl_element(user_name_element)
66 | assert acl2 == [
67 | {
68 | "user_name": "k2@karakoram.com",
69 | "permission_level": "CAN_EDIT"
70 | }
71 | ]
72 |
73 |
74 | def test_acl_element_user_name_2():
75 | acl2 = _map_acl_element(user_name_element_2)
76 | assert acl2 == [
77 | {
78 | "user_name": "k2@karakoram.com",
79 | "permission_level": "CAN_MANAGE"
80 | },
81 | {
82 | "user_name": "k2@karakoram.com",
83 | "permission_level": "CAN_MANAGE"
84 | }
85 | ]
86 |
87 |
88 | def test_acl_mixed():
89 | assert map_acl(mixed_acl) == [
90 | {
91 | "group_name": "admins",
92 | "permission_level": "CAN_MANAGE"
93 | },
94 | {
95 | "user_name": "k2@karakoram.com",
96 | "permission_level": "CAN_EDIT"
97 | }
98 | ]
99 |
100 |
101 | def test_acl_mixed_2():
102 | assert map_acl(mixed_acl_2) == [
103 | {
104 | "group_name": "admins",
105 | "permission_level": "CAN_MANAGE"
106 | },
107 | {
108 | "user_name": "k2@karakoram.com",
109 | "permission_level": "CAN_MANAGE"
110 | },
111 | {
112 | "user_name": "k2@karakoram.com",
113 | "permission_level": "CAN_MANAGE"
114 | }
115 | ]
116 |
117 |
118 | def test_empty():
119 | assert map_acl({}) == []
120 |
--------------------------------------------------------------------------------
/tests/sklearn_utils.py:
--------------------------------------------------------------------------------
1 | from sklearn.tree import DecisionTreeClassifier
2 | from sklearn import datasets
3 | from sklearn.model_selection import train_test_split
4 |
5 | _dataset = datasets.load_iris()
6 | X_train, X_test, _y_train, _y_test = train_test_split(_dataset.data, _dataset.target, test_size=0.3)
7 | feature_names = _dataset.feature_names
8 |
9 |
10 | def create_sklearn_model(max_depth=5):
11 | model = DecisionTreeClassifier(max_depth=max_depth)
12 | model.fit(X_train, _y_train)
13 | return model
14 |
--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import yaml
4 | import shortuuid
5 | import pandas as pd
6 | import mlflow
7 | from mlflow_export_import.common.mlflow_utils import MlflowTrackingUriTweak
8 | from . import sklearn_utils
9 |
10 | TEST_OBJECT_PREFIX = "test_exim"
11 |
12 | def mk_test_object_name_default():
13 | return f"{TEST_OBJECT_PREFIX}_{mk_uuid()}"
14 |
15 | def mk_uuid():
16 | return shortuuid.uuid()
17 |
18 |
19 | def create_output_dir(output_dir):
20 | if os.path.exists(output_dir):
21 | shutil.rmtree(output_dir)
22 | os.makedirs(output_dir)
23 |
24 |
25 | def compare_dirs(d1, d2):
26 | from filecmp import dircmp
27 | def _compare_dirs(dcmp):
28 | if len(dcmp.diff_files) > 0 or len(dcmp.left_only) > 0 or len(dcmp.right_only) > 0:
29 | if len(dcmp.diff_files) == 1:
30 | if dcmp.diff_files[0] == "MLmodel": # run_id differs because we changed it to the imported run_id
31 | return True
32 | return False
33 | for sub_dcmp in dcmp.subdirs.values():
34 | if not _compare_dirs(sub_dcmp):
35 | return False
36 | return True
37 | return _compare_dirs(dircmp(d1, d2))
38 |
39 |
40 | def create_run_artifact_dirs(output_dir):
41 | dir1 = create_run_artifact_dir(output_dir, "run1")
42 | dir2 = create_run_artifact_dir(output_dir, "run2")
43 | return dir1, dir2
44 |
45 |
46 | def create_run_artifact_dir(output_dir, run_name):
47 | dir = os.path.join(output_dir, "artifacts", run_name)
48 | create_output_dir(dir)
49 | return dir
50 |
51 |
52 | def create_iris_dataset():
53 | data_path = "in_memory"
54 | df = pd.DataFrame(data=sklearn_utils.X_train, columns=sklearn_utils.feature_names)
55 | return mlflow.data.from_pandas(df, source=data_path)
56 |
57 |
58 | def read_config_file(path="config.yaml"):
59 | with open(path, encoding="utf-8") as f:
60 | dct = yaml.safe_load(f)
61 | print(f"Config for '{path}':")
62 | for k,v in dct.items():
63 | print(f" {k}: {v}")
64 | return dct
65 |
66 |
67 | def create_nested_runs(client, experiment_id, max_depth=1, max_width=1, level=0, indent=""):
68 | run_name = "run"
69 | if level >= max_depth:
70 | return
71 | run_name = f"{run_name}_{level}"
72 | nested = level > 0
73 | with MlflowTrackingUriTweak(client) as run:
74 | with mlflow.start_run(experiment_id=experiment_id, run_name=run_name, nested=nested) as run:
75 | mlflow.log_param("alpha", "0.123")
76 | mlflow.log_metric("m",0.123)
77 | mlflow.set_tag("run_name", run_name)
78 | mlflow.set_tag("ori_run_id", run.info.run_id)
79 | model = sklearn_utils.create_sklearn_model()
80 | mlflow.sklearn.log_model(model, "model")
81 | for _ in range(max_width):
82 | create_nested_runs(client, experiment_id, max_depth, max_width, level+1, indent+" ")
83 | return client.get_run(run.info.run_id)
84 |
--------------------------------------------------------------------------------