├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── README_bulk.md ├── README_copy.md ├── README_export_format.md ├── README_governance.md ├── README_limitations.md ├── README_options.md ├── README_single.md ├── README_tools.md ├── databricks_notebooks ├── README.md ├── _README.py ├── bulk │ ├── Check_Model_Versions_Runs.py │ ├── Common.py │ ├── Export_All.py │ ├── Export_Experiments.py │ ├── Export_Registered_Models.py │ ├── Import_Experiments.py │ ├── Import_Registered_Models.py │ └── _README.py ├── copy │ ├── Common.py │ ├── Copy_Model_Version.py │ ├── Copy_Run.py │ ├── Create_Model_Version.py │ ├── MLflow_Copy_Model_Version.py │ ├── _README.py │ ├── experimental │ │ ├── Common.py │ │ └── Copy_Model_Version.py │ └── tests │ │ └── Test_Copy_Model_Version.py ├── scripts │ ├── Common.py │ ├── Console_Scripts.py │ └── _README.py ├── single │ ├── Common.py │ ├── Export_Experiment.py │ ├── Export_Model_Version.py │ ├── Export_Registered_Model.py │ ├── Export_Run.py │ ├── Import_Experiment.py │ ├── Import_Model_Version.py │ ├── Import_Registered_Model.py │ ├── Import_Run.py │ └── _README.py └── tools │ ├── Common.py │ ├── Get_Model_Signature.py │ ├── List_Model_Versions_Without_Signature.py │ ├── Set_Model_Signature.py │ └── _README.py ├── diagrams ├── Copy_Model_Version_NonUC.png ├── Copy_Model_Version_UC.png └── architecture.png ├── mlflow_export_import ├── __init__.py ├── bulk │ ├── __init__.py │ ├── bulk_utils.py │ ├── experiments_merge_utils.py │ ├── export_all.py │ ├── export_experiments.py │ ├── export_models.py │ ├── import_experiments.py │ ├── import_models.py │ ├── model_utils.py │ └── rename_utils.py ├── client │ ├── __init__.py │ ├── client_utils.py │ ├── databricks_cli_utils.py │ ├── databricks_utils.py │ ├── http_client.py │ ├── mlflow_auth_utils.py │ └── user_agent_header.py ├── common │ ├── __init__.py │ ├── click_options.py │ ├── default_logging_config.py │ ├── dump_utils.py │ ├── filesystem.py │ ├── find_artifacts.py │ ├── io_utils.py │ ├── iterators.py │ ├── logging_utils.py │ ├── mlflow_utils.py │ ├── model_utils.py │ ├── pkg_version.py │ ├── source_tags.py │ ├── timestamp_utils.py │ ├── uc_permissions_utils.py │ ├── utils.py │ └── ws_permissions_utils.py ├── copy │ ├── __init__.py │ ├── click_options.py │ ├── copy_model_version.py │ ├── copy_run.py │ └── copy_utils.py ├── experiment │ ├── __init__.py │ ├── export_experiment.py │ ├── import_experiment.py │ ├── nested_runs_utils.py │ └── oss_nested_runs_utils.py ├── model │ ├── __init__.py │ ├── export_model.py │ └── import_model.py ├── model_version │ ├── __init__.py │ ├── click_options.py │ ├── export_model_version.py │ └── import_model_version.py ├── notebook │ ├── __init__.py │ └── download_notebook.py ├── run │ ├── __init__.py │ ├── export_run.py │ ├── import_run.py │ ├── run_data_importer.py │ └── run_utils.py ├── tools │ ├── __init__.py │ ├── click_options.py │ ├── experimental │ │ ├── README.md │ │ ├── filter_one_model.py │ │ ├── rewrite_export.py │ │ └── samples │ │ │ └── custom_export_rewriters.py │ ├── get_model_signature.py │ ├── list_model_versions_without_signatures.py │ ├── list_registered_models.py │ ├── set_model_signature.py │ ├── signature_utils.py │ └── tools_utils.py ├── version.py └── workflow_api │ ├── README.md │ ├── __init__.py │ ├── log_utils.py │ ├── run_submit.py │ ├── utils.py │ └── workflow_api_client.py ├── samples ├── databricks │ ├── bulk │ │ ├── experiments │ │ │ ├── 1280664374380606 │ │ │ │ ├── 253000ee70914831850defc593ba4740 │ │ │ │ │ └── run.json │ │ │ │ └── experiment.json │ │ │ ├── 9195e233f19e49379b16c5f2d2b0c05f │ │ │ │ ├── a17f0abf5d46464d899f0ffcebbdb7a8 │ │ │ │ │ └── run.json │ │ │ │ └── experiment.json │ │ │ └── experiments.json │ │ └── models │ │ │ ├── experiments │ │ │ ├── 1280664374380606 │ │ │ │ ├── 851de1f466304650a77c949f5d386d9f │ │ │ │ │ └── run.json │ │ │ │ └── experiment.json │ │ │ ├── 9195e233f19e49379b16c5f2d2b0c05f │ │ │ │ ├── a17f0abf5d46464d899f0ffcebbdb7a8 │ │ │ │ │ └── run.json │ │ │ │ └── experiment.json │ │ │ └── experiments.json │ │ │ ├── manifest.json │ │ │ └── models │ │ │ ├── Keras_MNIST │ │ │ └── model.json │ │ │ ├── Sklearn_WineQuality │ │ │ └── model.json │ │ │ └── models.json │ └── single │ │ ├── experiments │ │ ├── notebook_experiments │ │ │ ├── repo_notebook │ │ │ │ ├── 02aeef6d8cbf449ab50c8e715e320085 │ │ │ │ │ └── run.json │ │ │ │ └── experiment.json │ │ │ └── workspace_notebook │ │ │ │ ├── experiment.json │ │ │ │ └── f7816bc76f254f22ab25549a7c2c9b06 │ │ │ │ └── run.json │ │ └── workspace_experiments │ │ │ ├── automl_workspace_notebook │ │ │ └── 5e1e2c44039a40afafc760b837a4daab │ │ │ │ ├── artifacts │ │ │ │ ├── estimator.html │ │ │ │ └── model │ │ │ │ │ ├── MLmodel │ │ │ │ │ ├── conda.yaml │ │ │ │ │ ├── input_example.json │ │ │ │ │ ├── python_env.yaml │ │ │ │ │ └── requirements.txt │ │ │ │ └── run.json │ │ │ ├── job_repo_notebook │ │ │ └── experiment.json │ │ │ ├── repo_notebook │ │ │ ├── bad4988ed1184aad953fd14efee72fa2 │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ │ ├── workspace_notebook │ │ │ ├── 253000ee70914831850defc593ba4740 │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ │ └── workspace_notebook_src_tags │ │ │ ├── 68850173104649149678090f75d36d0a │ │ │ └── run.json │ │ │ └── experiment.json │ │ ├── models │ │ ├── basic │ │ │ └── model.json │ │ ├── deleted_runs │ │ │ └── model.json │ │ └── src_tags │ │ │ └── model.json │ │ └── versions │ │ └── sklearn_wine │ │ ├── experiment.json │ │ ├── model.json │ │ ├── run │ │ ├── artifacts │ │ │ └── model │ │ │ │ ├── MLmodel │ │ │ │ ├── conda.yaml │ │ │ │ ├── input_example.json │ │ │ │ ├── metadata │ │ │ │ ├── MLmodel │ │ │ │ ├── conda.yaml │ │ │ │ ├── python_env.yaml │ │ │ │ └── requirements.txt │ │ │ │ ├── model.pkl │ │ │ │ ├── python_env.yaml │ │ │ │ └── requirements.txt │ │ └── run.json │ │ └── version.json └── oss_mlflow │ ├── bulk │ ├── experiments │ │ ├── 1 │ │ │ ├── d057cae15f27465988e72c6212e1f226 │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ ├── 2 │ │ │ ├── 5397ae67ee0c49139bf64834b4d27fab │ │ │ │ └── run.json │ │ │ ├── 8a6af43e756f433da7a90fd6b4e49c3a │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ └── experiments.json │ └── models │ │ ├── experiments │ │ ├── 1 │ │ │ ├── d057cae15f27465988e72c6212e1f226 │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ ├── 2 │ │ │ ├── 5397ae67ee0c49139bf64834b4d27fab │ │ │ │ └── run.json │ │ │ ├── 8a6af43e756f433da7a90fd6b4e49c3a │ │ │ │ └── run.json │ │ │ └── experiment.json │ │ └── experiments.json │ │ ├── manifest.json │ │ └── models │ │ ├── models.json │ │ ├── sklearn_iris │ │ └── model.json │ │ └── sklearn_wine │ │ └── model.json │ └── single │ ├── experiments │ ├── basic │ │ ├── eb66c160957d4a28b11d3f1b968df9cd │ │ │ └── run.json │ │ └── experiment.json │ └── src_tags │ │ ├── 4b0ce88fd34e45fc8ca08876127299ce │ │ └── run.json │ │ └── experiment.json │ └── models │ ├── basic │ └── model.json │ └── src_tags │ └── model.json ├── setup.py └── tests ├── README.md ├── __init__.py ├── compare_utils.py ├── core.py ├── data ├── iris_score.csv └── iris_train.csv ├── databricks ├── README.md ├── __init__.py ├── _test_model_version.py ├── _test_registered_model.py ├── compare_utils.py ├── config.yaml.template ├── includes.py ├── init_tests.py ├── local_utils.py ├── run_tests.sh ├── test_copy_model_version.py ├── test_copy_run.py ├── test_experiments.py ├── test_model_version.py ├── test_registered_model.py ├── uc │ ├── run_tests.sh │ ├── test_copy_model_version.py │ ├── test_model_version.py │ └── test_registered_model.py └── unity_catalog_client.py ├── databricks_notebooks ├── README.md ├── __init__.py ├── config.yaml.template ├── databricks_tester.py ├── experiment │ └── Iris_Train.py ├── init_tests.py ├── run_tests.sh ├── samples │ ├── failed │ │ ├── run_tests_junit.xml │ │ └── run_tests_report.html │ ├── run_tests_junit.xml │ └── run_tests_report.html └── test_basic.py ├── open_source ├── README.md ├── __init__.py ├── init_tests.py ├── kill_server.sh ├── oss_utils_test.py ├── run_tests.sh ├── samples │ ├── run_tests_junit.xml │ └── run_tests_report.html ├── test_bulk_all.py ├── test_bulk_experiments.py ├── test_bulk_experiments_export_param.py ├── test_bulk_experiments_merge_utils.py ├── test_bulk_models.py ├── test_copy_model_version.py ├── test_copy_run.py ├── test_exceptions.py ├── test_experiments.py ├── test_find_run_model_names.py ├── test_iterators.py ├── test_model_signature.py ├── test_model_version.py ├── test_models.py ├── test_models_archive_vesions.py ├── test_renames.py ├── test_run_data_limits.py ├── test_runs.py ├── test_uc_permissions.py └── test_ws_permissions.py ├── sklearn_utils.py └── utils_test.py /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 1.2.0 (2023-02-16) 4 | 5 | The 1.2.0 version of MLflow Export Import is a major milestone release. 6 | 7 | This release contains an important breaking change from the 1.x API, additional major features and improvements. 8 | 9 | Features: 10 | 11 | - New streamlined export format for MLflow objects (experiments, runs and registered models) 12 | - Exporting artifacts of a specific version of a model 13 | - Import source system fields and tags 14 | - More Databricks notebook examples: Export_All and Export_Models notebooks 15 | - Added download notebook CLI utility 16 | - Plenty of bug fixes 17 | 18 | Breaking Changes: 19 | 20 | - [Core] The JSON export file format has been overhauled and made consistent across different MLflow objects. 21 | 1.x export files cannot be read by the 2.x release. 22 | 23 | Documentation updates 24 | - Major updates to README files 25 | - Aligned sample JSON files with new format 26 | -------------------------------------------------------------------------------- /README_options.md: -------------------------------------------------------------------------------- 1 | 2 | # Options 3 | 4 | ## Common options 5 | 6 | `notebook-formats` - If exporting a Databricks run, the run's notebook revision can be saved in the specified formats (comma-delimited argument). Each format is saved in the notebooks folder of the run's artifact root directory as `notebook.{format}`. Supported formats are SOURCE, HTML, JUPYTER and DBC. See Databricks [Export Format](https://docs.databricks.com/dev-tools/api/latest/workspace.html#notebookexportformat) documentation. 7 | 8 | `use-src-user-id` - Set the destination user ID to the source user ID. Source user ID is ignored when importing into Databricks since the user is automatically picked up from your Databricks access token. 9 | 10 | `use-src-user-id` - Set the destination user field to the source user field. Only valid for open source MLflow. 11 | When importing into Databricks, the source user field is ignored since it is automatically picked up from your Databricks access token. 12 | There is no MLflow API endpoint to explicity set the user field for any objects such as Run or Experiment. 13 | 14 | `import-source-tags` - Import source information for registered model and its versions ad tags in destination object. 15 | See section below. 16 | 17 | ## MLflow Export Import Source Tags 18 | 19 | For ML governance purposes, original source run information is saved under the `mlflow_export_import` tag prefix in the destination MLflow object. 20 | 21 | 22 | For details see [README_governance.md](README_governance.md). 23 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/Check_Model_Versions_Runs.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Check Model Versions Runs 3 | # MAGIC 4 | # MAGIC Check if versions runs are deleted. 5 | # MAGIC * Soft delete - run is marked as `deleted`(tombstoned) but still exists in database for 30 days 6 | # MAGIC * Hard delete - run has been physically deleted 7 | # MAGIC 8 | # MAGIC Widget: 9 | # MAGIC * `1. Models` 10 | # MAGIC * `2. Export latest versions` 11 | # MAGIC * `yes`: get only latest versions per stage 12 | # MAGIC * `no`: get all versions for all stages 13 | # MAGIC * `3. Bail` 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run ./Common 18 | 19 | # COMMAND ---------- 20 | 21 | dbutils.widgets.text("1. Models", "") 22 | models = dbutils.widgets.get("1. Models") 23 | 24 | dbutils.widgets.dropdown("2. Export latest versions","yes",["yes","no"]) 25 | export_latest_versions = dbutils.widgets.get("2. Export latest versions") == "yes" 26 | 27 | dbutils.widgets.text("3. Bail", "") 28 | bail = dbutils.widgets.get("3. Bail") 29 | bail = None if bail=="" else int(bail) 30 | 31 | print("models:", models) 32 | print("export_latest_versions:", export_latest_versions) 33 | print("bail:", bail) 34 | 35 | # COMMAND ---------- 36 | 37 | assert_widget(models, "1. Models") 38 | 39 | # COMMAND ---------- 40 | 41 | from mlflow_export_import.bulk.check_model_version_runs import mk_pandas_df 42 | 43 | pdf = mk_pandas_df( 44 | models, 45 | export_latest_versions=export_latest_versions, 46 | bail=bail 47 | ) 48 | df = spark.createDataFrame(pdf) 49 | display(df) 50 | 51 | # COMMAND ---------- 52 | 53 | df.count() 54 | 55 | # COMMAND ---------- 56 | 57 | 58 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/Common.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %pip install -U mlflow-skinny 3 | # MAGIC %pip install -U git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import 4 | # MAGIC dbutils.library.restartPython() 5 | 6 | # COMMAND ---------- 7 | 8 | import mlflow 9 | mlflow_client = mlflow.MlflowClient() 10 | print("MLflow version",mlflow.__version__) 11 | 12 | # COMMAND ---------- 13 | 14 | def assert_widget(value, name): 15 | if len(value.rstrip())==0: 16 | raise Exception(f"ERROR: '{name}' widget is required") 17 | 18 | # COMMAND ---------- 19 | 20 | def get_notebook_formats(num): 21 | widget_name = f"{num}. Notebook formats" 22 | all_notebook_formats = [ "SOURCE", "DBC", "HTML", "JUPYTER" ] 23 | dbutils.widgets.multiselect(widget_name, all_notebook_formats[0], all_notebook_formats) 24 | notebook_formats = dbutils.widgets.get(widget_name) 25 | notebook_formats = notebook_formats.split(",") 26 | if "" in notebook_formats: notebook_formats.remove("") 27 | return notebook_formats 28 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/Export_Experiments.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Export Experiments 3 | # MAGIC 4 | # MAGIC Export multiple experiments and all their runs. 5 | # MAGIC 6 | # MAGIC Widgets 7 | # MAGIC * `1. Experiments` - comma delimited list of either experiment IDs or experiment names. `all` will export all experiments. Or filename (ending with .txt) with experiment names/IDs. 8 | # MAGIC * `2. Output directory` - shared directory between source and destination workspaces. 9 | # MAGIC * `3. Run start date` - Export runs after this UTC date (inclusive). Example: `2023-04-05`. 10 | # MAGIC * `4. Export permissions` - export Databricks permissions. 11 | # MAGIC * `5. Export deleted runs` 12 | # MAGIC * `6. Notebook formats` 13 | # MAGIC * `7. Use threads` 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run ./Common 18 | 19 | # COMMAND ---------- 20 | 21 | dbutils.widgets.text("1. Experiments", "") 22 | experiments = dbutils.widgets.get("1. Experiments") 23 | 24 | dbutils.widgets.text("2. Output directory", "") 25 | output_dir = dbutils.widgets.get("2. Output directory") 26 | output_dir = output_dir.replace("dbfs:","/dbfs") 27 | 28 | dbutils.widgets.text("3. Run start date", "") 29 | run_start_date = dbutils.widgets.get("3. Run start date") 30 | 31 | dbutils.widgets.dropdown("4. Export permissions","no",["yes","no"]) 32 | export_permissions = dbutils.widgets.get("4. Export permissions") == "yes" 33 | 34 | dbutils.widgets.dropdown("5. Export deleted runs","no",["yes","no"]) 35 | export_deleted_runs = dbutils.widgets.get("5. Export deleted runs") == "yes" 36 | 37 | notebook_formats = get_notebook_formats(6) 38 | 39 | dbutils.widgets.dropdown("7. Use threads","False",["True","False"]) 40 | use_threads = dbutils.widgets.get("7. Use threads") == "True" 41 | 42 | if run_start_date=="": run_start_date = None 43 | 44 | print("experiments:", experiments) 45 | print("output_dir:", output_dir) 46 | print("run_start_date:", run_start_date) 47 | print("export_permissions:", export_permissions) 48 | print("export_deleted_runs:", export_deleted_runs) 49 | print("notebook_formats:", notebook_formats) 50 | print("use_threads:", use_threads) 51 | 52 | # COMMAND ---------- 53 | 54 | assert_widget(experiments, "1. Experiments") 55 | assert_widget(output_dir, "2. Output directory") 56 | 57 | # COMMAND ---------- 58 | 59 | from mlflow_export_import.bulk.export_experiments import export_experiments 60 | 61 | export_experiments( 62 | experiments = experiments, 63 | output_dir = output_dir, 64 | run_start_time = run_start_date, 65 | export_permissions = export_permissions, 66 | export_deleted_runs = export_deleted_runs, 67 | notebook_formats = notebook_formats, 68 | use_threads = use_threads 69 | ) 70 | 71 | # COMMAND ---------- 72 | 73 | # MAGIC %md ### Display exported files 74 | 75 | # COMMAND ---------- 76 | 77 | import os 78 | output_dir = output_dir.replace("dbfs:", "/dbfs") 79 | os.environ['OUTPUT_DIR'] = output_dir 80 | output_dir 81 | 82 | # COMMAND ---------- 83 | 84 | # MAGIC %sh 85 | # MAGIC echo "OUTPUT_DIR: $OUTPUT_DIR" ; echo 86 | # MAGIC ls $OUTPUT_DIR 87 | 88 | # COMMAND ---------- 89 | 90 | # MAGIC %sh cat $OUTPUT_DIR/experiments.json 91 | 92 | # COMMAND ---------- 93 | 94 | # MAGIC %sh ls -lR $OUTPUT_DIR 95 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/Import_Experiments.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Import Experiments 3 | # MAGIC 4 | # MAGIC Widgets 5 | # MAGIC * `1. Input directory` - directory of exported experiments. 6 | # MAGIC * `2. Experiment rename file` - Experiment rename file. 7 | # MAGIC * `3. Import permissions` 8 | # MAGIC * `4. Import source tags` 9 | # MAGIC * `5. Use threads` - use multi-threaded import. 10 | # MAGIC 11 | # MAGIC See https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md#Import-experiments. 12 | 13 | # COMMAND ---------- 14 | 15 | # MAGIC %run ./Common 16 | 17 | # COMMAND ---------- 18 | 19 | dbutils.widgets.text("1. Input directory", "") 20 | input_dir = dbutils.widgets.get("1. Input directory") 21 | input_dir = input_dir.replace("dbfs:","/dbfs") 22 | 23 | dbutils.widgets.text("2. Experiment rename file","") 24 | val = dbutils.widgets.get("2. Experiment rename file") 25 | experiment_rename_file = val or None 26 | 27 | dbutils.widgets.dropdown("3. Import permissions","no",["yes","no"]) 28 | import_permissions = dbutils.widgets.get("3. Import permissions") == "yes" 29 | 30 | dbutils.widgets.dropdown("4. Import source tags","no",["yes","no"]) 31 | import_source_tags = dbutils.widgets.get("4. Import source tags") == "yes" 32 | 33 | dbutils.widgets.dropdown("5. Use threads","no",["yes","no"]) 34 | use_threads = dbutils.widgets.get("5. Use threads") == "yes" 35 | 36 | print("input_dir:", input_dir) 37 | print("experiment_rename_file:", experiment_rename_file) 38 | print("import_permissions:", import_permissions) 39 | print("import_source_tags:", import_source_tags) 40 | print("use_threads:", use_threads) 41 | 42 | # COMMAND ---------- 43 | 44 | assert_widget(input_dir, "1. Input directory") 45 | 46 | # COMMAND ---------- 47 | 48 | from mlflow_export_import.bulk.import_experiments import import_experiments 49 | 50 | import_experiments( 51 | input_dir = input_dir, 52 | import_source_tags = import_source_tags, 53 | experiment_renames = experiment_rename_file, 54 | import_permissions = import_permissions, 55 | use_threads = use_threads 56 | ) 57 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/Import_Registered_Models.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Import Registered Models 3 | # MAGIC 4 | # MAGIC Widgets 5 | # MAGIC * `1. Input directory` - directory of exported models. 6 | # MAGIC * `2. Delete model` - delete the current contents of model 7 | # MAGIC * `3. Model rename file` - Model rename file. 8 | # MAGIC * `4. Experiment rename file` - Experiment rename file. 9 | # MAGIC * `5. Import permissions` 10 | # MAGIC * `6. Import source tags` 11 | # MAGIC * `7. Use threads` - use multi-threaded import. 12 | # MAGIC 13 | # MAGIC See https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md#Import-registered-models 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run ./Common 18 | 19 | # COMMAND ---------- 20 | 21 | dbutils.widgets.text("1. Input directory", "") 22 | input_dir = dbutils.widgets.get("1. Input directory") 23 | input_dir = input_dir.replace("dbfs:","/dbfs") 24 | 25 | dbutils.widgets.dropdown("2. Delete model","no",["yes","no"]) 26 | delete_model = dbutils.widgets.get("2. Delete model") == "yes" 27 | 28 | dbutils.widgets.text("3. Model rename file","") 29 | val = dbutils.widgets.get("3. Model rename file") 30 | model_rename_file = val or None 31 | 32 | dbutils.widgets.text("4. Experiment rename file","") 33 | val = dbutils.widgets.get("4. Experiment rename file") 34 | experiment_rename_file = val or None 35 | 36 | dbutils.widgets.dropdown("5. Import permissions","no",["yes","no"]) 37 | import_permissions = dbutils.widgets.get("5. Import permissions") == "yes" 38 | 39 | dbutils.widgets.dropdown("6. Import source tags","no",["yes","no"]) 40 | import_source_tags = dbutils.widgets.get("6. Import source tags") == "yes" 41 | 42 | dbutils.widgets.dropdown("6. Use threads","no",["yes","no"]) 43 | use_threads = dbutils.widgets.get("6. Use threads") == "yes" 44 | 45 | print("input_dir:", input_dir) 46 | print("delete_model:", delete_model) 47 | print("model_rename_file: ", model_rename_file) 48 | print("experiment_rename_file:", experiment_rename_file) 49 | print("import_permissions:", import_permissions) 50 | print("import_source_tags:", import_source_tags) 51 | print("use_threads:", use_threads) 52 | 53 | # COMMAND ---------- 54 | 55 | assert_widget(input_dir, "1. Input directory") 56 | 57 | # COMMAND ---------- 58 | 59 | from mlflow_export_import.bulk.import_models import import_models 60 | 61 | import_models( 62 | input_dir = input_dir, 63 | delete_model = delete_model, 64 | model_renames = model_rename_file, 65 | experiment_renames = experiment_rename_file, 66 | import_permissions = import_permissions, 67 | import_source_tags = import_source_tags, 68 | use_threads = use_threads 69 | ) 70 | -------------------------------------------------------------------------------- /databricks_notebooks/bulk/_README.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow Export Import bulk notebooks 3 | # MAGIC 4 | # MAGIC * Experiments 5 | # MAGIC * [Export_Experiments]($Export_Experiments) 6 | # MAGIC * [Import_Experiments]($Import_Experiments) 7 | # MAGIC * Models 8 | # MAGIC * [Export_Registered_Models]($Export_Registered_Models) - Exports models and dependent objects (runs and experiments) 9 | # MAGIC * [Import_Registered_Models]($Import_Registered_Models) - Imports models and dependent objects (runs and experiments) 10 | # MAGIC * All - entire workspace of MLflow objects 11 | # MAGIC * [Export_All]($Export_All) 12 | # MAGIC * Import All - Use [Import_Models]($Import_Models) to import all objects 13 | # MAGIC * [Common]($Common) - Helper functions 14 | # MAGIC 15 | # MAGIC See: https://github.com/mlflow/mlflow-export-import/blob/master/README_bulk.md. 16 | # MAGIC 17 | # MAGIC Last update: 2023-10-28 18 | -------------------------------------------------------------------------------- /databricks_notebooks/copy/Copy_Run.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Copy Run 3 | # MAGIC 4 | # MAGIC ##### Overview 5 | # MAGIC 6 | # MAGIC Copy an MLflow run to either the current or to another workspace. 7 | # MAGIC 8 | # MAGIC ##### Widgets 9 | # MAGIC 10 | # MAGIC * `1. Source run ID` - Source run ID. 11 | # MAGIC * `2. Destination experiment name` - Destination experiment name of the run. 12 | # MAGIC * `3. Destination workspace` - Destination workspace - default is current workspace. 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %md #### Setup 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %run ./Common 21 | 22 | # COMMAND ---------- 23 | 24 | dbutils.widgets.text("1. Source run ID", "") 25 | src_run_id = dbutils.widgets.get("1. Source run ID") 26 | 27 | dbutils.widgets.text("2. Destination experiment", "") 28 | dst_experiment_name = dbutils.widgets.get("2. Destination experiment") 29 | 30 | dbutils.widgets.text("3. Destination workspace", "databricks") 31 | dst_run_workspace = dbutils.widgets.get("3. Destination workspace") 32 | dst_run_workspace = dst_run_workspace or "databricks" 33 | 34 | print("src_run_id:", src_run_id) 35 | print("dst_experiment_name:", dst_experiment_name) 36 | print("dst_run_workspace:", dst_run_workspace) 37 | 38 | # COMMAND ---------- 39 | 40 | assert_widget(src_run_id, "1. Source run ID") 41 | assert_widget(dst_experiment_name, "2. Destination experiment name") 42 | 43 | # COMMAND ---------- 44 | 45 | # MAGIC %md #### Copy Run 46 | 47 | # COMMAND ---------- 48 | 49 | from mlflow_export_import.copy.copy_run import copy 50 | 51 | dst_run = copy(src_run_id, dst_experiment_name, "databricks", dst_run_workspace) 52 | 53 | # COMMAND ---------- 54 | 55 | dst_run 56 | 57 | # COMMAND ---------- 58 | 59 | if dst_run_workspace == "databricks": 60 | display_run_uri(dst_run.info.run_id) 61 | else: 62 | print(f"Cannot display run '{dst_run.info.run_id}' since it is in a remove workspace.") 63 | -------------------------------------------------------------------------------- /databricks_notebooks/copy/MLflow_Copy_Model_Version.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow_Copy_Model_Version 3 | # MAGIC 4 | # MAGIC Uses the standard `MlflowClient.copy_model_version()` method. 5 | # MAGIC 6 | # MAGIC ##### Widgets 7 | # MAGIC 8 | # MAGIC * `1. Source Model URI` - Source model URI (must be `models:` scheme) 9 | # MAGIC * `2. Destination Model` - Destination model name. 10 | # MAGIC 11 | # MAGIC #### Documentation 12 | # MAGIC * [MlflowClient.copy_model_version](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.copy_model_version) 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %pip install -Uq mlflow-skinny 17 | # MAGIC dbutils.library.restartPython() 18 | 19 | # COMMAND ---------- 20 | 21 | import mlflow 22 | print("mlflow.version:", mlflow.__version__) 23 | print("mlflow.get_registry_uri:", mlflow.get_registry_uri()) 24 | 25 | # COMMAND ---------- 26 | 27 | dbutils.widgets.text("1. Source Model URI", "") 28 | src_model_uri = dbutils.widgets.get("1. Source Model URI") 29 | 30 | dbutils.widgets.text("2. Destination Model", "") 31 | dst_model_name = dbutils.widgets.get("2. Destination Model") 32 | 33 | print("src_model_uri: ", src_model_uri) 34 | print("dst_model_name: ", dst_model_name) 35 | 36 | # COMMAND ---------- 37 | 38 | if "." in src_model_uri: 39 | mlflow.set_registry_uri("databricks-uc") 40 | else: 41 | mlflow.set_registry_uri("databricks") 42 | client = mlflow.MlflowClient() 43 | print("client._registry_uri:", client._registry_uri) 44 | 45 | # COMMAND ---------- 46 | 47 | dst_vr = client.copy_model_version(src_model_uri, dst_model_name) 48 | 49 | # COMMAND ---------- 50 | 51 | dst_vr 52 | -------------------------------------------------------------------------------- /databricks_notebooks/copy/_README.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow Export Import - Copy Notebooks 3 | # MAGIC 4 | # MAGIC Copy an MLflow object to either the current or to another workspace and/or model registry. 5 | # MAGIC 6 | # MAGIC ##### Core Notebooks 7 | # MAGIC * [Copy_Model_Version]($Copy_Model_Version) - Copy an MLflow model model version (deep copy). 8 | # MAGIC * [Test_Copy_Model_Version]($tests/Test_Copy_Model_Version) 9 | # MAGIC * [Copy_Run]($Copy_Run) - Copy an MLflow run. 10 | # MAGIC * [Common]($Common) - Common utilities. 11 | # MAGIC 12 | # MAGIC ##### MLflow Copy Model Version 13 | # MAGIC * [MLflow_Copy_Model_Version]($MLflow_Copy_Model_Version) - notebook 14 | # MAGIC * Uses the standard [MlflowClient.copy_model_version](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.copy_model_version) method. 15 | # MAGIC * Does not use mlflow-export-import. 16 | # MAGIC 17 | # MAGIC ##### Create Model Version 18 | # MAGIC * [Create_Model_Version]($Create_Model_Version) - notebook 19 | # MAGIC * Creates a model version from an MLflow model "source" URI in the current or in another model registry. 20 | # MAGIC * Does not use mlflow-export-import. 21 | # MAGIC * Supported sources: 22 | # MAGIC * MLflow Registry: `models:/my_catalog.my_schema.my_model/1` 23 | # MAGIC * MLflow Run: `runs:/319a3eec9fb444d4a70996091b31a940/model` 24 | # MAGIC * Volume: `/Volumes/andre_catalog/volumes/mlflow_export_import/single/sklearn_wine_best/run/artifacts/model` 25 | # MAGIC * DBFS: `/dbfs/home/andre@databricks.com/mlflow_export_import/single/sklearn_wine_best/model` 26 | # MAGIC * Local: `/root/sample_model` 27 | # MAGIC * Cloud: `s3:/my-bucket/mlflow-models/sklearn-wine_best` 28 | # MAGIC 29 | # MAGIC ##### Last updated: _2024-07-07_ 30 | -------------------------------------------------------------------------------- /databricks_notebooks/copy/experimental/Common.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # Common - copy model version 3 | 4 | # COMMAND ---------- 5 | 6 | # MAGIC %pip install /dbfs/home/andre.mesarovic@databricks.com/lib/wheels/mlflow_export_import-1.2.0-py3-none-any.whl 7 | 8 | # MAGIC 9 | 10 | # COMMAND ---------- 11 | 12 | import mlflow 13 | print("mlflow.version:", mlflow.__version__) 14 | 15 | # COMMAND ---------- 16 | 17 | from mlflow_export_import.common.dump_utils import obj_to_dict, dict_to_json, dump_obj_as_json 18 | 19 | # COMMAND ---------- 20 | 21 | def assert_widget(value, name): 22 | if len(value.rstrip())==0: 23 | raise Exception(f"ERROR: '{name}' widget is required") 24 | 25 | # COMMAND ---------- 26 | 27 | from mlflow.utils import databricks_utils 28 | mlflow_client = mlflow.MlflowClient() 29 | 30 | _host_name = databricks_utils.get_browser_hostname() 31 | print("host_name:", _host_name) 32 | 33 | def display_registered_model_version_uri(model_name, version): 34 | if _host_name: 35 | if "." in model_name: # is unity catalog model 36 | model_name = model_name.replace(".","/") 37 | uri = f"https://{_host_name}/explore/data/models/{model_name}/version/{version}" 38 | else: 39 | uri = f"https://{_host_name}/#mlflow/models/{model_name}/versions/{version}" 40 | displayHTML("""Registered Model Version URI: {}""".format(uri,uri)) 41 | 42 | def display_run_uri(run_id): 43 | if _host_name: 44 | run = mlflow_client.get_run(run_id) 45 | uri = f"https://{_host_name}/#mlflow/experiments/{run.info.experiment_id}/runs/{run_id}" 46 | displayHTML("""Run URI: {}""".format(uri,uri)) 47 | 48 | # COMMAND ---------- 49 | 50 | def copy_model_version( 51 | src_model_name, 52 | src_model_version, 53 | dst_model_name, 54 | dst_experiment_name, 55 | src_run_workspace = "databricks", 56 | copy_lineage_tags = False, 57 | verbose = False 58 | ): 59 | from mlflow_export_import.common.model_utils import is_unity_catalog_model 60 | from mlflow_export_import.copy.copy_model_version import copy 61 | 62 | def mk_registry_uri(model_name): 63 | return "databricks-uc" if is_unity_catalog_model(model_name) else "databricks" 64 | 65 | if src_run_workspace in [ "databricks", "databricks-uc"]: 66 | src_registry_uri = mk_registry_uri(src_model_name) 67 | elif is_unity_catalog_model(src_model_name): 68 | src_registry_uri = "databricks-uc" 69 | else: 70 | src_registry_uri = src_run_workspace 71 | 72 | dst_registry_uri = mk_registry_uri(dst_model_name) 73 | 74 | return copy( 75 | src_model_name, 76 | src_model_version, 77 | dst_model_name, 78 | dst_experiment_name, 79 | src_tracking_uri = src_run_workspace, 80 | dst_tracking_uri = "databricks", 81 | src_registry_uri = src_registry_uri, 82 | dst_registry_uri = dst_registry_uri, 83 | copy_lineage_tags = copy_lineage_tags, 84 | verbose = verbose 85 | ) 86 | -------------------------------------------------------------------------------- /databricks_notebooks/scripts/Common.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # Create standard .databrickscfg in custom location and specify with $DATABRICKS_CONFIG_FILE 3 | 4 | def create_databrick_config_file(secrets_scope, secrets_key, databricks_config_file=None): 5 | """ Create a .databrickscfg file so you can work in shell mode with Python scripts. """ 6 | context = dbutils.notebook.entry_point.getDbutils().notebook().getContext() 7 | token = dbutils.secrets.get(scope=secrets_scope, key=secrets_key) 8 | host_name = context.tags().get("browserHostName").get() 9 | user = context.tags().get("user").get() 10 | 11 | import os 12 | if not databricks_config_file: 13 | databricks_config_file = os.path.join("/tmp", f".databrickscfg-{user}") 14 | print(f"DATABRICKS_CONFIG_FILE: {databricks_config_file}") 15 | os.environ["DATABRICKS_CONFIG_FILE"] = databricks_config_file 16 | dbutils.fs.put(f"file:///{databricks_config_file}",f"[DEFAULT]\nhost=https://{host_name}\ntoken = "+token,overwrite=True) 17 | 18 | # COMMAND ---------- 19 | 20 | def assert_widget(value, name): 21 | if len(value.rstrip())==0: 22 | raise Exception(f"ERROR: '{name}' widget is required") 23 | 24 | # COMMAND ---------- 25 | 26 | assert_widget(secrets_scope, "1. Secrets scope") 27 | assert_widget(secrets_token_key, "2. Secrets PAT key") 28 | 29 | # COMMAND ---------- 30 | 31 | create_databrick_config_file(secrets_scope, secrets_token_key) 32 | 33 | # COMMAND ---------- 34 | 35 | # MAGIC %pip install git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import 36 | 37 | # COMMAND ---------- 38 | 39 | # MAGIC %sh mlflow --version 40 | -------------------------------------------------------------------------------- /databricks_notebooks/scripts/Console_Scripts.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Console Scripts - Single 3 | # MAGIC 4 | # MAGIC * Use this notebook as a starting point template for executing console scripts. 5 | # MAGIC * See [github.com/mlflow/mlflow-export-import/blob/master/README_single.md](https://github.com/mlflow/mlflow-export-import/blob/master/README_single.md). 6 | # MAGIC * You'll first need to specify a [Databricks secret](https://docs.databricks.com/security/secrets/secrets.html) to your [PAT](https://docs.databricks.com/administration-guide/access-control/tokens.html) (personal access token) to execute CLI commands. 7 | 8 | # COMMAND ---------- 9 | 10 | # MAGIC %md ### Setup 11 | 12 | # COMMAND ---------- 13 | 14 | dbutils.widgets.text("1. Secrets scope", "") 15 | secrets_scope = dbutils.widgets.get("1. Secrets scope") 16 | dbutils.widgets.text("2. Secrets PAT key", "") 17 | secrets_token_key = dbutils.widgets.get("2. Secrets PAT key") 18 | secrets_scope, secrets_token_key 19 | 20 | # COMMAND ---------- 21 | 22 | # MAGIC %run ./Common 23 | 24 | # COMMAND ---------- 25 | 26 | # MAGIC %sh 27 | # MAGIC echo "DATABRICKS_CONFIG_FILE: $DATABRICKS_CONFIG_FILE" 28 | # MAGIC cat $DATABRICKS_CONFIG_FILE 29 | 30 | # COMMAND ---------- 31 | 32 | # MAGIC %md ### Single notebooks 33 | 34 | # COMMAND ---------- 35 | 36 | # MAGIC %md #### Experiment 37 | 38 | # COMMAND ---------- 39 | 40 | # MAGIC %sh export-experiment --help 41 | 42 | # COMMAND ---------- 43 | 44 | # MAGIC %sh import-experiment --help 45 | 46 | # COMMAND ---------- 47 | 48 | # MAGIC %md #### export-model 49 | 50 | # COMMAND ---------- 51 | 52 | # MAGIC %sh export-model --help 53 | 54 | # COMMAND ---------- 55 | 56 | # MAGIC %sh import-model --help 57 | 58 | # COMMAND ---------- 59 | 60 | # MAGIC %md #### export-run 61 | 62 | # COMMAND ---------- 63 | 64 | # MAGIC %sh export-run --help 65 | 66 | # COMMAND ---------- 67 | 68 | # MAGIC %sh import-run --help 69 | 70 | # COMMAND ---------- 71 | 72 | # MAGIC %md ### Bulk notebooks 73 | 74 | # COMMAND ---------- 75 | 76 | # MAGIC %md #### Experiments 77 | 78 | # COMMAND ---------- 79 | 80 | # MAGIC %sh export-experiments --help 81 | 82 | # COMMAND ---------- 83 | 84 | # MAGIC %sh import-experiments --help 85 | 86 | # COMMAND ---------- 87 | 88 | # MAGIC %md #### Models 89 | 90 | # COMMAND ---------- 91 | 92 | # MAGIC %sh export-models --help 93 | 94 | # COMMAND ---------- 95 | 96 | # MAGIC %sh import-models --help 97 | 98 | # COMMAND ---------- 99 | 100 | # MAGIC %md #### All 101 | 102 | # COMMAND ---------- 103 | 104 | # MAGIC %sh export-all --help 105 | 106 | # COMMAND ---------- 107 | 108 | # MAGIC %sh import-all --help 109 | -------------------------------------------------------------------------------- /databricks_notebooks/scripts/_README.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow Export Import - Console Script Notebooks 3 | # MAGIC 4 | # MAGIC Console Script notebooks - command-line scripts that use the shell (%sh) 5 | # MAGIC 6 | # MAGIC * [Console_Scripts]($Console_Scripts) 7 | # MAGIC * [Common]($Common) 8 | # MAGIC 9 | # MAGIC You'll need to specify a [Databricks secret](https://docs.databricks.com/security/secrets/secrets.html) to your [PAT](https://docs.databricks.com/administration-guide/access-control/tokens.html) (personal access token) in the notebook to execute CLI commands. 10 | # MAGIC 11 | # MAGIC Last updated: 2023-03-26 12 | -------------------------------------------------------------------------------- /databricks_notebooks/single/Export_Model_Version.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ### Export Model Version 3 | # MAGIC 4 | # MAGIC ##### Overview 5 | # MAGIC * Export a model version and its run. 6 | # MAGIC 7 | # MAGIC ##### Widgets 8 | # MAGIC * `1. Model` - Registered model name. 9 | # MAGIC * `2. Version` - Model version. 10 | # MAGIC * `3. Output directory` - Output directory. 11 | # MAGIC * `4. Export version MLflow model` - Export a model version's "cached" registry MLflow model (clone of run's MLflow model). 12 | # MAGIC * `5. Notebook formats` - Notebook formats to export. 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %md ### Include setup 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %run ./Common 21 | 22 | # COMMAND ---------- 23 | 24 | # MAGIC %md ### Widget setup 25 | 26 | # COMMAND ---------- 27 | 28 | dbutils.widgets.text("1. Model name", "") 29 | model_name = dbutils.widgets.get("1. Model name") 30 | 31 | dbutils.widgets.text("2. Model version", "") 32 | version = dbutils.widgets.get("2. Model version") 33 | 34 | dbutils.widgets.text("3. Output directory", "") 35 | output_dir = dbutils.widgets.get("3. Output directory") 36 | 37 | dbutils.widgets.dropdown("4. Export version MLflow model","no",["yes","no"]) 38 | export_version_model = dbutils.widgets.get("4. Export version MLflow model") == "yes" 39 | 40 | notebook_formats = get_notebook_formats(5) # widget "7. Notebook formats" 41 | 42 | print("model_name:", model_name) 43 | print("version:", version) 44 | print("output_dir:", output_dir) 45 | print("export_version_model:", export_version_model) 46 | print("notebook_formats:", notebook_formats) 47 | 48 | # COMMAND ---------- 49 | 50 | assert_widget(model_name, "1. Model name") 51 | assert_widget(model_name, "2. Model version") 52 | assert_widget(output_dir, "3. Output directory") 53 | 54 | # COMMAND ---------- 55 | 56 | # MAGIC %md ### Turn on Unity Catalog mode if necessary 57 | 58 | # COMMAND ---------- 59 | 60 | activate_unity_catalog(model_name) 61 | 62 | # COMMAND ---------- 63 | 64 | # MAGIC %md ### Display model UI link 65 | 66 | # COMMAND ---------- 67 | 68 | display_registered_model_uri(model_name) 69 | 70 | # COMMAND ---------- 71 | 72 | # MAGIC %md ### Export the model version 73 | 74 | # COMMAND ---------- 75 | 76 | from mlflow_export_import.model_version.export_model_version import export_model_version 77 | 78 | export_model_version( 79 | model_name = model_name, 80 | version = version, 81 | output_dir = output_dir, 82 | export_version_model = export_version_model, 83 | notebook_formats = notebook_formats 84 | ) 85 | 86 | # COMMAND ---------- 87 | 88 | # MAGIC %md ### Display exported files 89 | 90 | # COMMAND ---------- 91 | 92 | import os 93 | output_dir = mk_local_path(output_dir) 94 | os.environ['OUTPUT_DIR'] = output_dir 95 | 96 | # COMMAND ---------- 97 | 98 | # MAGIC %sh echo $OUTPUT_DIR 99 | 100 | # COMMAND ---------- 101 | 102 | # MAGIC %sh ls -l $OUTPUT_DIR 103 | 104 | # COMMAND ---------- 105 | 106 | # MAGIC %sh 107 | # MAGIC cat $OUTPUT_DIR/model_version.json 108 | -------------------------------------------------------------------------------- /databricks_notebooks/single/Export_Run.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ### Export Run 3 | # MAGIC 4 | # MAGIC ##### Overview 5 | # MAGIC * Exports a run and its artifacts to a folder. 6 | # MAGIC * Output file `run.json` contains run metadata to be able to rehydrate the run. 7 | # MAGIC * Notebooks are also exported in several formats. 8 | # MAGIC 9 | # MAGIC #### Output folder 10 | # MAGIC ``` 11 | # MAGIC +-artifacts/ 12 | # MAGIC | +-sklearn-model/ 13 | # MAGIC | | +-model.pkl 14 | # MAGIC | | +-conda.yaml 15 | # MAGIC | | 16 | # MAGIC +-run.json 17 | # MAGIC ``` 18 | # MAGIC 19 | # MAGIC ##### Widgets 20 | # MAGIC * `1. Run ID` 21 | # MAGIC * `2. Output base directory` - Base output directory of the exported run. 22 | # MAGIC * `3. Notebook formats` - Standard Databricks notebook formats such as SOURCE, HTML, JUPYTER, DBC. 23 | 24 | # COMMAND ---------- 25 | 26 | # MAGIC %run ./Common 27 | 28 | # COMMAND ---------- 29 | 30 | # MAGIC %md ### Setup 31 | 32 | # COMMAND ---------- 33 | 34 | dbutils.widgets.text("1. Run ID", "") 35 | run_id = dbutils.widgets.get("1. Run ID") 36 | 37 | dbutils.widgets.text("2. Output base directory", "") 38 | output_dir = dbutils.widgets.get("2. Output base directory") 39 | output_dir += f"/{run_id}" 40 | 41 | notebook_formats = get_notebook_formats(3) 42 | 43 | print("run_id:", run_id) 44 | print("output_dir:", output_dir) 45 | print("notebook_formats:", notebook_formats) 46 | 47 | # COMMAND ---------- 48 | 49 | assert_widget(run_id, "1. Run ID") 50 | assert_widget(output_dir, "2. Output base directory") 51 | 52 | # COMMAND ---------- 53 | 54 | # MAGIC %md ### Display run UI link 55 | 56 | # COMMAND ---------- 57 | 58 | display_run_uri(run_id) 59 | 60 | # COMMAND ---------- 61 | 62 | # MAGIC %md ### Export the run 63 | 64 | # COMMAND ---------- 65 | 66 | from mlflow_export_import.run.export_run import export_run 67 | 68 | export_run( 69 | run_id = run_id, 70 | output_dir = output_dir, 71 | notebook_formats = notebook_formats 72 | ) 73 | 74 | # COMMAND ---------- 75 | 76 | # MAGIC %md ### Display exported run files 77 | 78 | # COMMAND ---------- 79 | 80 | import os 81 | output_dir = output_dir.replace("dbfs:","/dbfs") 82 | os.environ['OUTPUT_DIR'] = output_dir 83 | 84 | # COMMAND ---------- 85 | 86 | # MAGIC %sh echo $OUTPUT_DIR 87 | 88 | # COMMAND ---------- 89 | 90 | # MAGIC %sh ls -l $OUTPUT_DIR 91 | 92 | # COMMAND ---------- 93 | 94 | # MAGIC %sh cat $OUTPUT_DIR/run.json 95 | 96 | # COMMAND ---------- 97 | 98 | # MAGIC %sh ls -lR $OUTPUT_DIR/artifacts 99 | -------------------------------------------------------------------------------- /databricks_notebooks/single/Import_Experiment.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ### Import Experiment 3 | # MAGIC 4 | # MAGIC **Widgets** 5 | # MAGIC * `1. Input directory` - Input directory containing an exported experiment. 6 | # MAGIC * `2. Destination experiment name` - will create experiment if it doesn't exist. 7 | # MAGIC * `3. Import permissions` 8 | # MAGIC * `4. Import source tags` 9 | 10 | # COMMAND ---------- 11 | 12 | # MAGIC %md ### Include setup 13 | 14 | # COMMAND ---------- 15 | 16 | # MAGIC %run ./Common 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %md ### Widget setup 21 | 22 | # COMMAND ---------- 23 | 24 | 25 | dbutils.widgets.text("1. Destination experiment name", "") 26 | experiment_name = dbutils.widgets.get("1. Destination experiment name") 27 | 28 | dbutils.widgets.text("2. Input directory", "") 29 | input_dir = dbutils.widgets.get("2. Input directory") 30 | 31 | dbutils.widgets.dropdown("3. Import permissions","no",["yes","no"]) 32 | import_permissions = dbutils.widgets.get("3. Import permissions") == "yes" 33 | 34 | dbutils.widgets.dropdown("4. Import source tags","no",["yes","no"]) 35 | import_source_tags = dbutils.widgets.get("4. Import source tags") == "yes" 36 | 37 | print("input_dir:", input_dir) 38 | print("experiment_name:", experiment_name) 39 | print("import_permissions:", import_permissions) 40 | print("import_source_tags:", import_source_tags) 41 | 42 | # COMMAND ---------- 43 | 44 | assert_widget(experiment_name, "1. Destination experiment name") 45 | assert_widget(input_dir, "2. Input directory") 46 | 47 | # COMMAND ---------- 48 | 49 | # MAGIC %md ### Import experiment 50 | 51 | # COMMAND ---------- 52 | 53 | from mlflow_export_import.experiment.import_experiment import import_experiment 54 | 55 | import_experiment( 56 | experiment_name = experiment_name, 57 | input_dir = input_dir, 58 | import_permissions = import_permissions, 59 | import_source_tags = import_source_tags 60 | ) 61 | 62 | # COMMAND ---------- 63 | 64 | # MAGIC %md ### Display experiment UI link 65 | 66 | # COMMAND ---------- 67 | 68 | display_experiment_info(experiment_name) 69 | -------------------------------------------------------------------------------- /databricks_notebooks/single/Import_Run.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Import Run 3 | # MAGIC 4 | # MAGIC Import run from the folder that was created by the [Export_Run]($Export_Run) notebook. 5 | # MAGIC 6 | # MAGIC #### Widgets 7 | # MAGIC * `1. Destination experiment name` - Import run into this experiment. Will create if it doesn't exist. 8 | # MAGIC * `2. Input directory` - Input directory containing an exported run. 9 | # MAGIC * `3. Import source tags` 10 | 11 | # COMMAND ---------- 12 | 13 | # MAGIC %md ### Include setup 14 | 15 | # COMMAND ---------- 16 | 17 | # MAGIC %run ./Common 18 | 19 | # COMMAND ---------- 20 | 21 | # MAGIC %md ### Widget setup 22 | 23 | # COMMAND ---------- 24 | 25 | dbutils.widgets.text("1. Destination experiment name", "") 26 | experiment_name = dbutils.widgets.get("1. Destination experiment name") 27 | 28 | dbutils.widgets.text("2. Input directory", "") 29 | input_dir = dbutils.widgets.get("2. Input directory") 30 | 31 | dbutils.widgets.dropdown("3. Import source tags","no",["yes","no"]) 32 | import_source_tags = dbutils.widgets.get("3. Import source tags") == "yes" 33 | 34 | print("input_dir:", input_dir) 35 | print("experiment_name:", experiment_name) 36 | print("import_source_tags:", import_source_tags) 37 | 38 | # COMMAND ---------- 39 | 40 | assert_widget(experiment_name, "1. Destination experiment name") 41 | assert_widget(input_dir, "2. Input base directory") 42 | 43 | # COMMAND ---------- 44 | 45 | # MAGIC %md ### Import Run 46 | 47 | # COMMAND ---------- 48 | 49 | from mlflow_export_import.run.import_run import import_run 50 | 51 | run, _ = import_run( 52 | experiment_name = experiment_name, 53 | input_dir = input_dir, 54 | import_source_tags = import_source_tags 55 | ) 56 | print("Run ID:", run.info.run_id) 57 | 58 | # COMMAND ---------- 59 | 60 | # MAGIC %md ### Display run UI link 61 | 62 | # COMMAND ---------- 63 | 64 | display_run_uri(run.info.run_id) 65 | 66 | # COMMAND ---------- 67 | 68 | # MAGIC %md ### Check imported source tags 69 | 70 | # COMMAND ---------- 71 | 72 | if import_source_tags: 73 | import pandas as pd 74 | run = mlflow_client.get_run(run.info.run_id) 75 | data = [ (k, v) for k,v in run.data.tags.items() if k.startswith("mlflow_exim") ] 76 | df = pd.DataFrame(data, columns = ["Key","Value"]) 77 | display(df) 78 | -------------------------------------------------------------------------------- /databricks_notebooks/single/_README.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow Export Import - Single Notebooks 3 | # MAGIC 4 | # MAGIC Export and import one MLflow object. 5 | # MAGIC 6 | # MAGIC **Notebooks** 7 | # MAGIC * Run 8 | # MAGIC * [Export_Run]($./Export_Run) - export a run in its entirety (run.info, run.data and artifacts). 9 | # MAGIC * [Import_Run]($./Import_Run) 10 | # MAGIC * Experiment 11 | # MAGIC * [Export_Experiment]($./Export_Experiment) - export an experiment and its runs (run.info, run.data and artifacts). 12 | # MAGIC * [Import_Experiment]($./Import_Experiment) 13 | # MAGIC * Registered Model 14 | # MAGIC * [Export_Registered_Model]($./Export_Registered_Model) - export a registered model, its versions and their runs. 15 | # MAGIC * [Import_Registered_Model]($./Import_Registered_Model) 16 | # MAGIC * Model Version 17 | # MAGIC * [Export_Model_Version]($./Export_Model_Version) - export a model version and its run. 18 | # MAGIC * [Import_Model_Version]($./Import_Model_Version) - import a model version. 19 | # MAGIC * [Common]($./Common) - helper utility methods. 20 | # MAGIC 21 | # MAGIC **More information** 22 | # MAGIC 23 | # MAGIC * [Console_Scripts_Single]($../console_scripts/Console_Scripts_Single) to execute scripts from shell (%sh). 24 | # MAGIC * [github.com/mlflow/mlflow-export-import/blob/master/README_single.md](https://github.com/mlflow/mlflow-export-import/blob/master/README_single.md). 25 | 26 | # COMMAND ---------- 27 | 28 | # MAGIC %md ##### Last updated: _2024-07-21_ 29 | -------------------------------------------------------------------------------- /databricks_notebooks/tools/Common.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %pip install -U mlflow-skinny 3 | # MAGIC %pip install -U git+https:///github.com/mlflow/mlflow-export-import/#egg=mlflow-export-import 4 | # MAGIC dbutils.library.restartPython() 5 | 6 | # COMMAND ---------- 7 | 8 | import json 9 | def dump_json(dct,title=""): 10 | print(json.dumps(dct, indent=2)) 11 | if title: 12 | print(f"{title}:") 13 | 14 | # COMMAND ---------- 15 | 16 | def is_unity_catalog_model(model_name): 17 | return "." in model_name 18 | 19 | def split_model_uri(model_uri): 20 | toks = model_uri.split("/") 21 | return toks[1], toks[2] 22 | 23 | # COMMAND ---------- 24 | 25 | import mlflow 26 | 27 | def set_registry_uri(model_name): 28 | if model_name.startswith("models:/"): 29 | model_name = split_model_uri(model_name)[0] 30 | if is_unity_catalog_model(model_name): 31 | mlflow.set_registry_uri("databricks-uc") 32 | else: 33 | mlflow.set_registry_uri("databricks") 34 | print("mlflow.registry_uri:", mlflow.get_registry_uri()) 35 | 36 | # COMMAND ---------- 37 | 38 | def to_json_signature(signature): 39 | def _normalize(lst): 40 | import json 41 | return json.loads(lst) if lst else lst 42 | return { k:_normalize(v) for k,v in signature.items()} 43 | 44 | # COMMAND ---------- 45 | 46 | def assert_widget(value, name): 47 | if len(value.rstrip())==0: 48 | raise RuntimeError(f"ERROR: '{name}' widget is required") 49 | -------------------------------------------------------------------------------- /databricks_notebooks/tools/Get_Model_Signature.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ### Get Model Signature 3 | # MAGIC 4 | # MAGIC Get the signature of an MLflow model. 5 | # MAGIC 6 | # MAGIC ##### MLflow models can live in a variety of places. Sample MLflow model URIs: 7 | # MAGIC * `models:/andre_catalog.ml_models2.sklearn_wine_best/15` 8 | # MAGIC * `models:/Sklearn_Wine_best/1` 9 | # MAGIC * `runs:/030075d9727945259c7d283e47fee4a9/model` 10 | # MAGIC * `/Volumes/andre_catalog/volumes/mlflow_export_import/single/sklearn_wine_best/run/artifacts/model` 11 | # MAGIC * `/dbfs/home/first.last@databricks.com/mlflow_export_import/single/sklearn_wine_best/model` 12 | # MAGIC * `s3:/my-bucket/mlflow-models/sklearn_wine_best` 13 | # MAGIC 14 | # MAGIC ##### The signature is located in the MLmodel artifact of the MLflow model. 15 | # MAGIC * For a run, you can view the signature in the "Artifacts" tab of the run UI page. 16 | # MAGIC * For a model version, you can only view (in the UI) the signature via the run. 17 | # MAGIC * To get the actual signature of the deployed model, you need to use the API method `mlflow.models.get_model_info()`. 18 | # MAGIC 19 | # MAGIC ##### Documentation: 20 | # MAGIC * [mlflow.models.ModelSignature](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature) 21 | # MAGIC * [mlflow.models.get_model_info](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.get_model_info) 22 | # MAGIC 23 | # MAGIC ##### Github: 24 | # MAGIC 25 | 26 | # COMMAND ---------- 27 | 28 | # MAGIC %md #### Setup 29 | 30 | # COMMAND ---------- 31 | 32 | # MAGIC %run ./Common 33 | 34 | # COMMAND ---------- 35 | 36 | dbutils.widgets.text("Model URI", "") 37 | model_uri = dbutils.widgets.get("Model URI") 38 | print("model_uri:", model_uri) 39 | 40 | # COMMAND ---------- 41 | 42 | assert_widget(model_uri, "Model URI") 43 | set_registry_uri(model_uri) 44 | 45 | # COMMAND ---------- 46 | 47 | # MAGIC %md #### Get `model_info.signature` 48 | 49 | # COMMAND ---------- 50 | 51 | from mlflow_export_import.tools.signature_utils import get_model_signature 52 | signature = get_model_signature(model_uri) 53 | signature 54 | 55 | # COMMAND ---------- 56 | 57 | if signature: 58 | dump_json(signature) 59 | else: 60 | print(f"Model '{model_uri}' does not have a signature") 61 | dbutils.notebook.exit(None) 62 | -------------------------------------------------------------------------------- /databricks_notebooks/tools/List_Model_Versions_Without_Signature.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ### List Model Versions Without Signature 3 | # MAGIC 4 | # MAGIC List Workspace Model Registry model version that don't have a signature. 5 | # MAGIC 6 | # MAGIC #### Widgets 7 | # MAGIC * `1. Filter` - Filter is for [search_registered_models()](https://mlflow.org/docs/latest/python_api/mlflow.client.html#mlflow.client.MlflowClient.search_registered_models) such as `name like 'Sklearn_Wine%'` 8 | # MAGIC * `2. Output file` - save output as CSV file 9 | 10 | # COMMAND ---------- 11 | 12 | # MAGIC %run ./Common 13 | 14 | # COMMAND ---------- 15 | 16 | import mlflow 17 | mlflow_client = mlflow.MlflowClient() 18 | mlflow.set_registry_uri("databricks") 19 | print("mlflow.version:", mlflow.__version__) 20 | 21 | # COMMAND ---------- 22 | 23 | dbutils.widgets.text("1. Filter","name like 'Sklearn_Wine%'") 24 | filter = dbutils.widgets.get("1. Filter") 25 | filter = filter or None 26 | 27 | dbutils.widgets.text("2. Output file","") 28 | output_file = dbutils.widgets.get("2. Output file") 29 | 30 | print("filter:", filter) 31 | print("output_file:", output_file) 32 | 33 | # COMMAND ---------- 34 | 35 | from mlflow_export_import.tools.list_model_versions_without_signatures import as_pandas_df 36 | 37 | df = as_pandas_df(filter) 38 | display(df) 39 | 40 | # COMMAND ---------- 41 | 42 | if output_file: 43 | with open(output_file, "w", encoding="utf-8") as f: 44 | df.to_csv(f, index=False) 45 | -------------------------------------------------------------------------------- /databricks_notebooks/tools/_README.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## MLflow Export Import - Tools 3 | # MAGIC 4 | # MAGIC ##### Model Signature Tools 5 | # MAGIC * [Get_Model_Signature]($Get_Model_Signature) - Get the model signature for a model URI. 6 | # MAGIC * [Set_Model_Signature]($Set_Model_Signature) - Set the model signature for a run's MLflow model. 7 | # MAGIC * [List_Model_Versions_Without_Signature]($List_Model_Versions_Without_Signature) 8 | # MAGIC * [Common]($Common) 9 | # MAGIC 10 | # MAGIC ##### Documentation: 11 | # MAGIC * [mlflow.models.ModelSignature](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature) 12 | # MAGIC * [mlflow.models.get_model_info](https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.get_model_info) 13 | # MAGIC 14 | # MAGIC ##### Github 15 | # MAGIC * https://github.com/mlflow/mlflow-export-import/blob/master/mlflow_export_import/tools/signature_utils.py 16 | # MAGIC 17 | # MAGIC ##### Last updated: _2024-07-03_ 18 | -------------------------------------------------------------------------------- /diagrams/Copy_Model_Version_NonUC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/Copy_Model_Version_NonUC.png -------------------------------------------------------------------------------- /diagrams/Copy_Model_Version_UC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/Copy_Model_Version_UC.png -------------------------------------------------------------------------------- /diagrams/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/diagrams/architecture.png -------------------------------------------------------------------------------- /mlflow_export_import/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import mlflow 4 | from mlflow_export_import import version 5 | 6 | __version__ = version.__version__ 7 | 8 | # monkey patch mlflow.tracking.MlflowClient to return tracking URI in __repr__ 9 | 10 | def add_repr_to_MlflowClient(): 11 | def custom_repr(self): 12 | try: 13 | msg = { "tracking_uri": self.tracking_uri, "registry_uri": self._registry_uri } 14 | except AttributeError as e: 15 | msg = { "error": str(e) } 16 | return json.dumps(msg) 17 | mlflow.client.MlflowClient.__repr__ = custom_repr 18 | 19 | 20 | add_repr_to_MlflowClient() 21 | -------------------------------------------------------------------------------- /mlflow_export_import/bulk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/bulk/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/bulk/bulk_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common.iterators import SearchRegisteredModelsIterator 2 | from mlflow_export_import.common.iterators import SearchExperimentsIterator 3 | 4 | 5 | def _get_list(names, func_list): 6 | """ 7 | Returns a list of entities specified by the 'names' filter. 8 | :param names: Filter of desired list of entities. Can be: "all", comma-delimited string, list of entities or trailing wildcard "*". 9 | :param func_list: Function that lists the entities primary keys - for experiments it is experiment_id, for registered models it is model name. 10 | :return: List of entities. 11 | """ 12 | if isinstance(names, str): 13 | if names == "all": 14 | return func_list() 15 | elif names.endswith("*"): 16 | prefix = names[:-1] 17 | return [ x for x in func_list() if x.startswith(prefix) ] 18 | else: 19 | return names.split(",") 20 | else: 21 | return names 22 | 23 | 24 | def get_experiment_ids(mlflow_client, experiment_ids): 25 | def list_entities(): 26 | return [ exp.experiment_id for exp in SearchExperimentsIterator(mlflow_client) ] 27 | return _get_list(experiment_ids, list_entities) 28 | 29 | 30 | def get_model_names(mlflow_client, model_names): 31 | def list_entities(): 32 | return [ model.name for model in SearchRegisteredModelsIterator(mlflow_client) ] 33 | return _get_list(model_names, list_entities) 34 | -------------------------------------------------------------------------------- /mlflow_export_import/bulk/experiments_merge_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Merge existing experiments.json generated by export_models with 3 | new export_experiments when both are called by export_all 4 | """ 5 | 6 | def merge_info(info1, info2): 7 | info = info1.copy() 8 | 9 | # export_file_version 2 change 10 | if "status" in info: 11 | status = info["status"] 12 | status2 = info2["status"] 13 | else: 14 | status, status2 = info, info2 15 | 16 | status["duration"] += status2["duration"] 17 | status["duration"] = round(status["duration"],1) 18 | status["experiments"] += status2["experiments"] 19 | status["total_runs"] += status2["total_runs"] 20 | status["ok_runs"] += status2["ok_runs"] 21 | status["failed_runs"] += status2["failed_runs"] 22 | 23 | return info 24 | 25 | 26 | def merge_mlflow(mlflow1, mlflow2): 27 | return { "experiments": mlflow1["experiments"] + mlflow2["experiments"] } 28 | -------------------------------------------------------------------------------- /mlflow_export_import/bulk/model_utils.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | 3 | from mlflow_export_import.common import utils 4 | from mlflow_export_import.bulk import bulk_utils 5 | from mlflow_export_import.common.iterators import SearchModelVersionsIterator 6 | 7 | _logger = utils.getLogger(__name__) 8 | 9 | 10 | def get_experiments_runs_of_models(client, model_names, show_experiments=False, show_runs=False): 11 | """ Get experiments and runs to to export. """ 12 | model_names = bulk_utils.get_model_names(client, model_names) 13 | _logger.info(f"{len(model_names)} Models:") 14 | for model_name in model_names: 15 | _logger.info(f" {model_name}") 16 | exps_and_runs = {} 17 | for model_name in model_names: 18 | versions = SearchModelVersionsIterator(client, filter=f"name='{model_name}'") 19 | for vr in versions: 20 | try: 21 | run = client.get_run(vr.run_id) 22 | exps_and_runs.setdefault(run.info.experiment_id,[]).append(run.info.run_id) 23 | except mlflow.exceptions.MlflowException as e: 24 | if e.error_code == "RESOURCE_DOES_NOT_EXIST": 25 | _logger.warning(f"run '{vr.run_id}' of version {vr.version} of model '{model_name}' does not exist") 26 | else: 27 | _logger.warning(f"run '{vr.run_id}' of version {vr.version} of model '{model_name}': Error.code: {e.error_code}. Error.message: {e.message}") 28 | if show_experiments: 29 | show_experiments_runs_of_models(exps_and_runs, show_runs) 30 | return exps_and_runs 31 | 32 | 33 | def show_experiments_runs_of_models(exps_and_runs, show_runs=False): 34 | _logger.info("Experiments for models:") 35 | for k,v in exps_and_runs.items(): 36 | _logger.info(f" Experiment: {k}") 37 | for x in v: 38 | if show_runs: _logger.info(f" {x}") 39 | -------------------------------------------------------------------------------- /mlflow_export_import/bulk/rename_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common import MlflowExportImportException 2 | from mlflow_export_import.common import utils 3 | from mlflow_export_import.common.filesystem import mk_local_path 4 | 5 | _logger = utils.getLogger(__name__) 6 | 7 | 8 | def read_rename_file(path): 9 | with open(mk_local_path(path), "r", encoding="utf-8") as f: 10 | dct = {} 11 | for line in f: 12 | toks = line.rstrip().split(",") 13 | dct[toks[0]] = toks[1] 14 | return dct 15 | 16 | 17 | def rename(name, replacements, object_name="object"): 18 | if not replacements: 19 | return name 20 | for k,v in replacements.items(): 21 | if k != "" and name.startswith(k): 22 | new_name = name.replace(k,v) 23 | _logger.info(f"Renaming {object_name} '{name}' to '{new_name}'") 24 | return new_name 25 | return name 26 | 27 | 28 | def get_renames(filename_or_dict): 29 | if filename_or_dict is None: 30 | return None 31 | if isinstance(filename_or_dict,str): 32 | return read_rename_file(filename_or_dict) 33 | elif isinstance(filename_or_dict, dict): 34 | return filename_or_dict 35 | else: 36 | raise MlflowExportImportException(f"Unknown name replacement type '{type(filename_or_dict)}'", http_status_code=400) 37 | -------------------------------------------------------------------------------- /mlflow_export_import/client/__init__.py: -------------------------------------------------------------------------------- 1 | USER_AGENT = "mlflow-export-import/1.0.0" 2 | -------------------------------------------------------------------------------- /mlflow_export_import/client/client_utils.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | from . http_client import HttpClient, MlflowHttpClient, DatabricksHttpClient 3 | 4 | 5 | def create_http_client(mlflow_client, model_name=None): 6 | """ 7 | Create MLflow HTTP client from MlflowClient. 8 | If model_name is a Unity Catalog (UC) model, the returned client is UC-enabled. 9 | """ 10 | from mlflow_export_import.common import model_utils 11 | creds = mlflow_client._tracking_client.store.get_host_creds() 12 | if model_name and model_utils.is_unity_catalog_model(model_name): 13 | return HttpClient("api/2.0/mlflow/unity-catalog", creds.host, creds.token) 14 | else: 15 | return MlflowHttpClient(creds.host, creds.token) 16 | 17 | 18 | def create_dbx_client(mlflow_client): 19 | """ 20 | Create Databricks HTTP client from MlflowClient. 21 | """ 22 | creds = mlflow_client._tracking_client.store.get_host_creds() 23 | return DatabricksHttpClient(creds.host, creds.token) 24 | 25 | 26 | def create_mlflow_client(): 27 | """ 28 | Create MLflowClient. If MLFLOW_TRACKING_URI is UC, then set MlflowClient.tracking_uri to the non-UC variant. 29 | """ 30 | registry_uri = mlflow.get_registry_uri() 31 | if registry_uri: 32 | tracking_uri = mlflow.get_tracking_uri() 33 | nonuc_tracking_uri = tracking_uri.replace("databricks-uc","databricks") # NOTE: legacy 34 | return mlflow.MlflowClient(nonuc_tracking_uri, registry_uri) 35 | else: 36 | return mlflow.MlflowClient() 37 | -------------------------------------------------------------------------------- /mlflow_export_import/client/databricks_cli_utils.py: -------------------------------------------------------------------------------- 1 | from databricks_cli.configure import provider 2 | from mlflow.utils.databricks_utils import is_in_databricks_runtime 3 | 4 | 5 | def get_host_token_for_profile(profile=None): 6 | """ 7 | :param profile: Databricks profile as in ~/.databrickscfg or None for the default profile 8 | :return: tuple of (host, token) from the ~/.databrickscfg profile 9 | """ 10 | if profile: 11 | cfg = provider.get_config_for_profile(profile) 12 | if not cfg.host and is_in_databricks_runtime(): 13 | cfg = provider.get_config() 14 | else: 15 | cfg = provider.get_config() 16 | return (cfg.host, cfg.token) 17 | 18 | 19 | if __name__ == "__main__": 20 | import sys 21 | profile = sys.argv[1] if len(sys.argv) > 1 else None 22 | print("profile:",profile) 23 | tuple = get_host_token_for_profile(profile) 24 | print("host and token:", tuple) 25 | -------------------------------------------------------------------------------- /mlflow_export_import/client/databricks_utils.py: -------------------------------------------------------------------------------- 1 | from databricks_cli.sdk.api_client import ApiClient 2 | from mlflow_export_import.client import mlflow_auth_utils 3 | 4 | 5 | def get_api_client(): 6 | (host, token) = mlflow_auth_utils.get_mlflow_host_token() 7 | return ApiClient(None, None, host, token) 8 | -------------------------------------------------------------------------------- /mlflow_export_import/client/mlflow_auth_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.client import databricks_cli_utils 2 | from mlflow_export_import.common import MlflowExportImportException 3 | from mlflow_export_import.common import utils 4 | 5 | _logger = utils.getLogger(__name__) 6 | 7 | 8 | def get_mlflow_host(): 9 | """ Returns the MLflow tracking URI (host) """ 10 | return get_mlflow_host_token()[0] 11 | 12 | 13 | def get_mlflow_host_token(): 14 | """ 15 | Returns the MLflow tracking URI (host) and Databricks personal access token (PAT). 16 | For Databricks, expects the MLflow tracking URI in the form of 'databricks' or 'databricks://MY_PROFILE'. 17 | """ 18 | 19 | import mlflow 20 | uri = mlflow.tracking.get_tracking_uri() 21 | if uri: 22 | if not uri.startswith("databricks"): 23 | if not uri.startswith("http"): 24 | _raise_exception(uri) 25 | else: 26 | return (uri, None) 27 | else: 28 | _raise_exception(uri) 29 | 30 | try: 31 | toks = uri.split("//") 32 | profile = uri.split("//")[1] if len(toks) > 1 else None 33 | return databricks_cli_utils.get_host_token_for_profile(profile) 34 | # databricks_cli.utils.InvalidConfigurationError 35 | # requests.exceptions.InvalidSchema(f"No connection adapters were found for {url!r}") 36 | except Exception as e: 37 | _logger.warning(e) 38 | return (None, None) 39 | 40 | 41 | def _raise_exception(uri): 42 | raise MlflowExportImportException( 43 | f"MLflow tracking URI (MLFLOW_TRACKING_URI environment variable) must be an HTTP URI: '{uri}'.", 44 | http_status_code=401) 45 | -------------------------------------------------------------------------------- /mlflow_export_import/client/user_agent_header.py: -------------------------------------------------------------------------------- 1 | """ 2 | Set HTTP User-Agent header as 'mlflow-export-import/1.X.X' for MLflow client. 3 | """ 4 | 5 | from mlflow.tracking.request_header.abstract_request_header_provider import RequestHeaderProvider 6 | from mlflow_export_import.client import USER_AGENT 7 | 8 | class MlflowExportImportRequestHeaderProvider(RequestHeaderProvider): 9 | def in_context(self): 10 | return True 11 | def request_headers(self): 12 | return { "User-Agent": USER_AGENT } 13 | -------------------------------------------------------------------------------- /mlflow_export_import/common/__init__.py: -------------------------------------------------------------------------------- 1 | from mlflow.exceptions import MlflowException 2 | import json 3 | 4 | class MlflowExportImportException(Exception): 5 | DEFAULT_HTTP_STATUS_CODE = -1 6 | 7 | def __init__(self, ex, message=None, http_status_code=DEFAULT_HTTP_STATUS_CODE, **kwargs): 8 | self.message = str(ex) # if arg 'message' is not None else is src_exception's message 9 | self.src_message = None # message from source exception if arg 'message' is not None 10 | self.src_exception = None # source exception if exists 11 | self.http_status_code = http_status_code 12 | custom_kwargs = {} 13 | if issubclass(ex.__class__,Exception): 14 | self.src_exception = ex 15 | if issubclass(ex.__class__,MlflowException): 16 | self.http_status_code = ex.get_http_status_code() 17 | custom_kwargs = { "mlflow_error_code": ex.error_code } 18 | if message: 19 | self.message = message 20 | self.src_message = str(ex) 21 | 22 | self.kwargs = { "message": self.message, "http_status_code": self.http_status_code } 23 | self.kwargs = {**self.kwargs, **kwargs, **custom_kwargs} 24 | if self.src_message: 25 | self.kwargs["src_message"] = self.src_message 26 | 27 | def _add(self, dct, k, v): 28 | if v: dct[k] = v 29 | 30 | def __str__(self): 31 | return json.dumps(self.kwargs) 32 | -------------------------------------------------------------------------------- /mlflow_export_import/common/default_logging_config.py: -------------------------------------------------------------------------------- 1 | config = { 2 | "version": 1, 3 | "formatters": { 4 | "simple": { 5 | "format": "%(asctime)s - %(levelname)s - %(message)s", 6 | "datefmt": "%d-%b-%y %H:%M:%S" 7 | } 8 | }, 9 | "handlers": { 10 | "console": { 11 | "class": "logging.StreamHandler", 12 | "level": "DEBUG", 13 | "formatter": "simple", 14 | "stream": "ext://sys.stdout" 15 | }, 16 | "file": { 17 | "class": "logging.FileHandler", 18 | "filename": "/tmp/mlflow-export-import.log", 19 | "level": "INFO", 20 | "formatter": "simple" 21 | } 22 | }, 23 | "loggers": { 24 | "sampleLogger": { 25 | "level": "DEBUG", 26 | "handlers": [ 27 | "console" 28 | ], 29 | "propagate": False 30 | } 31 | }, 32 | "root": { 33 | "level": "DEBUG", 34 | "handlers": [ 35 | "console", 36 | "file" 37 | ] 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /mlflow_export_import/common/dump_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def dump_mlflow_client(client, msg=""): 5 | import mlflow 6 | print(f"MlflowClient {msg}:") 7 | print(" client.tracking_uri: ", client.tracking_uri) 8 | print(" client._registry_uri:", client._registry_uri) 9 | creds = client._tracking_client.store.get_host_creds() 10 | dump_obj(creds, "Credentials", " ") 11 | print(" mlflow fluent:") 12 | print(" mlflow.tracking_uri: ", mlflow.get_tracking_uri()) 13 | print(" mlflow.registry_uri: ", mlflow.get_registry_uri()) 14 | 15 | 16 | def dump_obj(obj, title=None, indent=""): 17 | if isinstance(obj, dict) or isinstance(obj, list): 18 | #dump_dict(obj, title) 19 | dump_as_json(obj, title) 20 | return 21 | if obj: 22 | title = title if title else type(obj).__name__ 23 | print(f"{indent}{title}") 24 | for k,v in obj.__dict__.items(): 25 | print(f"{indent} {k}: {v}") 26 | else: 27 | title = title if title else "Object" 28 | title = f"{title}: None" 29 | print(f"{indent}{title}") 30 | 31 | 32 | def dump_dict(dct, title=None): 33 | if title: 34 | print(f"{title}:") 35 | for k,v in dct.items(): 36 | print(f" {k}: {v}") 37 | 38 | 39 | def dump_obj_as_json(obj, title=None): 40 | title = title if title else type(obj).__name__ 41 | print(title) 42 | dump_as_json(obj_to_dict(obj)) 43 | 44 | 45 | def dump_as_json(dct, title=None, sort_keys=None, indent=2): 46 | if title: 47 | print(f"{title}:") 48 | print(dict_to_json(dct, sort_keys, indent)) 49 | 50 | 51 | def dict_to_json(dct, sort_keys=None, indent=2): 52 | return json.dumps(dct, sort_keys=sort_keys, indent=indent) 53 | 54 | 55 | def obj_to_dict(obj): 56 | return obj.__dict__ 57 | -------------------------------------------------------------------------------- /mlflow_export_import/common/filesystem.py: -------------------------------------------------------------------------------- 1 | """ 2 | Filesystem utilities - local or Databricks 3 | """ 4 | 5 | import os 6 | import shutil 7 | 8 | 9 | def mk_dbfs_path(path): 10 | return path.replace("/dbfs","dbfs:") 11 | 12 | 13 | def mk_local_path(path): 14 | return path.replace("dbfs:","/dbfs") 15 | 16 | 17 | def exists(path): 18 | os.path.exists(mk_local_path(path)) 19 | 20 | 21 | class DatabricksFileSystem(): 22 | def __init__(self): 23 | import IPython 24 | self.dbutils = IPython.get_ipython().user_ns["dbutils"] 25 | 26 | def ls(self, path): 27 | return self.dbutils.fs.ls(mk_dbfs_path(path)) 28 | 29 | def cp(self, src, dst, recursive=False): 30 | self.dbutils.fs.cp(mk_dbfs_path(src), mk_dbfs_path(dst), recursive) 31 | 32 | def rm(self, path, recurse=False): 33 | self.dbutils.fs.rm(mk_dbfs_path(path), recurse) 34 | 35 | def mkdirs(self, path): 36 | self.dbutils.fs.mkdirs(mk_dbfs_path(path)) 37 | 38 | def write(self, path, content): 39 | self.dbutils.fs.put(mk_dbfs_path(path), content, True) 40 | 41 | 42 | class LocalFileSystem(): 43 | def __init__(self): 44 | pass 45 | 46 | def cp(self, src, dst, recurse=False): 47 | shutil.copytree(mk_local_path(src), mk_local_path(dst)) 48 | 49 | def rm(self, path, recurse=False): 50 | shutil.rmtree(mk_local_path(path)) 51 | 52 | def mkdirs(self, path): 53 | os.makedirs(mk_local_path(path),exist_ok=True) 54 | 55 | def write(self, path, content): 56 | with open(mk_local_path(path), "w", encoding="utf-8") as f: 57 | f.write(content) 58 | 59 | 60 | def get_filesystem(dir): 61 | """ Return the filesystem object matching the directory path. """ 62 | return DatabricksFileSystem() if dir.startswith("dbfs:") else LocalFileSystem() 63 | -------------------------------------------------------------------------------- /mlflow_export_import/common/find_artifacts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Find artifacts of a run that match a name. 3 | """ 4 | 5 | import sys 6 | import os 7 | import click 8 | import mlflow 9 | 10 | 11 | def find_run_model_names(mlflow_client, run_id): 12 | """ 13 | Return a list of model artifact directory paths of an MLflow run. 14 | Looks for any directory with an 'MLmodel' file and returns its directory. 15 | """ 16 | matches = find_artifacts(mlflow_client, run_id, "", "MLmodel") 17 | return [ m.replace("/MLmodel","").replace("MLmodel","") for m in matches ] 18 | 19 | 20 | def find_artifacts(mlflow_client, run_id, path, target, max_level=sys.maxsize): 21 | return _find_artifacts(mlflow_client, run_id, path, target, max_level, 0, []) 22 | 23 | 24 | def _find_artifacts(mlflow_client, run_id, path, target, max_level, level, matches): 25 | if level+1 > max_level: 26 | return matches 27 | artifacts = mlflow_client.list_artifacts(run_id, path) 28 | for art in artifacts: 29 | filename = os.path.basename(art.path) 30 | if filename == target: 31 | matches.append(art.path) 32 | # NOTE: as of mlflow 2.11.x a new directory 'metadata' is appeared with duplicate MLmodel and friend files in. 33 | if art.is_dir and filename != "metadata": 34 | _find_artifacts(mlflow_client, run_id, art.path, target, max_level, level+1, matches) 35 | return matches 36 | 37 | 38 | @click.command() 39 | @click.option("--run-id", help="Run ID.", required=True, type=str) 40 | @click.option("--path", help="Relative artifact path.", default="", type=str, show_default=True) 41 | @click.option("--target", help="Target filename to search for.", required=True, type=str) 42 | @click.option("--max-level", help="Number of artifact levels to recurse.", default=sys.maxsize, type=int, show_default=True) 43 | 44 | def main(run_id, path, target, max_level): 45 | print("Options:") 46 | for k,v in locals().items(): 47 | print(f" {k}: {v}") 48 | client = mlflow.client.MlflowClient() 49 | matches = find_artifacts(client, run_id, path, target, max_level) 50 | print("Matches:") 51 | for m in matches: 52 | print(" ",m) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /mlflow_export_import/common/logging_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import logging.config 4 | 5 | _have_loaded_logging_config = False 6 | 7 | def get_logger(name): 8 | global _have_loaded_logging_config 9 | if _have_loaded_logging_config: 10 | return logging.getLogger(name) 11 | 12 | config_path = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_CONFIG_FILE", None) 13 | output_path = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_OUTPUT_FILE", None) 14 | log_format = os.environ.get("MLFLOW_EXPORT_IMPORT_LOG_FORMAT", None) 15 | #print(f"logging_utils.get_logger: config_path: {config_path}") 16 | #print(f"logging_utils.get_logger: output_path: {output_path}") 17 | #print(f"logging_utils.get_logger: log_format: {log_format}") 18 | 19 | if config_path: 20 | if not config_path.endswith(".yaml"): 21 | _load_default_log_config(output_path, log_format) 22 | logging.warning(f"Logging config file '{config_path}' must be .yaml file.") 23 | elif not os.path.exists(config_path): 24 | _load_default_log_config(output_path, log_format) 25 | logging.warning(f"Logging config file '{config_path}' does not exist.") 26 | else: 27 | with open(config_path, "r", encoding="utf-8") as f: 28 | cfg = yaml.safe_load(f.read()) 29 | logging.config.dictConfig(cfg) 30 | logging.info(f"Reading log config file '{config_path}'") 31 | else: 32 | _load_default_log_config(output_path, log_format) 33 | 34 | _have_loaded_logging_config = True 35 | return logging.getLogger(name) 36 | 37 | 38 | def _load_default_log_config(output_path=None, log_format=None): 39 | cfg = _create_default_log_config(output_path, log_format) 40 | logging.config.dictConfig(cfg) 41 | msg = f"with output log file '{output_path}'" if output_path else "without output log file" 42 | logging.info(f"Using default logging config {msg}") 43 | 44 | 45 | def _create_default_log_config(output_path=None, log_format=None): 46 | from mlflow_export_import.common.default_logging_config import config 47 | cfg = config.copy() 48 | if log_format: 49 | cfg["formatters"]["simple"]["format"] = log_format 50 | 51 | if output_path: 52 | file_handler = cfg["handlers"]["file"] 53 | file_handler["filename"] = output_path 54 | else: 55 | handlers = cfg["root"]["handlers"] 56 | handlers.remove("file") 57 | 58 | return cfg 59 | -------------------------------------------------------------------------------- /mlflow_export_import/common/pkg_version.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version, PackageNotFoundError 2 | 3 | pkg = "mlflow_export_import" 4 | 5 | def get_version(): 6 | try: 7 | return version(pkg) 8 | except PackageNotFoundError: 9 | return "" 10 | -------------------------------------------------------------------------------- /mlflow_export_import/common/source_tags.py: -------------------------------------------------------------------------------- 1 | 2 | class ExportFields: 3 | """ Top-level fields for JSON export format. """ 4 | SYSTEM = "system" 5 | INFO = "info" 6 | MLFLOW = "mlflow" 7 | 8 | 9 | class ExportTags: 10 | """ Source export tags prefixes. """ 11 | PREFIX_ROOT = "mlflow_exim" 12 | PREFIX_FIELD = f"{PREFIX_ROOT}.field" 13 | PREFIX_RUN_INFO = f"{PREFIX_ROOT}.run_info" 14 | PREFIX_MLFLOW_TAG = f"{PREFIX_ROOT}.mlflow_tag" 15 | 16 | 17 | def fmt_timestamps(tag, dct, tags): 18 | from mlflow_export_import.common import timestamp_utils 19 | ts = dct[tag] 20 | tags[f"{ExportTags.PREFIX_FIELD}.{tag}"] = str(ts) 21 | tags[f"{ExportTags.PREFIX_FIELD}._{tag}"] = timestamp_utils.fmt_ts_millis(ts, True) 22 | 23 | 24 | def set_source_tags_for_field(dct, tags): 25 | """" 26 | Add an object's fields as source tags. 27 | """ 28 | for k,v in dct.items(): 29 | if k != "tags": 30 | tags[f"{ExportTags.PREFIX_FIELD}.{k}"] = str(v) 31 | 32 | 33 | def mk_source_tags_mlflow_tag(tags): 34 | """" 35 | Create 'mlflow_.exim.mlflow_tag' source tags from 'mlflow' tags.. 36 | """ 37 | prefix = "mlflow." 38 | return { f"{ExportTags.PREFIX_MLFLOW_TAG}.{k.replace(prefix,'')}":str(v) for k,v in tags.items() if k.startswith(prefix) } 39 | 40 | 41 | def mk_source_tags(tags, dst_prefix): 42 | """" 43 | Create source tags from destination prefix. 44 | """ 45 | return { f"{dst_prefix}.{k}":str(v) for k,v in tags.items() } 46 | -------------------------------------------------------------------------------- /mlflow_export_import/common/timestamp_utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | 4 | 5 | TS_FORMAT = "%Y-%m-%d %H:%M:%S" 6 | ts_now_seconds = round(time.time()) 7 | ts_now_fmt_utc = time.strftime(TS_FORMAT, time.gmtime(ts_now_seconds)) 8 | ts_now_fmt_local = time.strftime(TS_FORMAT, time.localtime(ts_now_seconds)) 9 | 10 | _default_as_utc = True 11 | 12 | 13 | def fmt_ts_millis(millis, as_utc=_default_as_utc): 14 | """ Convert epoch milliseconds to string format """ 15 | if not millis: 16 | return None 17 | return fmt_ts_seconds(round(millis/1000), as_utc) 18 | 19 | 20 | def fmt_ts_seconds(seconds, as_utc=_default_as_utc): 21 | """ Convert epoch seconds to string format """ 22 | if not seconds: 23 | return None 24 | if as_utc: 25 | ts = time.gmtime(seconds) 26 | else: 27 | ts = time.localtime(seconds) 28 | return time.strftime(TS_FORMAT, ts) 29 | 30 | 31 | def utc_str_to_millis(sdt): 32 | """ Convert UTC string to epoch milliseconds. """ 33 | return utc_str_to_seconds(sdt) * 1000 34 | 35 | 36 | def utc_str_to_seconds(sdt): 37 | """ Convert UTC string to epoch seconds. """ 38 | dt = datetime.fromisoformat(sdt) 39 | seconds = (dt - datetime(1970, 1, 1)).total_seconds() 40 | return seconds 41 | 42 | 43 | def adjust_timestamps(dct, keys): 44 | """ 45 | Add human readable keys for millisecond timestamps. 46 | """ 47 | keys = set(keys) 48 | for key in keys: 49 | if key in dct: 50 | dct[f"_{key}"] = fmt_ts_millis(dct[key]) 51 | 52 | 53 | def format_seconds(seconds): 54 | """ 55 | Format second duration h/m/s format, e.g. '6m 40s' or '40s'. 56 | """ 57 | minutes, seconds = divmod(seconds, 60) 58 | minutes = round(minutes) 59 | if minutes: 60 | seconds = round(seconds) 61 | return f"{minutes}m {seconds}s" 62 | else: 63 | prec = 2 if seconds < .1 else 1 64 | seconds = round(seconds,prec) 65 | return f"{seconds}s" 66 | -------------------------------------------------------------------------------- /mlflow_export_import/copy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mlflow_export_import/copy/click_options.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | def opt_src_model(function): 4 | function = click.option("--src-model", 5 | help="Source registered model.", 6 | type=str, 7 | required=True 8 | )(function) 9 | return function 10 | 11 | def opt_dst_model(function): 12 | function = click.option("--dst-model", 13 | help="Destination registered model.", 14 | type=str, 15 | required=True 16 | )(function) 17 | return function 18 | 19 | def opt_src_version(function): 20 | function = click.option("--src-version", 21 | help="Source model version.", 22 | type=str, 23 | required=True 24 | )(function) 25 | return function 26 | 27 | def opt_src_mlflow_uri(function): 28 | function = click.option("--src-mlflow-uri", 29 | help="Source MLflow tracking server URI.", 30 | type=str, 31 | required=False 32 | )(function) 33 | return function 34 | 35 | def opt_dst_mlflow_uri(function): 36 | function = click.option("--dst-mlflow-uri", 37 | help="Destination MLflow tracking server URI.", 38 | type=str, 39 | required=False 40 | )(function) 41 | return function 42 | 43 | def opt_src_registry_uri(function): 44 | function = click.option("--src-registry-uri", 45 | help="Source MLflow registry URI.", 46 | type=str, 47 | required=True 48 | )(function) 49 | return function 50 | 51 | def opt_dst_registry_uri(function): 52 | function = click.option("--dst-registry-uri", 53 | help="Destination MLflow registry URI.", 54 | type=str, 55 | required=True 56 | )(function) 57 | return function 58 | 59 | def opt_dst_experiment_name(function): 60 | function = click.option("--dst-experiment-name", 61 | help="Destination experiment name. If specified, will copy old version's run to a new run. Else, use old version's run for new version.", 62 | type=str, 63 | required=False 64 | )(function) 65 | return function 66 | 67 | def opt_copy_permissions(function): 68 | function = click.option("--copy-permissions", 69 | help="Copy model permissions (only if target model does not exist).", 70 | type=bool, 71 | default=False, 72 | show_default=True 73 | )(function) 74 | return function 75 | 76 | def opt_copy_stages_and_aliases(function): 77 | function = click.option("--copy-stages-and-aliases", 78 | help="Import stages and aliases.", 79 | type=bool, 80 | default=False, 81 | show_default=True 82 | )(function) 83 | return function 84 | 85 | def opt_copy_lineage_tags(function): 86 | function = click.option("--copy-lineage-tags", 87 | help="Add source lineage info to destination version as tags starting with 'mlflow_exim'.", 88 | type=bool, 89 | default=False, 90 | show_default=True 91 | )(function) 92 | return function 93 | -------------------------------------------------------------------------------- /mlflow_export_import/copy/copy_run.py: -------------------------------------------------------------------------------- 1 | import click 2 | import tempfile 3 | import mlflow 4 | 5 | from mlflow_export_import.run.export_run import export_run 6 | from mlflow_export_import.run.import_run import import_run 7 | from mlflow_export_import.common import utils 8 | from mlflow_export_import.common.click_options import opt_run_id, opt_experiment_name 9 | from . import copy_utils 10 | from . click_options import opt_src_mlflow_uri, opt_dst_mlflow_uri 11 | 12 | _logger = utils.getLogger(__name__) 13 | 14 | 15 | def copy( 16 | src_run_id, 17 | dst_experiment_name, 18 | src_mlflow_uri = None, 19 | dst_mlflow_uri = None 20 | ): 21 | """ 22 | Copies a run to another tracking server (workspace). 23 | 24 | :param src_run_id: Source run ID. 25 | :param dst_experiment_name: Destination experiment name. 26 | :param : src_mlflow_uri: Source tracking server (workspace) URI. 27 | :param : dst_mlflow_uri: Destination tracking server (workspace) URI. 28 | 29 | :return: Destination Run object. 30 | """ 31 | 32 | return _copy(src_run_id, dst_experiment_name, 33 | copy_utils.mk_client(src_mlflow_uri), 34 | copy_utils.mk_client(dst_mlflow_uri) 35 | ) 36 | 37 | 38 | def _copy(src_run_id, dst_experiment_name, src_client=None, dst_client=None): 39 | src_client = src_client or mlflow.MlflowClient() 40 | dst_client = dst_client or mlflow.MlflowClient() 41 | with tempfile.TemporaryDirectory() as download_dir: 42 | export_run( 43 | src_run_id, 44 | download_dir, 45 | notebook_formats = [ "SOURCE" ], 46 | mlflow_client = src_client 47 | ) 48 | dst_run, _ = import_run( 49 | download_dir, 50 | dst_experiment_name, 51 | mlflow_client = dst_client 52 | ) 53 | return dst_run 54 | 55 | 56 | @click.command() 57 | @opt_run_id 58 | @opt_experiment_name 59 | @opt_src_mlflow_uri 60 | @opt_dst_mlflow_uri 61 | def main(run_id, experiment_name, src_mlflow_uri, dst_mlflow_uri): 62 | print("Options:") 63 | for k,v in locals().items(): 64 | print(f" {k}: {v}") 65 | copy(run_id, experiment_name, src_mlflow_uri, dst_mlflow_uri) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /mlflow_export_import/copy/copy_utils.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | from mlflow.exceptions import MlflowException 3 | 4 | 5 | def get_model_name(artifact_path): 6 | """ 7 | Return 'my-model' from '/foo/artifacts/my-model' 8 | """ 9 | idx = artifact_path.find("artifacts") 10 | idx += len("artifacts") + 1 11 | return artifact_path[idx:] 12 | 13 | 14 | def create_registered_model(client, model_name): 15 | """ 16 | Return True if model already exists, False otherwise. 17 | """ 18 | try: 19 | client.create_registered_model(model_name) 20 | return False 21 | except MlflowException as e: # NOTE: for non-UC is RestException 22 | if e.error_code != "RESOURCE_ALREADY_EXISTS": 23 | raise 24 | return True 25 | 26 | 27 | def create_experiment(client, experiment_name): 28 | try: 29 | return client.create_experiment(experiment_name) 30 | except MlflowException as e: 31 | if e.error_code != "RESOURCE_ALREADY_EXISTS": 32 | raise 33 | experiment = client.get_experiment_by_name(experiment_name) 34 | return experiment.experiment_id 35 | 36 | 37 | def add_tag(src_tags, dst_tags, key, prefix): 38 | val = src_tags.get(key) 39 | if val is not None: 40 | dst_tags[f"{prefix}.{key}"] = val 41 | 42 | 43 | def obj_to_dict(obj): 44 | if isinstance(obj, mlflow.entities.model_registry.model_version.ModelVersion): 45 | dct = adjust_model_version(obj.__dict__) 46 | else: 47 | dct = obj.__dict__ 48 | return dct 49 | 50 | 51 | def adjust_model_version(vr): 52 | dct = {} 53 | for k,v in vr.items(): 54 | if k == "_aliases": # type - google._upb._message.RepeatedScalarContainer 55 | dct[k] = [ str(x) for x in v ] 56 | else: 57 | dct[k] = v 58 | return dct 59 | 60 | 61 | def mk_client(tracking_uri, registry_uri=None): 62 | if not tracking_uri and not registry_uri: 63 | return mlflow.MlflowClient() 64 | else: 65 | tracking_uri = tracking_uri.replace("databricks-uc", "databricks") 66 | return mlflow.MlflowClient(tracking_uri, registry_uri) 67 | -------------------------------------------------------------------------------- /mlflow_export_import/experiment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/experiment/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/experiment/nested_runs_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common import utils 2 | from mlflow_export_import.common.iterators import SearchRunsIterator 3 | 4 | _logger = utils.getLogger(__name__) 5 | 6 | 7 | def get_nested_runs(client, runs): 8 | """ 9 | Return set of run_ids and their nested run descendants from list of run IDs. 10 | """ 11 | if utils.calling_databricks(): 12 | return get_nested_runs_by_rootRunId(client, runs) 13 | else: 14 | from . import oss_nested_runs_utils 15 | return runs + oss_nested_runs_utils.get_nested_runs(client, runs) 16 | 17 | 18 | def get_nested_runs_by_rootRunId(client, runs): 19 | """ 20 | Return list of nested run descendants (includes the root run). 21 | Unlike Databricks MLflow, OSS MLflow does not add the 'mlflow.rootRunId' tag to child runs. 22 | """ 23 | descendant_runs= [] 24 | for run in runs: 25 | filter = f"tags.mlflow.rootRunId = '{run.info.run_id}'" 26 | _descendant_runs = list(SearchRunsIterator(client, run.info.experiment_id, filter=filter)) 27 | if _descendant_runs: 28 | descendant_runs += _descendant_runs 29 | else: 30 | descendant_runs.append(run) 31 | return descendant_runs 32 | -------------------------------------------------------------------------------- /mlflow_export_import/experiment/oss_nested_runs_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common.iterators import SearchRunsIterator 2 | 3 | 4 | def get_nested_runs(client, runs, parent_runs=None): 5 | nested_runs = [] 6 | for run in runs: 7 | nested_runs += _get_nested_runs_for_run(client, run, parent_runs) 8 | return nested_runs 9 | 10 | def get_nested_runs_for_experiment(client, experiment_id): 11 | filter = f"tags.mlflow.parentRunId like '%'" 12 | return list(SearchRunsIterator(client, experiment_id, filter=filter)) 13 | 14 | 15 | def _get_nested_runs_for_run(client, run, parent_runs=None): 16 | nested_runs = _build_nested_runs(client, run.info.experiment_id, parent_runs) 17 | run_ids = _get_run_ids(run.info.run_id, nested_runs) 18 | return [ client.get_run(run_id) for run_id in run_ids ] 19 | 20 | def _get_run_ids(root_id, nested_runs): 21 | nested_run_ids = nested_runs.get(root_id) 22 | if not nested_run_ids: 23 | return set() 24 | all_nested_run_ids = nested_run_ids 25 | for run_id in nested_run_ids: 26 | _nested_run_ids = _get_run_ids(run_id, nested_runs) 27 | if _nested_run_ids: 28 | all_nested_run_ids += _nested_run_ids 29 | return set(all_nested_run_ids) 30 | 31 | def _build_nested_runs(client, experiment_id, parent_runs=None): 32 | """ 33 | Flat dict of all descendant run IDs and their child runs 34 | dict: run_id: list of run_id's child runs (per mlflow.parentRunId tag) 35 | """ 36 | if not parent_runs: 37 | parent_runs = get_nested_runs_for_experiment(client, experiment_id) 38 | dct = { run.info.run_id:run.data.tags["mlflow.parentRunId"] for run in parent_runs } 39 | nested_runs = {} 40 | for run_id,parent_id in dct.items(): 41 | nested_runs.setdefault(parent_id, []).append(run_id) 42 | return nested_runs 43 | -------------------------------------------------------------------------------- /mlflow_export_import/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/model/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/model_version/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/model_version/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/model_version/click_options.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | 4 | # == Export model version 5 | 6 | def opt_version(function): 7 | function = click.option("--version", 8 | help="Registered model version.", 9 | type=str, 10 | required=True 11 | )(function) 12 | return function 13 | 14 | 15 | def opt_vrm_export_version_model(function): 16 | function = click.option("--vrm-export-version-model", 17 | help="Export the MLflow model (from model registry) of a model version.", 18 | type=bool, 19 | default=False 20 | )(function) 21 | return function 22 | 23 | def opt_vrm_model_artifact_path(function): 24 | function = click.option("--vrm-model-artifact-path", 25 | help="Destination artifact path of the Mlflow model of a model version.", 26 | type=str, 27 | required=False 28 | )(function) 29 | return function 30 | 31 | def opt_skip_download_run_artifacts(function): 32 | function = click.option("--skip-download-run-artifacts", 33 | help="Skip downloading run artifacts (for fine-tuned LLM models)", 34 | type=bool, 35 | default=False 36 | )(function) 37 | return function 38 | 39 | 40 | # == Import model version 41 | 42 | def opt_create_model(function): 43 | function = click.option("--create-model", 44 | help="Create registered model before creating model version.", 45 | type=bool, 46 | default=False, 47 | show_default=True 48 | )(function) 49 | return function 50 | 51 | def opt_experiment_name(function): 52 | function = click.option("--experiment-name", 53 | help="Destination experiment name for the version's run.", 54 | type=str, 55 | required=True 56 | )(function) 57 | return function 58 | 59 | def opt_import_stages_and_aliases(function): 60 | function = click.option("--import-stages-and-aliases", 61 | help="Import stages and aliases.", 62 | type=bool, 63 | default=False, 64 | show_default=True 65 | )(function) 66 | return function 67 | 68 | def opt_import_metadata(function): 69 | function = click.option("--import-metadata", 70 | help="Import registered model and experiment metadata (description and tags).", 71 | type=bool, 72 | default=False, 73 | show_default=True 74 | )(function) 75 | return function 76 | -------------------------------------------------------------------------------- /mlflow_export_import/notebook/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/notebook/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/notebook/download_notebook.py: -------------------------------------------------------------------------------- 1 | """ 2 | Downloads a Databricks notebook with optional revision. 3 | """ 4 | 5 | import os 6 | import json 7 | import click 8 | 9 | from mlflow_export_import.common.click_options import opt_output_dir 10 | from mlflow_export_import.common import utils, io_utils 11 | from mlflow_export_import.common import MlflowExportImportException 12 | from mlflow_export_import.client.http_client import DatabricksHttpClient 13 | 14 | _logger = utils.getLogger(__name__) 15 | 16 | 17 | def download_notebook(output_dir, notebook_workspace_path, revision_id, notebook_formats, dbx_client): 18 | notebook_dir = os.path.join(output_dir) 19 | os.makedirs(notebook_dir, exist_ok=True) 20 | for format in notebook_formats: 21 | _download_notebook(notebook_workspace_path, notebook_dir, format, format.lower(), revision_id, dbx_client) 22 | 23 | 24 | def _download_notebook(notebook_workspace_path, output_dir, format, extension, revision_id, dbx_client): 25 | params = { 26 | "path": notebook_workspace_path, 27 | "direct_download": True, 28 | "format": format 29 | } 30 | if revision_id: 31 | params ["revision"] = { "revision_timestamp": revision_id } # NOTE: not publicly documented 32 | notebook_name = os.path.basename(notebook_workspace_path) 33 | try: 34 | rsp = dbx_client._get("workspace/export", json.dumps(params)) 35 | notebook_path = os.path.join(output_dir, f"{notebook_name}.{extension}") 36 | io_utils.write_file(notebook_path, rsp.content) 37 | except MlflowExportImportException as e: 38 | _logger.warning(f"Cannot download notebook '{notebook_workspace_path}'. {e}") 39 | 40 | 41 | @click.command() 42 | @opt_output_dir 43 | @click.option("--notebook", 44 | help="Notebook path.", 45 | type=str, 46 | required=True 47 | ) 48 | @click.option("--revision", 49 | help="Notebook revision. If not specified will download the latest revision.", 50 | type=str, 51 | required=False 52 | ) 53 | @click.option("--notebook-formats", 54 | help="Databricks notebook formats. Values are SOURCE, HTML, JUPYTER or DBC (comma seperated).", 55 | type=str, 56 | default="SOURCE", 57 | show_default=True 58 | ) 59 | def main(output_dir, notebook, revision, notebook_formats): 60 | _logger.info("Options:") 61 | for k,v in locals().items(): 62 | _logger.info(f" {k}: {v}") 63 | dbx_client = DatabricksHttpClient() 64 | notebook_formats = utils.string_to_list(notebook_formats) 65 | download_notebook(output_dir, notebook, revision, notebook_formats, dbx_client) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /mlflow_export_import/run/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /mlflow_export_import/run/run_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import tempfile 4 | from mlflow_export_import.common import mlflow_utils, io_utils 5 | from mlflow_export_import.common.find_artifacts import find_run_model_names 6 | 7 | def get_model_name(artifact_path): 8 | idx = artifact_path.find("artifacts") 9 | idx += len("artifacts") + 1 10 | return artifact_path[idx:] 11 | 12 | 13 | def update_mlmodel_run_id(mlflow_client, run_id): 14 | """ 15 | :param: mlflow_client 16 | :param: run_id 17 | Workaround to fix the run_id in the destination MLmodel file since there is no method to get all model artifacts of a run. 18 | Since an MLflow run does not keep track of its models, there is no method to retrieve the artifact path to all its models. 19 | This workaround recursively searches the run's root artifact directory for all MLmodel files, and assumes their directory 20 | represents a path to the model. 21 | """ 22 | mlmodel_paths = find_run_model_names(mlflow_client, run_id) 23 | for model_path in mlmodel_paths: 24 | download_uri = f"runs:/{run_id}/{model_path}/MLmodel" 25 | local_path = mlflow_utils.download_artifacts(mlflow_client, download_uri) 26 | mlmodel = io_utils.read_file(local_path, "yaml") 27 | mlmodel["run_id"] = run_id 28 | with tempfile.TemporaryDirectory() as dir: 29 | output_path = os.path.join(dir, "MLmodel") 30 | io_utils.write_file(output_path, mlmodel, "yaml") 31 | if model_path == "MLmodel": 32 | model_path = "" 33 | mlflow_client.log_artifact(run_id, output_path, model_path) 34 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/mlflow_export_import/tools/__init__.py -------------------------------------------------------------------------------- /mlflow_export_import/tools/click_options.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | def opt_input_file(function): 4 | function = click.option("--input-file", 5 | help="Input file.", 6 | type=str, 7 | required=True 8 | )(function) 9 | return function 10 | 11 | def opt_output_file(function): 12 | function = click.option("--output-file", 13 | help="Output file.", 14 | type=str, 15 | required=False 16 | )(function) 17 | return function 18 | 19 | def opt_model_uri(function): 20 | function = click.option("--model-uri", 21 | help="Model URI such as 'models:/my_model/3' or 'runs:/73ab168e5775409fa3595157a415bb62/my_model'.", 22 | type=str, 23 | required=True 24 | )(function) 25 | return function 26 | 27 | def opt_filter(function): 28 | function = click.option("--filter", 29 | help="For OSS MLflow this is a filter for search_model_versions(), for Databricks it is for search_registered_models() due to Databricks MLflow search limitations.", 30 | type=str, 31 | required=False 32 | )(function) 33 | return function 34 | 35 | def opt_use_get_model_info(function): 36 | function = click.option("--use-get-model-info", 37 | help="Use mlflow.models.get_model_info() which apparently downloads *all* artifacts (quite slow for large models) instead of just downloading 'MLmodel' using mlflow.artifacts.download_artifacts().", 38 | type=bool, 39 | default=False, 40 | show_default=True 41 | )(function) 42 | return function 43 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/experimental/samples/custom_export_rewriters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sample post-processing rewriters for models and experiments: 3 | 1. for registered model truncate versions to one 4 | 2. for experiment truncate runs to one 5 | """ 6 | 7 | import os 8 | from mlflow_export_import.common import io_utils 9 | 10 | 11 | def rewrite_model(model_dct, models_dir): 12 | """ processes model.json """ 13 | versions = model_dct["mlflow"]["registered_model"]["versions"] 14 | print(f" Original versions: {len(versions)}") 15 | versions = versions[:1] 16 | print(f" New versions: {len(versions)}") 17 | model_dct["mlflow"]["registered_model"]["versions"] = versions 18 | 19 | 20 | def rewrite_experiment(experiment_dct, experiment_dir): 21 | """ processes experiment.json """ 22 | def fmt_run(run_dct): 23 | from mlflow_export_import.common.timestamp_utils import fmt_ts_millis 24 | info = run_dct["info"] 25 | return f'run_id: {info["run_id"]} start_time: {info["start_time"]} {fmt_ts_millis(info["start_time"])}' 26 | runs = experiment_dct["mlflow"]["runs"] 27 | print(f" Original runs: {len(runs)}") 28 | 29 | # do some custom processing such as returning the latest run 30 | latest_run_dct = None 31 | for run_id in runs: 32 | path = os.path.join(experiment_dir, run_id, "run.json") 33 | run_dct = io_utils.read_file(path)["mlflow"] 34 | if not latest_run_dct: 35 | latest_run_dct = run_dct 36 | else if latest_run_dct is not None and latest_run_dct["info"]["start_time"] > run_dct["info"]["start_time"]: 37 | latest_run_dct = run_dct 38 | print(f" Run: {fmt_run(run_dct)}") 39 | print(f" Latest run: {fmt_run(latest_run_dct)}") 40 | runs = [ latest_run_dct ] 41 | print(f" New runs: {len(runs)}") 42 | 43 | experiment_dct["mlflow"]["runs"] = runs 44 | 45 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/get_model_signature.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get the signature of an MLflow model. 3 | """ 4 | 5 | import click 6 | from mlflow_export_import.common import io_utils 7 | from mlflow_export_import.common.dump_utils import dump_as_json 8 | from . click_options import opt_model_uri, opt_output_file, opt_use_get_model_info 9 | from . signature_utils import get_model_signature 10 | 11 | 12 | @click.command() 13 | @opt_model_uri 14 | @opt_output_file 15 | @opt_use_get_model_info 16 | def main(model_uri, output_file, use_get_model_info): 17 | """ 18 | Get the signature of an MLflow model. 19 | """ 20 | print("Options:") 21 | for k,v in locals().items(): 22 | print(f" {k}: {v}") 23 | signature = get_model_signature(model_uri, use_get_model_info) 24 | if signature: 25 | print("Model Signature:") 26 | dump_as_json(signature) 27 | if output_file: 28 | io_utils.write_file(output_file, signature) 29 | else: 30 | print(f"WARNING: No model signature for '{model_uri}'") 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/list_model_versions_without_signatures.py: -------------------------------------------------------------------------------- 1 | """ 2 | List model versions without a model signature. 3 | """ 4 | 5 | import click 6 | import pandas as pd 7 | from tabulate import tabulate 8 | import mlflow 9 | 10 | from . click_options import opt_filter, opt_output_file, opt_use_get_model_info 11 | from . tools_utils import search_model_versions 12 | from . signature_utils import get_model_signature 13 | 14 | 15 | def as_pandas_df(filter, use_get_model_info=False): 16 | client = mlflow.MlflowClient() 17 | versions = search_model_versions(client, filter) 18 | 19 | print(f"Found {len(versions)} model versions") 20 | versions_without_signatures = [] 21 | for j, vr in enumerate(versions): 22 | model_uri = f"models:/{vr.name}/{vr.version}" 23 | if j%10 == 0: 24 | print(f"Processing {j}/{len(versions)}: {model_uri}") 25 | try: 26 | signature = get_model_signature(model_uri, use_get_model_info) 27 | if not signature: 28 | versions_without_signatures.append([vr.name, vr.version, vr.run_id, ""]) 29 | except Exception as e: 30 | versions_without_signatures.append([vr.name, vr.version, vr.run_id, str(e)]) 31 | #print(f"Found {len(versions)} model versions") 32 | print(f"Found {len(versions_without_signatures)}/{len(versions)} model versions without signatures") 33 | 34 | df = pd.DataFrame(versions_without_signatures, columns = ["model","version", "run_id", "error"]) 35 | return df.sort_values(by=["model", "version"], ascending = [True, False]) 36 | 37 | 38 | def show(filter, output_file, use_get_model_info): 39 | df = as_pandas_df(filter, use_get_model_info) 40 | print(tabulate(df, headers="keys", tablefmt="psql", numalign="right", showindex=False)) 41 | if output_file: 42 | with open(output_file, "w", encoding="utf-8") as f: 43 | df.to_csv(f, index=False) 44 | 45 | 46 | @click.command() 47 | @opt_filter 48 | @opt_output_file 49 | @opt_use_get_model_info 50 | def main(filter, output_file, use_get_model_info): 51 | """ 52 | List model versions without a model signature. 53 | """ 54 | print("Options:") 55 | args = locals() 56 | for k,v in args.items(): 57 | print(f" {k}: {v}") 58 | show(filter, output_file, use_get_model_info) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/list_registered_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lists all registered models. 3 | """ 4 | 5 | import json 6 | from mlflow_export_import.client.http_client import MlflowHttpClient 7 | 8 | def main(): 9 | client = MlflowHttpClient() 10 | print("HTTP client:",client) 11 | rsp = client._get("registered-models/search") 12 | dct = json.loads(rsp.text) 13 | print(json.dumps(dct,indent=2)+"\n") 14 | 15 | if __name__ == "__main__": 16 | main() 17 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/set_model_signature.py: -------------------------------------------------------------------------------- 1 | """ 2 | Set the model signature of an MLflow model. 3 | 4 | https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.set_signature 5 | """ 6 | 7 | import pandas as pd 8 | import click 9 | import mlflow 10 | from mlflow.models.signature import infer_signature 11 | from mlflow_export_import.common.dump_utils import dump_as_json 12 | from . signature_utils import get_model_signature, to_json_signature 13 | 14 | 15 | def set_signature(model_uri, input_file, output_file, overwrite_signature): 16 | signature = get_model_signature(model_uri) 17 | if signature: 18 | if not overwrite_signature: 19 | print(f"WARNING: Model '{model_uri}' already has a signature. Not overwriting signature.") 20 | return 21 | else: 22 | print(f"WARNING: Model '{model_uri}' already has a signature. Overwriting existing signature.") 23 | df_input = pd.read_csv(input_file) 24 | df_output = pd.read_csv(output_file) 25 | signature = infer_signature(df_input, df_output) 26 | print("New model signature:") 27 | dump_as_json(to_json_signature(signature.to_dict())) 28 | 29 | mlflow.models.set_signature(model_uri, signature) 30 | 31 | 32 | @click.command() 33 | @click.option("--model-uri", 34 | help=""" 35 | Model URI such as 'runs:/73ab168e5775409fa3595157a415bb62/my_model' or 'file:/my_mlflow_model. 36 | Per MLflow documentation 'models:/' scheme is not supported. 37 | """, 38 | type=str, 39 | required=True 40 | ) 41 | @click.option("--input-file", 42 | help="Input CSV file with training data samples for signature.", 43 | type=str, 44 | required=True 45 | ) 46 | @click.option("--output-file", 47 | help="Output CSV file with prediction data samples for signature.", 48 | type=str, 49 | required=False 50 | ) 51 | @click.option("--overwrite-signature", 52 | help="Overwrite existing model signature.", 53 | type=bool, 54 | default=False, 55 | show_default=True 56 | ) 57 | def main(model_uri, input_file, output_file, overwrite_signature): 58 | """ 59 | Set the signature of an MLflow model. 60 | 'models:/' scheme URIs are not accepted. 61 | For OSS MLflow, if you add a model signature to a run, it will automatically update any model version that was created from the run. 62 | """ 63 | print("Options:") 64 | for k,v in locals().items(): 65 | print(f" {k}: {v}") 66 | set_signature(model_uri, input_file, output_file, overwrite_signature) 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/signature_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import yaml 3 | import mlflow 4 | 5 | def to_json_signature(signature): 6 | def _to_json(lst): 7 | return json.loads(lst) if lst else lst 8 | return { k:_to_json(v) for k,v in signature.items()} 9 | 10 | 11 | def get_model_signature(model_uri, use_get_model_info=False): 12 | """ 13 | Return a fully exploded dict of of the stringified JSON signature field of MLmodel. 14 | :param use_get_model_info: Use mlflow.models.get_model_info() which apparently downloads *all* artifacts (quite slow for large models) instead of just downloading 'MLmodel' using mlflow.artifacts.download_artifacts(). 15 | :return: Returns signature as dictionary.. 16 | """ 17 | if use_get_model_info: 18 | return get_model_signature_use_get_model_info(model_uri) 19 | else: 20 | return get_model_signature_use_download_MLmodel(model_uri) 21 | 22 | def get_model_signature_use_download_MLmodel(model_uri): 23 | artifact_uri = f"{model_uri}/MLmodel" 24 | local_path = mlflow.artifacts.download_artifacts(artifact_uri) 25 | with open(local_path, "r") as f: 26 | mlmodel = yaml.safe_load(f) 27 | sig = mlmodel.get("signature") 28 | return to_json_signature(sig) if sig else None 29 | 30 | def get_model_signature_use_get_model_info(model_uri): 31 | model_info = mlflow.models.get_model_info(model_uri) 32 | if model_info.signature: 33 | sig = model_info.signature.to_dict() 34 | return to_json_signature(sig) 35 | else: 36 | return None 37 | -------------------------------------------------------------------------------- /mlflow_export_import/tools/tools_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common import utils 2 | from mlflow_export_import.common.iterators import SearchRegisteredModelsIterator, SearchModelVersionsIterator 3 | 4 | def search_model_versions(client, filter): 5 | if utils.calling_databricks(): 6 | models = list(SearchRegisteredModelsIterator(client, filter=filter)) 7 | versions = [] 8 | for model in models: 9 | try: 10 | _versions = list(SearchModelVersionsIterator(client, filter=f"name='{model.name}'")) 11 | versions += _versions 12 | except Exception as e: 13 | print(f"ERROR: registered model '{model.name}': {e}") 14 | return versions 15 | else: 16 | return list(SearchModelVersionsIterator(client, filter=filter)) 17 | -------------------------------------------------------------------------------- /mlflow_export_import/version.py: -------------------------------------------------------------------------------- 1 | 2 | __version__ = "1.2.0" 3 | -------------------------------------------------------------------------------- /mlflow_export_import/workflow_api/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /mlflow_export_import/workflow_api/log_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig( 4 | #format = "%(asctime)s %(levelname)-7s %(message)s", 5 | format = "%(asctime)s %(levelname)s %(message)s", 6 | level = logging.INFO, 7 | datefmt = "%Y-%m-%d %H:%M:%S") 8 | -------------------------------------------------------------------------------- /mlflow_export_import/workflow_api/run_submit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import click 3 | import logging 4 | from mlflow_export_import.workflow_api.workflow_api_client import WorkflowApiClient 5 | from mlflow_export_import.workflow_api import utils 6 | 7 | def run(profile, spec_file, sleep_seconds, timeout_seconds, verbose=False): 8 | client = WorkflowApiClient(profile, sleep_seconds, timeout_seconds) 9 | 10 | # Read JSON spec file 11 | job_spec = utils.load_json_file(spec_file) 12 | 13 | # Launch run jobs/submit 14 | res = client.run_submit(job_spec) 15 | 16 | run_id = res["run_id"] 17 | logging.info(f"New run_id: {run_id}") 18 | 19 | # Wait until cluster is created 20 | client.wait_until_cluster_is_created_for_run(run_id) 21 | 22 | # Get cluster ID 23 | dct = client.get_run(run_id) 24 | #cluster_state = dct["cluster_instance"]["cluster_id"] 25 | cluster_id = dct["cluster_instance"]["cluster_id"] 26 | logging.info(f"cluster_id: {cluster_id}") 27 | 28 | # Wait until run is done 29 | client.wait_until_run_is_done(run_id) 30 | 31 | # Get run status 32 | run = client.get_run(run_id) 33 | 34 | # Show final run 35 | if verbose: 36 | utils.dump_as_json("Final run", run) 37 | 38 | # Get cluster log directory 39 | try: 40 | log_dir = run["cluster_spec"]["new_cluster"]["cluster_log_conf"]["dbfs"]["destination"] + "/" + cluster_id 41 | logging.info(f"Log directory: '{log_dir}'") 42 | except KeyError: 43 | logging.warning(f"No cluster log directory") 44 | 45 | # Show run result state 46 | result_state = run["state"]["result_state"] 47 | logging.info(f"Run result state: {result_state}") 48 | 49 | 50 | 51 | @click.command() 52 | @click.option("--profile", 53 | help="Databricks profile", 54 | type=str, 55 | default=None, 56 | show_default=True 57 | ) 58 | @click.option("--spec-file", 59 | help="JSON job specification file", 60 | type=str, 61 | required=True, 62 | show_default=True 63 | ) 64 | @click.option("--sleep-seconds", 65 | help="Sleep time for checking run status(seconds)", 66 | type=int, 67 | default=5, 68 | show_default=True 69 | ) 70 | @click.option("--timeout-seconds", 71 | help="Timeout (seconds)", 72 | type=int, 73 | default=sys.maxsize, 74 | show_default=True 75 | ) 76 | @click.option("--verbose", 77 | help="Verbose", 78 | type=bool, 79 | default=False) 80 | 81 | def main(profile, spec_file, sleep_seconds, timeout_seconds, verbose): 82 | print("Options:") 83 | for k,v in locals().items(): print(f" {k}: {v}") 84 | run(profile, spec_file, sleep_seconds, timeout_seconds, verbose) 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /mlflow_export_import/workflow_api/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def load_json_file(path): 5 | with open(path, "r", encoding="utf-8") as f: 6 | return json.loads(f.read()) 7 | 8 | 9 | def dump_as_json(msg, dct): 10 | print(f"{msg}:") 11 | print(json.dumps(dct,indent=2)+"\n") 12 | -------------------------------------------------------------------------------- /samples/databricks/bulk/experiments/1280664374380606/253000ee70914831850defc593ba4740/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_run.py", 5 | "export_time": 1684725509, 6 | "_export_time": "2023-05-22 03:18:29", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "mlflow": { 17 | "info": { 18 | "run_uuid": "253000ee70914831850defc593ba4740", 19 | "run_id": "253000ee70914831850defc593ba4740", 20 | "experiment_id": "1280664374380606", 21 | "user_id": "", 22 | "status": "FINISHED", 23 | "start_time": 1683945877431, 24 | "end_time": 1683945880724, 25 | "lifecycle_stage": "active", 26 | "artifact_uri": "dbfs:/databricks/mlflow-tracking/1280664374380606/253000ee70914831850defc593ba4740/artifacts", 27 | "run_name": "851de1f466304650a77c949f5d386d9f", 28 | "_start_time": "2023-05-13 02:44:37", 29 | "_end_time": "2023-05-13 02:44:41" 30 | }, 31 | "params": { 32 | "max_depth": "1", 33 | "max_leaf_nodes": "None" 34 | }, 35 | "metrics": { 36 | "r2": [ 37 | { 38 | "value": 0.1553172302194683, 39 | "timestamp": 1681630579458, 40 | "step": 0 41 | } 42 | ], 43 | "rmse": [ 44 | { 45 | "value": 0.7986004372118107, 46 | "timestamp": 1681630579236, 47 | "step": 0 48 | } 49 | ] 50 | }, 51 | "tags": { 52 | "mlflow.databricks.cluster.id": "0414-154233-qm0df4rx", 53 | "mlflow.databricks.cluster.info": "{\"cluster_name\":\"Andre_ML_13.0\",\"spark_version\":\"13.0.x-cpu-ml-scala2.12\",\"node_type_id\":\"i3.xlarge\",\"driver_node_type_id\":\"i3.xlarge\",\"autotermination_minutes\":120,\"disk_spec\":{\"disk_count\":0},\"num_workers\":1}", 54 | "mlflow.databricks.cluster.libraries": "{\"installable\":[],\"redacted\":[]}", 55 | "mlflow.databricks.notebook.commandID": "3527702579137640954_8374924253965797983_041de288996c42ef97161546f39184f0", 56 | "mlflow.databricks.notebookID": "1280664374380381", 57 | "mlflow.databricks.notebookPath": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 58 | "mlflow.databricks.notebookRevisionID": "1683945880975", 59 | "mlflow.databricks.webappURL": "https://mycompany.cloud.com", 60 | "mlflow.databricks.workspaceID": "2556758628403379", 61 | "mlflow.databricks.workspaceURL": "mycompany.cloud.com", 62 | "mlflow.runName": "851de1f466304650a77c949f5d386d9f", 63 | "mlflow.source.name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 64 | "mlflow.source.type": "NOTEBOOK", 65 | "mlflow.user": "andre@mycompany.com", 66 | "save_signature": "False", 67 | "timestamp": "2023-04-16 07:36:09", 68 | "version.DATABRICKS_RUNTIME_VERSION": "13.0", 69 | "version.mlflow": "2.2.1", 70 | "version.python": "3.10.6", 71 | "version.sklearn": "1.1.1" 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /samples/databricks/bulk/experiments/1280664374380606/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684725509, 6 | "_export_time": "2023-05-22 03:18:29", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 3, 18 | "num_ok_runs": 3, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "1280664374380606", 25 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374380606", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 30 | "mlflow.experimentType": "MLFLOW_EXPERIMENT", 31 | "mlflow.note.content": "WS notebook - WS experiment\n/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 32 | "mlflow.ownerEmail": "andre@mycompany.com", 33 | "mlflow.ownerId": "4566812440727830", 34 | "timestamp": "2023-04-16 07:36:09", 35 | "version_mlflow": "2.2.1" 36 | }, 37 | "creation_time": 1681630570495, 38 | "last_update_time": 1683945877431, 39 | "_creation_time": "2023-04-16 07:36:10", 40 | "_last_update_time": "2023-05-13 02:44:37" 41 | }, 42 | "runs": [ 43 | "253000ee70914831850defc593ba4740", 44 | "85716eddd2ba4d938713b245e80df662", 45 | "851de1f466304650a77c949f5d386d9f" 46 | ] 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /samples/databricks/bulk/experiments/9195e233f19e49379b16c5f2d2b0c05f/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684725509, 6 | "_export_time": "2023-05-22 03:18:29", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 1, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "9195e233f19e49379b16c5f2d2b0c05f", 25 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/9195e233f19e49379b16c5f2d2b0c05f", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "mlflow.experiment.sourceId": "1765187885495869", 30 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 31 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK", 32 | "mlflow.ownerEmail": "andre@mycompany.com", 33 | "mlflow.ownerId": "4566812440727830" 34 | }, 35 | "creation_time": 1681490229478, 36 | "last_update_time": 1684339053373, 37 | "_creation_time": "2023-04-14 16:37:09", 38 | "_last_update_time": "2023-05-17 15:57:33" 39 | }, 40 | "runs": [ 41 | "a17f0abf5d46464d899f0ffcebbdb7a8" 42 | ] 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /samples/databricks/bulk/experiments/experiments.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiments.py", 5 | "export_time": 1684725509, 6 | "_export_time": "2023-05-22 03:18:29", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "experiment_names": [ 18 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 19 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist" 20 | ], 21 | "duration": 14.3, 22 | "experiments": 2, 23 | "total_runs": 4, 24 | "ok_runs": 4, 25 | "failed_runs": 0 26 | }, 27 | "mlflow": { 28 | "experiments": [ 29 | { 30 | "id": "9195e233f19e49379b16c5f2d2b0c05f", 31 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 32 | "ok_runs": 1, 33 | "failed_runs": 0, 34 | "duration": 5.3 35 | }, 36 | { 37 | "id": "1280664374380606", 38 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 39 | "ok_runs": 3, 40 | "failed_runs": 0, 41 | "duration": 13.4 42 | } 43 | ] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /samples/databricks/bulk/models/experiments/9195e233f19e49379b16c5f2d2b0c05f/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684724517, 6 | "_export_time": "2023-05-22 03:01:57", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 2, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 1, 20 | "failed_runs": [ 21 | "9b901d0d3c214880a4d38d0fceb3092c" 22 | ] 23 | }, 24 | "mlflow": { 25 | "experiment": { 26 | "experiment_id": "9195e233f19e49379b16c5f2d2b0c05f", 27 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 28 | "artifact_location": "dbfs:/databricks/mlflow-tracking/9195e233f19e49379b16c5f2d2b0c05f", 29 | "lifecycle_stage": "active", 30 | "tags": { 31 | "mlflow.experiment.sourceId": "1765187885495869", 32 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 33 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK", 34 | "mlflow.ownerEmail": "andre@mycompany.com", 35 | "mlflow.ownerId": "4566812440727830" 36 | }, 37 | "creation_time": 1681490229478, 38 | "last_update_time": 1684339053373, 39 | "_creation_time": "2023-04-14 16:37:09", 40 | "_last_update_time": "2023-05-17 15:57:33" 41 | }, 42 | "runs": [ 43 | "a17f0abf5d46464d899f0ffcebbdb7a8" 44 | ], 45 | "permissions": { 46 | "permission_levels": [ 47 | { 48 | "permission_level": "CAN_READ", 49 | "description": "Can view the experiment" 50 | }, 51 | { 52 | "permission_level": "CAN_EDIT", 53 | "description": "Can view, log runs, and edit the experiment" 54 | }, 55 | { 56 | "permission_level": "CAN_MANAGE", 57 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment" 58 | } 59 | ], 60 | "permissions": {} 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /samples/databricks/bulk/models/experiments/experiments.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiments.py", 5 | "export_time": 1684724517, 6 | "_export_time": "2023-05-22 03:01:57", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "experiment_names": [ 18 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 19 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist" 20 | ], 21 | "duration": 14.2, 22 | "experiments": 2, 23 | "total_runs": 3, 24 | "ok_runs": 2, 25 | "failed_runs": 1 26 | }, 27 | "mlflow": { 28 | "experiments": [ 29 | { 30 | "id": "1280664374380606", 31 | "name": "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 32 | "ok_runs": 1, 33 | "failed_runs": 0, 34 | "duration": 7.1 35 | }, 36 | { 37 | "id": "9195e233f19e49379b16c5f2d2b0c05f", 38 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist", 39 | "ok_runs": 1, 40 | "failed_runs": 1, 41 | "duration": 6.2 42 | } 43 | ] 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /samples/databricks/bulk/models/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_models.py", 5 | "export_time": 1684724517, 6 | "_export_time": "2023-05-22 03:01:57", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "model_names": "Sklearn_Wine_ws,Keras_Mnist", 18 | "stages": "Production,Staging,Archived,None", 19 | "export_all_runs": false, 20 | "export_latest_versions": false, 21 | "export_permissions": true, 22 | "export_deleted_runs": false, 23 | "notebook_formats": [ 24 | "SOURCE", 25 | "DBC" 26 | ], 27 | "use_threads": false, 28 | "output_dir": "out", 29 | "models": { 30 | "model_names": [ 31 | "Sklearn_Wine_ws", 32 | "Keras_Mnist" 33 | ], 34 | "stages": "Production,Staging,Archived,None", 35 | "export_latest_versions": false, 36 | "notebook_formats": [ 37 | "SOURCE", 38 | "DBC" 39 | ], 40 | "use_threads": false, 41 | "output_dir": "out/models", 42 | "num_total_models": 2, 43 | "num_ok_models": 2, 44 | "num_failed_models": 0, 45 | "duration": 20.0, 46 | "failed_models": [] 47 | }, 48 | "experiments": { 49 | "experiment_names": [ 50 | "/Users/andre@mycompany.com/experiments/sklearn_wine/Sklearn_Wine_ws", 51 | "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Keras_Mnist" 52 | ], 53 | "duration": 14.2, 54 | "experiments": 2, 55 | "total_runs": 3, 56 | "ok_runs": 2, 57 | "failed_runs": 1 58 | } 59 | }, 60 | "mlflow": {} 61 | } 62 | -------------------------------------------------------------------------------- /samples/databricks/bulk/models/models/models.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_models.py", 5 | "export_time": 1684724517, 6 | "_export_time": "2023-05-22 03:01:57", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "model_names": [ 18 | "Sklearn_Wine_ws", 19 | "Keras_Mnist" 20 | ], 21 | "stages": "Production,Staging,Archived,None", 22 | "export_latest_versions": false, 23 | "notebook_formats": [ 24 | "SOURCE", 25 | "DBC" 26 | ], 27 | "use_threads": false, 28 | "output_dir": "out/models", 29 | "num_total_models": 2, 30 | "num_ok_models": 2, 31 | "num_failed_models": 0, 32 | "duration": 20.0, 33 | "failed_models": [] 34 | }, 35 | "mlflow": { 36 | "models": [ 37 | "Sklearn_Wine_ws", 38 | "Keras_Mnist" 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/notebook_experiments/repo_notebook/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684340109, 6 | "_export_time": "2023-05-17 16:15:09", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 1, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "e090757fcb8f49cb9822f65f2fe7ed91", 25 | "name": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine", 26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/e090757fcb8f49cb9822f65f2fe7ed91", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "mlflow.experiment.sourceId": "1765187885495747", 30 | "mlflow.experiment.sourceName": "/Repos/andre@mycompany.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine", 31 | "mlflow.experiment.sourceType": "REPO_NOTEBOOK", 32 | "mlflow.ownerEmail": "andre@mycompany.com", 33 | "mlflow.ownerId": "4566812440727830" 34 | }, 35 | "creation_time": 1681489696888, 36 | "last_update_time": 1684340017546, 37 | "_creation_time": "2023-04-14 16:28:17", 38 | "_last_update_time": "2023-05-17 16:13:38" 39 | }, 40 | "runs": [ 41 | "02aeef6d8cbf449ab50c8e715e320085" 42 | ], 43 | "permissions": { 44 | "permission_levels": [ 45 | { 46 | "permission_level": "CAN_READ", 47 | "description": "Can view the experiment" 48 | }, 49 | { 50 | "permission_level": "CAN_EDIT", 51 | "description": "Can view, log runs, and edit the experiment" 52 | }, 53 | { 54 | "permission_level": "CAN_MANAGE", 55 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment" 56 | } 57 | ], 58 | "permissions": {} 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/notebook_experiments/workspace_notebook/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684718874, 6 | "_export_time": "2023-05-22 01:27:54", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 1, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "1280664374380381", 25 | "name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 26 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374380381", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 30 | "mlflow.experimentType": "NOTEBOOK", 31 | "mlflow.note.content": "WS notebook - NB experiment\n/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 32 | "mlflow.ownerEmail": "andre@mycompany.com", 33 | "mlflow.ownerId": "4566812440727830" 34 | }, 35 | "creation_time": 1681628993361, 36 | "last_update_time": 1681628993361, 37 | "_creation_time": "2023-04-16 07:09:53", 38 | "_last_update_time": "2023-04-16 07:09:53" 39 | }, 40 | "runs": [ 41 | "f7816bc76f254f22ab25549a7c2c9b06" 42 | ], 43 | "permissions": { 44 | "permission_levels": [ 45 | { 46 | "permission_level": "CAN_READ", 47 | "description": "Can view the experiment" 48 | }, 49 | { 50 | "permission_level": "CAN_EDIT", 51 | "description": "Can view, log runs, and edit the experiment" 52 | }, 53 | { 54 | "permission_level": "CAN_MANAGE", 55 | "description": "Can view, log runs, edit, delete, and change permissions of the experiment" 56 | } 57 | ], 58 | "permissions": {} 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/MLmodel: -------------------------------------------------------------------------------- 1 | artifact_path: model 2 | flavors: 3 | python_function: 4 | env: 5 | conda: conda.yaml 6 | virtualenv: python_env.yaml 7 | loader_module: mlflow.sklearn 8 | model_path: model.pkl 9 | predict_fn: predict 10 | python_version: 3.10.6 11 | sklearn: 12 | code: null 13 | pickled_model: model.pkl 14 | serialization_format: cloudpickle 15 | sklearn_version: 1.1.1 16 | mlflow_version: 2.3.1 17 | model_uuid: b13b14bd62734b31baa2e5664ad86417 18 | run_id: 5e1e2c44039a40afafc760b837a4daab 19 | saved_input_example_info: 20 | artifact_path: input_example.json 21 | pandas_orient: split 22 | type: dataframe 23 | signature: 24 | inputs: '[{"name": "fixed acidity", "type": "double"}, {"name": "volatile acidity", 25 | "type": "double"}, {"name": "citric acid", "type": "double"}, {"name": "residual 26 | sugar", "type": "double"}, {"name": "chlorides", "type": "double"}, {"name": "free 27 | sulfur dioxide", "type": "double"}, {"name": "total sulfur dioxide", "type": "double"}, 28 | {"name": "density", "type": "double"}, {"name": "pH", "type": "double"}, {"name": 29 | "sulphates", "type": "double"}, {"name": "alcohol", "type": "double"}]' 30 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]' 31 | utc_time_created: '2023-05-21 19:16:51.054335' 32 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/conda.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.10.6 5 | - pip<=22.2.2 6 | - pip: 7 | - mlflow<3,>=2.3 8 | - category-encoders==2.6.0 9 | - cffi==1.15.1 10 | - cloudpickle==2.0.0 11 | - databricks-automl-runtime==0.2.16 12 | - defusedxml==0.7.1 13 | - holidays==0.22 14 | - lightgbm==3.3.5 15 | - matplotlib==3.5.2 16 | - psutil==5.9.0 17 | - scikit-learn==1.1.1 18 | - typing-extensions==4.3.0 19 | name: mlflow-env 20 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/input_example.json: -------------------------------------------------------------------------------- 1 | {"columns": ["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol"], "data": [[3.8, 0.31, 0.02, 11.1, 0.036, 20.0, 114.0, 0.99248, 3.75, 0.44, 12.4], [4.4, 0.32, 0.39, 4.3, 0.03, 31.0, 127.0, 0.98904, 3.46, 0.36, 12.8], [4.5, 0.19, 0.21, 0.95, 0.033, 89.0, 159.0, 0.99332, 3.34, 0.42, 8.0], [4.6, 0.445, 0.0, 1.4, 0.053, 11.0, 178.0, 0.99426, 3.79, 0.55, 10.2], [4.7, 0.67, 0.09, 1.0, 0.02, 5.0, 9.0, 0.98722, 3.3, 0.34, 13.6]]} -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/python_env.yaml: -------------------------------------------------------------------------------- 1 | python: 3.10.6 2 | build_dependencies: 3 | - pip==22.2.2 4 | - setuptools==63.4.1 5 | - wheel==0.37.1 6 | dependencies: 7 | - -r requirements.txt 8 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/automl_workspace_notebook/5e1e2c44039a40afafc760b837a4daab/artifacts/model/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow<3,>=2.3 2 | category-encoders==2.6.0 3 | cffi==1.15.1 4 | cloudpickle==2.0.0 5 | databricks-automl-runtime==0.2.16 6 | defusedxml==0.7.1 7 | holidays==0.22 8 | lightgbm==3.3.5 9 | matplotlib==3.5.2 10 | psutil==5.9.0 11 | scikit-learn==1.1.1 12 | typing-extensions==4.3.0 -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/job_repo_notebook/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1681541244, 6 | "_export_time": "2023-04-15 06:47:24", 7 | "mlflow_version": "2.2.1", 8 | "mlflow_tracking_uri": "databricks", 9 | "platform": { 10 | "python_version": "3.10.6", 11 | "system": "Linux", 12 | "processor": "x86_64" 13 | }, 14 | "user": "root", 15 | "databricks": { 16 | "DATABRICKS_RUNTIME_VERSION": "13.0" 17 | } 18 | }, 19 | "info": { 20 | "num_total_runs": 1, 21 | "num_ok_runs": 1, 22 | "num_failed_runs": 0, 23 | "failed_runs": [] 24 | }, 25 | "mlflow": { 26 | "experiment": { 27 | "experiment_id": "1280664374378362", 28 | "name": "/Users/andre@mycompany.com/experiments/Sklearn_Wine_job", 29 | "artifact_location": "dbfs:/databricks/mlflow-tracking/1280664374378362", 30 | "lifecycle_stage": "active", 31 | "tags": { 32 | "mlflow.experiment.sourceName": "/Users/andre@mycompany.com/experiments/Sklearn_Wine_job", 33 | "mlflow.experimentType": "MLFLOW_EXPERIMENT", 34 | "mlflow.ownerEmail": "andre@mycompany.com", 35 | "mlflow.ownerId": "4566812440727830", 36 | "timestamp": "2023-04-15 06:21:44", 37 | "version_mlflow": "2.2.1" 38 | }, 39 | "creation_time": 1681539704973, 40 | "last_update_time": 1681539706472, 41 | "_creation_time": "2023-04-15 06:21:45", 42 | "_last_update_time": "2023-04-15 06:21:46" 43 | }, 44 | "runs": [ 45 | "e559b53f67884160b37e27f4475a26e3" 46 | ] 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /samples/databricks/single/experiments/workspace_experiments/workspace_notebook/253000ee70914831850defc593ba4740/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_run.py", 5 | "export_time": 1684303741, 6 | "_export_time": "2023-05-17 06:09:01", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "databricks://e2_demo", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "mlflow": { 17 | "info": { 18 | "run_uuid": "253000ee70914831850defc593ba4740", 19 | "run_id": "253000ee70914831850defc593ba4740", 20 | "experiment_id": "1280664374380606", 21 | "user_id": "", 22 | "status": "FINISHED", 23 | "start_time": 1683945877431, 24 | "end_time": 1683945880724, 25 | "lifecycle_stage": "active", 26 | "artifact_uri": "dbfs:/databricks/mlflow-tracking/1280664374380606/253000ee70914831850defc593ba4740/artifacts", 27 | "run_name": "851de1f466304650a77c949f5d386d9f", 28 | "_start_time": "2023-05-13 02:44:37", 29 | "_end_time": "2023-05-13 02:44:41" 30 | }, 31 | "params": { 32 | "max_depth": "1", 33 | "max_leaf_nodes": "None" 34 | }, 35 | "metrics": { 36 | "r2": [ 37 | { 38 | "value": 0.1553172302194683, 39 | "timestamp": 1681630579458, 40 | "step": 0 41 | } 42 | ], 43 | "rmse": [ 44 | { 45 | "value": 0.7986004372118107, 46 | "timestamp": 1681630579236, 47 | "step": 0 48 | } 49 | ] 50 | }, 51 | "tags": { 52 | "mlflow.databricks.cluster.id": "0414-154233-qm0df4rx", 53 | "mlflow.databricks.cluster.info": "{\"cluster_name\":\"Andre_ML_13.0\",\"spark_version\":\"13.0.x-cpu-ml-scala2.12\",\"node_type_id\":\"i3.xlarge\",\"driver_node_type_id\":\"i3.xlarge\",\"autotermination_minutes\":120,\"disk_spec\":{\"disk_count\":0},\"num_workers\":1}", 54 | "mlflow.databricks.cluster.libraries": "{\"installable\":[],\"redacted\":[]}", 55 | "mlflow.databricks.notebook.commandID": "3527702579137640954_8374924253965797983_041de288996c42ef97161546f39184f0", 56 | "mlflow.databricks.notebookID": "1280664374380381", 57 | "mlflow.databricks.notebookPath": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 58 | "mlflow.databricks.notebookRevisionID": "1683945880975", 59 | "mlflow.databricks.webappURL": "https://mycompany.cloud.com", 60 | "mlflow.databricks.workspaceID": "2556758628403379", 61 | "mlflow.databricks.workspaceURL": "mycompany.cloud.com", 62 | "mlflow.runName": "851de1f466304650a77c949f5d386d9f", 63 | "mlflow.source.name": "/Users/andre@mycompany.com/mlflow/mlflow-examples-basic/Sklearn_Wine", 64 | "mlflow.source.type": "NOTEBOOK", 65 | "mlflow.user": "andre@mycompany.com", 66 | "save_signature": "False", 67 | "timestamp": "2023-04-16 07:36:09", 68 | "version.DATABRICKS_RUNTIME_VERSION": "13.0", 69 | "version.mlflow": "2.2.1", 70 | "version.python": "3.10.6", 71 | "version.sklearn": "1.1.1" 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_model_version.py", 5 | "export_file_version": "2", 6 | "export_time": 1721364374, 7 | "_export_time": "2024-07-19 04:46:14", 8 | "mlflow_version": "2.14.3", 9 | "mlflow_tracking_uri": "databricks://e2_demo_fieldeng", 10 | "platform": { 11 | "python_version": "3.8.16", 12 | "system": "Darwin", 13 | "processor": "i386" 14 | }, 15 | "user": "andre.mesarovic" 16 | }, 17 | "mlflow": { 18 | "experiment": { 19 | "experiment_id": "0828080c9c7b43a7b7624307809cfcda", 20 | "name": "/Repos/andre.mesarovic@databricks.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine_UC", 21 | "artifact_location": "dbfs:/databricks/mlflow-tracking/0828080c9c7b43a7b7624307809cfcda", 22 | "lifecycle_stage": "active", 23 | "last_update_time": 1717552430207, 24 | "creation_time": 1716309738995, 25 | "tags": [ 26 | { 27 | "key": "mlflow.experiment.sourceType", 28 | "value": "REPO_NOTEBOOK" 29 | }, 30 | { 31 | "key": "mlflow.ownerId", 32 | "value": "4566812440727830" 33 | }, 34 | { 35 | "key": "mlflow.sharedViewState.d6a47c70ec552dd064068bd2040c8c53e68e466c44a75fa3353a7747a8c6489c", 36 | "value": "{\"searchFilter\":\"\",\"orderByKey\":\"attributes.start_time\",\"orderByAsc\":false,\"startTime\":\"ALL\",\"lifecycleFilter\":\"Active\",\"datasetsFilter\":[],\"modelVersionFilter\":\"All Runs\",\"selectedColumns\":[\"attributes.`Source`\",\"attributes.`Models`\",\"attributes.`Dataset`\"],\"runsExpanded\":{},\"runsPinned\":[],\"runsHidden\":[],\"runsHiddenMode\":\"FIRST_10_RUNS\",\"viewMaximized\":false,\"runListHidden\":false,\"isAccordionReordered\":false,\"useGroupedValuesInCharts\":true,\"groupBy\":null,\"groupsExpanded\":{},\"autoRefreshEnabled\":true}" 37 | }, 38 | { 39 | "key": "mlflow.experiment.sourceName", 40 | "value": "/Repos/andre.mesarovic@databricks.com/mlflow-examples/databricks/notebooks/basic/Sklearn_Wine_UC" 41 | }, 42 | { 43 | "key": "mlflow.ownerId", 44 | "value": "4566812440727830" 45 | }, 46 | { 47 | "key": "mlflow.ownerEmail", 48 | "value": "andre.mesarovic@databricks.com" 49 | }, 50 | { 51 | "key": "mlflow.experiment.sourceId", 52 | "value": "2824690123548175" 53 | } 54 | ], 55 | "_last_update_time": "2024-06-05 01:53:50", 56 | "_creation_time": "2024-05-21 16:42:19" 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/MLmodel: -------------------------------------------------------------------------------- 1 | artifact_path: model 2 | databricks_runtime: '15.1' 3 | flavors: 4 | python_function: 5 | env: 6 | conda: conda.yaml 7 | virtualenv: python_env.yaml 8 | loader_module: mlflow.sklearn 9 | model_path: model.pkl 10 | predict_fn: predict 11 | python_version: 3.11.0 12 | sklearn: 13 | code: null 14 | pickled_model: model.pkl 15 | serialization_format: cloudpickle 16 | sklearn_version: 1.3.0 17 | mlflow_version: 2.13.1 18 | model_size_bytes: 102527 19 | model_uuid: 7c56d8b80973448b8c4e7b5a3b9fc7b6 20 | run_id: 6222162b4c7f47c2820a7e5b520f65a9 21 | saved_input_example_info: 22 | artifact_path: input_example.json 23 | pandas_orient: split 24 | type: dataframe 25 | signature: 26 | inputs: '[{"type": "double", "name": "fixed_acidity", "required": true}, {"type": 27 | "double", "name": "volatile_acidity", "required": true}, {"type": "double", "name": 28 | "citric_acid", "required": true}, {"type": "double", "name": "residual_sugar", 29 | "required": true}, {"type": "double", "name": "chlorides", "required": true}, 30 | {"type": "double", "name": "free_sulfur_dioxide", "required": true}, {"type": 31 | "double", "name": "total_sulfur_dioxide", "required": true}, {"type": "double", 32 | "name": "density", "required": true}, {"type": "double", "name": "pH", "required": 33 | true}, {"type": "double", "name": "sulphates", "required": true}, {"type": "double", 34 | "name": "alcohol", "required": true}]' 35 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]' 36 | params: null 37 | utc_time_created: '2024-06-05 01:53:53.910623' 38 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/conda.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.0 5 | - pip<=23.0.1 6 | - pip: 7 | - mlflow==2.13.1 8 | - cloudpickle==2.2.1 9 | - lz4==4.3.2 10 | - psutil==5.9.0 11 | - scikit-learn==1.3.0 12 | name: mlflow-env 13 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/MLmodel: -------------------------------------------------------------------------------- 1 | artifact_path: model 2 | databricks_runtime: '15.1' 3 | flavors: 4 | python_function: 5 | env: 6 | conda: conda.yaml 7 | virtualenv: python_env.yaml 8 | loader_module: mlflow.sklearn 9 | model_path: model.pkl 10 | predict_fn: predict 11 | python_version: 3.11.0 12 | sklearn: 13 | code: null 14 | pickled_model: model.pkl 15 | serialization_format: cloudpickle 16 | sklearn_version: 1.3.0 17 | mlflow_version: 2.13.1 18 | model_size_bytes: 102527 19 | model_uuid: 7c56d8b80973448b8c4e7b5a3b9fc7b6 20 | run_id: 6222162b4c7f47c2820a7e5b520f65a9 21 | saved_input_example_info: 22 | artifact_path: input_example.json 23 | pandas_orient: split 24 | type: dataframe 25 | signature: 26 | inputs: '[{"type": "double", "name": "fixed_acidity", "required": true}, {"type": 27 | "double", "name": "volatile_acidity", "required": true}, {"type": "double", "name": 28 | "citric_acid", "required": true}, {"type": "double", "name": "residual_sugar", 29 | "required": true}, {"type": "double", "name": "chlorides", "required": true}, 30 | {"type": "double", "name": "free_sulfur_dioxide", "required": true}, {"type": 31 | "double", "name": "total_sulfur_dioxide", "required": true}, {"type": "double", 32 | "name": "density", "required": true}, {"type": "double", "name": "pH", "required": 33 | true}, {"type": "double", "name": "sulphates", "required": true}, {"type": "double", 34 | "name": "alcohol", "required": true}]' 35 | outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]' 36 | params: null 37 | utc_time_created: '2024-06-05 01:53:53.910623' 38 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/conda.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.0 5 | - pip<=23.0.1 6 | - pip: 7 | - mlflow==2.13.1 8 | - cloudpickle==2.2.1 9 | - lz4==4.3.2 10 | - psutil==5.9.0 11 | - scikit-learn==1.3.0 12 | name: mlflow-env 13 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/python_env.yaml: -------------------------------------------------------------------------------- 1 | python: 3.11.0 2 | build_dependencies: 3 | - pip==23.0.1 4 | - setuptools==68.0.0 5 | - wheel==0.38.4 6 | dependencies: 7 | - -r requirements.txt 8 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/metadata/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.13.1 2 | cloudpickle==2.2.1 3 | lz4==4.3.2 4 | psutil==5.9.0 5 | scikit-learn==1.3.0 -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/samples/databricks/single/versions/sklearn_wine/run/artifacts/model/model.pkl -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/python_env.yaml: -------------------------------------------------------------------------------- 1 | python: 3.11.0 2 | build_dependencies: 3 | - pip==23.0.1 4 | - setuptools==68.0.0 5 | - wheel==0.38.4 6 | dependencies: 7 | - -r requirements.txt 8 | -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/run/artifacts/model/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow==2.13.1 2 | cloudpickle==2.2.1 3 | lz4==4.3.2 4 | psutil==5.9.0 5 | scikit-learn==1.3.0 -------------------------------------------------------------------------------- /samples/databricks/single/versions/sklearn_wine/version.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_model_version.py", 5 | "export_file_version": "2", 6 | "export_time": 1721364374, 7 | "_export_time": "2024-07-19 04:46:14", 8 | "mlflow_version": "2.14.3", 9 | "mlflow_tracking_uri": "databricks://e2_demo_fieldeng", 10 | "platform": { 11 | "python_version": "3.8.16", 12 | "system": "Darwin", 13 | "processor": "i386" 14 | }, 15 | "user": "andre.mesarovic" 16 | }, 17 | "mlflow": { 18 | "model_version": { 19 | "name": "andre_catalog.ml_models2.sklearn_wine_best", 20 | "version": "15", 21 | "creation_timestamp": 1717552439516, 22 | "last_updated_timestamp": 1717552440318, 23 | "description": "white_2", 24 | "user_id": "andre.mesarovic@databricks.com", 25 | "current_stage": null, 26 | "source": "dbfs:/databricks/mlflow-tracking/0828080c9c7b43a7b7624307809cfcda/6222162b4c7f47c2820a7e5b520f65a9/artifacts/model", 27 | "run_id": "6222162b4c7f47c2820a7e5b520f65a9", 28 | "run_link": null, 29 | "status": "READY", 30 | "status_message": "", 31 | "tags": { 32 | "alias": "white_2" 33 | }, 34 | "aliases": [ 35 | "white_2" 36 | ], 37 | "_creation_timestamp": "2024-06-05 01:54:00", 38 | "_last_updated_timestamp": "2024-06-05 01:54:00" 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/experiments/1/d057cae15f27465988e72c6212e1f226/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_run.py", 5 | "export_time": 1671260983, 6 | "_export_time": "2022-12-17 07:09:43", 7 | "mlflow_version": "2.0.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "mlflow": { 16 | "info": { 17 | "run_uuid": "a83cebbccbca41299360c695c5ea72f3", 18 | "run_id": "a83cebbccbca41299360c695c5ea72f3", 19 | "experiment_id": "1", 20 | "user_id": "andre", 21 | "status": "FINISHED", 22 | "start_time": 1671070664322, 23 | "end_time": 1671070667923, 24 | "lifecycle_stage": "active", 25 | "artifact_uri": "/opt/mlflow/server/mlruns/1/a83cebbccbca41299360c695c5ea72f3/artifacts", 26 | "run_name": "train.sh 2.0.1 2022-12-15 02:17:43" 27 | }, 28 | "params": { 29 | "max_depth": "None", 30 | "max_leaf_nodes": "32" 31 | }, 32 | "metrics": { 33 | "rmse": [ 34 | { 35 | "value": 0.7256044469217515, 36 | "timestamp": 1671070665139, 37 | "step": 0 38 | } 39 | ], 40 | "r2": [ 41 | { 42 | "value": 0.30267631032833586, 43 | "timestamp": 1671070665152, 44 | "step": 0 45 | } 46 | ], 47 | "mae": [ 48 | { 49 | "value": 0.5688309814398113, 50 | "timestamp": 1671070665163, 51 | "step": 0 52 | } 53 | ] 54 | }, 55 | "tags": { 56 | "data_path": "../../data/train/wine-quality-white.csv", 57 | "mlflow.log-model.history": "[{\"run_id\": \"a83cebbccbca41299360c695c5ea72f3\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2022-12-15 02:17:45.173770\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.14\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"b0022500ab944161b3f97b9746509418\", \"mlflow_version\": \"2.0.1\"}]", 58 | "mlflow.runName": "train.sh 2.0.1 2022-12-15 02:17:43", 59 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009", 60 | "mlflow.source.name": "/Users/andre/git/andre/mlflow-examples/python/sklearn/wine_quality/train.py", 61 | "mlflow.source.type": "LOCAL", 62 | "mlflow.user": "andre", 63 | "registered_model_name": "sklearn_wine", 64 | "registered_model_version_stage": "Production", 65 | "run_origin": "train.sh", 66 | "save_signature": "False", 67 | "uuid": "CfHSHRv2yXaDioNR46FRoL", 68 | "version.mlflow": "2.0.1", 69 | "version.platform": "macOS-10.16-x86_64-i386-64bit", 70 | "version.python": "3.8.14", 71 | "version.sklearn": "1.1.1" 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/experiments/1/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiment.py", 5 | "export_time": 1671260983, 6 | "_export_time": "2022-12-17 07:09:43", 7 | "mlflow_version": "2.0.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_total_runs": 1, 17 | "num_ok_runs": 1, 18 | "num_failed_runs": 0, 19 | "failed_runs": [] 20 | }, 21 | "mlflow": { 22 | "experiment": { 23 | "experiment_id": "1", 24 | "name": "sklearn_wine", 25 | "artifact_location": "/opt/mlflow/server/mlruns/1", 26 | "lifecycle_stage": "active", 27 | "tags": { 28 | "experiment_created": "2022-12-15 02:17:43", 29 | "version_mlflow": "2.0.1" 30 | }, 31 | "creation_time": 1671070664091, 32 | "last_update_time": 1671070664091 33 | }, 34 | "runs": [ 35 | "d057cae15f27465988e72c6212e1f226" 36 | ] 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/experiments/2/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiment.py", 5 | "export_time": 1671260983, 6 | "_export_time": "2022-12-17 07:09:45", 7 | "mlflow_version": "2.0.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_total_runs": 1, 17 | "num_ok_runs": 1, 18 | "num_failed_runs": 0, 19 | "failed_runs": [] 20 | }, 21 | "mlflow": { 22 | "experiment": { 23 | "experiment_id": "1", 24 | "name": "sklearn_wine", 25 | "artifact_location": "/opt/mlflow/server/mlruns/1", 26 | "lifecycle_stage": "active", 27 | "tags": { 28 | "experiment_created": "2022-12-15 02:17:4555 29 | "version_mlflow": "2.0.1" 30 | }, 31 | "creation_time": 1671070664091, 32 | "last_update_time": 1671070664091 33 | }, 34 | "runs": [ 35 | "5397ae67ee0c49139bf64834b4d27fab", 36 | "8a6af43e756f433da7a90fd6b4e49c3a" 37 | ] 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/experiments/experiments.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiments.py", 5 | "export_time": 1671260983, 6 | "_export_time": "2022-12-17 07:09:43", 7 | "mlflow_version": "2.0.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "duration": 0.2, 17 | "experiments": 3, 18 | "total_runs": 2, 19 | "ok_runs": 2, 20 | "failed_runs": 0 21 | }, 22 | "mlflow": { 23 | "experiments": [ 24 | { 25 | "id": "2", 26 | "name": "sklearn_iris", 27 | "ok_runs": 1, 28 | "failed_runs": 0, 29 | "duration": 0.1 30 | }, 31 | { 32 | "id": "1", 33 | "name": "sklearn_wine", 34 | "ok_runs": 1, 35 | "failed_runs": 0, 36 | "duration": 0.1 37 | } 38 | ] 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/1/d057cae15f27465988e72c6212e1f226/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_run.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "mlflow": { 16 | "info": { 17 | "run_uuid": "d057cae15f27465988e72c6212e1f226", 18 | "run_id": "d057cae15f27465988e72c6212e1f226", 19 | "experiment_id": "1", 20 | "user_id": "andre", 21 | "status": "FINISHED", 22 | "start_time": 1672601484918, 23 | "end_time": 1672601487986, 24 | "lifecycle_stage": "active", 25 | "artifact_uri": "/opt/mlflow/server/mlruns/1/d057cae15f27465988e72c6212e1f226/artifacts", 26 | "run_name": "2023-01-01 19:31:23 train.sh 2.1.1" 27 | }, 28 | "params": { 29 | "max_depth": "4", 30 | "max_leaf_nodes": "32" 31 | }, 32 | "metrics": { 33 | "rmse": [ 34 | { 35 | "value": 0.7367947360663162, 36 | "timestamp": 1672601485655, 37 | "step": 0 38 | } 39 | ], 40 | "r2": [ 41 | { 42 | "value": 0.28100217442439346, 43 | "timestamp": 1672601485668, 44 | "step": 0 45 | } 46 | ], 47 | "mae": [ 48 | { 49 | "value": 0.5877424565761121, 50 | "timestamp": 1672601485675, 51 | "step": 0 52 | } 53 | ] 54 | }, 55 | "tags": { 56 | "data_path": "https://raw.githubusercontent.com/amesar/mlflow-examples/master/data/train/wine-quality-white.csv", 57 | "dataset": "wine-quality", 58 | "mlflow.log-model.history": "[{\"run_id\": \"d057cae15f27465988e72c6212e1f226\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:31:25.684661\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"46d75d52dc604c5bb49a0c4fae32c1b3\", \"mlflow_version\": \"2.1.1\"}]", 59 | "mlflow.runName": "2023-01-01 19:31:23 train.sh 2.1.1", 60 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009", 61 | "mlflow.source.name": "/Users/andre/git/andre/mlflow-examples/python/sklearn/wine_quality/train.py", 62 | "mlflow.source.type": "LOCAL", 63 | "mlflow.user": "andre", 64 | "output_path": "None", 65 | "registered_model_name": "sklearn_wine", 66 | "registered_model_version_stage": "Production", 67 | "run_origin": "train.sh", 68 | "save_signature": "False", 69 | "uuid": "feX2hCEDYwCJRgypBKDpJZ", 70 | "version.mlflow": "2.1.1", 71 | "version.platform": "macOS-10.16-x86_64-i386-64bit", 72 | "version.python": "3.8.15", 73 | "version.sklearn": "1.1.1" 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/1/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiment.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_total_runs": 1, 17 | "num_ok_runs": 1, 18 | "num_failed_runs": 0, 19 | "failed_runs": [] 20 | }, 21 | "mlflow": { 22 | "experiment": { 23 | "experiment_id": "1", 24 | "name": "sklearn_wine", 25 | "artifact_location": "/opt/mlflow/server/mlruns/1", 26 | "lifecycle_stage": "active", 27 | "tags": { 28 | "experiment_created": "2023-01-01 19:31:23", 29 | "version_mlflow": "2.1.1" 30 | }, 31 | "creation_time": 1672601484703, 32 | "last_update_time": 1672601484703 33 | }, 34 | "runs": [ 35 | "d057cae15f27465988e72c6212e1f226" 36 | ] 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/2/5397ae67ee0c49139bf64834b4d27fab/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_run.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "mlflow": { 16 | "info": { 17 | "run_uuid": "5397ae67ee0c49139bf64834b4d27fab", 18 | "run_id": "5397ae67ee0c49139bf64834b4d27fab", 19 | "experiment_id": "2", 20 | "user_id": "andre", 21 | "status": "FINISHED", 22 | "start_time": 1672601579550, 23 | "end_time": 1672601583143, 24 | "lifecycle_stage": "active", 25 | "artifact_uri": "/opt/mlflow/server/mlruns/2/5397ae67ee0c49139bf64834b4d27fab/artifacts", 26 | "run_name": "sklearn_iris" 27 | }, 28 | "params": { 29 | "max_depth": "5" 30 | }, 31 | "metrics": { 32 | "accuracy_score": [ 33 | { 34 | "value": 0.9555555555555556, 35 | "timestamp": 1672601579593, 36 | "step": 0 37 | } 38 | ], 39 | "zero_one_loss": [ 40 | { 41 | "value": 0.0444444444444444, 42 | "timestamp": 1672601579602, 43 | "step": 0 44 | } 45 | ] 46 | }, 47 | "tags": { 48 | "dataset": "sklearn-iris", 49 | "int": "123", 50 | "mlflow.log-model.history": "[{\"run_id\": \"5397ae67ee0c49139bf64834b4d27fab\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:32:59.612140\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"3bd3c618169546189858df68e9610e0a\", \"mlflow_version\": \"2.1.1\"}]", 51 | "mlflow.runName": "sklearn_iris", 52 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009", 53 | "mlflow.source.name": "train.py", 54 | "mlflow.source.type": "LOCAL", 55 | "mlflow.user": "andre", 56 | "mlflow_version": "2.1.1" 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/2/8a6af43e756f433da7a90fd6b4e49c3a/run.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_run.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "mlflow": { 16 | "info": { 17 | "run_uuid": "5397ae67ee0c49139bf64834b4d27fab", 18 | "run_id": "5397ae67ee0c49139bf64834b4d27fab", 19 | "experiment_id": "2", 20 | "user_id": "andre", 21 | "status": "FINISHED", 22 | "start_time": 1672601579550, 23 | "end_time": 1672601583143, 24 | "lifecycle_stage": "active", 25 | "artifact_uri": "/opt/mlflow/server/mlruns/2/5397ae67ee0c49139bf64834b4d27fab/artifacts", 26 | "run_name": "sklearn_iris" 27 | }, 28 | "params": { 29 | "max_depth": "5" 30 | }, 31 | "metrics": { 32 | "accuracy_score": [ 33 | { 34 | "value": 0.9555555555555556, 35 | "timestamp": 1672601579593, 36 | "step": 0 37 | } 38 | ], 39 | "zero_one_loss": [ 40 | { 41 | "value": 0.0444444444444444, 42 | "timestamp": 1672601579602, 43 | "step": 0 44 | } 45 | ] 46 | }, 47 | "tags": { 48 | "dataset": "sklearn-iris", 49 | "int": "123", 50 | "mlflow.log-model.history": "[{\"run_id\": \"5397ae67ee0c49139bf64834b4d27fab\", \"artifact_path\": \"sklearn-model\", \"utc_time_created\": \"2023-01-01 19:32:59.612140\", \"flavors\": {\"python_function\": {\"model_path\": \"model.pkl\", \"predict_fn\": \"predict\", \"loader_module\": \"mlflow.sklearn\", \"python_version\": \"3.8.15\", \"env\": {\"conda\": \"conda.yaml\", \"virtualenv\": \"python_env.yaml\"}}, \"sklearn\": {\"pickled_model\": \"model.pkl\", \"sklearn_version\": \"1.1.1\", \"serialization_format\": \"cloudpickle\", \"code\": null}}, \"model_uuid\": \"3bd3c618169546189858df68e9610e0a\", \"mlflow_version\": \"2.1.1\"}]", 51 | "mlflow.runName": "sklearn_iris", 52 | "mlflow.source.git.commit": "d38984e536ac5102ccd495fdafe487a418378009", 53 | "mlflow.source.name": "train.py", 54 | "mlflow.source.type": "LOCAL", 55 | "mlflow.user": "andre", 56 | "mlflow_version": "2.1.1" 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/2/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiment.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_total_runs": 2, 17 | "num_ok_runs": 2, 18 | "num_failed_runs": 0, 19 | "failed_runs": [] 20 | }, 21 | "mlflow": { 22 | "experiment": { 23 | "experiment_id": "2", 24 | "name": "sklearn_iris", 25 | "artifact_location": "/opt/mlflow/server/mlruns/2", 26 | "lifecycle_stage": "active", 27 | "tags": {}, 28 | "creation_time": 1672601579372, 29 | "last_update_time": 1672601579372 30 | }, 31 | "runs": [ 32 | "5397ae67ee0c49139bf64834b4d27fab", 33 | "8a6af43e756f433da7a90fd6b4e49c3a" 34 | ] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/experiments/experiments.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_experiments.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "duration": 0.1, 17 | "experiments": 2, 18 | "total_runs": 2, 19 | "ok_runs": 2, 20 | "failed_runs": 0 21 | }, 22 | "mlflow": { 23 | "experiments": [ 24 | { 25 | "id": "2", 26 | "name": "sklearn_iris", 27 | "ok_runs": 1, 28 | "failed_runs": 0, 29 | "duration": 0.1 30 | }, 31 | { 32 | "id": "1", 33 | "name": "sklearn_wine", 34 | "ok_runs": 1, 35 | "failed_runs": 0, 36 | "duration": 0.1 37 | } 38 | ] 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_models.py", 5 | "export_time": 1676530201, 6 | "_export_time": "2023-02-16 06:50:01", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "model_names": "sklearn*", 18 | "stages": "production,staging", 19 | "export_all_runs": true, 20 | "export_latest_versions": false, 21 | "notebook_formats": "", 22 | "use_threads": false, 23 | "output_dir": "out", 24 | "models": { 25 | "model_names": [ 26 | "sklearn_iris", 27 | "sklearn_wine" 28 | ], 29 | "stages": "production,staging", 30 | "export_run": false, 31 | "export_latest_versions": false, 32 | "notebook_formats": "", 33 | "use_threads": false, 34 | "output_dir": "out/models", 35 | "num_total_models": 2, 36 | "num_ok_models": 2, 37 | "num_failed_models": 0, 38 | "duration": 0.1, 39 | "failed_models": [] 40 | }, 41 | "experiments": { 42 | "duration": 0.3, 43 | "experiments": 2, 44 | "total_runs": 5, 45 | "ok_runs": 5, 46 | "failed_runs": 0 47 | } 48 | }, 49 | "mlflow": {} 50 | } 51 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/models/models.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_models.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "stages": "production,staging", 17 | "notebook_formats": "", 18 | "num_total_models": 2, 19 | "num_ok_models": 2, 20 | "num_failed_models": 0, 21 | "duration": 0.1, 22 | "failed_models": [] 23 | }, 24 | "mlflow": { 25 | "models": [ 26 | "sklearn_iris", 27 | "sklearn_wine" 28 | ] 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/models/sklearn_iris/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_model.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_target_stages": 2, 17 | "num_target_versions": 0, 18 | "num_src_versions": 1, 19 | "num_dst_versions": 1 20 | }, 21 | "mlflow": { 22 | "registered_model": { 23 | "name": "sklearn_iris", 24 | "creation_timestamp": 1672601581580, 25 | "last_updated_timestamp": 1672601581600, 26 | "latest_versions": [ 27 | { 28 | "creation_timestamp": 1672601581587, 29 | "current_stage": "Staging", 30 | "description": "", 31 | "last_updated_timestamp": 1672601581600, 32 | "name": "sklearn_iris", 33 | "run_id": "dee04cd7f0054008a3c8fe0260feb814", 34 | "run_link": "", 35 | "source": "/opt/mlflow/server/mlruns/2/dee04cd7f0054008a3c8fe0260feb814/artifacts/artifacts/sklearn-model", 36 | "status": "READY", 37 | "status_message": "", 38 | "tags": {}, 39 | "user_id": "", 40 | "version": "1", 41 | "_run_artifact_uri": "/opt/mlflow/server/mlruns/2/dee04cd7f0054008a3c8fe0260feb814/artifacts", 42 | "_experiment_name": "sklearn_iris" 43 | } 44 | ] 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /samples/oss_mlflow/bulk/models/models/sklearn_wine/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.1.2", 4 | "script": "export_model.py", 5 | "export_time": 1672601766, 6 | "_export_time": "2023-01-01 19:36:06", 7 | "mlflow_version": "2.1.1", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "user": "andre", 10 | "platform": { 11 | "python_version": "3.8.15", 12 | "system": "Darwin" 13 | } 14 | }, 15 | "info": { 16 | "num_target_stages": 2, 17 | "num_target_versions": 0, 18 | "num_src_versions": 1, 19 | "num_dst_versions": 1 20 | }, 21 | "mlflow": { 22 | "registered_model": { 23 | "name": "sklearn_wine", 24 | "creation_timestamp": 1672601487738, 25 | "last_updated_timestamp": 1672601487764, 26 | "description": "Skearn Wine Quality model", 27 | "latest_versions": [ 28 | { 29 | "creation_timestamp": 1672601487749, 30 | "current_stage": "Production", 31 | "description": "v1 Production - wine", 32 | "last_updated_timestamp": 1672601487775, 33 | "name": "sklearn_wine", 34 | "run_id": "939157dfaaf24244adafcb72acdfc4a6", 35 | "run_link": "", 36 | "source": "/opt/mlflow/server/mlruns/1/939157dfaaf24244adafcb72acdfc4a6/artifacts/sklearn-model", 37 | "status": "READY", 38 | "status_message": "", 39 | "tags": { 40 | "registered_version_info": "v1 Production - wine" 41 | }, 42 | "user_id": "", 43 | "version": "1", 44 | "_run_artifact_uri": "/opt/mlflow/server/mlruns/1/939157dfaaf24244adafcb72acdfc4a6/artifacts", 45 | "_experiment_name": "sklearn_wine" 46 | } 47 | ], 48 | "tags": [ 49 | { 50 | "key": "info", 51 | "value": "Skearn Wine Quality model" 52 | } 53 | ] 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /samples/oss_mlflow/single/experiments/basic/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684688346, 6 | "_export_time": "2023-05-21 16:59:06", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 1, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "1", 25 | "name": "sklearn_wine", 26 | "artifact_location": "/opt/mlflow/server/mlruns/1", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "experiment_created": "2023-05-20 19:04:09", 30 | "version_mlflow": "2.3.2" 31 | }, 32 | "creation_time": 1684623705797, 33 | "last_update_time": 1684623705797, 34 | "_creation_time": "2023-05-20 23:01:46", 35 | "_last_update_time": "2023-05-20 23:01:46" 36 | }, 37 | "runs": [ 38 | "eb66c160957d4a28b11d3f1b968df9cd" 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /samples/oss_mlflow/single/experiments/src_tags/experiment.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "package_version": "1.2.0", 4 | "script": "export_experiment.py", 5 | "export_time": 1684698448, 6 | "_export_time": "2023-05-21 19:47:28", 7 | "mlflow_version": "2.3.2", 8 | "mlflow_tracking_uri": "http://localhost:5005", 9 | "platform": { 10 | "python_version": "3.8.15", 11 | "system": "Darwin", 12 | "processor": "i386" 13 | }, 14 | "user": "andre" 15 | }, 16 | "info": { 17 | "num_total_runs": 1, 18 | "num_ok_runs": 1, 19 | "num_failed_runs": 0, 20 | "failed_runs": [] 21 | }, 22 | "mlflow": { 23 | "experiment": { 24 | "experiment_id": "1", 25 | "name": "sklearn_wine", 26 | "artifact_location": "/opt/mlflow/server/mlruns/1", 27 | "lifecycle_stage": "active", 28 | "tags": { 29 | "experiment_created": "2023-05-20 19:04:09", 30 | "mlflow_exim.field._creation_time": "2023-05-20 23:01:46", 31 | "mlflow_exim.field._last_update_time": "2023-05-20 23:01:46", 32 | "mlflow_exim.field.artifact_location": "/opt/mlflow/server/mlruns/1", 33 | "mlflow_exim.field.creation_time": "1684623705797", 34 | "mlflow_exim.field.experiment_id": "1", 35 | "mlflow_exim.field.last_update_time": "1684623705797", 36 | "mlflow_exim.field.lifecycle_stage": "active", 37 | "mlflow_exim.field.name": "sklearn_wine", 38 | "version_mlflow": "2.3.2" 39 | }, 40 | "creation_time": 1684698373087, 41 | "last_update_time": 1684698373087, 42 | "_creation_time": "2023-05-21 19:46:13", 43 | "_last_update_time": "2023-05-21 19:46:13" 44 | }, 45 | "runs": [ 46 | "4b0ce88fd34e45fc8ca08876127299ce" 47 | ] 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Mlflow Export Import - Tests 2 | 3 | ## Overview 4 | 5 | Following sets of tests: 6 | * [Open source MLflow tests](open_source/README.md). 7 | * Launches a source and destination tracking server and then runs tests to ensure that the exported MLflow objects (runs, experiments and registered models) are correctly imported. 8 | * Numerous tests - 100+. 9 | * [Databricks tests](databricks/README.md). 10 | * Remote tests using the Databricks MLflow REST API. 11 | * WIP. 12 | * [Databricks MLflow notebook tests](databricks_notebooks/README.md). 13 | * Simple smoke tests for Databricks notebooks. Launches Databricks jobs to ensure that [Databricks export-import notebooks](../databricks_notebooks/README.md) execute properly. 14 | 15 | ## Setup 16 | 17 | ``` 18 | pip install -e ..[tests] --upgrade 19 | ``` 20 | 21 | ## Reports and logs 22 | 23 | The test script creates the folowing files: 24 | * run_tests.log - log of the entire test run. 25 | * run_tests_junit.xml - report for all tests in standard JUnit XML format. 26 | * run_tests_report.html - report for all tests in HTML format. 27 | 28 | **Sample reports** 29 | 30 | Open Source Tests: 31 | * [run_tests_junit.xml](open_source/samples/run_tests_junit.xml) 32 | * [run_tests_report.html](open_source/samples/run_tests_report.html) 33 | 34 | Databricks Tests: 35 | * [run_tests_junit.xml](databricks/samples/run_tests_junit.xml) 36 | * [run_tests_report.html](databricks/samples/run_tests_report.html) 37 | 38 | Failed Databricks Tests: 39 | * [run_tests_junit.xml](databricks/samples/failed/run_tests_junit.xml) 40 | * [run_tests_report.html](databricks/samples/failed/run_tests_report.html) 41 | 42 | 43 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import mlflow 2 | 3 | print(f"mlflow.version: {mlflow.__version__}") 4 | -------------------------------------------------------------------------------- /tests/core.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from dataclasses import dataclass 3 | 4 | 5 | @dataclass() 6 | class MlflowContext: 7 | """ 8 | For tests.open_source tests. Original tests. 9 | """ 10 | client_src: Any 11 | client_dst: Any 12 | output_dir: str 13 | output_run_dir: str 14 | 15 | 16 | @dataclass() 17 | class TestContext: 18 | """ 19 | For tests.databricks tests. Newer tests. 20 | """ 21 | mlflow_client_src: Any 22 | mlflow_client_dst: Any 23 | dbx_client_src: Any 24 | dbx_client_dst: Any 25 | output_dir: str 26 | output_run_dir: str 27 | 28 | 29 | def to_MlflowContext(test_context): 30 | """ 31 | Convert TestContext to MlflowContext in order to reuse plentiful existing test comparisons. 32 | """ 33 | return MlflowContext( 34 | test_context.mlflow_client_src, 35 | test_context.mlflow_client_dst, 36 | test_context.output_dir, 37 | test_context.output_run_dir 38 | ) 39 | -------------------------------------------------------------------------------- /tests/databricks/README.md: -------------------------------------------------------------------------------- 1 | # Mlflow Export Import - Databricks Tests 2 | 3 | ## Overview 4 | 5 | Remote tests using the Databricks MLflow REST API. 6 | 7 | ## Setup 8 | 9 | For Unity Catalog tests do the following: 10 | ``` 11 | pip install -U mlflow[databricks] 12 | ``` 13 | 14 | Configuration is straightforward. 15 | Copy [config.yaml.template](config.yaml.template) to `config.yaml` and adjust. 16 | 17 | For both source and destination workspaces, set the following attributes: 18 | 19 | * profile - Databricks profile 20 | * base_dir - Workspace base directory where all test experiments will be created. 21 | 22 | 23 | ``` 24 | workspace_src: 25 | profile: databricks://ws_src_profile 26 | base_dir: /tmp/test-mlflow-expot-import 27 | 28 | workspace_dst: 29 | profile: databricks://ws_dst_profile 30 | base_dir: /tmp/test-mlflow-expot-import 31 | ``` 32 | 33 | The `base_dir` folder will be deleted before each test session. 34 | 35 | ## Run tests 36 | 37 | ``` 38 | python -u -m pytest -s test_*.py 39 | ``` 40 | 41 | The script [run_tests.sh](run_tests.sh) is provided as a convenience. 42 | 43 | ## Debug 44 | 45 | If the environment variable `MLFLOW_EXPORT_IMPORT_OUTPUT_DIR` is set, 46 | it will be used as the test directory instead of `tempfile.TemporaryDirectory()`. 47 | -------------------------------------------------------------------------------- /tests/databricks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlflow/mlflow-export-import/a572e944ebee82fe9b37c5e7b35541d01c26253b/tests/databricks/__init__.py -------------------------------------------------------------------------------- /tests/databricks/_test_registered_model.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.model.export_model import export_model 2 | from mlflow_export_import.model.import_model import import_model 3 | from mlflow_export_import.common import dump_utils 4 | from mlflow_export_import.common import model_utils 5 | 6 | from tests.core import to_MlflowContext 7 | from tests.compare_utils import compare_models_with_versions 8 | from tests.databricks.init_tests import workspace_src, workspace_dst 9 | from tests.databricks.init_tests import test_context 10 | from tests.databricks import local_utils 11 | 12 | num_versions = 3 13 | 14 | def _init(test_context, is_uc): 15 | src_model_name = local_utils.mk_model_name(workspace_src, is_uc) 16 | 17 | src_vrs = [ local_utils.create_version(test_context.mlflow_client_src, src_model_name) for _ in range(num_versions) ] 18 | src_model = src_vrs[0][1] 19 | src_vrs = [ vr[0] for vr in src_vrs ] 20 | dump_utils.dump_obj(src_model, "SRC MODEL") 21 | for vr in src_vrs: 22 | dump_utils.dump_obj(vr, f"SRC Version {vr.version}") 23 | 24 | export_model( 25 | mlflow_client = test_context.mlflow_client_src, 26 | model_name = src_model.name, 27 | output_dir = test_context.output_dir 28 | ) 29 | dst_model_name = local_utils.mk_model_name(workspace_dst, is_uc) 30 | import_model( 31 | mlflow_client = test_context.mlflow_client_dst, 32 | model_name = dst_model_name, 33 | experiment_name = local_utils.mk_experiment_name(workspace=workspace_dst), 34 | input_dir = test_context.output_dir 35 | ) 36 | dst_model = test_context.mlflow_client_dst.get_registered_model(dst_model_name) 37 | dump_utils.dump_obj(dst_model, "DST MODEL") 38 | dst_vrs = model_utils.list_model_versions(test_context.mlflow_client_dst, dst_model.name) 39 | assert len(dst_vrs) == num_versions 40 | for vr in dst_vrs: 41 | dump_utils.dump_obj(vr, f"DST Version {vr.version}") 42 | 43 | return src_model, dst_model 44 | 45 | 46 | def test_registered_model(test_context, is_uc): 47 | src_model, dst_model = _init(test_context, is_uc) 48 | compare_models_with_versions(to_MlflowContext(test_context), src_model, dst_model, compare_names=False) 49 | -------------------------------------------------------------------------------- /tests/databricks/compare_utils.py: -------------------------------------------------------------------------------- 1 | def compare_experiments(exp1, exp2, client1, client2, num_runs): 2 | assert exp1.name == exp2.name 3 | _compare_experiment_tags(exp1.tags, exp2.tags) 4 | runs1 = client1.search_runs(exp1.experiment_id) 5 | runs2 = client2.search_runs(exp2.experiment_id) 6 | assert len(runs1) == num_runs 7 | assert len(runs1) == len(runs2) 8 | for run1,run2 in zip(runs1, runs2): 9 | compare_runs(run1, run2) 10 | 11 | def _compare_experiment_tags(tags1, tags2): 12 | _assert_tag("mlflow.ownerEmail", tags1, tags2) 13 | #_assert_tag("mlflow.experimentType", tags1, tags2) # might not be the same 14 | _compare_non_mlflow_tags(tags1, tags2) 15 | 16 | 17 | def compare_runs(run1, run2): 18 | _compare_non_mlflow_tags(run1.data.tags, run1.data.tags) 19 | assert run1.data.params == run2.data.params 20 | assert run1.data.metrics == run2.data.metrics 21 | 22 | 23 | def _get_non_mlflow_tags(tags): 24 | return { k:v for k,v in tags.items() if not k.startswith("mlflow.") } 25 | 26 | def _compare_non_mlflow_tags(tags1, tags2): 27 | tags1 = _get_non_mlflow_tags(tags1) 28 | tags2 = _get_non_mlflow_tags(tags2) 29 | assert tags1 == tags2 30 | 31 | def _assert_tag(key, tags1, tags2): 32 | assert tags1.get(key,None) == tags2.get(key,None) 33 | -------------------------------------------------------------------------------- /tests/databricks/config.yaml.template: -------------------------------------------------------------------------------- 1 | 2 | workspace_src: 3 | profile: databricks://ws_src_profile 4 | base_dir: /tmp/test-mlflow-expot-import 5 | 6 | workspace_dst: 7 | profile: databricks://ws_dst_profile 8 | base_dir: /tmp/test-mlflow-expot-import 9 | -------------------------------------------------------------------------------- /tests/databricks/includes.py: -------------------------------------------------------------------------------- 1 | class Dict2Class(): 2 | def __init__(self, dct): 3 | self.dct = dct 4 | for k,v in dct.items(): 5 | if isinstance(v,dict): 6 | v = Dict2Class(v) 7 | setattr(self, k, v) 8 | def __str__(self): 9 | return str(self.dct) 10 | -------------------------------------------------------------------------------- /tests/databricks/run_tests.sh: -------------------------------------------------------------------------------- 1 | 2 | run() { 3 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" ` 4 | echo "MLFLOW.VERSION: $mlflow_version" 5 | python -u -m pytest -s test_*.py 6 | echo "MLFLOW.VERSION: $mlflow_version" 7 | } 8 | run 2>&1 | tee run_tests.log 9 | -------------------------------------------------------------------------------- /tests/databricks/test_copy_run.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.copy import copy_run 2 | from . init_tests import test_context 3 | from . import local_utils 4 | from . import compare_utils 5 | from . init_tests import workspace_src, workspace_dst 6 | 7 | 8 | def _init_run_test(test_context, workspace_src, workspace_dst): 9 | exp_src = local_utils.create_experiment(test_context.mlflow_client_src) 10 | src_run = local_utils.create_run(test_context.mlflow_client_src, exp_src.experiment_id) 11 | dst_exp_name = local_utils.mk_experiment_name(workspace_dst) 12 | dst_run = copy_run.copy( 13 | src_run.info.run_id, 14 | dst_exp_name, 15 | workspace_src.cfg.profile, 16 | workspace_dst.cfg.profile 17 | ) 18 | return src_run, dst_run 19 | 20 | 21 | def test_run_same_workspace(test_context): 22 | src_run, dst_run = _init_run_test(test_context, workspace_src, workspace_src) 23 | compare_utils.compare_runs(src_run, dst_run) 24 | 25 | def test_run_different__workspace(test_context): 26 | src_run, dst_run = _init_run_test(test_context, workspace_src, workspace_dst) 27 | compare_utils.compare_runs(src_run, dst_run) 28 | -------------------------------------------------------------------------------- /tests/databricks/test_model_version.py: -------------------------------------------------------------------------------- 1 | from tests.databricks.init_tests import test_context 2 | from tests.databricks import _test_model_version 3 | 4 | def test_import_metadata_false(test_context): 5 | _test_model_version.test_import_metadata_false(test_context, False) 6 | 7 | def test_import_metadata_true(test_context): 8 | _test_model_version.test_import_metadata_true(test_context, False) 9 | -------------------------------------------------------------------------------- /tests/databricks/test_registered_model.py: -------------------------------------------------------------------------------- 1 | from tests.databricks.init_tests import test_context 2 | from tests.databricks import _test_registered_model 3 | 4 | def test_registered_model(test_context): 5 | _test_registered_model.test_registered_model(test_context, False) 6 | -------------------------------------------------------------------------------- /tests/databricks/uc/run_tests.sh: -------------------------------------------------------------------------------- 1 | 2 | run() { 3 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" ` 4 | echo "MLFLOW.VERSION: $mlflow_version" 5 | python -u -m pytest -s test_*.py 6 | echo "MLFLOW.VERSION: $mlflow_version" 7 | } 8 | run 2>&1 | tee run_tests.log 9 | -------------------------------------------------------------------------------- /tests/databricks/uc/test_model_version.py: -------------------------------------------------------------------------------- 1 | from tests.databricks.init_tests import test_context 2 | from tests.databricks import _test_model_version 3 | 4 | def test_import_metadata_false(test_context): 5 | _test_model_version.test_import_metadata_false(test_context, True) 6 | 7 | def test_import_metadata_true(test_context): 8 | _test_model_version.test_import_metadata_true(test_context, True) 9 | -------------------------------------------------------------------------------- /tests/databricks/uc/test_registered_model.py: -------------------------------------------------------------------------------- 1 | from tests.databricks.init_tests import test_context 2 | from tests.databricks import _test_registered_model 3 | 4 | def test_registered_model(test_context): 5 | _test_registered_model.test_registered_model(test_context, True) 6 | -------------------------------------------------------------------------------- /tests/databricks/unity_catalog_client.py: -------------------------------------------------------------------------------- 1 | class UnityCatalogClient: 2 | def __init__(self, dbx_client): 3 | self.client = mk_uc_dbx_client(dbx_client) 4 | 5 | def list_models(self, catalog_name=None, schema_name=None): 6 | if catalog_name and schema_name: 7 | params = { "catalog_name": catalog_name, "schema_name": schema_name } 8 | else: 9 | params = { "max_results": 5000 } 10 | rsp = self.client.get("unity-catalog/models", params) 11 | if len(rsp) == 0: 12 | return rsp 13 | return rsp["registered_models"] 14 | 15 | def list_model_names(self, catalog_name, schema_name): 16 | return [ m["full_name"] for m in self.list_models(catalog_name, schema_name) ] 17 | 18 | def create_schema(self, catalog_name, schema_name): 19 | params = { "catalog_name": catalog_name, "name": schema_name } 20 | self.client.post("unity-catalog/schemas", params) 21 | 22 | def __repr__(self): 23 | return str(self.client) 24 | 25 | 26 | def mk_uc_dbx_client(client): 27 | from mlflow_export_import.client.http_client import HttpClient 28 | return HttpClient("api/2.1", client.host, client.token) 29 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/config.yaml.template: -------------------------------------------------------------------------------- 1 | 2 | # ---- 3 | # Uses default workspace specified in DEFAULT profile in ~.databrickscfg 4 | # 5 | # see run_tests.sh 6 | # export MLFLOW_TRACKING_URI=databricks 7 | 8 | # ---- 9 | 10 | # test workspace directory 11 | ws_base_dir: /Users/first.last@mycompany.com/tmp/test-mlflow-exim 12 | 13 | # test DBFS directory 14 | dbfs_base_export_dir: dbfs:/tmp/first.last@mycompany.com/test-mlflow-exim 15 | 16 | # test registered model name 17 | model_name: andre-test-mlflow-exim-Iris-Train 18 | 19 | # test prefix for imported runs 20 | run_name_prefix: andre-test-mlflow-exim 21 | 22 | # ---- toggle either for existing or new cluster 23 | 24 | # - - toggle option 1 - existing cluster 25 | 26 | # cluster: 0318-151752-abed99 27 | 28 | # - - toggle option 2 - new cluster 29 | 30 | cluster: { 31 | cluster_name: test-mlflow-export-import, 32 | spark_version: 12.0.x-cpu-ml-scala2.12, 33 | node_type_id: i3.xlarge, 34 | num_workers: 1, 35 | autotermination_minutes: 20, 36 | } 37 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/experiment/Iris_Train.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %md ## Iris Train 3 | # MAGIC * Train and register a model for testing purposes. 4 | 5 | # COMMAND ---------- 6 | 7 | dbutils.widgets.text("Experiment", "") 8 | experiment_name = dbutils.widgets.get("Experiment") 9 | 10 | dbutils.widgets.text("Registered model", "") 11 | registered_model = dbutils.widgets.get("Registered model") 12 | if registered_model == "": registered_model = None 13 | 14 | experiment_name, registered_model 15 | 16 | # COMMAND ---------- 17 | 18 | import mlflow 19 | if experiment_name: 20 | mlflow.set_experiment(experiment_name) 21 | 22 | # COMMAND ---------- 23 | 24 | from sklearn import svm, datasets 25 | print("mlflow.version:", mlflow.__version__) 26 | 27 | with mlflow.start_run() as run: 28 | print("run_id:",run.info.run_id) 29 | print("experiment_id:",run.info.experiment_id) 30 | iris = datasets.load_iris() 31 | mlflow.log_metric("degree", 5) 32 | model = svm.SVC(C=2.0, degree=5, kernel="rbf") 33 | model.fit(iris.data, iris.target) 34 | mlflow.sklearn.log_model(model, "model", registered_model_name=registered_model) 35 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/init_tests.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import pytest 3 | import tempfile 4 | 5 | from mlflow_export_import.client import databricks_utils 6 | 7 | from tests import utils_test 8 | from tests.databricks.databricks_tester import DatabricksTester 9 | 10 | cfg = utils_test.read_config_file() 11 | 12 | 13 | _tester = DatabricksTester( 14 | ws_base_dir = cfg["ws_base_dir"], 15 | dbfs_base_export_dir = cfg["dbfs_base_export_dir"], 16 | local_artifacts_compare_dir = cfg.get("local_artifacts_compare_dir", None), 17 | cluster_spec = cfg["cluster"], 18 | model_name = cfg["model_name"], 19 | run_name_prefix = cfg["run_name_prefix"] 20 | ) 21 | 22 | 23 | from databricks_cli.dbfs.api import DbfsApi 24 | _dbfs_api = DbfsApi(databricks_utils.get_api_client()) 25 | 26 | 27 | TestContext = namedtuple( 28 | "TestContext", 29 | [ "tester", "dbfs_api" ] 30 | ) 31 | 32 | 33 | @pytest.fixture(scope="session") 34 | def test_context(): 35 | if _tester.local_artifacts_compare_dir: # NOTE: for debugging 36 | utils_test.create_output_dir(_tester.local_artifacts_compare_dir) 37 | else: 38 | with tempfile.TemporaryDirectory() as tmpdir: 39 | _tester.local_artifacts_compare_dir = tmpdir 40 | yield TestContext(_tester, _dbfs_api) 41 | _tester.teardown() 42 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/run_tests.sh: -------------------------------------------------------------------------------- 1 | 2 | export MLFLOW_TRACKING_URI=databricks 3 | 4 | if [ $# -gt 0 ] ; then 5 | DATABRICKS_PROFILE=$1 6 | export MLFLOW_TRACKING_URI=databricks://$DATABRICKS_PROFILE 7 | fi 8 | 9 | JUNIT_FILE=run_tests_junit.xml 10 | HTML_FILE=run_tests_report.html 11 | LOG_FILE=run_tests.log 12 | 13 | run() { 14 | echo "MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI" 15 | time -p pytest -s \ 16 | --junitxml=$JUNIT_FILE \ 17 | --html=$HTML_FILE \ 18 | --self-contained-html \ 19 | --override-ini log_cli=true \ 20 | `ls test_*.py` 21 | echo 22 | echo "******************************************************" 23 | echo 24 | echo "MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI" 25 | echo "LOG_FILE : $LOG_FILE" 26 | echo "JUNIT REPORT: $JUNIT_FILE" 27 | echo "HTML REPORT : $HTML_FILE" 28 | echo 29 | } 30 | 31 | run | 2>&1 tee $LOG_FILE 32 | -------------------------------------------------------------------------------- /tests/databricks_notebooks/samples/run_tests_junit.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/open_source/README.md: -------------------------------------------------------------------------------- 1 | # mlflow-export-import - Open Source Tests 2 | 3 | ## Overview 4 | 5 | Open source MLflow Export Import tests use two MLflow tracking servers: 6 | * Source tracking for exporting MLflow objects. 7 | * Destination tracking server for importing the exported MLflow objects. 8 | 9 | ## Setup 10 | 11 | See the [Setup](../../README.md#Setup) section. 12 | 13 | ## Test Configuration 14 | 15 | Test environment variables. 16 | 17 | |Name | Required | Description| 18 | |-----|----------|---------| 19 | | MLFLOW_TRACKING_URI_SRC | yes | URI of source tracking server | 20 | | MLFLOW_TRACKING_URI_DST | yes | URI of destination tracking server | 21 | | MLFLOW_EXPORT_IMPORT_OUTPUT_DIR | no | If set, will use this as the export output directory instead of `tempfile.TemporaryDirectory()` | 22 | 23 | 24 | 25 | ## Run tests 26 | 27 | Use the [run_tests.sh](run_tests.sh) script to run the tests and and specify the source and destination tracking server port number. 28 | Output will be in the `run_tests.log` file. 29 | 30 | The script does the following: 31 | * Launches a source MLflow tracking server and destination MLflow tracking server in the background. 32 | * Runs tests against these servers with pytest. 33 | * Tears down the two MLflow tracking servers. 34 | 35 | **Example** 36 | ``` 37 | run_tests.sh 5010 5011 38 | ``` 39 | ``` 40 | ======================== 43 passed in 295.36s (0:04:55) ======================== 41 | 42 | LOG_FILE : run_tests.log 43 | JUNIT REPORT: run_tests_junit.xml 44 | HTML REPORT : run_tests_report.html 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /tests/open_source/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/open_source/init_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | import tempfile 4 | import mlflow 5 | from tests import utils_test 6 | 7 | from tests.core import MlflowContext 8 | from mlflow_export_import.common import utils 9 | _logger = utils.getLogger(__name__) 10 | 11 | _logger.info(f"mlflow.tracking_uri {mlflow.tracking.get_tracking_uri()}") 12 | 13 | uri_src = os.environ.get("MLFLOW_TRACKING_URI_SRC",None) 14 | _logger.info(f"MLFLOW_TRACKING_URI_SRC: {uri_src}") 15 | assert uri_src,"Environment variable MLFLOW_TRACKING_URI_SRC must be set" 16 | client_src = mlflow.tracking.MlflowClient(uri_src) 17 | _logger.info(f"client_src: {client_src}") 18 | 19 | uri_dst = os.environ.get("MLFLOW_TRACKING_URI_DST",None) 20 | _logger.info(f"MLFLOW_TRACKING_URI_DST: {uri_dst}") 21 | assert uri_dst 22 | client_dst = mlflow.tracking.MlflowClient(uri_dst) 23 | _logger.info(f"client_dst: {client_dst}") 24 | 25 | 26 | @pytest.fixture(scope="session") 27 | def mlflow_context(): 28 | with tempfile.TemporaryDirectory() as tmpdir: 29 | assert mlflow.get_tracking_uri() is not None 30 | output_dir = os.environ.get("MLFLOW_EXPORT_IMPORT_OUTPUT_DIR",None) # for debugging 31 | if output_dir: 32 | utils_test.create_output_dir(output_dir) 33 | else: 34 | output_dir = tmpdir 35 | yield MlflowContext( 36 | client_src, client_dst, output_dir, os.path.join(output_dir,"run") 37 | ) 38 | -------------------------------------------------------------------------------- /tests/open_source/kill_server.sh: -------------------------------------------------------------------------------- 1 | 2 | # ============================================= 3 | # 4 | # Kill an MLflow tracking server by port number 5 | # 6 | # ============================================= 7 | 8 | if [ $# -lt 1 ] ; then 9 | echo "$0: Expecting MLflow Tracking Server port" 10 | exit 1 11 | fi 12 | port=$1 13 | 14 | pids=`lsof -n -i :$port | awk ' { print ( $2 ) } ' | grep -v PID` 15 | echo "PIDs: $pids" 16 | echo "Killing MLflow Tracking Server running on port $port" 17 | for pid in $pids ; do 18 | echo "Killing PID $pid" 19 | kill $pid 20 | done 21 | -------------------------------------------------------------------------------- /tests/open_source/run_tests.sh: -------------------------------------------------------------------------------- 1 | 2 | # =========================================================== 3 | # 4 | # Script to run tests against a source and destination MLflow tracking server. 5 | # Expects the source and destination server port numbers as arguments. 6 | # 7 | # Does the following: 8 | # 1. Launches a source and destination tracking server in the background. 9 | # 2. Runs tests against the tracking servers with pytest. 10 | # 3. Kills the tracking servers. 11 | # 12 | # Example: 13 | # 14 | # run_tests.sh 5010 5011 15 | # 16 | # =========================================================== 17 | 18 | if [ $# -lt 2 ] ; then 19 | echo "ERROR: Expecting source and destination MLflow Tracking Server ports" 20 | exit 1 21 | fi 22 | PORT_SRC=$1 23 | PORT_DST=$2 24 | 25 | export MLFLOW_TRACKING_URI=http://localhost:$PORT_SRC 26 | export MLFLOW_TRACKING_URI_SRC=http://localhost:${PORT_SRC} 27 | export MLFLOW_TRACKING_URI_DST=http://localhost:${PORT_DST} 28 | 29 | JUNIT_FILE=run_tests_junit.xml 30 | HTML_FILE=run_tests_report.html 31 | LOG_FILE=run_tests.log 32 | 33 | message() { 34 | echo 35 | echo "******************************************************" 36 | echo "*" 37 | echo "* $*" 38 | echo "*" 39 | echo "******************************************************" 40 | echo 41 | } 42 | 43 | run_tests() { 44 | message "STAGE 2: RUN TESTS" 45 | time -p pytest -s \ 46 | --junitxml=$JUNIT_FILE \ 47 | --html=$HTML_FILE \ 48 | --self-contained-html \ 49 | test_*.py 50 | } 51 | 52 | launch_server() { 53 | port=$1 54 | message "STAGE 1: LAUNCH TRACKING SERVER on port $port" 55 | rm mlflow_${port}.db 56 | rm -rf mlruns_${port} 57 | mlflow server \ 58 | --host localhost --port ${port} \ 59 | --backend-store-uri sqlite:///mlflow_${port}.db \ 60 | --default-artifact-root $PWD/mlruns_${port} 61 | } 62 | 63 | kill_server() { 64 | port=$1 65 | message "STAGE 3: KILL TRACKING SERVER on port ${port}" 66 | echo "Killing MLflow Tracking Server pids:" 67 | pids=`lsof -n -i :${port} | awk '{ print ( $2 ) }' | grep -v PID` 68 | for pid in $pids ; do 69 | echo " Killing PID=$pid" 70 | kill $pid 71 | done 72 | rm -rf mlruns_${port} 73 | rm mlflow_${port}.db 74 | } 75 | 76 | run() { 77 | echo "$0: MLFLOW_TRACKING_URI: $MLFLOW_TRACKING_URI" 78 | launch_server $PORT_SRC & 79 | launch_server $PORT_DST & 80 | sleep 5 # wait for the tracking servers to come up 81 | run_tests 82 | kill_server $PORT_SRC 83 | kill_server $PORT_DST 84 | } 85 | 86 | run_all() { 87 | mlflow_version=`mlflow --version | sed -e "s/mlflow, version //" ` 88 | echo "MLFLOW.VERSION: $mlflow_version" 89 | time -p run 90 | echo 91 | echo "******************************************************" 92 | echo 93 | echo "MLFLOW.VERSION: $mlflow_version" 94 | echo "LOG_FILE: $LOG_FILE" 95 | echo "JUNIT REPORT: $JUNIT_FILE" 96 | echo "HTML REPORT : $HTML_FILE" 97 | echo 98 | } 99 | 100 | time run_all 2>&1 | tee run_tests.log 101 | 102 | exit 0 103 | -------------------------------------------------------------------------------- /tests/open_source/samples/run_tests_junit.xml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/open_source/test_bulk_all.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.bulk.export_all import export_all 2 | from mlflow_export_import.bulk.import_models import import_models 3 | from tests.open_source.test_bulk_experiments import compare_experiments 4 | from tests.open_source.test_bulk_models import create_model, compare_models_with_versions, get_num_deleted_runs 5 | 6 | from tests.open_source.init_tests import mlflow_context 7 | from tests.compare_utils import compare_runs 8 | from tests.open_source.oss_utils_test import delete_experiments_and_models 9 | 10 | # == Helper functions 11 | 12 | _notebook_formats = "SOURCE,DBC" 13 | _num_models = 2 14 | _num_runs = 2 15 | 16 | 17 | def _run_test(mlflow_context, compare_func=compare_runs, use_threads=False): 18 | delete_experiments_and_models(mlflow_context) 19 | for _ in range( _num_models): 20 | create_model(mlflow_context.client_src) 21 | export_all( 22 | mlflow_client = mlflow_context.client_src, 23 | output_dir = mlflow_context.output_dir, 24 | notebook_formats = _notebook_formats, 25 | use_threads = use_threads 26 | ) 27 | import_models( 28 | mlflow_client = mlflow_context.client_dst, 29 | input_dir = mlflow_context.output_dir, 30 | delete_model = True 31 | ) 32 | compare_experiments(mlflow_context, compare_func) 33 | compare_models_with_versions(mlflow_context, compare_func) 34 | 35 | 36 | # == Test basic 37 | 38 | def test_basic(mlflow_context): 39 | _run_test(mlflow_context) 40 | 41 | 42 | def test_basic_threads(mlflow_context): 43 | _run_test(mlflow_context, use_threads=True) 44 | 45 | 46 | # == Test deleted runs 47 | 48 | def test_model_deleted_runs(mlflow_context): 49 | model_name = create_model(mlflow_context.client_src) 50 | versions = mlflow_context.client_src.search_model_versions(filter_string=f"name='{model_name}'") 51 | assert len(versions) == _num_runs 52 | 53 | mlflow_context.client_src.delete_run(versions[0].run_id) 54 | num_deleted = get_num_deleted_runs(mlflow_context.client_src, versions) 55 | assert num_deleted == _num_runs - 1 56 | 57 | export_all( 58 | mlflow_client = mlflow_context.client_src, 59 | output_dir = mlflow_context.output_dir, 60 | export_deleted_runs = True 61 | ) 62 | import_models( 63 | mlflow_client = mlflow_context.client_dst, 64 | input_dir = mlflow_context.output_dir, 65 | delete_model = True 66 | ) 67 | versions = mlflow_context.client_dst.search_model_versions(filter_string=f"name='{model_name}'") 68 | assert len(versions) == _num_runs 69 | 70 | num_deleted2 = get_num_deleted_runs(mlflow_context.client_dst, versions) 71 | assert num_deleted == num_deleted2 72 | -------------------------------------------------------------------------------- /tests/open_source/test_bulk_experiments_merge_utils.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.bulk import experiments_merge_utils 2 | import pytest 3 | 4 | 5 | dct1 = { 6 | "system": { 7 | "package_version": "1.2.0", 8 | "script": "export_experiments.py", 9 | "export_time": 1683865840, 10 | "_export_time": "2023-05-12 04:30:40", 11 | "mlflow_version": "2.3.0", 12 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 13 | "platform": { 14 | "python_version": "3.8.15", 15 | "system": "Darwin", 16 | "processor": "i386" 17 | }, 18 | "user": "k2" 19 | }, 20 | "info": { 21 | "experiment_names": [ 22 | "sklearn_wine" 23 | ], 24 | "duration": 0.1, 25 | "experiments": 1, 26 | "total_runs": 1, 27 | "ok_runs": 1, 28 | "failed_runs": 0 29 | }, 30 | "mlflow": { 31 | "experiments": [ 32 | { 33 | "id": "1", 34 | "name": "sklearn_wine", 35 | "ok_runs": 3, 36 | "failed_runs": 1, 37 | "duration": 0.1 38 | } 39 | ] 40 | } 41 | } 42 | 43 | 44 | dct2 = { 45 | "system": { 46 | "package_version": "1.2.0", 47 | "script": "export_experiments.py", 48 | "export_time": 1683865840, 49 | "_export_time": "2023-05-12 04:30:40", 50 | "mlflow_version": "2.3.0", 51 | "mlflow_tracking_uri": "http://127.0.0.1:5020", 52 | "platform": { 53 | "python_version": "3.8.15", 54 | "system": "Darwin", 55 | "processor": "i386" 56 | }, 57 | "user": "k2" 58 | }, 59 | "info": { 60 | "experiment_names": [ 61 | "Default" 62 | ], 63 | "duration": 0.2, 64 | "experiments": 1, 65 | "total_runs": 0, 66 | "ok_runs": 0, 67 | "failed_runs": 0 68 | }, 69 | "mlflow": { 70 | "experiments": [ 71 | { 72 | "id": "0", 73 | "name": "Default", 74 | "ok_runs": 0, 75 | "failed_runs": 0, 76 | "duration": 0.0 77 | } 78 | ] 79 | } 80 | } 81 | 82 | 83 | def test_merge_info(): 84 | info1 = dct1["info"] 85 | info2 = dct2["info"] 86 | info = experiments_merge_utils.merge_info(info1, info2) 87 | assert info["duration"] == pytest.approx(info1["duration"] + info2["duration"]) 88 | assert info["total_runs"] == info1["total_runs"] + info2["total_runs"] 89 | assert info["ok_runs"] == info1["ok_runs"] + info2["ok_runs"] 90 | assert info["failed_runs"] == info1["failed_runs"] + info2["failed_runs"] 91 | assert info["experiments"] == info1["experiments"] + info2["experiments"] 92 | 93 | 94 | def test_merge_mlflow(): 95 | mlflow1 = dct1["mlflow"] 96 | mlflow2 = dct2["mlflow"] 97 | mlflow = experiments_merge_utils.merge_mlflow(mlflow1, mlflow2) 98 | 99 | assert len(mlflow["experiments"]) == len(mlflow1["experiments"]) + len(mlflow2["experiments"]) 100 | assert mlflow["experiments"] == mlflow1["experiments"] + mlflow2["experiments"] 101 | -------------------------------------------------------------------------------- /tests/open_source/test_copy_run.py: -------------------------------------------------------------------------------- 1 | from tests.open_source.oss_utils_test import create_simple_run 2 | from tests.compare_utils import compare_runs 3 | from tests.open_source.init_tests import mlflow_context 4 | 5 | from mlflow_export_import.copy import copy_run 6 | from tests.open_source.oss_utils_test import mk_test_object_name_default 7 | 8 | 9 | # == Setup 10 | 11 | def _init_run_test(mlflow_context, dst_mlflow_uri=None): 12 | _, src_run = create_simple_run(mlflow_context.client_src, model_artifact = "model") 13 | dst_exp_name = mk_test_object_name_default() 14 | dst_run = copy_run.copy( 15 | src_run.info.run_id, 16 | dst_exp_name, 17 | mlflow_context.client_src.tracking_uri, 18 | dst_mlflow_uri 19 | ) 20 | return src_run, dst_run 21 | 22 | 23 | # == Regular tests 24 | 25 | def test_run_basic_without_dst_mlflow_uri(mlflow_context): 26 | run1, run2 = _init_run_test(mlflow_context) 27 | compare_runs(mlflow_context, run1, run2) 28 | 29 | 30 | def test_run_basic_with_dst_mlflow_uri(mlflow_context): 31 | run1, run2 = _init_run_test(mlflow_context, mlflow_context.client_dst.tracking_uri) 32 | compare_runs(mlflow_context, run1, run2) 33 | 34 | 35 | # == Test for source and exported model prediction equivalence 36 | 37 | from tests.sklearn_utils import X_test 38 | import cloudpickle as pickle 39 | import numpy as np 40 | 41 | 42 | def test_model_predictions(mlflow_context): 43 | _, run1 = create_simple_run(mlflow_context.client_src) 44 | run2 = copy_run._copy(run1.info.run_id, mk_test_object_name_default(), mlflow_context.client_src, mlflow_context.client_dst) 45 | 46 | # Since you cannot load model flavors (such as mlflow.sklearn.load_model()) with the MlflowClient, 47 | # we have to manually load the model pickle file 48 | 49 | path1 = mlflow_context.client_src.download_artifacts(run1.info.run_id, "model/model.pkl") 50 | with open(path1,"rb") as f: 51 | model1 = pickle.load(f) 52 | path2 = mlflow_context.client_src.download_artifacts(run2.info.run_id, "model/model.pkl") 53 | with open(path2, "rb") as f: 54 | model2 = pickle.load(f) 55 | 56 | predictions1 = model1.predict(X_test) 57 | predictions2 = model2.predict(X_test) 58 | assert np.array_equal(predictions1, predictions2) 59 | -------------------------------------------------------------------------------- /tests/open_source/test_find_run_model_names.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test find_artifacts.find_run_model_names() 3 | """ 4 | 5 | import mlflow 6 | from mlflow_export_import.common.find_artifacts import find_run_model_names 7 | from tests.open_source.oss_utils_test import create_experiment 8 | from tests.sklearn_utils import create_sklearn_model 9 | 10 | client = mlflow.MlflowClient() 11 | 12 | 13 | def test_no_model(): 14 | create_experiment(client) 15 | with mlflow.start_run() as run: 16 | mlflow.set_tag("name","foo") 17 | model_paths = find_run_model_names(client, run.info.run_id) 18 | assert len(model_paths) == 0 19 | 20 | 21 | def test_one_model_at_artifact_root(): 22 | """ Test when model artifact root is '' """ 23 | create_experiment(client) 24 | model = create_sklearn_model() 25 | with mlflow.start_run() as run: 26 | mlflow.sklearn.log_model(model, "") 27 | model_paths = find_run_model_names(client, run.info.run_id) 28 | assert len(model_paths) == 1 29 | assert model_paths[0] == "" 30 | 31 | 32 | def test_one_model(): 33 | create_experiment(client) 34 | model = create_sklearn_model() 35 | with mlflow.start_run() as run: 36 | mlflow.sklearn.log_model(model, "model") 37 | model_paths = find_run_model_names(client, run.info.run_id) 38 | assert len(model_paths) == 1 39 | assert model_paths[0] == "model" 40 | 41 | 42 | def test_two_models(): 43 | create_experiment(client) 44 | model = create_sklearn_model() 45 | with mlflow.start_run() as run: 46 | mlflow.sklearn.log_model(model, "model") 47 | mlflow.sklearn.log_model(model, "model-onnx") 48 | model_paths = find_run_model_names(client, run.info.run_id) 49 | assert len(model_paths) == 2 50 | assert model_paths[0] == "model" 51 | assert model_paths[1] == "model-onnx" 52 | 53 | 54 | def test_two_models_nested(): 55 | create_experiment(client) 56 | model = create_sklearn_model() 57 | with mlflow.start_run() as run: 58 | mlflow.sklearn.log_model(model, "model") 59 | mlflow.sklearn.log_model(model, "other_models/model-onnx") 60 | model_paths = find_run_model_names(client, run.info.run_id) 61 | assert len(model_paths) == 2 62 | assert model_paths[0] == "model" 63 | assert model_paths[1] == "other_models/model-onnx" 64 | -------------------------------------------------------------------------------- /tests/open_source/test_renames.py: -------------------------------------------------------------------------------- 1 | 2 | # Test experiment and model rename core logic 3 | 4 | from mlflow_export_import.bulk import rename_utils 5 | 6 | def test_equal(): 7 | renames = { "/User/foo": "/User/bar" } 8 | new_name = rename_utils.rename("/User/foo" , renames) 9 | assert new_name == "/User/bar" 10 | 11 | def test_prefix(): 12 | renames = { "/User/foo": "/User/bar" } 13 | new_name = rename_utils.rename("/User/foo/home" , renames) 14 | assert new_name == "/User/bar/home" 15 | 16 | def test_no_match(): 17 | renames = { "/User/foo": "/User/bar" } 18 | new_name = rename_utils.rename("foo" , renames) 19 | assert new_name == "foo" 20 | 21 | def test_blank_key(): 22 | renames = { "": "/User/bar" } 23 | new_name = rename_utils.rename("foo" , renames) 24 | assert new_name == "foo" 25 | 26 | def test_blank_key_2(): 27 | renames = { "/User/foo": "/User/bar" } 28 | new_name = rename_utils.rename("" , renames) 29 | assert new_name == "" 30 | -------------------------------------------------------------------------------- /tests/open_source/test_ws_permissions.py: -------------------------------------------------------------------------------- 1 | from mlflow_export_import.common.ws_permissions_utils import _map_acl_element, map_acl 2 | 3 | 4 | # == Setup data 5 | 6 | group_name_element = { 7 | "group_name": "admins", 8 | "all_permissions": [ 9 | { 10 | "permission_level": "CAN_MANAGE", 11 | "inherited": True, 12 | "inherited_from_object": [ 13 | "/directories/" 14 | ] 15 | } 16 | ] 17 | } 18 | 19 | user_name_element = { 20 | "user_name": "k2@karakoram.com", 21 | "all_permissions": [ 22 | { 23 | "permission_level": "CAN_EDIT", 24 | "inherited": True, 25 | "inherited_from_object": [ 26 | "/directories/" 27 | ] 28 | } 29 | ] 30 | } 31 | 32 | user_name_element_2 = { 33 | "user_name": "k2@karakoram.com", 34 | "all_permissions": [ 35 | { 36 | "permission_level": "CAN_MANAGE", 37 | "inherited": False 38 | }, 39 | { 40 | "permission_level": "CAN_MANAGE", 41 | "inherited": True, 42 | "inherited_from_object": [ 43 | "/directories/767933989557963" 44 | ] 45 | } 46 | ] 47 | } 48 | 49 | mixed_acl = [ group_name_element, user_name_element ] 50 | mixed_acl_2 = [ group_name_element, user_name_element_2 ] 51 | 52 | 53 | # == Tests 54 | 55 | def test_acl_element_group_name(): 56 | acl2 = _map_acl_element(group_name_element) 57 | assert acl2 == [ 58 | { 59 | "group_name": "admins", 60 | "permission_level": "CAN_MANAGE" 61 | } 62 | ] 63 | 64 | def test_acl_element_user_name(): 65 | acl2 = _map_acl_element(user_name_element) 66 | assert acl2 == [ 67 | { 68 | "user_name": "k2@karakoram.com", 69 | "permission_level": "CAN_EDIT" 70 | } 71 | ] 72 | 73 | 74 | def test_acl_element_user_name_2(): 75 | acl2 = _map_acl_element(user_name_element_2) 76 | assert acl2 == [ 77 | { 78 | "user_name": "k2@karakoram.com", 79 | "permission_level": "CAN_MANAGE" 80 | }, 81 | { 82 | "user_name": "k2@karakoram.com", 83 | "permission_level": "CAN_MANAGE" 84 | } 85 | ] 86 | 87 | 88 | def test_acl_mixed(): 89 | assert map_acl(mixed_acl) == [ 90 | { 91 | "group_name": "admins", 92 | "permission_level": "CAN_MANAGE" 93 | }, 94 | { 95 | "user_name": "k2@karakoram.com", 96 | "permission_level": "CAN_EDIT" 97 | } 98 | ] 99 | 100 | 101 | def test_acl_mixed_2(): 102 | assert map_acl(mixed_acl_2) == [ 103 | { 104 | "group_name": "admins", 105 | "permission_level": "CAN_MANAGE" 106 | }, 107 | { 108 | "user_name": "k2@karakoram.com", 109 | "permission_level": "CAN_MANAGE" 110 | }, 111 | { 112 | "user_name": "k2@karakoram.com", 113 | "permission_level": "CAN_MANAGE" 114 | } 115 | ] 116 | 117 | 118 | def test_empty(): 119 | assert map_acl({}) == [] 120 | -------------------------------------------------------------------------------- /tests/sklearn_utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.tree import DecisionTreeClassifier 2 | from sklearn import datasets 3 | from sklearn.model_selection import train_test_split 4 | 5 | _dataset = datasets.load_iris() 6 | X_train, X_test, _y_train, _y_test = train_test_split(_dataset.data, _dataset.target, test_size=0.3) 7 | feature_names = _dataset.feature_names 8 | 9 | 10 | def create_sklearn_model(max_depth=5): 11 | model = DecisionTreeClassifier(max_depth=max_depth) 12 | model.fit(X_train, _y_train) 13 | return model 14 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import yaml 4 | import shortuuid 5 | import pandas as pd 6 | import mlflow 7 | from mlflow_export_import.common.mlflow_utils import MlflowTrackingUriTweak 8 | from . import sklearn_utils 9 | 10 | TEST_OBJECT_PREFIX = "test_exim" 11 | 12 | def mk_test_object_name_default(): 13 | return f"{TEST_OBJECT_PREFIX}_{mk_uuid()}" 14 | 15 | def mk_uuid(): 16 | return shortuuid.uuid() 17 | 18 | 19 | def create_output_dir(output_dir): 20 | if os.path.exists(output_dir): 21 | shutil.rmtree(output_dir) 22 | os.makedirs(output_dir) 23 | 24 | 25 | def compare_dirs(d1, d2): 26 | from filecmp import dircmp 27 | def _compare_dirs(dcmp): 28 | if len(dcmp.diff_files) > 0 or len(dcmp.left_only) > 0 or len(dcmp.right_only) > 0: 29 | if len(dcmp.diff_files) == 1: 30 | if dcmp.diff_files[0] == "MLmodel": # run_id differs because we changed it to the imported run_id 31 | return True 32 | return False 33 | for sub_dcmp in dcmp.subdirs.values(): 34 | if not _compare_dirs(sub_dcmp): 35 | return False 36 | return True 37 | return _compare_dirs(dircmp(d1, d2)) 38 | 39 | 40 | def create_run_artifact_dirs(output_dir): 41 | dir1 = create_run_artifact_dir(output_dir, "run1") 42 | dir2 = create_run_artifact_dir(output_dir, "run2") 43 | return dir1, dir2 44 | 45 | 46 | def create_run_artifact_dir(output_dir, run_name): 47 | dir = os.path.join(output_dir, "artifacts", run_name) 48 | create_output_dir(dir) 49 | return dir 50 | 51 | 52 | def create_iris_dataset(): 53 | data_path = "in_memory" 54 | df = pd.DataFrame(data=sklearn_utils.X_train, columns=sklearn_utils.feature_names) 55 | return mlflow.data.from_pandas(df, source=data_path) 56 | 57 | 58 | def read_config_file(path="config.yaml"): 59 | with open(path, encoding="utf-8") as f: 60 | dct = yaml.safe_load(f) 61 | print(f"Config for '{path}':") 62 | for k,v in dct.items(): 63 | print(f" {k}: {v}") 64 | return dct 65 | 66 | 67 | def create_nested_runs(client, experiment_id, max_depth=1, max_width=1, level=0, indent=""): 68 | run_name = "run" 69 | if level >= max_depth: 70 | return 71 | run_name = f"{run_name}_{level}" 72 | nested = level > 0 73 | with MlflowTrackingUriTweak(client) as run: 74 | with mlflow.start_run(experiment_id=experiment_id, run_name=run_name, nested=nested) as run: 75 | mlflow.log_param("alpha", "0.123") 76 | mlflow.log_metric("m",0.123) 77 | mlflow.set_tag("run_name", run_name) 78 | mlflow.set_tag("ori_run_id", run.info.run_id) 79 | model = sklearn_utils.create_sklearn_model() 80 | mlflow.sklearn.log_model(model, "model") 81 | for _ in range(max_width): 82 | create_nested_runs(client, experiment_id, max_depth, max_width, level+1, indent+" ") 83 | return client.get_run(run.info.run_id) 84 | --------------------------------------------------------------------------------