├── .github └── workflows │ ├── codeql.yml │ ├── full-stack-test.yml │ ├── metaflow.s3_tests.yml │ ├── publish.yml │ ├── test-card-build.yml │ ├── test-stubs.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── ADOPTERS.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── R ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R │ ├── decorators-aws.R │ ├── decorators-environment.R │ ├── decorators-errors.R │ ├── decorators.R │ ├── flags.R │ ├── flow.R │ ├── flow_client.R │ ├── imports.R │ ├── install.R │ ├── metadata.R │ ├── metaflow_client.R │ ├── namespace.R │ ├── package.R │ ├── parameter.R │ ├── run.R │ ├── run_client.R │ ├── step.R │ ├── step_client.R │ ├── task_client.R │ ├── utils.R │ └── zzz.R ├── README.md ├── check_as_cran.sh ├── doc │ ├── metaflow.R │ ├── metaflow.Rmd │ └── metaflow.html ├── inst │ ├── run.R │ ├── run_batch.R │ └── tutorials │ │ ├── 00-helloworld │ │ ├── README.md │ │ └── helloworld.R │ │ ├── 01-playlist │ │ ├── README.md │ │ ├── movies.csv │ │ ├── playlist.R │ │ └── playlist.Rmd │ │ ├── 02-statistics │ │ ├── README.md │ │ ├── movies.csv │ │ ├── stats.R │ │ └── stats.Rmd │ │ ├── 03-playlist-redux │ │ ├── README.md │ │ ├── movies.csv │ │ └── playlist.R │ │ ├── 04-helloaws │ │ ├── README.md │ │ ├── helloaws.R │ │ └── helloaws.Rmd │ │ ├── 05-statistics-redux │ │ └── README.md │ │ ├── 06-worldview │ │ ├── README.md │ │ └── worldview.Rmd │ │ ├── 07-autopilot │ │ ├── README.md │ │ └── autopilot.Rmd │ │ └── README.md ├── man │ ├── add_decorators.Rd │ ├── batch.Rd │ ├── cash-.metaflow.flowspec.FlowSpec.Rd │ ├── cash-set-.metaflow.flowspec.FlowSpec.Rd │ ├── catch.Rd │ ├── container_image.Rd │ ├── current.Rd │ ├── decorator.Rd │ ├── decorator_arguments.Rd │ ├── environment_variables.Rd │ ├── flow_client.Rd │ ├── fmt_decorator.Rd │ ├── gather_inputs.Rd │ ├── get_metadata.Rd │ ├── get_namespace.Rd │ ├── install_metaflow.Rd │ ├── is_valid_python_identifier.Rd │ ├── list_flows.Rd │ ├── merge_artifacts.Rd │ ├── metaflow-package.Rd │ ├── metaflow.Rd │ ├── metaflow_location.Rd │ ├── metaflow_object.Rd │ ├── mf_client.Rd │ ├── mf_deserialize.Rd │ ├── mf_serialize.Rd │ ├── new_flow.Rd │ ├── new_run.Rd │ ├── new_step.Rd │ ├── new_task.Rd │ ├── parameter.Rd │ ├── pipe.Rd │ ├── pull_tutorials.Rd │ ├── py_version.Rd │ ├── r_version.Rd │ ├── remove_metaflow_env.Rd │ ├── reset_default_metadata.Rd │ ├── retry.Rd │ ├── run.Rd │ ├── run_client.Rd │ ├── set_default_namespace.Rd │ ├── set_metadata.Rd │ ├── set_namespace.Rd │ ├── step.Rd │ ├── step_client.Rd │ ├── sub-sub-.metaflow.flowspec.FlowSpec.Rd │ ├── sub-subset-.metaflow.flowspec.FlowSpec.Rd │ ├── task_client.Rd │ ├── test.Rd │ └── version_info.Rd ├── tests │ ├── contexts.json │ ├── formatter.R │ ├── graphs │ │ ├── branch.json │ │ ├── foreach.json │ │ ├── linear.json │ │ ├── nested_branches.json │ │ ├── nested_foreach.json │ │ └── small_foreach.json │ ├── run_integration_tests.R │ ├── run_tests.R │ ├── tests │ │ ├── basic_artifacts.R │ │ ├── basic_foreach.R │ │ ├── basic_parameter.R │ │ ├── complex_artifacts.R │ │ ├── merge_artifacts.R │ │ ├── merge_artifacts_propagation.R │ │ └── nested_foreach.R │ ├── testthat.R │ ├── testthat │ │ ├── helper.R │ │ ├── test-command-args.R │ │ ├── test-decorators-aws.R │ │ ├── test-decorators-environment.R │ │ ├── test-decorators-error.R │ │ ├── test-decorators.R │ │ ├── test-flags.R │ │ ├── test-flow.R │ │ ├── test-metaflow.R │ │ ├── test-parameter.R │ │ ├── test-run-cmd.R │ │ ├── test-run.R │ │ ├── test-sfn-cli-parsing.R │ │ ├── test-step.R │ │ ├── test-utils-format.R │ │ └── test-utils.R │ └── utils.R └── vignettes │ └── metaflow.Rmd ├── README.md ├── SECURITY.md ├── devtools ├── Makefile ├── Tiltfile └── pick_services.sh ├── docs ├── Environment escape.md ├── cards.md ├── concurrency.md ├── datastore.md ├── lifecycle.dot ├── lifecycle.png ├── metaflow.svg ├── metaflow_sidecar_arch.png ├── multicloud.png ├── prototype-to-prod.png ├── sidecars.md └── update_lifecycle_png ├── metaflow-complete.sh ├── metaflow ├── R.py ├── __init__.py ├── _vendor │ ├── __init__.py │ ├── click.LICENSE │ ├── click │ │ ├── __init__.py │ │ ├── _bashcomplete.py │ │ ├── _compat.py │ │ ├── _termui_impl.py │ │ ├── _textwrap.py │ │ ├── _unicodefun.py │ │ ├── _winconsole.py │ │ ├── core.py │ │ ├── decorators.py │ │ ├── exceptions.py │ │ ├── formatting.py │ │ ├── globals.py │ │ ├── parser.py │ │ ├── termui.py │ │ ├── testing.py │ │ ├── types.py │ │ └── utils.py │ ├── importlib_metadata.LICENSE │ ├── importlib_metadata │ │ ├── __init__.py │ │ ├── _adapters.py │ │ ├── _collections.py │ │ ├── _compat.py │ │ ├── _functools.py │ │ ├── _itertools.py │ │ ├── _meta.py │ │ ├── _text.py │ │ └── py.typed │ ├── packaging.LICENSE │ ├── packaging │ │ ├── __init__.py │ │ ├── _elffile.py │ │ ├── _manylinux.py │ │ ├── _musllinux.py │ │ ├── _parser.py │ │ ├── _structures.py │ │ ├── _tokenizer.py │ │ ├── markers.py │ │ ├── py.typed │ │ ├── requirements.py │ │ ├── specifiers.py │ │ ├── tags.py │ │ ├── utils.py │ │ └── version.py │ ├── pip.LICENSE │ ├── typeguard.LICENSE │ ├── typeguard │ │ ├── __init__.py │ │ ├── _checkers.py │ │ ├── _config.py │ │ ├── _decorators.py │ │ ├── _exceptions.py │ │ ├── _functions.py │ │ ├── _importhook.py │ │ ├── _memo.py │ │ ├── _pytest_plugin.py │ │ ├── _suppression.py │ │ ├── _transformer.py │ │ ├── _union_transformer.py │ │ ├── _utils.py │ │ └── py.typed │ ├── typing_extensions.LICENSE │ ├── typing_extensions.py │ ├── v3_6 │ │ ├── __init__.py │ │ ├── importlib_metadata.LICENSE │ │ ├── importlib_metadata │ │ │ ├── __init__.py │ │ │ ├── _adapters.py │ │ │ ├── _collections.py │ │ │ ├── _compat.py │ │ │ ├── _functools.py │ │ │ ├── _itertools.py │ │ │ ├── _meta.py │ │ │ ├── _text.py │ │ │ └── py.typed │ │ ├── typing_extensions.LICENSE │ │ ├── typing_extensions.py │ │ ├── zipp.LICENSE │ │ └── zipp.py │ ├── v3_7 │ │ ├── __init__.py │ │ ├── importlib_metadata.LICENSE │ │ ├── importlib_metadata │ │ │ ├── __init__.py │ │ │ ├── _adapters.py │ │ │ ├── _collections.py │ │ │ ├── _compat.py │ │ │ ├── _functools.py │ │ │ ├── _itertools.py │ │ │ ├── _meta.py │ │ │ ├── _text.py │ │ │ └── py.typed │ │ ├── typeguard.LICENSE │ │ ├── typeguard │ │ │ ├── __init__.py │ │ │ ├── _checkers.py │ │ │ ├── _config.py │ │ │ ├── _decorators.py │ │ │ ├── _exceptions.py │ │ │ ├── _functions.py │ │ │ ├── _importhook.py │ │ │ ├── _memo.py │ │ │ ├── _pytest_plugin.py │ │ │ ├── _suppression.py │ │ │ ├── _transformer.py │ │ │ ├── _union_transformer.py │ │ │ ├── _utils.py │ │ │ └── py.typed │ │ ├── typing_extensions.LICENSE │ │ ├── typing_extensions.py │ │ ├── zipp.LICENSE │ │ └── zipp.py │ ├── vendor_any.txt │ ├── vendor_v3_6.txt │ ├── vendor_v3_7.txt │ ├── zipp.LICENSE │ └── zipp.py ├── cards.py ├── cli.py ├── cli_args.py ├── cli_components │ ├── __init__.py │ ├── dump_cmd.py │ ├── init_cmd.py │ ├── run_cmds.py │ ├── step_cmd.py │ └── utils.py ├── client │ ├── __init__.py │ ├── core.py │ └── filecache.py ├── clone_util.py ├── cmd │ ├── __init__.py │ ├── code │ │ └── __init__.py │ ├── configure_cmd.py │ ├── develop │ │ ├── __init__.py │ │ ├── stub_generator.py │ │ └── stubs.py │ ├── main_cli.py │ ├── make_wrapper.py │ ├── tutorials_cmd.py │ └── util.py ├── cmd_with_io.py ├── datastore │ ├── __init__.py │ ├── content_addressed_store.py │ ├── datastore_set.py │ ├── datastore_storage.py │ ├── exceptions.py │ ├── flow_datastore.py │ ├── inputs.py │ └── task_datastore.py ├── debug.py ├── decorators.py ├── event_logger.py ├── events.py ├── exception.py ├── extension_support │ ├── __init__.py │ ├── _empty_file.py │ ├── cmd.py │ ├── integrations.py │ └── plugins.py ├── flowspec.py ├── graph.py ├── includefile.py ├── info_file.py ├── integrations.py ├── lint.py ├── metadata_provider │ ├── __init__.py │ ├── heartbeat.py │ ├── metadata.py │ └── util.py ├── metaflow_config.py ├── metaflow_config_funcs.py ├── metaflow_current.py ├── metaflow_environment.py ├── metaflow_git.py ├── metaflow_profile.py ├── metaflow_version.py ├── mflog │ ├── __init__.py │ ├── mflog.py │ ├── save_logs.py │ ├── save_logs_periodically.py │ └── tee.py ├── monitor.py ├── multicore_utils.py ├── package.py ├── parameters.py ├── plugins │ ├── __init__.py │ ├── airflow │ │ ├── __init__.py │ │ ├── airflow.py │ │ ├── airflow_cli.py │ │ ├── airflow_decorator.py │ │ ├── airflow_utils.py │ │ ├── dag.py │ │ ├── exception.py │ │ ├── plumbing │ │ │ ├── __init__.py │ │ │ └── set_parameters.py │ │ └── sensors │ │ │ ├── __init__.py │ │ │ ├── base_sensor.py │ │ │ ├── external_task_sensor.py │ │ │ └── s3_sensor.py │ ├── argo │ │ ├── __init__.py │ │ ├── argo_client.py │ │ ├── argo_events.py │ │ ├── argo_workflows.py │ │ ├── argo_workflows_cli.py │ │ ├── argo_workflows_decorator.py │ │ ├── argo_workflows_deployer.py │ │ ├── argo_workflows_deployer_objects.py │ │ ├── capture_error.py │ │ ├── generate_input_paths.py │ │ └── jobset_input_paths.py │ ├── aws │ │ ├── __init__.py │ │ ├── aws_client.py │ │ ├── aws_utils.py │ │ ├── batch │ │ │ ├── __init__.py │ │ │ ├── batch.py │ │ │ ├── batch_cli.py │ │ │ ├── batch_client.py │ │ │ └── batch_decorator.py │ │ ├── secrets_manager │ │ │ ├── __init__.py │ │ │ └── aws_secrets_manager_secrets_provider.py │ │ └── step_functions │ │ │ ├── __init__.py │ │ │ ├── dynamo_db_client.py │ │ │ ├── event_bridge_client.py │ │ │ ├── production_token.py │ │ │ ├── schedule_decorator.py │ │ │ ├── set_batch_environment.py │ │ │ ├── step_functions.py │ │ │ ├── step_functions_cli.py │ │ │ ├── step_functions_client.py │ │ │ ├── step_functions_decorator.py │ │ │ ├── step_functions_deployer.py │ │ │ └── step_functions_deployer_objects.py │ ├── azure │ │ ├── __init__.py │ │ ├── azure_credential.py │ │ ├── azure_exceptions.py │ │ ├── azure_secret_manager_secrets_provider.py │ │ ├── azure_tail.py │ │ ├── azure_utils.py │ │ ├── blob_service_client_factory.py │ │ └── includefile_support.py │ ├── cards │ │ ├── __init__.py │ │ ├── card_cli.py │ │ ├── card_client.py │ │ ├── card_creator.py │ │ ├── card_datastore.py │ │ ├── card_decorator.py │ │ ├── card_modules │ │ │ ├── __init__.py │ │ │ ├── base.html │ │ │ ├── basic.py │ │ │ ├── bundle.css │ │ │ ├── card.py │ │ │ ├── chevron │ │ │ │ ├── LICENCE.txt │ │ │ │ ├── __init__.py │ │ │ │ ├── main.py │ │ │ │ ├── metadata.py │ │ │ │ ├── renderer.py │ │ │ │ └── tokenizer.py │ │ │ ├── components.py │ │ │ ├── convert_to_native_type.py │ │ │ ├── main.js │ │ │ ├── renderer_tools.py │ │ │ └── test_cards.py │ │ ├── card_resolver.py │ │ ├── card_server.py │ │ ├── card_viewer │ │ │ └── viewer.html │ │ ├── component_serializer.py │ │ ├── exception.py │ │ └── ui │ │ │ ├── .eslintignore │ │ │ ├── .eslintrc.cjs │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── cypress.json │ │ │ ├── cypress │ │ │ ├── fixtures │ │ │ │ └── example.json │ │ │ ├── integration │ │ │ │ ├── demo_spec.ts │ │ │ │ └── utils_spec.ts │ │ │ ├── plugins │ │ │ │ └── index.js │ │ │ └── support │ │ │ │ ├── commands.js │ │ │ │ └── index.js │ │ │ ├── demo │ │ │ ├── card-example.json │ │ │ └── index.html │ │ │ ├── package-lock.json │ │ │ ├── package.json │ │ │ ├── prism.css │ │ │ ├── prism.js │ │ │ ├── rollup.config.jsBACKUP │ │ │ ├── src │ │ │ ├── App.svelte │ │ │ ├── app.css │ │ │ ├── aws-exports.cjs │ │ │ ├── components │ │ │ │ ├── artifact-row.svelte │ │ │ │ ├── artifacts.svelte │ │ │ │ ├── aside-nav.svelte │ │ │ │ ├── aside.svelte │ │ │ │ ├── card-component-renderer.svelte │ │ │ │ ├── dag │ │ │ │ │ ├── connector.svelte │ │ │ │ │ ├── connectors.svelte │ │ │ │ │ ├── constants.svelte │ │ │ │ │ ├── dag.css │ │ │ │ │ ├── dag.svelte │ │ │ │ │ ├── step-wrapper.svelte │ │ │ │ │ └── step.svelte │ │ │ │ ├── heading.svelte │ │ │ │ ├── image.svelte │ │ │ │ ├── log.svelte │ │ │ │ ├── logo.svelte │ │ │ │ ├── main.svelte │ │ │ │ ├── markdown.svelte │ │ │ │ ├── modal.svelte │ │ │ │ ├── page.svelte │ │ │ │ ├── progress-bar.svelte │ │ │ │ ├── python-code.svelte │ │ │ │ ├── section.svelte │ │ │ │ ├── subtitle.svelte │ │ │ │ ├── table-data-renderer.svelte │ │ │ │ ├── table-horizontal.svelte │ │ │ │ ├── table-vertical.svelte │ │ │ │ ├── table.svelte │ │ │ │ ├── text.svelte │ │ │ │ ├── title.svelte │ │ │ │ └── vega-chart.svelte │ │ │ ├── constants.ts │ │ │ ├── global.css │ │ │ ├── global.d.ts │ │ │ ├── main.ts │ │ │ ├── store.ts │ │ │ ├── types.ts │ │ │ └── utils.ts │ │ │ ├── svelte.config.js │ │ │ ├── tsconfig.json │ │ │ ├── tsconfig.node.json │ │ │ └── vite.config.ts │ ├── catch_decorator.py │ ├── datastores │ │ ├── __init__.py │ │ ├── azure_storage.py │ │ ├── gs_storage.py │ │ ├── local_storage.py │ │ └── s3_storage.py │ ├── datatools │ │ ├── __init__.py │ │ ├── local.py │ │ └── s3 │ │ │ ├── __init__.py │ │ │ ├── s3.py │ │ │ ├── s3op.py │ │ │ ├── s3tail.py │ │ │ └── s3util.py │ ├── debug_logger.py │ ├── debug_monitor.py │ ├── env_escape │ │ ├── __init__.py │ │ ├── client.py │ │ ├── client_modules.py │ │ ├── communication │ │ │ ├── __init__.py │ │ │ ├── bytestream.py │ │ │ ├── channel.py │ │ │ ├── socket_bytestream.py │ │ │ └── utils.py │ │ ├── configurations │ │ │ ├── emulate_test_lib │ │ │ │ ├── __init__.py │ │ │ │ ├── overrides.py │ │ │ │ └── server_mappings.py │ │ │ └── test_lib_impl │ │ │ │ ├── __init__.py │ │ │ │ └── test_lib.py │ │ ├── consts.py │ │ ├── data_transferer.py │ │ ├── exception_transferer.py │ │ ├── override_decorators.py │ │ ├── server.py │ │ ├── stub.py │ │ └── utils.py │ ├── environment_decorator.py │ ├── events_decorator.py │ ├── frameworks │ │ ├── __init__.py │ │ └── pytorch.py │ ├── gcp │ │ ├── __init__.py │ │ ├── gcp_secret_manager_secrets_provider.py │ │ ├── gs_exceptions.py │ │ ├── gs_storage_client_factory.py │ │ ├── gs_tail.py │ │ ├── gs_utils.py │ │ └── includefile_support.py │ ├── kubernetes │ │ ├── __init__.py │ │ ├── kube_utils.py │ │ ├── kubernetes.py │ │ ├── kubernetes_cli.py │ │ ├── kubernetes_client.py │ │ ├── kubernetes_decorator.py │ │ ├── kubernetes_job.py │ │ ├── kubernetes_jobsets.py │ │ ├── spot_metadata_cli.py │ │ └── spot_monitor_sidecar.py │ ├── logs_cli.py │ ├── metadata_providers │ │ ├── __init__.py │ │ ├── local.py │ │ └── service.py │ ├── package_cli.py │ ├── parallel_decorator.py │ ├── project_decorator.py │ ├── pypi │ │ ├── __init__.py │ │ ├── bootstrap.py │ │ ├── conda_decorator.py │ │ ├── conda_environment.py │ │ ├── micromamba.py │ │ ├── parsers.py │ │ ├── pip.py │ │ ├── pypi_decorator.py │ │ ├── pypi_environment.py │ │ └── utils.py │ ├── resources_decorator.py │ ├── retry_decorator.py │ ├── secrets │ │ ├── __init__.py │ │ ├── inline_secrets_provider.py │ │ └── secrets_decorator.py │ ├── storage_executor.py │ ├── tag_cli.py │ ├── test_unbounded_foreach_decorator.py │ ├── timeout_decorator.py │ └── uv │ │ ├── __init__.py │ │ ├── bootstrap.py │ │ └── uv_environment.py ├── procpoll.py ├── py.typed ├── pylint_wrapper.py ├── runner │ ├── __init__.py │ ├── click_api.py │ ├── deployer.py │ ├── deployer_impl.py │ ├── metaflow_runner.py │ ├── nbdeploy.py │ ├── nbrun.py │ ├── subprocess_manager.py │ └── utils.py ├── runtime.py ├── sidecar │ ├── __init__.py │ ├── sidecar.py │ ├── sidecar_messages.py │ ├── sidecar_subprocess.py │ └── sidecar_worker.py ├── system │ ├── __init__.py │ ├── system_logger.py │ ├── system_monitor.py │ └── system_utils.py ├── tagging_util.py ├── task.py ├── tracing │ ├── __init__.py │ ├── propagator.py │ ├── span_exporter.py │ └── tracing_modules.py ├── tuple_util.py ├── tutorials │ ├── 00-helloworld │ │ ├── README.md │ │ └── helloworld.py │ ├── 01-playlist │ │ ├── README.md │ │ ├── movies.csv │ │ ├── playlist.ipynb │ │ └── playlist.py │ ├── 02-statistics │ │ ├── README.md │ │ ├── movies.csv │ │ ├── stats.ipynb │ │ └── stats.py │ ├── 03-playlist-redux │ │ ├── README.md │ │ └── playlist.py │ ├── 04-playlist-plus │ │ ├── README.md │ │ └── playlist.py │ ├── 05-hello-cloud │ │ ├── README.md │ │ ├── hello-cloud.ipynb │ │ └── hello-cloud.py │ ├── 06-statistics-redux │ │ ├── README.md │ │ └── stats.ipynb │ ├── 07-worldview │ │ ├── README.md │ │ └── worldview.ipynb │ └── 08-autopilot │ │ ├── README.md │ │ └── autopilot.ipynb ├── unbounded_foreach.py ├── user_configs │ ├── __init__.py │ ├── config_decorators.py │ ├── config_options.py │ └── config_parameters.py ├── util.py ├── vendor.py └── version.py ├── setup.cfg ├── setup.py ├── stubs ├── MANIFEST.in ├── README.md ├── setup.py └── test │ ├── setup.cfg │ └── test_stubs.yml ├── test ├── README.md ├── cmd │ └── diff │ │ └── test_metaflow_diff.py ├── core │ ├── contexts.json │ ├── graphs │ │ ├── branch.json │ │ ├── foreach.json │ │ ├── linear.json │ │ ├── nested_branches.json │ │ ├── nested_foreach.json │ │ ├── parallel.json │ │ └── small_foreach.json │ ├── metaflow_extensions │ │ └── test_org │ │ │ ├── config │ │ │ └── mfextinit_test_org.py │ │ │ ├── exceptions │ │ │ └── mfextinit_test_org.py │ │ │ ├── plugins │ │ │ ├── cards │ │ │ │ ├── brokencard │ │ │ │ │ └── __init__.py │ │ │ │ └── simplecard │ │ │ │ │ └── __init__.py │ │ │ ├── flow_options.py │ │ │ ├── frameworks │ │ │ │ ├── __init__.py │ │ │ │ └── pytorch.py │ │ │ ├── mfextinit_test_org.py │ │ │ ├── nondecoplugin │ │ │ │ └── __init__.py │ │ │ └── test_step_decorator.py │ │ │ └── toplevel │ │ │ ├── mfextinit_test_org.py │ │ │ └── test_org_toplevel.py │ ├── metaflow_test │ │ ├── __init__.py │ │ ├── cli_check.py │ │ ├── formatter.py │ │ └── metadata_check.py │ ├── run_tests.py │ └── tests │ │ ├── basic_artifact.py │ │ ├── basic_config_parameters.py │ │ ├── basic_config_silly.txt │ │ ├── basic_foreach.py │ │ ├── basic_include.py │ │ ├── basic_log.py │ │ ├── basic_parallel.py │ │ ├── basic_parameters.py │ │ ├── basic_tags.py │ │ ├── basic_unbounded_foreach.py │ │ ├── card_component_refresh_test.py │ │ ├── card_default_editable.py │ │ ├── card_default_editable_customize.py │ │ ├── card_default_editable_with_id.py │ │ ├── card_error.py │ │ ├── card_extension_test.py │ │ ├── card_id_append.py │ │ ├── card_import.py │ │ ├── card_multiple.py │ │ ├── card_refresh_test.py │ │ ├── card_resume.py │ │ ├── card_simple.py │ │ ├── card_timeout.py │ │ ├── catch_retry.py │ │ ├── constants.py │ │ ├── current_singleton.py │ │ ├── custom_decorators.py │ │ ├── detect_segfault.py │ │ ├── dynamic_parameters.py │ │ ├── extensions.py │ │ ├── flow_options.py │ │ ├── large_artifact.py │ │ ├── large_mflog.py │ │ ├── lineage.py │ │ ├── merge_artifacts.py │ │ ├── merge_artifacts_include.py │ │ ├── merge_artifacts_propagation.py │ │ ├── nested_foreach.py │ │ ├── nested_unbounded_foreach.py │ │ ├── param_names.py │ │ ├── project_branch.py │ │ ├── project_production.py │ │ ├── resume_end_step.py │ │ ├── resume_foreach_inner.py │ │ ├── resume_foreach_join.py │ │ ├── resume_foreach_split.py │ │ ├── resume_originpath.py │ │ ├── resume_start_step.py │ │ ├── resume_succeeded_step.py │ │ ├── resume_ubf_basic_foreach.py │ │ ├── resume_ubf_foreach_join.py │ │ ├── run_id_file.py │ │ ├── runtime_dag.py │ │ ├── s3_failure.py │ │ ├── secrets_decorator.py │ │ ├── tag_catch.py │ │ ├── tag_mutation.py │ │ ├── task_exception.py │ │ ├── timeout_decorator.py │ │ └── wide_foreach.py ├── data │ ├── __init__.py │ └── s3 │ │ ├── __init__.py │ │ ├── s3_data.py │ │ ├── test_s3.py │ │ └── test_s3op.py ├── env_escape │ └── example.py ├── extensions │ ├── README.md │ ├── install_packages.sh │ └── packages │ │ ├── card_via_extinit │ │ ├── README.md │ │ ├── metaflow_extensions │ │ │ └── card_via_extinit │ │ │ │ └── plugins │ │ │ │ └── cards │ │ │ │ ├── card_a │ │ │ │ └── __init__.py │ │ │ │ ├── card_b │ │ │ │ └── __init__.py │ │ │ │ └── mfextinit_X.py │ │ └── setup.py │ │ ├── card_via_init │ │ ├── README.md │ │ ├── metaflow_extensions │ │ │ └── card_via_init │ │ │ │ └── plugins │ │ │ │ └── cards │ │ │ │ └── __init__.py │ │ └── setup.py │ │ └── card_via_ns_subpackage │ │ ├── README.md │ │ ├── metaflow_extensions │ │ └── card_via_ns_subpackage │ │ │ └── plugins │ │ │ └── cards │ │ │ └── nssubpackage │ │ │ └── __init__.py │ │ └── setup.py ├── parallel │ ├── parallel_test_flow.py │ └── pytorch_parallel_test_flow.py ├── test_config │ ├── basic_config_silly.txt │ ├── card_config.py │ ├── config2.json │ ├── config_card.py │ ├── config_corner_cases.py │ ├── config_parser.py │ ├── config_parser_requirements.txt │ ├── config_simple.json │ ├── config_simple.py │ ├── config_simple2.py │ ├── helloconfig.py │ ├── mutable_flow.py │ ├── no_default.py │ ├── photo_config.json │ ├── runner_flow.py │ └── test.py └── unit │ ├── test_compute_resource_attributes.py │ ├── test_conda_decorator.py │ ├── test_config_value.py │ ├── test_kubernetes.py │ ├── test_local_metadata_provider.py │ ├── test_multicore_utils.py │ ├── test_pypi_decorator.py │ ├── test_pypi_parsers.py │ └── test_secrets_decorator.py ├── test_runner └── tox.ini /.github/workflows/full-stack-test.yml: -------------------------------------------------------------------------------- 1 | name: Test Metaflow with complete Kubernetes stack 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Check out source 17 | uses: actions/checkout@v4 18 | 19 | - name: Install Metaflow 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install . kubernetes 23 | 24 | 25 | - name: Bring up the environment 26 | run: | 27 | echo "Starting environment in the background..." 28 | MINIKUBE_CPUS=2 metaflow-dev all-up & 29 | # Give time to spin up. Adjust as needed: 30 | sleep 150 31 | 32 | - name: Wait & run flow 33 | run: | 34 | # When the environment is up, metaflow-dev shell will wait for readiness 35 | # and then drop into a shell. We feed commands via a heredoc: 36 | cat <%") 8 | export(batch) 9 | export(catch) 10 | export(container_image) 11 | export(current) 12 | export(decorator) 13 | export(environment_variables) 14 | export(flow_client) 15 | export(gather_inputs) 16 | export(get_metadata) 17 | export(get_namespace) 18 | export(install_metaflow) 19 | export(list_flows) 20 | export(merge_artifacts) 21 | export(metaflow) 22 | export(metaflow_location) 23 | export(mf_client) 24 | export(new_flow) 25 | export(new_run) 26 | export(new_step) 27 | export(new_task) 28 | export(parameter) 29 | export(pull_tutorials) 30 | export(r_version) 31 | export(remove_metaflow_env) 32 | export(reset_default_metadata) 33 | export(resources) 34 | export(retry) 35 | export(run) 36 | export(run_client) 37 | export(set_default_namespace) 38 | export(set_metadata) 39 | export(set_namespace) 40 | export(step) 41 | export(step_client) 42 | export(task_client) 43 | export(test) 44 | export(version_info) 45 | importFrom(magrittr,"%>%") 46 | -------------------------------------------------------------------------------- /R/R/imports.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL -------------------------------------------------------------------------------- /R/R/namespace.R: -------------------------------------------------------------------------------- 1 | #' Switch to a namespace specified by the given tag. 2 | #' 3 | #' @param ns namespace 4 | #' 5 | #' @details NULL maps to global namespace. 6 | #' 7 | #' @export 8 | set_namespace <- function(ns = NULL) { 9 | pkg.env$mf$namespace(ns) 10 | } 11 | 12 | #' Return the current namespace (tag). 13 | #' 14 | #' @export 15 | get_namespace <- function() { 16 | pkg.env$mf$get_namespace() 17 | } 18 | 19 | #' Set the default namespace. 20 | #' 21 | #' @export 22 | set_default_namespace <- function() { 23 | pkg.env$mf$default_namespace() 24 | } 25 | -------------------------------------------------------------------------------- /R/R/package.R: -------------------------------------------------------------------------------- 1 | #' @description R binding for Metaflow. Metaflow is a human-friendly Python/R library 2 | #' that helps scientists and engineers build and manage real-life data science projects. 3 | #' Metaflow was originally developed at Netflix to boost productivity of data scientists 4 | #' who work on a wide variety of projects from classical statistics to state-of-the-art deep learning. 5 | #' @aliases metaflow-r 6 | "_PACKAGE" 7 | 8 | # directly setting global var would cause a NOTE from R CMD check 9 | set_global_variable <- function(key, val, pos = 1) { 10 | assign(key, val, envir = as.environment(pos)) 11 | } 12 | 13 | #' Instantiate a flow 14 | #' 15 | #' @param cls flow class name 16 | #' @param ... flow decorators 17 | #' @return flow object 18 | #' @section Usage: 19 | #' \preformatted{ 20 | #' metaflow("HelloFlow") 21 | #' } 22 | #' @export 23 | metaflow <- function(cls, ...) { 24 | set_global_variable(cls, Flow$new(cls, list(...))) 25 | get(cls, pos = 1) 26 | } 27 | -------------------------------------------------------------------------------- /R/README.md: -------------------------------------------------------------------------------- 1 | # Metaflow 2 | 3 | Metaflow is a human-friendly R package that helps scientists and engineers build and manage real-life data science projects. Metaflow was originally developed at Netflix to boost productivity of data scientists who work on a wide variety of projects from classical statistics to state-of-the-art deep learning. 4 | 5 | For more information, see [Metaflow's website](https://metaflow.org). 6 | 7 | ## Getting Started 8 | 9 | Getting up and running with Metaflow is easy. Install Metaflow from [github](https://github.com/Netflix/metaflow/tree/master/R): 10 | 11 | >```R 12 | >devtools::install_github("Netflix/metaflow", subdir="R") 13 | >metaflow::install_metaflow() 14 | >``` 15 | 16 | and access tutorials by typing: 17 | 18 | >```R 19 | >metaflow::pull_tutorials() 20 | >``` 21 | 22 | or jump straight into the [docs](https://docs.metaflow.org/v/r). 23 | 24 | ## Get in Touch 25 | There are several ways to get in touch with us: 26 | 27 | * Open an issue at: https://github.com/Netflix/metaflow 28 | * Email us at: help@metaflow.org 29 | * Chat with us on: http://chat.metaflow.org 30 | -------------------------------------------------------------------------------- /R/check_as_cran.sh: -------------------------------------------------------------------------------- 1 | rm -rf cran_check 2 | mkdir -p cran_check; 3 | cp -r inst ./cran_check/ 4 | cp -r man ./cran_check/ 5 | cp -r R ./cran_check/ 6 | cp -r vignettes ./cran_check/ 7 | cp DESCRIPTION ./cran_check/ 8 | cp NAMESPACE ./cran_check/ 9 | cp LICENSE ./cran_check/ 10 | cd cran_check; R CMD build . ; R CMD check --as-cran metaflow_*.tar.gz 11 | -------------------------------------------------------------------------------- /R/doc/metaflow.R: -------------------------------------------------------------------------------- 1 | ## ---- include = FALSE--------------------------------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /R/doc/metaflow.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "metaflow" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{metaflow} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | Please refer to \url{docs.metaflow.org} for detailed documentation and tutorials. 17 | -------------------------------------------------------------------------------- /R/inst/run.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(metaflow)) 2 | 3 | flowRDS_file <- "flow.RDS" 4 | flowRDS_arg <- Filter(function(arg) { 5 | startsWith(arg, "--flowRDS") 6 | }, commandArgs()) 7 | if (length(flowRDS_arg) == 1) { 8 | flowRDS_file <- strsplit(flowRDS_arg[1], "=")[[1]][2] 9 | } else { 10 | stop("missing --flowRDS file command in the command line arguments") 11 | } 12 | 13 | if (!file.exists(flowRDS_file)) { 14 | stop(sprintf("Cannot locate flow RDS file: %s", flowRDS_file)) 15 | } 16 | 17 | flow <- readRDS(flowRDS_file) 18 | 19 | rfuncs <- flow$get_functions() 20 | r_functions <- reticulate::dict(rfuncs, convert = TRUE) 21 | flow_script <- flow$get_flow() 22 | 23 | for (fname in names(rfuncs)) { 24 | assign(fname, rfuncs[[fname]], envir = .GlobalEnv) 25 | } 26 | 27 | runtime_args <- function(arg) { 28 | return(!startsWith(arg, "--flowRDS")) 29 | } 30 | 31 | mf <- reticulate::import("metaflow", delay_load = TRUE) 32 | 33 | mf$R$run( 34 | flow_script, r_functions, 35 | flowRDS_file, 36 | Filter(runtime_args, commandArgs(trailingOnly = TRUE)), 37 | c(commandArgs(trailingOnly = FALSE), flowRDS_arg), 38 | metaflow_location(flowRDS = flowRDS_file), 39 | container_image(), 40 | r_version(), 41 | paste(R.version.string), 42 | paste(getRversion()) 43 | ) 44 | -------------------------------------------------------------------------------- /R/inst/tutorials/00-helloworld/README.md: -------------------------------------------------------------------------------- 1 | # Episode 00-helloworld: Metaflow says Hi! 2 | 3 | **This flow is a simple linear workflow that verifies your installation by 4 | printing out 'Metaflow says: Hi!' to the terminal.** 5 | 6 | #### Showcasing: 7 | - Basics of Metaflow. 8 | - Step decorator. 9 | 10 | 11 | #### To play this episode: 12 | 1. ```cd tutorials/00-helloworld``` 13 | 2. ```Rscript helloworld.R show``` 14 | 3. ```Rscript helloworld.R run``` 15 | 16 | If you are using RStudio, you can run this script by directly executing `source("helloworld.R")`. -------------------------------------------------------------------------------- /R/inst/tutorials/00-helloworld/helloworld.R: -------------------------------------------------------------------------------- 1 | # A flow where Metaflow prints 'Hi'. 2 | # Run this flow to validate that Metaflow is installed correctly. 3 | 4 | library(metaflow) 5 | 6 | # This is the 'start' step. All flows must have a step named 7 | # 'start' that is the first step in the flow. 8 | start <- function(self){ 9 | print("HelloFlow is starting.") 10 | } 11 | 12 | # A step for metaflow to introduce itself. 13 | hello <- function(self){ 14 | print("Metaflow says: Hi!") 15 | } 16 | 17 | # This is the 'end' step. All flows must have an 'end' step, 18 | # which is the last step in the flow. 19 | end <- function(self){ 20 | print("HelloFlow is all done.") 21 | } 22 | 23 | metaflow("HelloFlow") %>% 24 | step(step = "start", 25 | r_function = start, 26 | next_step = "hello") %>% 27 | step(step = "hello", 28 | r_function = hello, 29 | next_step = "end") %>% 30 | step(step = "end", 31 | r_function = end) %>% 32 | run() 33 | -------------------------------------------------------------------------------- /R/inst/tutorials/01-playlist/README.md: -------------------------------------------------------------------------------- 1 | # Episode 01-playlist: Let's build you a movie playlist. 2 | 3 | **This flow loads a movie metadata CSV file and builds a playlist for your 4 | favorite movie genre. Everything in Metaflow is versioned, so you can run it 5 | multiple times and view all the historical playlists with the Metaflow client 6 | in an R Markdown Notebook.** 7 | 8 | #### Showcasing: 9 | - Basic Metaflow Parameters. 10 | - Running workflow branches in parallel and joining results. 11 | - Using the Metaflow client in an R Markdown Notebook. 12 | 13 | #### To play this episode: 14 | ##### Execute the flow: 15 | Inside a terminal: 16 | 1. ```cd tutorials/01-playlist/``` 17 | 2. ```Rscript playlist.R show``` 18 | 3. ```Rscript playlist.R run``` 19 | 4. ```Rscript playlist.R run --genre comedy``` 20 | 21 | If you are using RStudio, you can replace the `run()` in last line in `playlist.R` with `run(genre="comedy")`, and run the episode by executing `source("playlist.R")` in RStudio. 22 | 23 | ##### Inspect the results 24 | Open the R Markdown file ```playlist.Rmd``` in RStudio and execute the markdown cells. -------------------------------------------------------------------------------- /R/inst/tutorials/01-playlist/playlist.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Episode 01-playlist: Let's build you a movie playlist" 3 | output: html_notebook 4 | --- 5 | 6 | PlayListFlow is a movie playlist generator, and this notebook shows how you can use the Metaflow client to access data from the versioned Metaflow runs. In this example, you can view all the historical playlists. 7 | 8 | ```{r} 9 | suppressPackageStartupMessages(library(metaflow)) 10 | message("Current metadata provider: ", get_metadata()) 11 | message("Current namespace: ", get_namespace()) 12 | ``` 13 | 14 | ## Print your latest generated playlist 15 | ```{r} 16 | flow <- flow_client$new("PlayListFlow") 17 | 18 | run_id <- flow$latest_successful_run 19 | message("Using run: ", run_id) 20 | 21 | run <- run_client$new(flow, run_id) 22 | 23 | message("Bonus pick: ", run$artifact("bonus")) 24 | 25 | message("Playlist generated on ", run$finished_at) 26 | message("Playlist for movies in genre: ", run$artifact("genre")) 27 | 28 | playlist <- run$artifact("playlist") 29 | print(head(playlist)) 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /R/inst/tutorials/02-statistics/README.md: -------------------------------------------------------------------------------- 1 | # Episode 02-statistics: Is this Data Science? 2 | 3 | **Use metaflow to load the movie metadata CSV file into a data frame and compute some movie genre-specific statistics. These statistics are then used in 4 | later examples to improve our playlist generator. You can optionally use the 5 | Metaflow client to eyeball the results in a Markdown Notebook, and make some simple 6 | plots.** 7 | 8 | #### Showcasing: 9 | - Fan-out over a set of parameters using Metaflow foreach. 10 | - Plotting results in a Markdown Notebook. 11 | 12 | #### Before playing this episode: 13 | 1. Configure your metadata provider to a user-wise global provider, if you haven't done it already. 14 | ```bash 15 | $mkdir -p /path/to/home/.metaflow 16 | $export METAFLOW_DEFAULT_METADATA=local 17 | ``` 18 | 19 | #### To play this episode: 20 | ##### Execute the flow: 21 | In a terminal: 22 | 1. ```cd tutorials/02-statistics``` 23 | 2. ```Rscript stats.R show``` 24 | 3. ```Rscript stats.R run``` 25 | 26 | If you are using RStudio, you can run this script by directly executing `source("stats.R")`. 27 | 28 | ##### Inspect the results: 29 | Open the R Markdown file ```stats.Rmd``` in RStudio and execute the markdown cells. -------------------------------------------------------------------------------- /R/inst/tutorials/02-statistics/stats.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Episode 02: Is this Data Science?" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | MovieStatsFlow loads the movie metadata CSV file into a Pandas Dataframe and computes some movie genre-specific statistics. You can use this notebook and the Metaflow client to eyeball the results and make some simple plots. 9 | 10 | ```{r} 11 | suppressPackageStartupMessages(library(metaflow)) 12 | message("Current metadata provider: ", get_metadata()) 13 | message("Current namespace: ", get_namespace()) 14 | ``` 15 | 16 | ## Get the movie statistics from the latest run of MovieStatsFlow 17 | 18 | ```{r} 19 | flow <- flow_client$new("MovieStatsFlow") 20 | run_id <- flow$latest_successful_run 21 | run <- run_client$new(flow, run_id) 22 | 23 | df <- run$artifact("stats") 24 | print(head(df)) 25 | ``` 26 | 27 | 28 | 29 | ## Create a bar plot of median gross box office of top 5 movies 30 | ```{r} 31 | df <- df[order(df$median, decreasing = TRUE), ] 32 | print(head(df)) 33 | 34 | barplot(df$median[1:5], names.arg=df$genres[1:5]) 35 | ``` 36 | -------------------------------------------------------------------------------- /R/inst/tutorials/03-playlist-redux/README.md: -------------------------------------------------------------------------------- 1 | # Episode 03-playlist-redux: Follow the Money. 2 | 3 | **Use Metaflow to load the statistics generated from 'Episode 02' and recommend movies from a genre with highest median gross box office** 4 | 5 | #### Showcasing: 6 | - Using data artifacts generated from other flows. 7 | 8 | #### Before playing this episode: 9 | 1. Run 'Episode 02-statistics: Is this Data Science?' 10 | 2. Configure your metadata provider to a user-wise global provider, if you haven't done it already. 11 | ```bash 12 | $mkdir -p /path/to/home/.metaflow 13 | $export METAFLOW_DEFAULT_METADATA=local 14 | ``` 15 | 16 | #### To play this episode: 17 | In a terminal: 18 | 1. ```cd tutorials/03-playlist-redux``` 19 | 2. ```Rscript playlist.R show``` 20 | 3. ```Rscript playlist.R run``` 21 | 22 | If you are using RStudio, you can run this script by directly executing `source("playlist.R")`. 23 | 24 | In this ```PlayListReduxFlow```, we reuse the genre median gross box office statistics computed from ```MoviesStatsFlow```, pick the genre with the highest median gross box office, and create a randomized playlist of movies of this picked genre. -------------------------------------------------------------------------------- /R/inst/tutorials/04-helloaws/README.md: -------------------------------------------------------------------------------- 1 | # Episode 04-helloaws: Look Mom, We're in the Cloud. 2 | 3 | **This flow is a simple linear workflow that verifies your AWS 4 | configuration. The 'start' and 'end' steps will run locally, while the 'hello' 5 | step will run remotely on AWS batch. After configuring Metaflow to run on AWS, 6 | data and metadata about your runs will be stored remotely. This means you can 7 | use the client to access information about any flow from anywhere.** 8 | 9 | #### Showcasing: 10 | - AWS batch decorator. 11 | - Accessing data artifacts generated remotely in a local notebook. 12 | - retry decorator. 13 | 14 | #### Before playing this episode: 15 | 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox 16 | 17 | #### To play this episode: 18 | ##### Execute the flow: 19 | In a terminal: 20 | 1. ```cd tutorials/04-helloaws``` 21 | 2. ```Rscript helloaws.R run``` 22 | 23 | If you are using RStudio, you can run this script by directly executing `source("helloaws.R")`. 24 | 25 | ##### Inspect the results: 26 | Open the R Markdown file ```helloaws.Rmd``` in RStudio and execute the markdown cells. -------------------------------------------------------------------------------- /R/inst/tutorials/04-helloaws/helloaws.R: -------------------------------------------------------------------------------- 1 | # A flow where Metaflow prints 'Hi'. 2 | # Run this flow to validate that Metaflow is installed correctly. 3 | 4 | library(metaflow) 5 | 6 | # This is the 'start' step. All flows must have a step named 7 | # 'start' that is the first step in the flow. 8 | start <- function(self){ 9 | message("HelloAWS is starting.") 10 | message("Using metadata provider: ", get_metadata()) 11 | } 12 | 13 | # A step for metaflow to introduce itself. 14 | hello <- function(self){ 15 | self$message <- "We're on the cloud! Metaflow says: Hi!" 16 | print(self$message) 17 | message("Using metadata provider: ", get_metadata()) 18 | } 19 | 20 | # This is the 'end' step. All flows must have an 'end' step, 21 | # which is the last step in the flow. 22 | end <- function(self){ 23 | message("HelloAWS is all done.") 24 | } 25 | 26 | metaflow("HelloAWSFlow") %>% 27 | step(step = "start", 28 | r_function = start, 29 | next_step = "hello") %>% 30 | step(step = "hello", 31 | decorator("retry", times=2), 32 | decorator("batch", cpu=2, memory=2048), 33 | r_function = hello, 34 | next_step = "end") %>% 35 | step(step = "end", 36 | r_function = end) %>% 37 | run() 38 | -------------------------------------------------------------------------------- /R/inst/tutorials/04-helloaws/helloaws.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Episode 04-helloaws: Look Mom, We're in the Cloud" 3 | output: html_notebook 4 | --- 5 | In HellowAWSFlow, the 'start' and 'end' steps were run locally, while the 'hello' step was run remotely on AWS batch. Since we are using AWS, data artifacts and metadata were stored remotely. This means you can use the client to access information about any flow from anywhere. This notebook shows you how. 6 | 7 | ## Import the metaflow client 8 | ```{r} 9 | library(metaflow) 10 | message("Current metaadata provider: ", get_metadata()) 11 | ``` 12 | 13 | Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*. 14 | 15 | ## Print the message generated from the flow 16 | ```{r} 17 | flow <- flow_client$new("HelloAWSFlow") 18 | run <- run_client$new(flow, flow$latest_successful_run) 19 | message("Using run: ", run$pathspec) 20 | message(run$artifact("message")) 21 | ``` 22 | -------------------------------------------------------------------------------- /R/inst/tutorials/06-worldview/README.md: -------------------------------------------------------------------------------- 1 | # Episode 06-worldview: Way up here. 2 | 3 | **This episode shows how you can use a notebook to setup a simple dashboard to 4 | monitor all of your Metaflow flows.** 5 | 6 | #### Showcasing: 7 | - The metaflow client API. 8 | 9 | #### Before playing this episode: 10 | 1. Configure your sandbox: https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox 11 | 12 | #### To play this episode: 13 | 1. ```cd tutorials/06-worldview/``` 14 | 2. Open ```worldview.Rmd``` in RStudio on your local computer -------------------------------------------------------------------------------- /R/inst/tutorials/06-worldview/worldview.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Episode 06: Way up here." 3 | output: html_notebook 4 | --- 5 | 6 | This notebook shows how you can see some basic information about all Metaflow flows that you've run. 7 | 8 | ## Check metadata provider and your namespace 9 | We will be able to see all flows registered with this metadata provider across all namespaces. If you're sharing the AWS metadata provider with your colleagues, you will be able to see all of your colleagues' flows as well. 10 | ```{r} 11 | suppressPackageStartupMessages(library(metaflow)) 12 | message("Current metadata provider: ", get_metadata()) 13 | ``` 14 | 15 | ## List all flows with their latest completion time and status 16 | ```{r} 17 | set_namespace(NULL) 18 | flow_names <- metaflow::list_flows() 19 | for (name in unlist(flow_names)){ 20 | flow <- flow_client$new(name) 21 | 22 | run <- run_client$new(flow, flow$latest_run) 23 | 24 | message("Run id: ", run$id, " Last run: ", run$finished_at, " Successful: ", run$successful) 25 | } 26 | ``` 27 | 28 | 29 | ## Give some detailed information on HelloAWSFlow 30 | ```{r} 31 | flow <- flow_client$new("HelloAWSFlow") 32 | for (run_id in flow$runs){ 33 | run <- run_client$new(flow, run_id) 34 | message("Run id: ", run$id, " Successful: ", run$successful) 35 | message("Tags: ") 36 | print(run$tags) 37 | } 38 | ``` -------------------------------------------------------------------------------- /R/inst/tutorials/07-autopilot/autopilot.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Episode 7: Autopilot" 3 | output: html_notebook 4 | --- 5 | 6 | **This notebook shows how you can track Metaflow flows that have been scheduled to execute in the cloud.** 7 | 8 | ## Import the metaflow client 9 | ```{r} 10 | suppressPackageStartupMessages(library(metaflow)) 11 | message("Current metadata provider: ", metaflow::get_metadata()) 12 | ``` 13 | 14 | ## Plot a timeline view of a scheduled run of MovieStatsFlow 15 | When you triggered your flow on AWS Step Functions using `step-functions trigger`, you would have seen an output similar to - 16 | ```{bash} 17 | ... 18 | Workflow MovieStatsFlow triggered on AWS Step Functions (run-id sfn-dolor-sit-amet). 19 | ... 20 | ``` 21 | Paste the run-id below (run_id = 'sfn-dolor-sit-amet') and run the following after the run finishes on Step Function. 22 | ```{r} 23 | set_namespace(NULL) 24 | run = flow_client$new('MovieStatsFlow')$run('sfn-dolor-sit-amet') 25 | print(run$steps) 26 | ``` 27 | 28 | ## Steps View 29 | ```{r} 30 | for (step_name in run$steps){ 31 | step = run$step(step_name) 32 | step$summary() 33 | } 34 | ``` 35 | -------------------------------------------------------------------------------- /R/inst/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # Tutorials for Metaflow R 2 | 3 | This set of tutorials provides a hands-on introduction to Metaflow. The [basic concepts](https://docs.metaflow.org/v/r/metaflow/basics) are introduced in practice, and you can find out more details about the functionality showcased in these tutorials in Basics of Metaflow and the following sections. 4 | 5 | ## Setting up 6 | Metaflow comes packaged with the tutorials, so getting started is easy. You can pull a copy of the tutorials to your current directory by running the following command in R: 7 | ```R 8 | metaflow::pull_tutorials() 9 | ``` 10 | This creates a directory tutorials in your current working directory with a subdirectory for each tutorial. 11 | 12 | Each tutorial has a brief description and instructions included in the `README.md` in each subfolder. -------------------------------------------------------------------------------- /R/man/add_decorators.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decorators.R 3 | \name{add_decorators} 4 | \alias{add_decorators} 5 | \title{Format a list of decorators as a character vector} 6 | \usage{ 7 | add_decorators(decorators) 8 | } 9 | \arguments{ 10 | \item{decorators}{List of decorators, as created by the 11 | \code{\link{decorator}} function.} 12 | } 13 | \value{ 14 | character vector 15 | } 16 | \description{ 17 | Format a list of decorators as a character vector 18 | } 19 | \section{Python decorators}{ 20 | Metaflow decorators are so called because they 21 | translate directly to Python decorators that are applied to a step. So, for 22 | example, \code{decorator("batch", cpu = 1)} in R becomes \verb{@batch(cpu = 1)} in 23 | Python. A new line is appended as well, as Python decorators are placed 24 | above the function they take as an input. 25 | } 26 | 27 | \examples{ 28 | \dontrun{ 29 | add_decorators(list(decorator("batch", cpu = 4), decorator("retry"))) 30 | #> c("@batch(cpu=4)", "\n", "@retry", "\n") 31 | } 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /R/man/cash-.metaflow.flowspec.FlowSpec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{$.metaflow.flowspec.FlowSpec} 4 | \alias{$.metaflow.flowspec.FlowSpec} 5 | \title{Overload getter for self object} 6 | \usage{ 7 | \method{$}{metaflow.flowspec.FlowSpec}(self, name) 8 | } 9 | \arguments{ 10 | \item{self}{the metaflow self object for each step function} 11 | 12 | \item{name}{attribute name} 13 | } 14 | \description{ 15 | Overload getter for self object 16 | } 17 | \section{Usage}{ 18 | 19 | \preformatted{ 20 | print(self$var) 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/man/cash-set-.metaflow.flowspec.FlowSpec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{$<-.metaflow.flowspec.FlowSpec} 4 | \alias{$<-.metaflow.flowspec.FlowSpec} 5 | \title{Overload setter for self object} 6 | \usage{ 7 | \method{$}{metaflow.flowspec.FlowSpec}(self, name) <- value 8 | } 9 | \arguments{ 10 | \item{self}{the metaflow self object for each step function} 11 | 12 | \item{name}{attribute name} 13 | 14 | \item{value}{value to assign to the attribute} 15 | } 16 | \description{ 17 | Overload setter for self object 18 | } 19 | \section{Usage}{ 20 | 21 | \preformatted{ 22 | self$var <- "hello" 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/man/container_image.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{container_image} 4 | \alias{container_image} 5 | \title{Return the default container image to use for remote execution on AWS Batch. 6 | By default we user docker images maintained on https://hub.docker.com/r/rocker/ml.} 7 | \usage{ 8 | container_image() 9 | } 10 | \description{ 11 | Return the default container image to use for remote execution on AWS Batch. 12 | By default we user docker images maintained on https://hub.docker.com/r/rocker/ml. 13 | } 14 | -------------------------------------------------------------------------------- /R/man/current.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{current} 4 | \alias{current} 5 | \title{Helper utility to access current IDs of interest} 6 | \usage{ 7 | current(value) 8 | } 9 | \arguments{ 10 | \item{value}{one of flow_name, run_id, origin_run_id, 11 | step_name, task_id, pathspec, namespace, 12 | username, retry_count} 13 | } 14 | \description{ 15 | Helper utility to access current IDs of interest 16 | } 17 | \examples{ 18 | \dontrun{ 19 | current("flow_name") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /R/man/decorator_arguments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decorators.R 3 | \name{decorator_arguments} 4 | \alias{decorator_arguments} 5 | \title{Format the arguments of a decorator as inputs to a Python function} 6 | \usage{ 7 | decorator_arguments(args, .convert_args = TRUE) 8 | } 9 | \arguments{ 10 | \item{args}{Named list of arguments, as would be provided to the \code{...} of a 11 | function.} 12 | 13 | \item{.convert_args}{Boolean. If \code{TRUE} (the default), argument values will 14 | be converted to analogous Python values, with strings quoted and escaped. 15 | Disable this if argument values are already formatted for Python.} 16 | } 17 | \value{ 18 | atomic character of arguments, separated by a comma 19 | } 20 | \description{ 21 | Format the arguments of a decorator as inputs to a Python function 22 | } 23 | \section{Python decorators}{ 24 | Metaflow decorators are so called because they 25 | translate directly to Python decorators that are applied to a step. So, for 26 | example, \code{decorator("batch", cpu = 1)} in R becomes \verb{@batch(cpu = 1)} in 27 | Python. A new line is appended as well, as Python decorators are placed 28 | above the function they take as an input. 29 | } 30 | 31 | \examples{ 32 | \dontrun{ 33 | decorator_arguments(list(cpu = 1, memory = 1000)) 34 | #> "cpu=1, memory=1000" 35 | } 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /R/man/environment_variables.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/decorators-environment.R 3 | \name{environment_variables} 4 | \alias{environment_variables} 5 | \title{Decorator that sets environment variables during step execution} 6 | \usage{ 7 | environment_variables(...) 8 | } 9 | \arguments{ 10 | \item{...}{Named environment variables and their values, with all values 11 | coercible to a character string.. For example, \code{environment_variables(foo = "bar")} will set the "foo" environment variable as "bar" during step 12 | execution.} 13 | } 14 | \value{ 15 | A object of class "decorator" 16 | } 17 | \description{ 18 | Decorator that sets environment variables during step execution 19 | } 20 | \examples{ 21 | \dontrun{ 22 | start <- function(self) { 23 | print(paste("The cutest animal is the", Sys.getenv("CUTEST_ANIMAL"))) 24 | print(paste("The", Sys.getenv("ALSO_CUTE"), "is also cute, though")) 25 | } 26 | 27 | metaflow("EnvironmentVariables") \%>\% 28 | step(step="start", 29 | environment_variables(CUTEST_ANIMAL = "corgi", ALSO_CUTE = "penguin"), 30 | r_function=start, 31 | next_step="end") \%>\% 32 | step(step="end") \%>\% 33 | run() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /R/man/gather_inputs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{gather_inputs} 4 | \alias{gather_inputs} 5 | \title{Helper utility to gather inputs in a join step} 6 | \usage{ 7 | gather_inputs(inputs, input) 8 | } 9 | \arguments{ 10 | \item{inputs}{inputs from parent branches} 11 | 12 | \item{input}{field to extract from inputs from 13 | parent branches into vector} 14 | } 15 | \description{ 16 | Helper utility to gather inputs in a join step 17 | } 18 | \section{usage}{ 19 | 20 | \preformatted{ 21 | gather_inputs(inputs, "alpha") 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /R/man/get_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metadata.R 3 | \name{get_metadata} 4 | \alias{get_metadata} 5 | \title{Returns the current Metadata provider.} 6 | \usage{ 7 | get_metadata() 8 | } 9 | \value{ 10 | String type. Information about the Metadata provider currently selected. 11 | This information typically returns provider specific information (like URL for remote 12 | providers or local paths for local providers. 13 | } 14 | \description{ 15 | This call returns the current Metadata being used to return information 16 | about Metaflow objects. If this is not set explicitly using metadata(), the default value is 17 | determined through environment variables. 18 | } 19 | -------------------------------------------------------------------------------- /R/man/get_namespace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/namespace.R 3 | \name{get_namespace} 4 | \alias{get_namespace} 5 | \title{Return the current namespace (tag).} 6 | \usage{ 7 | get_namespace() 8 | } 9 | \description{ 10 | Return the current namespace (tag). 11 | } 12 | -------------------------------------------------------------------------------- /R/man/is_valid_python_identifier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{is_valid_python_identifier} 4 | \alias{is_valid_python_identifier} 5 | \alias{is_valid_python_identifier_py2} 6 | \alias{is_valid_python_identifier_py3} 7 | \title{Determine if the given string is a valid identifier in Python} 8 | \usage{ 9 | is_valid_python_identifier(identifier) 10 | 11 | is_valid_python_identifier_py2(identifier) 12 | 13 | is_valid_python_identifier_py3(identifier) 14 | } 15 | \arguments{ 16 | \item{identifier}{character, or an object that can be coerced to a 17 | character.} 18 | } 19 | \value{ 20 | logical 21 | } 22 | \description{ 23 | Python 2 and Python 3 have different rules for determining if a string is a 24 | valid variable name ("identifier"). The \code{is_valid_python_identifier} function 25 | will use the logic that corresponds to the version of Python that 26 | \code{reticulate} is using. 27 | } 28 | \details{ 29 | For Python 2, the rules can be checked with simple regex: a Python variable 30 | name can contain upper- and lower-case letters, underscores, and numbers, 31 | although it cannot begin with a number. Python 3 is more complicated, in that 32 | it allows unicode characters. Fortunately, Python 3 introduces the string 33 | \code{isidentifer} method which handles the logic for us. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /R/man/list_flows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{list_flows} 4 | \alias{list_flows} 5 | \title{Return a vector of all flow ids.} 6 | \usage{ 7 | list_flows() 8 | } 9 | \description{ 10 | Return a vector of all flow ids. 11 | } 12 | -------------------------------------------------------------------------------- /R/man/merge_artifacts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{merge_artifacts} 4 | \alias{merge_artifacts} 5 | \title{Helper utility to merge artifacts in a join step} 6 | \usage{ 7 | merge_artifacts(flow, inputs, exclude = list()) 8 | } 9 | \arguments{ 10 | \item{flow}{flow object} 11 | 12 | \item{inputs}{inputs from parent branches} 13 | 14 | \item{exclude}{list of artifact names to exclude from merging} 15 | } 16 | \description{ 17 | Helper utility to merge artifacts in a join step 18 | } 19 | \examples{ 20 | \dontrun{ 21 | merge_artifacts(flow, inputs) 22 | } 23 | \dontrun{ 24 | merge_artifacts(flow, inputs, list("alpha")) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /R/man/metaflow-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{metaflow-package} 5 | \alias{metaflow-package} 6 | \alias{_PACKAGE} 7 | \alias{metaflow-r} 8 | \title{metaflow: Metaflow for R-Lang} 9 | \description{ 10 | R binding for Metaflow. Metaflow is a human-friendly Python/R library 11 | that helps scientists and engineers build and manage real-life data science projects. 12 | Metaflow was originally developed at Netflix to boost productivity of data scientists 13 | who work on a wide variety of projects from classical statistics to state-of-the-art deep learning. 14 | } 15 | \seealso{ 16 | Useful links: 17 | \itemize{ 18 | \item \url{https://metaflow.org/} 19 | \item \url{https://docs.metaflow.org/} 20 | \item \url{https://github.com/Netflix/metaflow} 21 | \item Report bugs at \url{https://github.com/Netflix/metaflow/issues} 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /R/man/metaflow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \name{metaflow} 4 | \alias{metaflow} 5 | \title{Instantiate a flow} 6 | \usage{ 7 | metaflow(cls, ...) 8 | } 9 | \arguments{ 10 | \item{cls}{flow class name} 11 | 12 | \item{...}{flow decorators} 13 | } 14 | \value{ 15 | flow object 16 | } 17 | \description{ 18 | Instantiate a flow 19 | } 20 | \section{Usage}{ 21 | 22 | \preformatted{ 23 | metaflow("HelloFlow") 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /R/man/metaflow_location.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{metaflow_location} 4 | \alias{metaflow_location} 5 | \title{Return installation path of metaflow R library} 6 | \usage{ 7 | metaflow_location(flowRDS) 8 | } 9 | \arguments{ 10 | \item{flowRDS}{path of the RDS file containing the flow object} 11 | } 12 | \description{ 13 | Return installation path of metaflow R library 14 | } 15 | -------------------------------------------------------------------------------- /R/man/mf_deserialize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{mf_deserialize} 4 | \alias{mf_deserialize} 5 | \title{Helper utility to deserialize objects from metaflow 6 | data format to R object} 7 | \usage{ 8 | mf_deserialize(object) 9 | } 10 | \arguments{ 11 | \item{object}{object to deserialize} 12 | } 13 | \value{ 14 | R object 15 | } 16 | \description{ 17 | Helper utility to deserialize objects from metaflow 18 | data format to R object 19 | } 20 | -------------------------------------------------------------------------------- /R/man/mf_serialize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{mf_serialize} 4 | \alias{mf_serialize} 5 | \title{Helper utility to serialize R object to metaflow 6 | data format} 7 | \usage{ 8 | mf_serialize(object) 9 | } 10 | \arguments{ 11 | \item{object}{object to serialize} 12 | } 13 | \value{ 14 | metaflow data format object 15 | } 16 | \description{ 17 | Helper utility to serialize R object to metaflow 18 | data format 19 | } 20 | -------------------------------------------------------------------------------- /R/man/new_flow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flow_client.R 3 | \name{new_flow} 4 | \alias{new_flow} 5 | \title{Instantiates a new flow object.} 6 | \usage{ 7 | new_flow(flow_id) 8 | } 9 | \arguments{ 10 | \item{flow_id}{Flow identifier.} 11 | } 12 | \value{ 13 | \code{flow} object corresponding to the supplied identifier. 14 | } 15 | \description{ 16 | Instantiates a new flow object. 17 | } 18 | -------------------------------------------------------------------------------- /R/man/new_run.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_client.R 3 | \name{new_run} 4 | \alias{new_run} 5 | \title{Instantiates a new run object.} 6 | \usage{ 7 | new_run(flow_id, run_id) 8 | } 9 | \arguments{ 10 | \item{flow_id}{Flow identifier.} 11 | 12 | \item{run_id}{Run identifier.} 13 | } 14 | \value{ 15 | \code{run} object corresponding to the supplied identifiers. 16 | } 17 | \description{ 18 | Instantiates a new run object. 19 | } 20 | -------------------------------------------------------------------------------- /R/man/new_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step_client.R 3 | \name{new_step} 4 | \alias{new_step} 5 | \title{Instantiates a new step object.} 6 | \usage{ 7 | new_step(flow_id, run_id, step_id) 8 | } 9 | \arguments{ 10 | \item{flow_id}{Flow identifier.} 11 | 12 | \item{run_id}{Run identifier.} 13 | 14 | \item{step_id}{Step identifier.} 15 | } 16 | \value{ 17 | \code{step} object corresponding to the supplied identifiers. 18 | } 19 | \description{ 20 | Instantiates a new step object. 21 | } 22 | -------------------------------------------------------------------------------- /R/man/new_task.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/task_client.R 3 | \name{new_task} 4 | \alias{new_task} 5 | \title{Instantiates a new task object.} 6 | \usage{ 7 | new_task(flow_id, run_id, step_id, task_id) 8 | } 9 | \arguments{ 10 | \item{flow_id}{Flow identifier.} 11 | 12 | \item{run_id}{Run identifier.} 13 | 14 | \item{step_id}{Step identifier.} 15 | 16 | \item{task_id}{Task identifier.} 17 | } 18 | \value{ 19 | \code{task} object corresponding to the supplied identifiers. 20 | } 21 | \description{ 22 | Instantiates a new task object. 23 | } 24 | -------------------------------------------------------------------------------- /R/man/parameter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parameter.R 3 | \name{parameter} 4 | \alias{parameter} 5 | \title{Assign parameter to the flow} 6 | \usage{ 7 | parameter( 8 | flow, 9 | parameter, 10 | required = FALSE, 11 | help = NULL, 12 | separator = NULL, 13 | default = NULL, 14 | type = NULL, 15 | is_flag = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{flow}{metaflow object} 20 | 21 | \item{parameter}{name of the parameter} 22 | 23 | \item{required}{logical (defaults to FALSE) denoting if 24 | parameter is required as an argument to \code{run} the flow} 25 | 26 | \item{help}{optional help text} 27 | 28 | \item{separator}{optional separator for string parameters. 29 | Useful in defining an iterable as a delimited string inside a parameter} 30 | 31 | \item{default}{optional default value of the parameter} 32 | 33 | \item{type}{optional type of the parameter} 34 | 35 | \item{is_flag}{optional logical (defaults to FALSE) flag to denote is_flag} 36 | } 37 | \description{ 38 | \code{parameter} assigns variables to the flow that are 39 | automatically available in all the steps. 40 | } 41 | \section{Usage}{ 42 | 43 | \preformatted{ 44 | parameter("alpha", help = "learning rate", required = TRUE) 45 | parameter("alpha", help = "learning rate", default = 0.05) 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /R/man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/imports.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /R/man/pull_tutorials.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{pull_tutorials} 4 | \alias{pull_tutorials} 5 | \title{Pull the R tutorials to the current folder} 6 | \usage{ 7 | pull_tutorials() 8 | } 9 | \description{ 10 | Pull the R tutorials to the current folder 11 | } 12 | -------------------------------------------------------------------------------- /R/man/py_version.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{py_version} 4 | \alias{py_version} 5 | \title{Return Metaflow python version} 6 | \usage{ 7 | py_version() 8 | } 9 | \description{ 10 | Return Metaflow python version 11 | } 12 | -------------------------------------------------------------------------------- /R/man/r_version.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{r_version} 4 | \alias{r_version} 5 | \title{Return Metaflow R version} 6 | \usage{ 7 | r_version() 8 | } 9 | \description{ 10 | Return Metaflow R version 11 | } 12 | -------------------------------------------------------------------------------- /R/man/remove_metaflow_env.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/install.R 3 | \name{remove_metaflow_env} 4 | \alias{remove_metaflow_env} 5 | \title{Remove Metaflow Python package.} 6 | \usage{ 7 | remove_metaflow_env(prompt = TRUE) 8 | } 9 | \arguments{ 10 | \item{prompt}{\code{bool}, whether to ask for user prompt before removal. Default to TRUE.} 11 | } 12 | \description{ 13 | Remove Metaflow Python package. 14 | } 15 | \examples{ 16 | \dontrun{ 17 | # not run because it requires Python 18 | remove_metaflow_env() 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /R/man/reset_default_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metadata.R 3 | \name{reset_default_metadata} 4 | \alias{reset_default_metadata} 5 | \title{Resets the Metadata provider to the default value.} 6 | \usage{ 7 | reset_default_metadata() 8 | } 9 | \value{ 10 | String type. The result of get_metadata() after resetting the provider. 11 | } 12 | \description{ 13 | The default value of the Metadata provider is determined through a 14 | combination of environment variables. 15 | } 16 | -------------------------------------------------------------------------------- /R/man/set_default_namespace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/namespace.R 3 | \name{set_default_namespace} 4 | \alias{set_default_namespace} 5 | \title{Set the default namespace.} 6 | \usage{ 7 | set_default_namespace() 8 | } 9 | \description{ 10 | Set the default namespace. 11 | } 12 | -------------------------------------------------------------------------------- /R/man/set_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/metadata.R 3 | \name{set_metadata} 4 | \alias{set_metadata} 5 | \title{Switch Metadata provider} 6 | \usage{ 7 | set_metadata(ms = NULL) 8 | } 9 | \arguments{ 10 | \item{ms}{string. Can be a path (selects local metadata), a URL starting with http (selects 11 | the service metadata) or an explicit specification {metadata_type}@{info}; as an 12 | example, you can specify local@{path} or service@{url}.} 13 | } 14 | \value{ 15 | a string of the description of the metadata selected 16 | } 17 | \description{ 18 | This call has a global effect. 19 | Selecting the local metadata will, for example, not allow access to information 20 | stored in remote metadata providers 21 | } 22 | -------------------------------------------------------------------------------- /R/man/set_namespace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/namespace.R 3 | \name{set_namespace} 4 | \alias{set_namespace} 5 | \title{Switch to a namespace specified by the given tag.} 6 | \usage{ 7 | set_namespace(ns = NULL) 8 | } 9 | \arguments{ 10 | \item{ns}{namespace} 11 | } 12 | \description{ 13 | Switch to a namespace specified by the given tag. 14 | } 15 | \details{ 16 | NULL maps to global namespace. 17 | } 18 | -------------------------------------------------------------------------------- /R/man/sub-sub-.metaflow.flowspec.FlowSpec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{[[.metaflow.flowspec.FlowSpec} 4 | \alias{[[.metaflow.flowspec.FlowSpec} 5 | \title{Overload getter for self object} 6 | \usage{ 7 | \method{[[}{metaflow.flowspec.FlowSpec}(self, name) 8 | } 9 | \arguments{ 10 | \item{self}{the metaflow self object for each step function} 11 | 12 | \item{name}{attribute name} 13 | } 14 | \description{ 15 | Overload getter for self object 16 | } 17 | \section{Usage}{ 18 | 19 | \preformatted{ 20 | print(self[["var"]]) 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/man/sub-subset-.metaflow.flowspec.FlowSpec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{[[<-.metaflow.flowspec.FlowSpec} 4 | \alias{[[<-.metaflow.flowspec.FlowSpec} 5 | \title{Overload setter for self object} 6 | \usage{ 7 | \method{[[}{metaflow.flowspec.FlowSpec}(self, name) <- value 8 | } 9 | \arguments{ 10 | \item{self}{the metaflow self object for each step function} 11 | 12 | \item{name}{attribute name} 13 | 14 | \item{value}{value to assign to the attribute} 15 | } 16 | \description{ 17 | Overload setter for self object 18 | } 19 | \section{Usage}{ 20 | 21 | \preformatted{ 22 | self[["var"]] <- "hello" 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/man/test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{test} 4 | \alias{test} 5 | \title{Run a test to check if Metaflow R is installed properly} 6 | \usage{ 7 | test() 8 | } 9 | \description{ 10 | Run a test to check if Metaflow R is installed properly 11 | } 12 | -------------------------------------------------------------------------------- /R/man/version_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{version_info} 4 | \alias{version_info} 5 | \title{Print out Metaflow version} 6 | \usage{ 7 | version_info() 8 | } 9 | \description{ 10 | Print out Metaflow version 11 | } 12 | -------------------------------------------------------------------------------- /R/tests/contexts.json: -------------------------------------------------------------------------------- 1 | { 2 | "contexts": [ 3 | { 4 | "name": "all-local", 5 | "disabled": false, 6 | "env": { 7 | "USER": "tester" 8 | }, 9 | "python": "python3", 10 | "top_options": [ 11 | "package_suffixes = c('.R', '.py', '.csv')", 12 | "metadata='local'", 13 | "datastore='local'" 14 | ], 15 | "run_options": [ 16 | "--tag", "\u523a\u8eab means sashimi", 17 | "--tag", "multiple tags should be ok" 18 | ] 19 | }, 20 | { 21 | "name": "batch", 22 | "disabled": true, 23 | "env": { 24 | "USER": "tester" 25 | }, 26 | "python": "python3", 27 | "top_options": [ 28 | "batch = TRUE", 29 | "max_workers = 16", 30 | "package_suffixes = c('.R', '.py', '.csv')", 31 | "metadata='service'", 32 | "datastore='s3'" 33 | ], 34 | "run_options": [ 35 | "--tag", "\u523a\u8eab means sashimi", 36 | "--tag", "multiple tags should be ok" 37 | ] 38 | } 39 | ] 40 | } -------------------------------------------------------------------------------- /R/tests/graphs/branch.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "single-and-branch", 3 | "graph": { 4 | "start": {"branch": ["a", "b"], "quals": ["split-and"]}, 5 | "a": {"linear": "join"}, 6 | "b": {"linear": "join"}, 7 | "join": {"linear": "end", "join": true, "quals": ["join-and"]}, 8 | "end": {} 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /R/tests/graphs/foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split"}, 5 | "foreach_split": { 6 | "foreach": "foreach_inner_first", 7 | "foreach_var": "arr", 8 | "foreach_var_default": "c(1, 2, 3)", 9 | "quals": ["foreach-split"] 10 | }, 11 | "foreach_inner_first": { 12 | "linear": "foreach_inner_second", 13 | "quals": ["foreach-inner"] 14 | }, 15 | "foreach_inner_second": { 16 | "linear": "foreach_join", 17 | "quals": ["foreach-inner"] 18 | }, 19 | "foreach_join": { 20 | "linear": "end", 21 | "join": true, 22 | "quals": ["foreach-join"] 23 | }, 24 | "end": {} 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /R/tests/graphs/linear.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "single-linear-step", 3 | "graph": { 4 | "start": {"linear": "a", "quals": ["singleton-start"]}, 5 | "a": {"linear": "end", "quals": ["singleton"]}, 6 | "end": {"quals": ["singleton-end"]} 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /R/tests/graphs/nested_foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nested-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split_x"}, 5 | "foreach_split_x": { 6 | "foreach": "foreach_split_y", 7 | "foreach_var": "x", 8 | "foreach_var_default": "'abc'", 9 | "quals": ["foreach-split"] 10 | }, 11 | "foreach_split_y": { 12 | "foreach": "foreach_split_z", 13 | "foreach_var": "y", 14 | "foreach_var_default": "'de'", 15 | "quals": ["foreach-split"] 16 | }, 17 | "foreach_split_z": { 18 | "foreach": "foreach_inner", 19 | "foreach_var": "z", 20 | "foreach_var_default": "'fghijk'", 21 | "quals": ["foreach-nested-split", "foreach-split"] 22 | }, 23 | "foreach_inner": { 24 | "linear": "foreach_join_z", 25 | "quals": ["foreach-nested-inner", "foreach-inner"] 26 | }, 27 | "foreach_join_z": { 28 | "linear": "foreach_join_y", 29 | "join": true, 30 | "quals": ["foreach-nested-join"] 31 | }, 32 | "foreach_join_y": { "linear": "foreach_join_x", "join": true }, 33 | "foreach_join_x": { "linear": "end", "join": true }, 34 | "end": {} 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /R/tests/graphs/small_foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "small-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split"}, 5 | "foreach_split": { 6 | "foreach": "foreach_inner", 7 | "foreach_var": "arr", 8 | "foreach_var_default": "c(1, 2, 3)", 9 | "quals": ["foreach-split-small", "foreach-split"] 10 | }, 11 | "foreach_inner": { 12 | "linear": "foreach_join", 13 | "quals": ["foreach-inner-small"] 14 | }, 15 | "foreach_join": { 16 | "linear": "end", 17 | "join": true, 18 | "quals": ["foreach-join-small"] 19 | }, 20 | "end": {} 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /R/tests/run_tests.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | 3 | virtualenv_create("r-metaflow") 4 | virtualenv_install("r-metaflow", c("../..", "pandas", "numpy")) 5 | use_virtualenv("r-metaflow") 6 | 7 | source("testthat.R") 8 | source("run_integration_tests.R") -------------------------------------------------------------------------------- /R/tests/tests/basic_artifacts.R: -------------------------------------------------------------------------------- 1 | test <- new.env() 2 | test$name <- "BasicArtifactsTest" 3 | test$priority <- 0 4 | 5 | test$step_start <- decorated_function( 6 | function(self) { 7 | self$data <- "abc" 8 | }, 9 | type = "step", prio = 0, qual = c("start"), required = TRUE 10 | ) 11 | 12 | test$step_join <- decorated_function( 13 | function(self, inputs) { 14 | inputset <- gather_inputs(inputs, "data") 15 | for (item in inputset) { 16 | print(item) 17 | stopifnot(item == "abc") 18 | } 19 | self$data <- inputset[[1]] 20 | }, 21 | type = "step", prio = 1, qual = c("join"), required = TRUE 22 | ) 23 | 24 | 25 | test$step_all <- decorated_function( 26 | function(self) { 27 | }, 28 | type = "step", prio = 2, qual = c("all") 29 | ) 30 | 31 | 32 | test$check_artifact <- decorated_function( 33 | function(checker, test_flow) { 34 | test_run <- test_flow$run(test_flow$latest_run) 35 | for (step_name in test_run$steps) { 36 | stopifnot(fetch_artifact(checker, 37 | step = step_name, 38 | var = "data" 39 | ) == "abc") 40 | } 41 | }, 42 | type = "check" 43 | ) 44 | -------------------------------------------------------------------------------- /R/tests/tests/basic_foreach.R: -------------------------------------------------------------------------------- 1 | test <- new.env() 2 | test$name <- "BasicForeachTest" 3 | test$priority <- 0 4 | 5 | test$split <- decorated_function( 6 | function(self) { 7 | self$my_index <- "None" 8 | self$arr <- 1:10 9 | }, 10 | type = "step", prio = 0, qual = c("foreach-split"), required = TRUE 11 | ) 12 | 13 | test$inner <- decorated_function( 14 | function(self) { 15 | # index must stay constant over multiple steps inside foreach 16 | if (self$my_index == "None") { 17 | self$my_index <- self$index + 1 18 | } 19 | stopifnot(self$my_index == self$index + 1) 20 | stopifnot(self$my_index == self$arr[self$my_index]) 21 | self$my_input <- self$input 22 | }, 23 | type = "step", prio = 0, qual = c("foreach-inner"), required = TRUE 24 | ) 25 | 26 | test$join <- decorated_function( 27 | function(self, inputs) { 28 | got <- sort(unlist(gather_inputs(inputs, "my_input"))) 29 | stopifnot(all(got == 1:10)) 30 | }, 31 | type = "step", prio = 0, qual = c("foreach-join"), required = TRUE 32 | ) 33 | 34 | test$all <- decorated_function( 35 | function(self) { 36 | }, 37 | type = "step", prio = 1, qual = c("all") 38 | ) 39 | -------------------------------------------------------------------------------- /R/tests/tests/basic_parameter.R: -------------------------------------------------------------------------------- 1 | test <- new.env() 2 | test$name <- "BasicParameterTest" 3 | test$priority <- 1 4 | test$parameters <- list( 5 | bool_param = list(default = "TRUE"), 6 | int_param = list(default = "123"), 7 | str_param = list(default = '"foobar"') 8 | ) 9 | 10 | test$all <- decorated_function( 11 | function(self) { 12 | source("utils.R") 13 | stopifnot(self$bool_param) 14 | stopifnot(self$int_param == 123) 15 | stopifnot(self$str_param == "foobar") 16 | # parameters should be immutable 17 | assert_exception( 18 | expression(self$int_param <- 5), 19 | "AttributeError" 20 | ) 21 | }, 22 | type = "step", prio = 0, qual = c("all") 23 | ) 24 | 25 | test$check_artifact <- decorated_function( 26 | function(checker, test_flow) { 27 | test_run <- test_flow$run(test_flow$latest_run) 28 | for (step_name in test_run$steps) { 29 | stopifnot(fetch_artifact(checker, 30 | step = step_name, 31 | var = "bool_param" 32 | ) == TRUE) 33 | 34 | stopifnot(fetch_artifact(checker, 35 | step = step_name, 36 | var = "int_param" 37 | ) == 123) 38 | 39 | stopifnot(fetch_artifact(checker, 40 | step = step_name, 41 | var = "str_param" 42 | ) == "foobar") 43 | } 44 | }, 45 | type = "check" 46 | ) 47 | -------------------------------------------------------------------------------- /R/tests/tests/merge_artifacts_propagation.R: -------------------------------------------------------------------------------- 1 | test <- new.env() 2 | test$name <- "MergeArtifactsPropagationTest" 3 | test$priority <- 1 4 | 5 | test$start <- decorated_function( 6 | function(self) { 7 | self$non_modified_passdown <- "a" 8 | }, 9 | type = "step", prio = 0, qual = c("start"), required = TRUE 10 | ) 11 | 12 | test$modify_things <- decorated_function( 13 | function(self) { 14 | # Set different names to different things 15 | val <- self$index + 1 16 | self[[sprintf("val%d", val)]] <- val 17 | }, 18 | type = "step", prio = 0, qual = c("foreach-inner-small"), required = TRUE 19 | ) 20 | 21 | 22 | test$merge_things <- decorated_function( 23 | function(self, inputs) { 24 | merge_artifacts(self, inputs) 25 | 26 | stopifnot(self$non_modified_passdown == "a") 27 | for (i in 1:length(inputs)) { 28 | stopifnot(self[[sprintf("val%d", i)]] == i) 29 | } 30 | }, 31 | type = "step", prio = 0, qual = c("join"), required = TRUE 32 | ) 33 | 34 | test$all <- decorated_function( 35 | function(self) { 36 | stopifnot(self$non_modified_passdown == "a") 37 | }, 38 | type = "step", prio = 1, qual = c("all"), required = TRUE 39 | ) 40 | -------------------------------------------------------------------------------- /R/tests/tests/nested_foreach.R: -------------------------------------------------------------------------------- 1 | test <- new.env() 2 | test$name <- "NestedForeachTest" 3 | test$priority <- 1 4 | 5 | test$inner <- decorated_function( 6 | function(self) { 7 | stack <- self$foreach_stack() 8 | x <- stack[[1]] 9 | y <- stack[[2]] 10 | z <- stack[[3]] 11 | 12 | # assert that lengths are correct 13 | stopifnot(length(self$x) == length(x[[2]])) 14 | stopifnot(length(self$y) == length(y[[2]])) 15 | stopifnot(length(self$z) == length(z[[2]])) 16 | 17 | # assert that variables are correct given their indices 18 | stopifnot(x[[3]] == substr(self$x, x[[1]] + 1, x[[1]] + 1)) 19 | stopifnot(y[[3]] == substr(self$y, y[[1]] + 1, y[[1]] + 1)) 20 | stopifnot(z[[3]] == substr(self$z, z[[1]] + 1, z[[1]] + 1)) 21 | }, 22 | type = "step", prio = 0, qual = c("foreach-nested-inner"), required = TRUE 23 | ) 24 | 25 | test$all <- decorated_function( 26 | function(self) { 27 | }, 28 | type = "step", prio = 1, qual = c("all") 29 | ) 30 | -------------------------------------------------------------------------------- /R/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(metaflow) 3 | 4 | test_check("metaflow") 5 | -------------------------------------------------------------------------------- /R/tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | skip_if_no_metaflow <- function() { 2 | have_metaflow <- reticulate::py_module_available("metaflow") 3 | if (!have_metaflow) { 4 | skip("metaflow not available for testing") 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /R/tests/testthat/test-command-args.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | library(metaflow) 3 | 4 | flags <- metaflow:::parse_arguments() 5 | saveRDS(flags, "flags.RDS") 6 | -------------------------------------------------------------------------------- /R/tests/testthat/test-decorators-environment.R: -------------------------------------------------------------------------------- 1 | test_that("@environment parses correctly", { 2 | skip_if_no_metaflow() 3 | 4 | actual <- decorator("retry", times = 3)[1] 5 | expected <- "@retry(times=3)" 6 | expect_equal(actual, expected) 7 | }) 8 | 9 | test_that("@environment wrapper parses correctly", { 10 | skip_if_no_metaflow() 11 | 12 | actual <- environment_variables(foo = "red panda")[1] 13 | expected <- "@environment(vars={'foo': 'red panda'})" 14 | expect_equal(actual, expected) 15 | 16 | actual <- environment_variables(foo = "red panda", bar = "corgi")[1] 17 | expected <- "@environment(vars={'foo': 'red panda', 'bar': 'corgi'})" 18 | expect_equal(actual, expected) 19 | 20 | # Note that in this case, "TRUE" does not become Pythonic "True" --- 21 | # each environment variable value is immediately coerced to a character. 22 | actual <- environment_variables(foo = "TRUE")[1] 23 | expected <- "@environment(vars={'foo': 'TRUE'})" 24 | expect_equal(actual, expected) 25 | }) -------------------------------------------------------------------------------- /R/tests/testthat/test-decorators-error.R: -------------------------------------------------------------------------------- 1 | test_that("@retry parses correctly", { 2 | skip_if_no_metaflow() 3 | 4 | actual <- decorator("retry", times = 3)[1] 5 | expected <- "@retry(times=3)" 6 | expect_equal(actual, expected) 7 | }) 8 | 9 | test_that("@retry wrapper parses correctly", { 10 | skip_if_no_metaflow() 11 | 12 | actual <- retry(times = 3)[1] 13 | expected <- "@retry(times=3, minutes_between_retries=2)" 14 | expect_equal(actual, expected) 15 | 16 | actual <- retry(times = 3, minutes_between_retries=0)[1] 17 | expected <- "@retry(times=3, minutes_between_retries=0)" 18 | expect_equal(actual, expected) 19 | }) 20 | 21 | test_that("@catch parses correctly", { 22 | skip_if_no_metaflow() 23 | 24 | actual <- decorator("catch", var = "red_panda")[1] 25 | expected <- "@catch(var='red_panda')" 26 | expect_equal(actual, expected) 27 | }) 28 | 29 | test_that("@catch wrapper parses correctly", { 30 | skip_if_no_metaflow() 31 | 32 | actual <- catch(var = "red_panda")[1] 33 | expected <- "@catch(var='red_panda', print_exception=True)" 34 | expect_equal(actual, expected) 35 | 36 | actual <- catch(var = "red_panda", print_exception = FALSE)[1] 37 | expected <- "@catch(var='red_panda', print_exception=False)" 38 | expect_equal(actual, expected) 39 | }) 40 | -------------------------------------------------------------------------------- /R/tests/testthat/test-metaflow.R: -------------------------------------------------------------------------------- 1 | context("test-metaflow.R") 2 | 3 | test_that("metaflow() creates flow object", { 4 | skip_if_no_metaflow() 5 | metaflow("TestFlow") 6 | expect_true(exists("TestFlow")) 7 | }) 8 | -------------------------------------------------------------------------------- /R/tests/testthat/test-run-cmd.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | library(metaflow) 3 | 4 | run_cmd <- metaflow:::run_cmd("flow.RDS") 5 | saveRDS(run_cmd, "run_cmd.RDS") 6 | -------------------------------------------------------------------------------- /R/tests/testthat/test-run.R: -------------------------------------------------------------------------------- 1 | context("test-run.R") 2 | 3 | extract_args <- function(x) { 4 | args <- strsplit(x, " ")[[1]][-c(1:2)] 5 | args[args != ""] 6 | } 7 | 8 | test_that("test run_cmd is correctly passing default flags.", { 9 | skip_if_no_metaflow() 10 | expected <- c( 11 | "--flowRDS=flow.RDS", 12 | "--no-pylint", "run" 13 | ) 14 | actual <- run_cmd("flow.RDS") %>% 15 | as.character() %>% 16 | extract_args() 17 | expect_equal(actual, expected) 18 | }) 19 | 20 | test_that("test run_cmd correctly parses --with batch", { 21 | skip_if_no_metaflow() 22 | actual <- run_cmd("flow.RDS", batch = TRUE) %>% 23 | as.character() %>% 24 | extract_args() 25 | expected <- c( 26 | "--flowRDS=flow.RDS", 27 | "--no-pylint", "--with", 28 | "batch", "run" 29 | ) 30 | expect_equal(actual, expected) 31 | }) 32 | 33 | test_that("test run_cmd correctly parses help", { 34 | skip_if_no_metaflow() 35 | actual <- run_cmd("flow.RDS", help = TRUE) %>% 36 | as.character() %>% 37 | extract_args() 38 | expected <- c("--flowRDS=flow.RDS", "--no-pylint", "--help") 39 | expect_equal(actual, expected) 40 | }) 41 | -------------------------------------------------------------------------------- /R/tests/testthat/test-utils-format.R: -------------------------------------------------------------------------------- 1 | context("test-utils-format.R") 2 | 3 | test_that("quotes are properly escaped", { 4 | skip_if_no_metaflow() 5 | actual <- escape_quote("TRUE") 6 | expected <- "True" 7 | expect_equal(actual, expected) 8 | actual <- escape_quote("parameter") 9 | expected <- "'parameter'" 10 | expect_equal(actual, expected) 11 | }) 12 | -------------------------------------------------------------------------------- /R/tests/utils.R: -------------------------------------------------------------------------------- 1 | decorated_function <- function(f, type = NULL, prio = NULL, qual = c(), required = FALSE) { 2 | attr(f, "type") <- type 3 | attr(f, "prio") <- prio 4 | attr(f, "quals") <- qual 5 | attr(f, "required") <- required 6 | return(f) 7 | } 8 | 9 | assert_exception <- function(r_expr, expected_error_message, env = parent.frame()) { 10 | has_correct_error_message <- FALSE 11 | tryCatch( 12 | { 13 | eval(r_expr, envir = env) 14 | }, 15 | error = function(e) { 16 | print(e) 17 | has_correct_error_message <<- 18 | (length(grep(expected_error_message, e$message)) > 0) 19 | } 20 | ) 21 | stopifnot(has_correct_error_message) 22 | } 23 | -------------------------------------------------------------------------------- /R/vignettes/metaflow.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "metaflow" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{metaflow} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | Please refer to \url{docs.metaflow.org} for detailed documentation and tutorials. 17 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | We currently accept reports for vulnerabilities on all published versions of the project. 4 | 5 | ## Reporting a Vulnerability 6 | 7 | You can disclose vulnerabilities securely through the [Netflix Bugcrowd](https://bugcrowd.com/netflix) site. When reporting a finding, mention the project name or repository in the title and the report will find its way to the correct people. 8 | 9 | Please note that at the moment, the Metaflow project does not offer a bounty for any disclosure. 10 | -------------------------------------------------------------------------------- /docs/lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/docs/lifecycle.png -------------------------------------------------------------------------------- /docs/metaflow_sidecar_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/docs/metaflow_sidecar_arch.png -------------------------------------------------------------------------------- /docs/multicloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/docs/multicloud.png -------------------------------------------------------------------------------- /docs/prototype-to-prod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/docs/prototype-to-prod.png -------------------------------------------------------------------------------- /docs/update_lifecycle_png: -------------------------------------------------------------------------------- 1 | # install graphviz first 2 | dot -Tpng lifecycle.dot -o lifecycle.png -------------------------------------------------------------------------------- /metaflow-complete.sh: -------------------------------------------------------------------------------- 1 | _metaflow_completion() { 2 | local IFS=$' 3 | ' 4 | COMPREPLY=( $( env COMP_WORDS="${COMP_WORDS[*]}" \ 5 | COMP_CWORD=$COMP_CWORD \ 6 | _METAFLOW_COMPLETE=complete $1 ) ) 7 | return 0 8 | } 9 | 10 | _metaflow_completionetup() { 11 | local COMPLETION_OPTIONS="" 12 | local BASH_VERSION_ARR=(${BASH_VERSION//./ }) 13 | # Only BASH version 4.4 and later have the nosort option. 14 | if [ ${BASH_VERSION_ARR[0]} -gt 4 ] || ([ ${BASH_VERSION_ARR[0]} -eq 4 ] && [ ${BASH_VERSION_ARR[1]} -ge 4 ]); then 15 | COMPLETION_OPTIONS="-o nosort" 16 | fi 17 | 18 | complete $COMPLETION_OPTIONS -F _metaflow_completion metaflow 19 | } 20 | 21 | _metaflow_completionetup; 22 | -------------------------------------------------------------------------------- /metaflow/_vendor/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | metaflow._vendor is for vendoring dependencies of metaflow. Files 3 | inside of metaflow._vendor should be considered immutable and 4 | should only be updated to versions from upstream. 5 | 6 | This folder is generated by `python vendor.py` 7 | 8 | If you would like to debundle the vendored dependencies, please 9 | reach out to the maintainers at chat.metaflow.org 10 | """ 11 | -------------------------------------------------------------------------------- /metaflow/_vendor/click/_textwrap.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from contextlib import contextmanager 3 | 4 | 5 | class TextWrapper(textwrap.TextWrapper): 6 | def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 7 | space_left = max(width - cur_len, 1) 8 | 9 | if self.break_long_words: 10 | last = reversed_chunks[-1] 11 | cut = last[:space_left] 12 | res = last[space_left:] 13 | cur_line.append(cut) 14 | reversed_chunks[-1] = res 15 | elif not cur_line: 16 | cur_line.append(reversed_chunks.pop()) 17 | 18 | @contextmanager 19 | def extra_indent(self, indent): 20 | old_initial_indent = self.initial_indent 21 | old_subsequent_indent = self.subsequent_indent 22 | self.initial_indent += indent 23 | self.subsequent_indent += indent 24 | try: 25 | yield 26 | finally: 27 | self.initial_indent = old_initial_indent 28 | self.subsequent_indent = old_subsequent_indent 29 | 30 | def indent_only(self, text): 31 | rv = [] 32 | for idx, line in enumerate(text.splitlines()): 33 | indent = self.initial_indent 34 | if idx > 0: 35 | indent = self.subsequent_indent 36 | rv.append(indent + line) 37 | return "\n".join(rv) 38 | -------------------------------------------------------------------------------- /metaflow/_vendor/importlib_metadata.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2019 Jason R. Coombs, Barry Warsaw 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /metaflow/_vendor/importlib_metadata/_collections.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | # from jaraco.collections 3.3 5 | class FreezableDefaultDict(collections.defaultdict): 6 | """ 7 | Often it is desirable to prevent the mutation of 8 | a default dict after its initial construction, such 9 | as to prevent mutation during iteration. 10 | 11 | >>> dd = FreezableDefaultDict(list) 12 | >>> dd[0].append('1') 13 | >>> dd.freeze() 14 | >>> dd[1] 15 | [] 16 | >>> len(dd) 17 | 1 18 | """ 19 | 20 | def __missing__(self, key): 21 | return getattr(self, '_frozen', super().__missing__)(key) 22 | 23 | def freeze(self): 24 | self._frozen = lambda key: self.default_factory() 25 | 26 | 27 | class Pair(collections.namedtuple('Pair', 'name value')): 28 | @classmethod 29 | def parse(cls, text): 30 | return cls(*map(str.strip, text.split("=", 1))) 31 | -------------------------------------------------------------------------------- /metaflow/_vendor/importlib_metadata/_meta.py: -------------------------------------------------------------------------------- 1 | from ._compat import Protocol 2 | from typing import Any, Dict, Iterator, List, TypeVar, Union 3 | 4 | 5 | _T = TypeVar("_T") 6 | 7 | 8 | class PackageMetadata(Protocol): 9 | def __len__(self) -> int: 10 | ... # pragma: no cover 11 | 12 | def __contains__(self, item: str) -> bool: 13 | ... # pragma: no cover 14 | 15 | def __getitem__(self, key: str) -> str: 16 | ... # pragma: no cover 17 | 18 | def __iter__(self) -> Iterator[str]: 19 | ... # pragma: no cover 20 | 21 | def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: 22 | """ 23 | Return all values associated with a possibly multi-valued key. 24 | """ 25 | 26 | @property 27 | def json(self) -> Dict[str, Union[str, List[str]]]: 28 | """ 29 | A JSON-compatible form of the metadata. 30 | """ 31 | 32 | 33 | class SimplePath(Protocol): 34 | """ 35 | A minimal subset of pathlib.Path required by PathDistribution. 36 | """ 37 | 38 | def joinpath(self) -> 'SimplePath': 39 | ... # pragma: no cover 40 | 41 | def __truediv__(self) -> 'SimplePath': 42 | ... # pragma: no cover 43 | 44 | def parent(self) -> 'SimplePath': 45 | ... # pragma: no cover 46 | 47 | def read_text(self) -> str: 48 | ... # pragma: no cover 49 | -------------------------------------------------------------------------------- /metaflow/_vendor/importlib_metadata/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/importlib_metadata/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/packaging/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is dual licensed under the terms of the Apache License, Version 2 | # 2.0, and the BSD License. See the LICENSE file in the root of this repository 3 | # for complete details. 4 | 5 | __title__ = "packaging" 6 | __summary__ = "Core utilities for Python packages" 7 | __uri__ = "https://github.com/pypa/packaging" 8 | 9 | __version__ = "23.0" 10 | 11 | __author__ = "Donald Stufft and individual contributors" 12 | __email__ = "donald@stufft.io" 13 | 14 | __license__ = "BSD-2-Clause or Apache-2.0" 15 | __copyright__ = "2014-2019 %s" % __author__ 16 | -------------------------------------------------------------------------------- /metaflow/_vendor/packaging/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/packaging/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/pip.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-present The pip developers (see AUTHORS.txt file) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /metaflow/_vendor/typeguard.LICENSE: -------------------------------------------------------------------------------- 1 | This is the MIT license: http://www.opensource.org/licenses/mit-license.php 2 | 3 | Copyright (c) Alex Grönholm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 9 | to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or 12 | substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 15 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 16 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE 17 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /metaflow/_vendor/typeguard/_exceptions.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from typing import Deque 3 | 4 | 5 | class TypeHintWarning(UserWarning): 6 | """ 7 | A warning that is emitted when a type hint in string form could not be resolved to 8 | an actual type. 9 | """ 10 | 11 | 12 | class TypeCheckWarning(UserWarning): 13 | """Emitted by typeguard's type checkers when a type mismatch is detected.""" 14 | 15 | def __init__(self, message: str): 16 | super().__init__(message) 17 | 18 | 19 | class InstrumentationWarning(UserWarning): 20 | """Emitted when there's a problem with instrumenting a function for type checks.""" 21 | 22 | def __init__(self, message: str): 23 | super().__init__(message) 24 | 25 | 26 | class TypeCheckError(Exception): 27 | """ 28 | Raised by typeguard's type checkers when a type mismatch is detected. 29 | """ 30 | 31 | def __init__(self, message: str): 32 | super().__init__(message) 33 | self._path: Deque[str] = deque() 34 | 35 | def append_path_element(self, element: str) -> None: 36 | self._path.append(element) 37 | 38 | def __str__(self) -> str: 39 | if self._path: 40 | return " of ".join(self._path) + " " + str(self.args[0]) 41 | else: 42 | return str(self.args[0]) 43 | -------------------------------------------------------------------------------- /metaflow/_vendor/typeguard/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/typeguard/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/importlib_metadata.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2019 Jason R. Coombs, Barry Warsaw 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/importlib_metadata/_collections.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | # from jaraco.collections 3.3 5 | class FreezableDefaultDict(collections.defaultdict): 6 | """ 7 | Often it is desirable to prevent the mutation of 8 | a default dict after its initial construction, such 9 | as to prevent mutation during iteration. 10 | 11 | >>> dd = FreezableDefaultDict(list) 12 | >>> dd[0].append('1') 13 | >>> dd.freeze() 14 | >>> dd[1] 15 | [] 16 | >>> len(dd) 17 | 1 18 | """ 19 | 20 | def __missing__(self, key): 21 | return getattr(self, '_frozen', super().__missing__)(key) 22 | 23 | def freeze(self): 24 | self._frozen = lambda key: self.default_factory() 25 | 26 | 27 | class Pair(collections.namedtuple('Pair', 'name value')): 28 | @classmethod 29 | def parse(cls, text): 30 | return cls(*map(str.strip, text.split("=", 1))) 31 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/importlib_metadata/_meta.py: -------------------------------------------------------------------------------- 1 | from ._compat import Protocol 2 | from typing import Any, Dict, Iterator, List, TypeVar, Union 3 | 4 | 5 | _T = TypeVar("_T") 6 | 7 | 8 | class PackageMetadata(Protocol): 9 | def __len__(self) -> int: 10 | ... # pragma: no cover 11 | 12 | def __contains__(self, item: str) -> bool: 13 | ... # pragma: no cover 14 | 15 | def __getitem__(self, key: str) -> str: 16 | ... # pragma: no cover 17 | 18 | def __iter__(self) -> Iterator[str]: 19 | ... # pragma: no cover 20 | 21 | def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: 22 | """ 23 | Return all values associated with a possibly multi-valued key. 24 | """ 25 | 26 | @property 27 | def json(self) -> Dict[str, Union[str, List[str]]]: 28 | """ 29 | A JSON-compatible form of the metadata. 30 | """ 31 | 32 | 33 | class SimplePath(Protocol): 34 | """ 35 | A minimal subset of pathlib.Path required by PathDistribution. 36 | """ 37 | 38 | def joinpath(self) -> 'SimplePath': 39 | ... # pragma: no cover 40 | 41 | def __truediv__(self) -> 'SimplePath': 42 | ... # pragma: no cover 43 | 44 | def parent(self) -> 'SimplePath': 45 | ... # pragma: no cover 46 | 47 | def read_text(self) -> str: 48 | ... # pragma: no cover 49 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/importlib_metadata/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/v3_6/importlib_metadata/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/v3_6/zipp.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Jason R. Coombs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/importlib_metadata.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2019 Jason R. Coombs, Barry Warsaw 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/importlib_metadata/_collections.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | # from jaraco.collections 3.3 5 | class FreezableDefaultDict(collections.defaultdict): 6 | """ 7 | Often it is desirable to prevent the mutation of 8 | a default dict after its initial construction, such 9 | as to prevent mutation during iteration. 10 | 11 | >>> dd = FreezableDefaultDict(list) 12 | >>> dd[0].append('1') 13 | >>> dd.freeze() 14 | >>> dd[1] 15 | [] 16 | >>> len(dd) 17 | 1 18 | """ 19 | 20 | def __missing__(self, key): 21 | return getattr(self, '_frozen', super().__missing__)(key) 22 | 23 | def freeze(self): 24 | self._frozen = lambda key: self.default_factory() 25 | 26 | 27 | class Pair(collections.namedtuple('Pair', 'name value')): 28 | @classmethod 29 | def parse(cls, text): 30 | return cls(*map(str.strip, text.split("=", 1))) 31 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/importlib_metadata/_meta.py: -------------------------------------------------------------------------------- 1 | from ._compat import Protocol 2 | from typing import Any, Dict, Iterator, List, TypeVar, Union 3 | 4 | 5 | _T = TypeVar("_T") 6 | 7 | 8 | class PackageMetadata(Protocol): 9 | def __len__(self) -> int: 10 | ... # pragma: no cover 11 | 12 | def __contains__(self, item: str) -> bool: 13 | ... # pragma: no cover 14 | 15 | def __getitem__(self, key: str) -> str: 16 | ... # pragma: no cover 17 | 18 | def __iter__(self) -> Iterator[str]: 19 | ... # pragma: no cover 20 | 21 | def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: 22 | """ 23 | Return all values associated with a possibly multi-valued key. 24 | """ 25 | 26 | @property 27 | def json(self) -> Dict[str, Union[str, List[str]]]: 28 | """ 29 | A JSON-compatible form of the metadata. 30 | """ 31 | 32 | 33 | class SimplePath(Protocol): 34 | """ 35 | A minimal subset of pathlib.Path required by PathDistribution. 36 | """ 37 | 38 | def joinpath(self) -> 'SimplePath': 39 | ... # pragma: no cover 40 | 41 | def __truediv__(self) -> 'SimplePath': 42 | ... # pragma: no cover 43 | 44 | def parent(self) -> 'SimplePath': 45 | ... # pragma: no cover 46 | 47 | def read_text(self) -> str: 48 | ... # pragma: no cover 49 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/importlib_metadata/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/v3_7/importlib_metadata/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/typeguard.LICENSE: -------------------------------------------------------------------------------- 1 | This is the MIT license: http://www.opensource.org/licenses/mit-license.php 2 | 3 | Copyright (c) Alex Grönholm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 9 | to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or 12 | substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 15 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 16 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE 17 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/typeguard/_exceptions.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from typing import Deque 3 | 4 | 5 | class TypeHintWarning(UserWarning): 6 | """ 7 | A warning that is emitted when a type hint in string form could not be resolved to 8 | an actual type. 9 | """ 10 | 11 | 12 | class TypeCheckWarning(UserWarning): 13 | """Emitted by typeguard's type checkers when a type mismatch is detected.""" 14 | 15 | def __init__(self, message: str): 16 | super().__init__(message) 17 | 18 | 19 | class InstrumentationWarning(UserWarning): 20 | """Emitted when there's a problem with instrumenting a function for type checks.""" 21 | 22 | def __init__(self, message: str): 23 | super().__init__(message) 24 | 25 | 26 | class TypeCheckError(Exception): 27 | """ 28 | Raised by typeguard's type checkers when a type mismatch is detected. 29 | """ 30 | 31 | def __init__(self, message: str): 32 | super().__init__(message) 33 | self._path: Deque[str] = deque() 34 | 35 | def append_path_element(self, element: str) -> None: 36 | self._path.append(element) 37 | 38 | def __str__(self) -> str: 39 | if self._path: 40 | return " of ".join(self._path) + " " + str(self.args[0]) 41 | else: 42 | return str(self.args[0]) 43 | -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/typeguard/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/_vendor/v3_7/typeguard/py.typed -------------------------------------------------------------------------------- /metaflow/_vendor/v3_7/zipp.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Jason R. Coombs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /metaflow/_vendor/vendor_any.txt: -------------------------------------------------------------------------------- 1 | click==7.1.2 2 | packaging==23.0 3 | importlib_metadata==4.8.3 4 | typeguard==4.4.0 5 | typing_extensions==4.12.2 6 | zipp==3.6.0 7 | -------------------------------------------------------------------------------- /metaflow/_vendor/vendor_v3_6.txt: -------------------------------------------------------------------------------- 1 | importlib_metadata==4.8.3 2 | typing_extensions==4.1.1 3 | zipp==3.6.0 4 | -------------------------------------------------------------------------------- /metaflow/_vendor/vendor_v3_7.txt: -------------------------------------------------------------------------------- 1 | importlib_metadata==4.8.3 2 | typeguard==4.1.2 3 | typing_extensions==4.7.1 4 | zipp==3.6.0 5 | -------------------------------------------------------------------------------- /metaflow/_vendor/zipp.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Jason R. Coombs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /metaflow/cards.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.cards.card_client import get_cards 2 | from metaflow.plugins.cards.card_modules.card import MetaflowCardComponent, MetaflowCard 3 | from metaflow.plugins.cards.card_modules.components import ( 4 | Artifact, 5 | Table, 6 | Image, 7 | Error, 8 | Markdown, 9 | VegaChart, 10 | ProgressBar, 11 | PythonCode, 12 | ) 13 | from metaflow.plugins.cards.card_modules.basic import ( 14 | DefaultCard, 15 | PageComponent, 16 | ErrorCard, 17 | BlankCard, 18 | ) 19 | -------------------------------------------------------------------------------- /metaflow/cli_components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/cli_components/__init__.py -------------------------------------------------------------------------------- /metaflow/client/__init__.py: -------------------------------------------------------------------------------- 1 | # core client classes 2 | from .core import ( 3 | namespace, 4 | get_namespace, 5 | default_namespace, 6 | metadata, 7 | get_metadata, 8 | default_metadata, 9 | Metaflow, 10 | Flow, 11 | Run, 12 | Step, 13 | Task, 14 | DataArtifact, 15 | ) 16 | -------------------------------------------------------------------------------- /metaflow/cmd/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /metaflow/cmd/develop/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from metaflow.cli import echo_dev_null, echo_always 4 | from metaflow._vendor import click 5 | 6 | 7 | class CommandObj: 8 | def __init__(self): 9 | pass 10 | 11 | 12 | @click.group() 13 | @click.pass_context 14 | def cli(ctx): 15 | pass 16 | 17 | 18 | @cli.group(help="Metaflow develop commands") 19 | @click.option( 20 | "--quiet/--no-quiet", 21 | show_default=True, 22 | default=False, 23 | help="Suppress unnecessary messages", 24 | ) 25 | @click.pass_context 26 | def develop( 27 | ctx: Any, 28 | quiet: bool, 29 | ): 30 | if quiet: 31 | echo = echo_dev_null 32 | else: 33 | echo = echo_always 34 | 35 | obj = CommandObj() 36 | obj.quiet = quiet 37 | obj.echo = echo 38 | obj.echo_always = echo_always 39 | ctx.obj = obj 40 | 41 | 42 | from . import stubs 43 | -------------------------------------------------------------------------------- /metaflow/cmd/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from metaflow._vendor import click 4 | 5 | 6 | def makedirs(path): 7 | # This is for python2 compatibility. 8 | # Python3 has os.makedirs(exist_ok=True). 9 | try: 10 | os.makedirs(path) 11 | except OSError as x: 12 | if x.errno == 17: 13 | return 14 | else: 15 | raise 16 | 17 | 18 | def echo_dev_null(*args, **kwargs): 19 | pass 20 | 21 | 22 | def echo_always(line, **kwargs): 23 | click.secho(line, **kwargs) 24 | -------------------------------------------------------------------------------- /metaflow/cmd_with_io.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from .exception import ExternalCommandFailed 3 | 4 | from metaflow.util import to_bytes 5 | 6 | 7 | def cmd(cmdline, input, output): 8 | for path, data in input.items(): 9 | with open(path, "wb") as f: 10 | f.write(to_bytes(data)) 11 | 12 | if subprocess.call(cmdline, shell=True): 13 | raise ExternalCommandFailed( 14 | "Command '%s' returned a non-zero " "exit code." % cmdline 15 | ) 16 | 17 | out = [] 18 | for path in output: 19 | with open(path, "rb") as f: 20 | out.append(f.read()) 21 | 22 | if len(out) == 1: 23 | return out[0] 24 | else: 25 | return out 26 | -------------------------------------------------------------------------------- /metaflow/datastore/__init__.py: -------------------------------------------------------------------------------- 1 | from .inputs import Inputs 2 | from .flow_datastore import FlowDataStore 3 | from .datastore_set import TaskDataStoreSet 4 | from .task_datastore import TaskDataStore 5 | -------------------------------------------------------------------------------- /metaflow/datastore/exceptions.py: -------------------------------------------------------------------------------- 1 | from ..exception import MetaflowException 2 | 3 | 4 | class DataException(MetaflowException): 5 | headline = "Data store error" 6 | 7 | 8 | class UnpicklableArtifactException(MetaflowException): 9 | headline = "Cannot pickle artifact" 10 | 11 | def __init__(self, artifact_name): 12 | msg = 'Cannot pickle dump artifact named "%s"' % artifact_name 13 | super().__init__(msg=msg, lineno=None) 14 | -------------------------------------------------------------------------------- /metaflow/datastore/inputs.py: -------------------------------------------------------------------------------- 1 | class Inputs(object): 2 | """ 3 | split: inputs.step_a.x inputs.step_b.x 4 | foreach: inputs[0].x 5 | both: (inp.x for inp in inputs) 6 | """ 7 | 8 | def __init__(self, flows): 9 | # TODO sort by foreach index 10 | self.flows = list(flows) 11 | for flow in self.flows: 12 | setattr(self, flow._current_step, flow) 13 | 14 | def __getitem__(self, idx): 15 | return self.flows[idx] 16 | 17 | def __iter__(self): 18 | return iter(self.flows) 19 | -------------------------------------------------------------------------------- /metaflow/event_logger.py: -------------------------------------------------------------------------------- 1 | from metaflow.sidecar import Message, MessageTypes, Sidecar 2 | 3 | 4 | class NullEventLogger(object): 5 | TYPE = "nullSidecarLogger" 6 | 7 | def __init__(self, *args, **kwargs): 8 | # Currently passed flow and env in kwargs 9 | self._sidecar = Sidecar(self.TYPE) 10 | 11 | def start(self): 12 | return self._sidecar.start() 13 | 14 | def terminate(self): 15 | return self._sidecar.terminate() 16 | 17 | def send(self, msg): 18 | # Arbitrary message sending. Useful if you want to override some different 19 | # types of messages. 20 | self._sidecar.send(msg) 21 | 22 | def log(self, payload): 23 | if self._sidecar.is_active: 24 | msg = Message(MessageTypes.BEST_EFFORT, payload) 25 | self._sidecar.send(msg) 26 | 27 | @classmethod 28 | def get_worker(cls): 29 | return None 30 | -------------------------------------------------------------------------------- /metaflow/extension_support/_empty_file.py: -------------------------------------------------------------------------------- 1 | # This file serves as a __init__.py for metaflow_extensions when it is packaged 2 | # and needs to remain empty. 3 | -------------------------------------------------------------------------------- /metaflow/info_file.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from os import path 4 | 5 | CURRENT_DIRECTORY = path.dirname(path.abspath(__file__)) 6 | INFO_FILE = path.join(path.dirname(CURRENT_DIRECTORY), "INFO") 7 | 8 | _info_file_content = None 9 | _info_file_present = None 10 | 11 | 12 | def read_info_file(): 13 | global _info_file_content 14 | global _info_file_present 15 | if _info_file_present is None: 16 | _info_file_present = path.exists(INFO_FILE) 17 | if _info_file_present: 18 | try: 19 | with open(INFO_FILE, "r", encoding="utf-8") as contents: 20 | _info_file_content = json.load(contents) 21 | except IOError: 22 | pass 23 | if _info_file_present: 24 | return _info_file_content 25 | return None 26 | -------------------------------------------------------------------------------- /metaflow/metadata_provider/__init__.py: -------------------------------------------------------------------------------- 1 | from .metadata import DataArtifact, MetadataProvider, MetaDatum 2 | -------------------------------------------------------------------------------- /metaflow/metaflow_profile.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from contextlib import contextmanager 4 | 5 | 6 | @contextmanager 7 | def profile(label, stats_dict=None): 8 | if stats_dict is None: 9 | print("PROFILE: %s starting" % label) 10 | start = time.time() 11 | yield 12 | took = int((time.time() - start) * 1000) 13 | if stats_dict is None: 14 | print("PROFILE: %s completed in %dms" % (label, took)) 15 | else: 16 | stats_dict[label] = stats_dict.get(label, 0) + took 17 | -------------------------------------------------------------------------------- /metaflow/mflog/tee.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from .mflog import decorate 3 | 4 | # This script is similar to the command-line utility 'tee': 5 | # It reads stdin line by line and writes the lines to stdout 6 | # and a file. In contrast to 'tee', this script formats each 7 | # line with mflog-style structure. 8 | 9 | if __name__ == "__main__": 10 | SOURCE = sys.argv[1].encode("ascii") 11 | 12 | with open(sys.argv[2], mode="ab", buffering=0) as f: 13 | if sys.version_info < (3, 0): 14 | # Python 2 15 | for line in iter(sys.stdin.readline, ""): 16 | # https://bugs.python.org/issue3907 17 | decorated = decorate(SOURCE, line) 18 | f.write(decorated) 19 | sys.stdout.write(line) 20 | else: 21 | # Python 3 22 | for line in sys.stdin.buffer: 23 | decorated = decorate(SOURCE, line) 24 | f.write(decorated) 25 | sys.stdout.buffer.write(line) 26 | -------------------------------------------------------------------------------- /metaflow/plugins/airflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/airflow/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/airflow/dag.py: -------------------------------------------------------------------------------- 1 | # Deployed on {{deployed_on}} 2 | 3 | CONFIG = {{{config}}} 4 | 5 | {{{utils}}} 6 | 7 | dag = Workflow.from_dict(CONFIG).compile() 8 | with dag: 9 | pass 10 | -------------------------------------------------------------------------------- /metaflow/plugins/airflow/exception.py: -------------------------------------------------------------------------------- 1 | from metaflow.exception import MetaflowException 2 | 3 | 4 | class AirflowException(MetaflowException): 5 | headline = "Airflow Exception" 6 | 7 | def __init__(self, msg): 8 | super().__init__(msg) 9 | 10 | 11 | class NotSupportedException(MetaflowException): 12 | headline = "Not yet supported with Airflow" 13 | -------------------------------------------------------------------------------- /metaflow/plugins/airflow/plumbing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/airflow/plumbing/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/airflow/plumbing/set_parameters.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | 5 | 6 | def export_parameters(output_file): 7 | input = json.loads(os.environ.get("METAFLOW_PARAMETERS", "{}")) 8 | with open(output_file, "w") as f: 9 | for k in input: 10 | # Replace `-` with `_` is parameter names since `-` isn't an 11 | # allowed character for environment variables. cli.py will 12 | # correctly translate the replaced `-`s. 13 | f.write( 14 | "export METAFLOW_INIT_%s=%s\n" 15 | % (k.upper().replace("-", "_"), json.dumps(input[k])) 16 | ) 17 | os.chmod(output_file, 509) 18 | 19 | 20 | if __name__ == "__main__": 21 | export_parameters(sys.argv[1]) 22 | -------------------------------------------------------------------------------- /metaflow/plugins/airflow/sensors/__init__.py: -------------------------------------------------------------------------------- 1 | from .external_task_sensor import ExternalTaskSensorDecorator 2 | from .s3_sensor import S3KeySensorDecorator 3 | 4 | SUPPORTED_SENSORS = [ 5 | ExternalTaskSensorDecorator, 6 | S3KeySensorDecorator, 7 | ] 8 | -------------------------------------------------------------------------------- /metaflow/plugins/argo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/argo/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/argo/generate_input_paths.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from hashlib import md5 3 | 4 | 5 | def generate_input_paths(step_name, timestamp, input_paths, split_cardinality): 6 | # => run_id/step/:foo,bar 7 | run_id = input_paths.split("/")[0] 8 | foreach_base_id = "{}-{}-{}".format(step_name, timestamp, input_paths) 9 | 10 | ids = [_generate_task_id(foreach_base_id, i) for i in range(int(split_cardinality))] 11 | return "{}/{}/:{}".format(run_id, step_name, ",".join(ids)) 12 | 13 | 14 | def _generate_task_id(base, idx): 15 | # For foreach splits generate the expected input-paths based on split_cardinality and base_id. 16 | # newline required at the end due to 'echo' appending one in the shell side task_id creation. 17 | task_str = "%s-%s\n" % (base, idx) 18 | hash = md5(task_str.encode("utf-8")).hexdigest()[-8:] 19 | return "t-" + hash 20 | 21 | 22 | if __name__ == "__main__": 23 | print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])) 24 | -------------------------------------------------------------------------------- /metaflow/plugins/argo/jobset_input_paths.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def generate_input_paths(run_id, step_name, task_id_entropy, num_parallel): 5 | # => run_id/step/:foo,bar 6 | control_id = "control-{}-0".format(task_id_entropy) 7 | worker_ids = [ 8 | "worker-{}-{}".format(task_id_entropy, i) for i in range(int(num_parallel) - 1) 9 | ] 10 | ids = [control_id] + worker_ids 11 | return "{}/{}/:{}".format(run_id, step_name, ",".join(ids)) 12 | 13 | 14 | if __name__ == "__main__": 15 | print(generate_input_paths(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])) 16 | -------------------------------------------------------------------------------- /metaflow/plugins/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/aws/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/aws/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/aws/batch/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/aws/secrets_manager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/aws/secrets_manager/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/aws/step_functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/aws/step_functions/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/azure/__init__.py: -------------------------------------------------------------------------------- 1 | from .azure_credential import ( 2 | create_cacheable_azure_credential as create_azure_credential, 3 | ) 4 | -------------------------------------------------------------------------------- /metaflow/plugins/azure/azure_exceptions.py: -------------------------------------------------------------------------------- 1 | from metaflow.exception import MetaflowException 2 | 3 | 4 | class MetaflowAzureAuthenticationError(MetaflowException): 5 | headline = "Failed to authenticate with Azure" 6 | 7 | 8 | class MetaflowAzureResourceError(MetaflowException): 9 | headline = "Failed to access Azure resource" 10 | 11 | 12 | class MetaflowAzurePackageError(MetaflowException): 13 | headline = "Missing required packages 'azure-identity' and 'azure-storage-blob' and 'azure-keyvault-secrets'" 14 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/cards/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/cards/card_modules/chevron/LICENCE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Noah Morrison 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/card_modules/chevron/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import main, cli_main 2 | from .renderer import render 3 | from .tokenizer import ChevronError 4 | 5 | __all__ = ["main", "render", "cli_main", "ChevronError"] 6 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/card_modules/chevron/metadata.py: -------------------------------------------------------------------------------- 1 | version = "0.13.1" 2 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/card_resolver.py: -------------------------------------------------------------------------------- 1 | from .card_datastore import CardDatastore 2 | 3 | 4 | def _chase_origin(task): 5 | from metaflow.client import Task 6 | 7 | task_origin = None 8 | ref_task = task 9 | while ref_task.origin_pathspec is not None: 10 | task_origin = ref_task.origin_pathspec 11 | ref_task = Task(task_origin) 12 | return task_origin 13 | 14 | 15 | def resumed_info(task): 16 | return _chase_origin(task) 17 | 18 | 19 | def resolve_paths_from_task( 20 | flow_datastore, 21 | pathspec=None, 22 | type=None, 23 | hash=None, 24 | card_id=None, 25 | ): 26 | card_datastore = CardDatastore(flow_datastore, pathspec=pathspec) 27 | card_paths_found = card_datastore.extract_card_paths( 28 | card_type=type, card_hash=hash, card_id=card_id 29 | ) 30 | return card_paths_found, card_datastore 31 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/.eslintignore: -------------------------------------------------------------------------------- 1 | rollup.config.js -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | // TypeScript Project 2 | 3 | module.exports = { 4 | root: true, 5 | extends: [ 6 | "eslint:recommended", 7 | "plugin:@typescript-eslint/recommended", 8 | "plugin:svelte/recommended", 9 | ], 10 | parser: "@typescript-eslint/parser", 11 | plugins: ["@typescript-eslint", "prettier"], 12 | parserOptions: { 13 | sourceType: "module", 14 | ecmaVersion: 2020, 15 | extraFileExtensions: [".svelte"], 16 | }, 17 | env: { 18 | browser: true, 19 | es2017: true, 20 | node: true, 21 | }, 22 | overrides: [ 23 | { 24 | files: ["*.svelte"], 25 | parser: "svelte-eslint-parser", 26 | parserOptions: { 27 | parser: "@typescript-eslint/parser", 28 | }, 29 | }, 30 | ], 31 | rules: { 32 | "@typescript-eslint/no-explicit-any": "off", 33 | "@typescript-eslint/no-unsafe-assignment": "off", 34 | }, 35 | }; 36 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/README.md: -------------------------------------------------------------------------------- 1 | # @cards UI 2 | 3 | This directory contains the files that generate the Javascript and CSS used in the standalone HTML file when the cards are generated. 4 | 5 | The code is written in [svelte](https://svelte.dev/). 6 | 7 | ## To run locally 8 | 9 | - `yarn install` 10 | - `yarn dev` 11 | 12 | This will run a [server](http://localhost:8080) showing a single card, using example data from `public/card-example.json`. 13 | 14 | ## To make changes to be used by metaflow 15 | 16 | - `yarn install` 17 | - Make your changes to the `.svelte` and/or `.css` files 18 | - `yarn lint` to ensure the types are correct 19 | - `yarn build` 20 | 21 | This will put a `main.js` and a `bundle.css` file in a directory that will be picked up by metaflow when it is running a flow. The output directory is specified in `package.json`. 22 | 23 | To run a flow: 24 | 25 | - `python MYCARD.py run --with card` 26 | - `python dummy.py card view //` 27 | 28 | You can get `//` from the output from the first step that runs metaflow. 29 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/cypress.json: -------------------------------------------------------------------------------- 1 | { 2 | "baseUrl": "http://localhost:8080", 3 | "viewportWidth": 1000, 4 | "viewportHeight": 1000, 5 | "video": false, 6 | "screenshot": false 7 | } 8 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/cypress/fixtures/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Using fixtures to represent data", 3 | "email": "hello@cypress.io", 4 | "body": "Fixtures are a great way to mock data for responses to routes" 5 | } 6 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/cypress/plugins/index.js: -------------------------------------------------------------------------------- 1 | /// 2 | // *********************************************************** 3 | // This example plugins/index.js can be used to load plugins 4 | // 5 | // You can change the location of this file or turn off loading 6 | // the plugins file with the 'pluginsFile' configuration option. 7 | // 8 | // You can read more here: 9 | // https://on.cypress.io/plugins-guide 10 | // *********************************************************** 11 | 12 | // This function is called when a project is opened or re-opened (e.g. due to 13 | // the project's config changing) 14 | 15 | /** 16 | * @type {Cypress.PluginConfig} 17 | */ 18 | // eslint-disable-next-line no-unused-vars 19 | module.exports = (on, config) => {}; 20 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/cypress/support/commands.js: -------------------------------------------------------------------------------- 1 | // *********************************************** 2 | // This example commands.js shows you how to 3 | // create various custom commands and overwrite 4 | // existing commands. 5 | // 6 | // For more comprehensive examples of custom 7 | // commands please read more here: 8 | // https://on.cypress.io/custom-commands 9 | // *********************************************** 10 | // 11 | // 12 | // -- This is a parent command -- 13 | // Cypress.Commands.add('login', (email, password) => { ... }) 14 | // 15 | // 16 | // -- This is a child command -- 17 | // Cypress.Commands.add('drag', { prevSubject: 'element'}, (subject, options) => { ... }) 18 | // 19 | // 20 | // -- This is a dual command -- 21 | // Cypress.Commands.add('dismiss', { prevSubject: 'optional'}, (subject, options) => { ... }) 22 | // 23 | // 24 | // -- This will overwrite an existing command -- 25 | // Cypress.Commands.overwrite('visit', (originalFn, url, options) => { ... }) 26 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/cypress/support/index.js: -------------------------------------------------------------------------------- 1 | // *********************************************************** 2 | // This example support/index.js is processed and 3 | // loaded automatically before your test files. 4 | // 5 | // This is a great place to put global configuration and 6 | // behavior that modifies Cypress. 7 | // 8 | // You can change the location of this file or turn off 9 | // automatically serving support files with the 10 | // 'supportFile' configuration option. 11 | // 12 | // You can read more here: 13 | // https://on.cypress.io/configuration 14 | // *********************************************************** 15 | 16 | // Import commands.js using ES2015 syntax: 17 | import './commands' 18 | 19 | // Alternatively you can use CommonJS syntax: 20 | // require('./commands') 21 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/demo/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Metaflow Card Example 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/aws-exports.cjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // WARNING: DO NOT EDIT. This file is automatically generated by AWS Amplify. It will be overwritten. 3 | 4 | const awsmobile = { 5 | "aws_project_region": "us-west-2" 6 | }; 7 | 8 | 9 | export default awsmobile; 10 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/artifacts.svelte: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 |
23 | 24 | 25 | {#each sortedData as artifact} 26 | 27 | {/each} 28 |
29 |
30 | 31 | 41 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/dag/constants.svelte: -------------------------------------------------------------------------------- 1 | 4 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/dag/dag.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --dag-border: #282828; 3 | --dag-bg-static: var(--lt-grey); 4 | --dag-bg-success: #a5d46a; 5 | --dag-bg-running: #ffdf80; 6 | --dag-bg-error: #ffa080; 7 | --dag-connector: #cccccc; 8 | --dag-gap: 5rem; 9 | --dag-step-height: 6.25rem; 10 | --dag-step-width: 11.25rem; 11 | --dag-selected: #ffd700; 12 | } 13 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/heading.svelte: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 |
12 | {#if title} 13 | 14 | {/if} 15 | {#if subtitle} 16 | <Subtitle componentData={{ type: "subtitle", text: subtitle }} /> 17 | {/if} 18 | </header> 19 | 20 | <style> 21 | header { 22 | margin-bottom: var(--component-spacer); 23 | } 24 | </style> 25 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/log.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component will render a simple log with syntax highlighting --> 2 | <script lang="ts"> 3 | import type * as types from "../types"; 4 | export let componentData: types.LogComponent; 5 | let el: HTMLElement; 6 | 7 | function highlightCode() { 8 | el && (window as any)?.Prism?.highlightElement(el); 9 | } 10 | 11 | $: el ? highlightCode() : null; 12 | </script> 13 | 14 | <pre class="log" data-component="log"> 15 | <code class="mono language-log" bind:this={el}> 16 | {componentData.data} 17 | </code> 18 | </pre> 19 | 20 | <style> 21 | .log { 22 | background: var(--lt-grey) !important; 23 | font-size: 0.9rem; 24 | padding: 2rem; 25 | } 26 | </style> 27 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/main.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component gives us a wrapper for the main section --> 2 | <main> 3 | <div class="mainContainer"> 4 | <slot /> 5 | </div> 6 | </main> 7 | 8 | <style> 9 | .mainContainer { 10 | max-width: 110rem; 11 | } 12 | 13 | main { 14 | flex: 0 1 auto; 15 | max-width: 100rem; 16 | padding: 1.5rem; 17 | } 18 | 19 | @media (min-width: 60rem) { 20 | main { 21 | margin-left: var(--aside-width); 22 | } 23 | } 24 | 25 | /* if the embed class is present, we hide the aside, and we should center the main */ 26 | :global(.embed main) { 27 | margin: 0 auto; 28 | min-width: 80% 29 | } 30 | </style> 31 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/markdown.svelte: -------------------------------------------------------------------------------- 1 | <!--Displays HTML written in markdown--> 2 | <script lang="ts"> 3 | import SvelteMarkdown from 'svelte-markdown' 4 | import type { MarkdownComponent } from '../types'; 5 | 6 | export let componentData: MarkdownComponent 7 | </script> 8 | 9 | <SvelteMarkdown source={componentData.source} /> -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/page.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component will wrap a page. Later we can decide if we want to render only a single page --> 2 | <script lang="ts"> 3 | import type * as types from "../types"; 4 | export let componentData: types.PageComponent; 5 | </script> 6 | 7 | <div 8 | id={`page-${componentData?.title || "No Title"}`} 9 | class="page" 10 | data-component="page" 11 | > 12 | <slot /> 13 | </div> 14 | 15 | <style> 16 | .page:last-of-type { 17 | margin-bottom: var(--component-spacer); 18 | } 19 | 20 | :global(.page:last-of-type section:last-of-type hr) { 21 | display: none; 22 | } 23 | </style> 24 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/python-code.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component will render a simple log with syntax highlighting --> 2 | <script lang="ts"> 3 | import type * as types from "../types"; 4 | export let componentData: types.PythonCodeComponent; 5 | let el: HTMLElement; 6 | 7 | function highlightCode() { 8 | el && (window as any)?.Prism?.highlightElement(el, ); 9 | } 10 | 11 | $: el ? highlightCode() : null; 12 | </script> 13 | 14 | <!-- This needs to be in this exact format of <pre><code> ... without a new line between the <pre> and <code> tags --> 15 | <!-- Need to do this to avoid weird indentation issues --> 16 | <!-- based on https://github.com/PrismJS/prism/issues/554#issuecomment-83197995 --> 17 | <pre data-component="pythonCode"><code class="language-python" bind:this={el}>{componentData.data} 18 | </code> 19 | </pre> 20 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/subtitle.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component is used for a simple subtitle text --> 2 | <script lang="ts"> 3 | import type * as types from "../types"; 4 | export let componentData: types.SubtitleComponent; 5 | </script> 6 | 7 | <p class="subtitle" data-component="subtitle">{componentData?.text || ""}</p> 8 | 9 | <style> 10 | .subtitle { 11 | font-size: 1rem; 12 | text-align: left; 13 | } 14 | </style> 15 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/table-horizontal.svelte: -------------------------------------------------------------------------------- 1 | <!-- This is the standard table component. It will attempt to make fixed headers, and 2 | allow you to scroll --> 3 | <script lang="ts"> 4 | import type * as types from "../types"; 5 | import DataRenderer from "./table-data-renderer.svelte"; 6 | 7 | export let componentData: types.TableComponent; 8 | $: ({ columns, data } = componentData); 9 | </script> 10 | 11 | {#if columns && data} 12 | <div class="tableContainer" data-component="table-horizontal"> 13 | <table> 14 | <thead> 15 | <tr> 16 | {#each columns as column} 17 | <th>{column}</th> 18 | {/each} 19 | </tr> 20 | </thead> 21 | <tbody> 22 | {#each data as row} 23 | <tr> 24 | {#each row as col} 25 | <td><DataRenderer componentData={col} /></td> 26 | {/each} 27 | </tr> 28 | {/each} 29 | </tbody> 30 | </table> 31 | </div> 32 | {/if} 33 | 34 | <style> 35 | .tableContainer { 36 | overflow: auto; 37 | } 38 | 39 | th { 40 | position: sticky; 41 | top: -1px; 42 | z-index: 2; 43 | white-space: nowrap; 44 | background: var(--white); 45 | } 46 | </style> 47 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/table-vertical.svelte: -------------------------------------------------------------------------------- 1 | <!-- There are cases where we want a vertical table. Its expected to be a very low column 2 | count. We basically pivot the table and style the first column to be headers --> 3 | <script lang="ts"> 4 | import type * as types from "../types"; 5 | import DataRenderer from "./table-data-renderer.svelte"; 6 | 7 | export let componentData: types.TableComponent; 8 | $: ({ columns, data } = componentData); 9 | </script> 10 | 11 | {#if columns && data} 12 | <div class="tableContainer" data-component="table-vertical"> 13 | <table> 14 | <tbody> 15 | <!-- here we're pivoting the table treating columns as rows --> 16 | {#each columns as column, i} 17 | <tr> 18 | <td class="labelColumn">{column}</td> 19 | {#each data as row} 20 | <td><DataRenderer componentData={row[i]} /></td> 21 | {/each} 22 | </tr> 23 | {/each} 24 | </tbody> 25 | </table> 26 | </div> 27 | {/if} 28 | 29 | <style> 30 | td { 31 | text-align: left; 32 | } 33 | 34 | td.labelColumn { 35 | text-align: right; 36 | background-color: var(--lt-grey); 37 | font-weight: 700; 38 | /* note, if you are going to change the default width, please do the same in artifacts */ 39 | width: 12%; 40 | white-space: nowrap; 41 | } 42 | </style> 43 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/table.svelte: -------------------------------------------------------------------------------- 1 | <!-- this component exists because we support two types 2 | of tables. And we want to avoid the circular dependency.--> 3 | <script lang="ts"> 4 | import type * as types from "../types"; 5 | import TableVertical from "./table-vertical.svelte"; 6 | import TableHorizontal from "./table-horizontal.svelte"; 7 | 8 | export let componentData: types.TableComponent; 9 | $: ({ columns, data, vertical } = componentData); 10 | 11 | const component = vertical ? TableVertical : TableHorizontal; 12 | </script> 13 | 14 | {#if columns && data} 15 | <svelte:component this={component} {componentData} /> 16 | {/if} 17 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/text.svelte: -------------------------------------------------------------------------------- 1 | <!-- A simple text component to render a paragraph wherever needed.App 2 | If you need more than one paragraph, you should probably use the HTML component instead --> 3 | <script lang="ts"> 4 | import type * as types from "../types"; 5 | export let componentData: types.TextComponent; 6 | </script> 7 | 8 | <p data-component="text">{componentData?.text || ""}</p> 9 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/components/title.svelte: -------------------------------------------------------------------------------- 1 | <!-- This component renders a title for use throughout your page --> 2 | <script lang="ts"> 3 | import type * as types from "../types"; 4 | export let componentData: types.TitleComponent; 5 | </script> 6 | 7 | <h2 class="title" data-component="title"> 8 | {componentData?.text || ""} 9 | </h2> 10 | 11 | <style> 12 | .title { 13 | text-align: left; 14 | } 15 | </style> 16 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/constants.ts: -------------------------------------------------------------------------------- 1 | import type * as types from "./types"; 2 | 3 | // ROUTES 4 | 5 | export const ROUTES: Record<string, types.Route> = { 6 | HOME: ["/", "Home"], 7 | CARD: ["/card", "Card"], 8 | }; 9 | 10 | // COLORS 11 | 12 | export const COLORS: Record<string, string> = { 13 | bg: "white", 14 | black: "#282828", 15 | blue: "rgb(12, 102, 222)", 16 | dkGrey: "#6a6a6a", 17 | dkPrimary: "#ef863b", 18 | dkSecondary: "#13172d0", 19 | dkTertiary: "#0f426e", 20 | grey: "#e9e9e9", 21 | highlight: "#f8d9d8", 22 | ltBlue: "rgb(228, 240, 255)", 23 | ltGrey: "#f7f7f7", 24 | ltPrimary: "#ffcb8b", 25 | ltSecondary: "#434d81", 26 | ltTertiary: "#4189c9", 27 | primary: "#faab4a", 28 | quadrary: "#f8d9d8", 29 | secondary: "#2e3454", 30 | tertiary: "#2a679d", 31 | success: "#2e8036", 32 | error: "#e13d3f", 33 | }; 34 | 35 | export const COLORS_LIST: string[] = [ 36 | COLORS.primary, 37 | COLORS.secondary, 38 | COLORS.tertiary, 39 | COLORS.quadrary, 40 | ]; 41 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/global.css: -------------------------------------------------------------------------------- 1 | @import url("https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap"); 2 | 3 | :root { 4 | --bg: #ffffff; 5 | --black: #333; 6 | --blue: #0c66de; 7 | --dk-grey: #767676; 8 | --dk-primary: #ef863b; 9 | --dk-secondary: #13172d; 10 | --dk-tertiary: #0f426e; 11 | --error: #cf483e; 12 | --grey: rgba(0, 0, 0, 0.125); 13 | --highlight: #f8d9d8; 14 | --lt-blue: #4fa7ff; 15 | --lt-grey: #f3f3f3; 16 | --lt-lt-grey: #f9f9f9; 17 | --lt-primary: #ffcb8b; 18 | --lt-secondary: #434d81; 19 | --lt-tertiary: #4189c9; 20 | --primary: #faab4a; 21 | --quadrary: #f8d9d8; 22 | --secondary: #2e3454; 23 | --tertiary: #2a679d; 24 | --white: #ffffff; 25 | 26 | --component-spacer: 3rem; 27 | --aside-width: 20rem; 28 | --embed-card-min-height: 12rem; 29 | 30 | --mono-font: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", 31 | "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", 32 | "Fira Mono", "Droid Sans Mono", "Courier New", monospace; 33 | } 34 | 35 | html, body { 36 | margin: 0; 37 | min-height: 100vh; 38 | overflow-y: visible; 39 | padding: 0; 40 | width: 100%; 41 | } 42 | 43 | .card_app { 44 | width: 100%; 45 | min-height: 100vh; 46 | } 47 | 48 | .embed .card_app { 49 | min-height: var(--embed-card-min-height); 50 | } 51 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/global.d.ts: -------------------------------------------------------------------------------- 1 | /// <reference types="svelte" /> -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/src/main.ts: -------------------------------------------------------------------------------- 1 | // load app 2 | import App from "./App.svelte"; 3 | 4 | let app; 5 | 6 | // wrapping in try/catch to let user know if its missing #app 7 | try { 8 | const cardDataId: string = (window as any).mfCardDataId as string; 9 | const containerId: string = (window as any).mfContainerId as string; 10 | const containedApp = document.querySelector(`[data-container="${containerId}"]`)?.querySelector(".card_app") as Element 11 | app = new App({ 12 | target: containedApp ?? document.querySelector(".card_app") as Element, 13 | props: {cardDataId}, 14 | }); 15 | } catch (err: any) { 16 | throw new Error(err); 17 | } 18 | 19 | export default app; 20 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/svelte.config.js: -------------------------------------------------------------------------------- 1 | import { vitePreprocess } from "@sveltejs/vite-plugin-svelte"; 2 | 3 | export default { 4 | // Consult https://svelte.dev/docs#compile-time-svelte-preprocess 5 | // for more information about preprocessors 6 | preprocess: vitePreprocess(), 7 | }; 8 | -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/svelte/tsconfig.json", 3 | "compilerOptions": { 4 | "target": "ESNext", 5 | "useDefineForClassFields": true, 6 | "module": "ESNext", 7 | "resolveJsonModule": true, 8 | "allowJs": true, 9 | "checkJs": true, 10 | "isolatedModules": true 11 | }, 12 | "include": [ 13 | "src/**/*.ts", 14 | "src/**/*.js", 15 | "src/**/*.svelte" 16 | ], 17 | "references": [ 18 | { 19 | "path": "./tsconfig.node.json" 20 | } 21 | ] 22 | } -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler" 7 | }, 8 | "include": [ 9 | "vite.config.ts" 10 | ] 11 | } -------------------------------------------------------------------------------- /metaflow/plugins/cards/ui/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import { svelte } from "@sveltejs/vite-plugin-svelte"; 3 | import { resolve } from "node:path"; 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [svelte()], 8 | build: { 9 | assetsDir: "public", 10 | outDir: "../card_modules", 11 | emptyOutDir: false, 12 | minify: true, 13 | lib: { 14 | entry: resolve(__dirname, "src/main.ts"), 15 | name: "Outerbounds Cards", 16 | // the proper extensions will be added 17 | fileName: "main", 18 | formats: ["umd"], 19 | }, 20 | rollupOptions: { 21 | output: { 22 | assetFileNames: (assetInfo) => { 23 | if (assetInfo.name == "style.css") return "bundle.css"; 24 | return assetInfo.name; 25 | }, 26 | chunkFileNames: "[name].[ext]", 27 | entryFileNames: "[name].js", 28 | }, 29 | }, 30 | }, 31 | }); 32 | -------------------------------------------------------------------------------- /metaflow/plugins/datastores/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/datastores/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/datatools/s3/__init__.py: -------------------------------------------------------------------------------- 1 | from .s3 import RangeInfo, S3, S3GetObject, S3Object, S3PutObject 2 | from .s3 import ( 3 | MetaflowS3InvalidObject, 4 | MetaflowS3URLException, 5 | MetaflowS3Exception, 6 | MetaflowS3NotFound, 7 | MetaflowS3AccessDenied, 8 | MetaflowS3InvalidRange, 9 | ) 10 | -------------------------------------------------------------------------------- /metaflow/plugins/debug_logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from metaflow.event_logger import NullEventLogger 4 | from metaflow.sidecar import Message, MessageTypes 5 | 6 | 7 | class DebugEventLogger(NullEventLogger): 8 | TYPE = "debugLogger" 9 | 10 | @classmethod 11 | def get_worker(cls): 12 | return DebugEventLoggerSidecar 13 | 14 | 15 | class DebugEventLoggerSidecar(object): 16 | def __init__(self): 17 | pass 18 | 19 | def process_message(self, msg): 20 | # type: (Message) -> None 21 | if msg.msg_type == MessageTypes.SHUTDOWN: 22 | print("Debug[shutdown]: got shutdown!", file=sys.stderr) 23 | self._shutdown() 24 | elif msg.msg_type == MessageTypes.BEST_EFFORT: 25 | print("Debug[best_effort]: %s" % str(msg.payload), file=sys.stderr) 26 | elif msg.msg_type == MessageTypes.MUST_SEND: 27 | print("Debug[must_send]: %s" % str(msg.payload), file=sys.stderr) 28 | 29 | def _shutdown(self): 30 | sys.stderr.flush() 31 | -------------------------------------------------------------------------------- /metaflow/plugins/debug_monitor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from metaflow.sidecar import MessageTypes, Message 4 | from metaflow.monitor import NullMonitor, Metric 5 | 6 | 7 | class DebugMonitor(NullMonitor): 8 | TYPE = "debugMonitor" 9 | 10 | @classmethod 11 | def get_worker(cls): 12 | return DebugMonitorSidecar 13 | 14 | 15 | class DebugMonitorSidecar(object): 16 | def __init__(self): 17 | pass 18 | 19 | def process_message(self, msg): 20 | # type: (Message) -> None 21 | if msg.msg_type == MessageTypes.MUST_SEND: 22 | print("DebugMonitor[must_send]: %s" % str(msg.payload), file=sys.stderr) 23 | elif msg.msg_type == MessageTypes.SHUTDOWN: 24 | print("DebugMonitor[shutdown]: got shutdown!", file=sys.stderr) 25 | self._shutdown() 26 | elif msg.msg_type == MessageTypes.BEST_EFFORT: 27 | for v in msg.payload.values(): 28 | metric = Metric.deserialize(v) 29 | print( 30 | "DebugMonitor[metric]: %s for %s: %s" 31 | % (metric.metric_type, metric.name, str(metric.value)), 32 | file=sys.stderr, 33 | ) 34 | 35 | def _shutdown(self): 36 | sys.stderr.flush() 37 | -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/communication/__init__.py: -------------------------------------------------------------------------------- 1 | # env_escape.communication subpackage 2 | -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/communication/utils.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | 4 | def __try_op__(op_name, op, retries, *args): 5 | """ 6 | A helper function to retry an operation that timed out on a socket. After 7 | the retries are expired a `socket.timeout` is raised. 8 | 9 | Parameters 10 | ---------- 11 | op_name : str 12 | The operations name 13 | op : Callable 14 | The operation to perform 15 | retries : int 16 | The number of retries 17 | args : 18 | Args for the operation 19 | 20 | Returns 21 | ------- 22 | The operations response 23 | 24 | Raises 25 | ------ 26 | socket.timeout 27 | If all retries are exhausted, `socket.timeout` is raised 28 | 29 | """ 30 | for i in range(retries): 31 | try: 32 | result = op(*args) 33 | return result 34 | except socket.timeout: 35 | pass 36 | else: 37 | raise socket.timeout( 38 | "Timeout after {} retries on operation " "'{}'".format(retries, op_name) 39 | ) 40 | -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/configurations/emulate_test_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/env_escape/configurations/emulate_test_lib/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/configurations/emulate_test_lib/server_mappings.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import os 3 | import sys 4 | 5 | # HACK to pretend that we installed test_lib 6 | sys.path.append( 7 | os.path.realpath(os.path.join(os.path.dirname(__file__), "..", "test_lib_impl")) 8 | ) 9 | 10 | import test_lib as lib 11 | 12 | EXPORTED_CLASSES = { 13 | ("test_lib", "test_lib.alias"): { 14 | "TestClass1": lib.TestClass1, 15 | "TestClass2": lib.TestClass2, 16 | "BaseClass": lib.BaseClass, 17 | "ChildClass": lib.ChildClass, 18 | "ExceptionAndClass": lib.ExceptionAndClass, 19 | "ExceptionAndClassChild": lib.ExceptionAndClassChild, 20 | } 21 | } 22 | 23 | EXPORTED_EXCEPTIONS = { 24 | ("test_lib", "test_lib.alias"): { 25 | "SomeException": lib.SomeException, 26 | "MyBaseException": lib.MyBaseException, 27 | "ExceptionAndClass": lib.ExceptionAndClass, 28 | "ExceptionAndClassChild": lib.ExceptionAndClassChild, 29 | } 30 | } 31 | 32 | PROXIED_CLASSES = [functools.partial] 33 | 34 | EXPORTED_FUNCTIONS = {"test_lib": {"test_func": lib.test_func}} 35 | 36 | EXPORTED_VALUES = {"test_lib": {"test_value": lib.test_value}} 37 | -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/configurations/test_lib_impl/__init__.py: -------------------------------------------------------------------------------- 1 | # Example library that can be used to demonstrate the use of the env_escape 2 | # plugin. 3 | 4 | # See test/env_escape/example.py for an example flow that uses this. 5 | -------------------------------------------------------------------------------- /metaflow/plugins/env_escape/consts.py: -------------------------------------------------------------------------------- 1 | # Type of the message 2 | FIELD_MSGTYPE = "t" 3 | MSG_OP = 1 # This is an operation 4 | MSG_REPLY = 2 # This is a regular reply 5 | MSG_EXCEPTION = 3 # This is an exception 6 | MSG_CONTROL = 4 # This is a control message 7 | MSG_INTERNAL_ERROR = 5 # Some internal error happened 8 | 9 | # Fields for operations/control 10 | FIELD_OPTYPE = "o" 11 | FIELD_TARGET = "o_t" 12 | FIELD_ARGS = "o_a" 13 | FIELD_KWARGS = "o_ka" 14 | 15 | # Fields for reply/exception 16 | FIELD_CONTENT = "c" 17 | 18 | # Fields for values 19 | # Indicates that the object is remote to the receiver (and local to the sender) 20 | VALUE_REMOTE = 1 21 | # Indicates that the object is local to the receiver (and remote to the sender) 22 | VALUE_LOCAL = 2 23 | 24 | # Operations that we support 25 | OP_GETATTR = 1 26 | OP_SETATTR = 2 27 | OP_DELATTR = 3 28 | OP_CALL = 4 29 | OP_CALLATTR = 5 30 | OP_REPR = 6 31 | OP_STR = 7 32 | OP_HASH = 9 33 | OP_PICKLE = 10 34 | OP_DEL = 11 35 | OP_GETMETHODS = 12 36 | OP_DIR = 13 37 | OP_CALLFUNC = 14 38 | OP_GETVAL = 15 39 | OP_SETVAL = 16 40 | OP_INIT = 17 41 | OP_CALLONCLASS = 18 42 | OP_SUBCLASSCHECK = 19 43 | 44 | # Control messages 45 | CONTROL_SHUTDOWN = 1 46 | CONTROL_GETEXPORTS = 2 47 | -------------------------------------------------------------------------------- /metaflow/plugins/environment_decorator.py: -------------------------------------------------------------------------------- 1 | from metaflow.decorators import StepDecorator 2 | 3 | 4 | class EnvironmentDecorator(StepDecorator): 5 | """ 6 | Specifies environment variables to be set prior to the execution of a step. 7 | 8 | Parameters 9 | ---------- 10 | vars : Dict[str, str], default {} 11 | Dictionary of environment variables to set. 12 | """ 13 | 14 | name = "environment" 15 | defaults = {"vars": {}} 16 | 17 | def runtime_step_cli( 18 | self, cli_args, retry_count, max_user_code_retries, ubf_context 19 | ): 20 | cli_args.env.update( 21 | {key: str(value) for key, value in self.attributes["vars"].items()} 22 | ) 23 | -------------------------------------------------------------------------------- /metaflow/plugins/frameworks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/frameworks/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/gcp/__init__.py: -------------------------------------------------------------------------------- 1 | from .gs_storage_client_factory import get_credentials 2 | -------------------------------------------------------------------------------- /metaflow/plugins/gcp/gs_exceptions.py: -------------------------------------------------------------------------------- 1 | from metaflow.exception import MetaflowException 2 | 3 | 4 | class MetaflowGSPackageError(MetaflowException): 5 | headline = "Missing required packages 'google-cloud-storage' and 'google-auth'" 6 | -------------------------------------------------------------------------------- /metaflow/plugins/kubernetes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/kubernetes/__init__.py -------------------------------------------------------------------------------- /metaflow/plugins/metadata_providers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /metaflow/plugins/pypi/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow import metaflow_config 2 | from metaflow.exception import MetaflowException 3 | 4 | MAGIC_FILE = "conda.manifest" 5 | 6 | 7 | # TODO: This can be lifted all the way into metaflow config 8 | def _datastore_packageroot(datastore, echo): 9 | datastore_type = datastore.TYPE 10 | datastore_packageroot = getattr( 11 | metaflow_config, 12 | "CONDA_PACKAGE_{datastore_type}ROOT".format( 13 | datastore_type=datastore_type.upper() 14 | ), 15 | None, 16 | ) 17 | if datastore_packageroot is None: 18 | datastore_sysroot = datastore.get_datastore_root_from_config(echo) 19 | if datastore_sysroot is None: 20 | # TODO: Throw a more evocative error message 21 | raise MetaflowException( 22 | msg="METAFLOW_DATASTORE_SYSROOT_{datastore_type} must be set!".format( 23 | datastore_type=datastore_type.upper() 24 | ) 25 | ) 26 | datastore_packageroot = "{datastore_sysroot}/conda".format( 27 | datastore_sysroot=datastore_sysroot 28 | ) 29 | return datastore_packageroot 30 | -------------------------------------------------------------------------------- /metaflow/plugins/pypi/pypi_environment.py: -------------------------------------------------------------------------------- 1 | from .conda_environment import CondaEnvironment 2 | 3 | 4 | # To placate people who don't want to see a shred of conda in UX, we symlink 5 | # --environment=pypi to --environment=conda 6 | class PyPIEnvironment(CondaEnvironment): 7 | TYPE = "pypi" 8 | -------------------------------------------------------------------------------- /metaflow/plugins/secrets/__init__.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Dict 3 | 4 | 5 | class SecretsProvider(abc.ABC): 6 | TYPE = None 7 | 8 | @abc.abstractmethod 9 | def get_secret_as_dict(self, secret_id, options={}, role=None) -> Dict[str, str]: 10 | """Retrieve the secret from secrets backend, and return a dictionary of 11 | environment variables.""" 12 | -------------------------------------------------------------------------------- /metaflow/plugins/secrets/inline_secrets_provider.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.secrets import SecretsProvider 2 | 3 | 4 | class InlineSecretsProvider(SecretsProvider): 5 | TYPE = "inline" 6 | 7 | def get_secret_as_dict(self, secret_id, options={}, role=None): 8 | """Intended to be used for testing purposes only.""" 9 | return options.get("env_vars", {}) 10 | -------------------------------------------------------------------------------- /metaflow/plugins/uv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/plugins/uv/__init__.py -------------------------------------------------------------------------------- /metaflow/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/py.typed -------------------------------------------------------------------------------- /metaflow/runner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/runner/__init__.py -------------------------------------------------------------------------------- /metaflow/sidecar/__init__.py: -------------------------------------------------------------------------------- 1 | from .sidecar import Sidecar 2 | from .sidecar_messages import MessageTypes, Message 3 | from .sidecar_subprocess import SidecarSubProcess 4 | -------------------------------------------------------------------------------- /metaflow/sidecar/sidecar_messages.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | # Define message enums 5 | # Unfortunately we can't use enums because they are not supported 6 | # officially in Python2 7 | # INVALID: Not a valid message 8 | # MUST_SEND: Will attempt to send until successful and not send any BEST_EFFORT 9 | # messages until then. A newer MUST_SEND message will take precedence on 10 | # any currently unsent one 11 | # BEST_EFFORT: Will try to send once and drop if not possible 12 | # SHUTDOWN: Signal termination; also best effort 13 | class MessageTypes(object): 14 | INVALID, MUST_SEND, BEST_EFFORT, SHUTDOWN = range(1, 5) 15 | 16 | 17 | class Message(object): 18 | def __init__(self, msg_type, payload): 19 | self.msg_type = msg_type 20 | self.payload = payload 21 | 22 | def serialize(self): 23 | msg = { 24 | "msg_type": self.msg_type, 25 | "payload": self.payload, 26 | } 27 | return json.dumps(msg) + "\n" 28 | 29 | @staticmethod 30 | def deserialize(json_msg): 31 | try: 32 | return Message(**json.loads(json_msg)) 33 | except json.decoder.JSONDecodeError: 34 | return Message(MessageTypes.INVALID, None) 35 | -------------------------------------------------------------------------------- /metaflow/system/__init__.py: -------------------------------------------------------------------------------- 1 | from .system_monitor import SystemMonitor 2 | from .system_logger import SystemLogger 3 | 4 | _system_logger = SystemLogger() 5 | _system_monitor = SystemMonitor() 6 | -------------------------------------------------------------------------------- /metaflow/system/system_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | 4 | class DummyFlow(object): 5 | def __init__(self, name="not_a_real_flow"): 6 | self.name = name 7 | 8 | 9 | # This function is used to initialize the environment outside a flow. 10 | def init_environment_outside_flow( 11 | flow: Union["metaflow.flowspec.FlowSpec", "metaflow.sidecar.DummyFlow"] 12 | ) -> "metaflow.metaflow_environment.MetaflowEnvironment": 13 | from metaflow.plugins import ENVIRONMENTS 14 | from metaflow.metaflow_config import DEFAULT_ENVIRONMENT 15 | from metaflow.metaflow_environment import MetaflowEnvironment 16 | 17 | return [ 18 | e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == DEFAULT_ENVIRONMENT 19 | ][0](flow) 20 | -------------------------------------------------------------------------------- /metaflow/tuple_util.py: -------------------------------------------------------------------------------- 1 | # Keep this file minimum dependency as this will be imported by metaflow at bootup. 2 | def namedtuple_with_defaults(typename, field_descr, defaults=()): 3 | from typing import NamedTuple 4 | 5 | T = NamedTuple(typename, field_descr) 6 | T.__new__.__defaults__ = tuple(defaults) 7 | 8 | # Adding the following to ensure the named tuple can be (un)pickled correctly. 9 | import __main__ 10 | 11 | setattr(__main__, T.__name__, T) 12 | T.__module__ = "__main__" 13 | return T 14 | 15 | 16 | # Define the namedtuple with default here if they need to be accessible in client 17 | # (and w/o a real flow). 18 | foreach_frame_field_list = [ 19 | ("step", str), 20 | ("var", str), 21 | ("num_splits", int), 22 | ("index", int), 23 | ("value", str), 24 | ] 25 | ForeachFrame = namedtuple_with_defaults( 26 | "ForeachFrame", foreach_frame_field_list, (None,) * (len(foreach_frame_field_list)) 27 | ) 28 | -------------------------------------------------------------------------------- /metaflow/tutorials/00-helloworld/README.md: -------------------------------------------------------------------------------- 1 | # Episode 00-helloworld: Metaflow says Hi! 2 | 3 | **This flow is a simple linear workflow that verifies your installation by 4 | printing out 'Metaflow says: Hi!' to the terminal.** 5 | 6 | #### Showcasing: 7 | - Basics of Metaflow. 8 | - Step decorator. 9 | 10 | #### To play this episode: 11 | 1. ```cd metaflow-tutorials``` 12 | 2. ```python 00-helloworld/helloworld.py show``` 13 | 3. ```python 00-helloworld/helloworld.py run``` 14 | -------------------------------------------------------------------------------- /metaflow/tutorials/00-helloworld/helloworld.py: -------------------------------------------------------------------------------- 1 | from metaflow import FlowSpec, step 2 | 3 | 4 | class HelloFlow(FlowSpec): 5 | """ 6 | A flow where Metaflow prints 'Hi'. 7 | 8 | Run this flow to validate that Metaflow is installed correctly. 9 | 10 | """ 11 | 12 | @step 13 | def start(self): 14 | """ 15 | This is the 'start' step. All flows must have a step named 'start' that 16 | is the first step in the flow. 17 | 18 | """ 19 | print("HelloFlow is starting.") 20 | self.next(self.hello) 21 | 22 | @step 23 | def hello(self): 24 | """ 25 | A step for metaflow to introduce itself. 26 | 27 | """ 28 | print("Metaflow says: Hi!") 29 | self.next(self.end) 30 | 31 | @step 32 | def end(self): 33 | """ 34 | This is the 'end' step. All flows must have an 'end' step, which is the 35 | last step in the flow. 36 | 37 | """ 38 | print("HelloFlow is all done.") 39 | 40 | 41 | if __name__ == "__main__": 42 | HelloFlow() 43 | -------------------------------------------------------------------------------- /metaflow/tutorials/01-playlist/README.md: -------------------------------------------------------------------------------- 1 | # Episode 01-playlist: Let's build you a movie playlist. 2 | 3 | **This flow loads a movie metadata CSV file and builds a playlist for your 4 | favorite movie genre. Everything in Metaflow is versioned, so you can run it 5 | multiple times and view all the historical playlists with the Metaflow client 6 | in a Notebook.** 7 | 8 | #### Showcasing: 9 | - Including external files with 'IncludeFile'. 10 | - Basic Metaflow Parameters. 11 | - Running workflow branches in parallel and joining results. 12 | - Using the Metaflow client in a Notebook. 13 | 14 | #### Before playing this episode: 15 | 1. ```python -m pip install notebook``` 16 | 17 | #### To play this episode: 18 | 1. ```cd metaflow-tutorials``` 19 | 2. ```python 01-playlist/playlist.py show``` 20 | 3. ```python 01-playlist/playlist.py run``` 21 | 4. ```python 01-playlist/playlist.py run --genre comedy``` 22 | 5. ```jupyter-notebook 01-playlist/playlist.ipynb``` 23 | -------------------------------------------------------------------------------- /metaflow/tutorials/02-statistics/README.md: -------------------------------------------------------------------------------- 1 | # Episode 02-statistics: Is this Data Science? 2 | 3 | **Use metaflow to load the movie metadata CSV file into a dataframe and 4 | compute some movie genre-specific statistics. These statistics are then used in 5 | later examples to improve our playlist generator. You can optionally use the 6 | Metaflow client to eyeball the results in a Notebook, and make some simple 7 | plots using the Matplotlib library.** 8 | 9 | #### Showcasing: 10 | - Fan-out over a set of parameters using Metaflow foreach. 11 | - Plotting results in a Notebook. 12 | 13 | #### Before playing this episode: 14 | 1. ```python -m pip install notebook``` 15 | 2. ```python -m pip install matplotlib``` 16 | 17 | #### To play this episode: 18 | 1. ```cd metaflow-tutorials``` 19 | 2. ```python 02-statistics/stats.py show``` 20 | 3. ```python 02-statistics/stats.py run``` 21 | 4. ```jupyter-notebook 02-statistics/stats.ipynb``` -------------------------------------------------------------------------------- /metaflow/tutorials/03-playlist-redux/README.md: -------------------------------------------------------------------------------- 1 | # Episode 03-playlist-redux: Follow the Money. 2 | 3 | **Use Metaflow to load the statistics generated from 'Episode 02' and improve 4 | our playlist generator by only recommending top box office grossing movies.** 5 | 6 | #### Showcasing: 7 | - Using data artifacts generated from other flows. 8 | 9 | #### Before playing this episode: 10 | 1. Run 'Episode 02-statistics: Is this Data Science?' 11 | 12 | #### To play this episode: 13 | 1. ```cd metaflow-tutorials``` 14 | 2. ```python 03-playlist-redux/playlist.py show``` 15 | 3. ```python 03-playlist-redux/playlist.py run``` 16 | -------------------------------------------------------------------------------- /metaflow/tutorials/04-playlist-plus/README.md: -------------------------------------------------------------------------------- 1 | # Episode 04-playlist-plus: The Final Showdown. 2 | 3 | **Now that we've improved our genre based playlist generator. We expose a 'hint' 4 | parameter allowing the user to suggest a better bonus movie. The bonus movie is 5 | chosen from the movie that has the most similar name to the 'hint'. 6 | This is achieved by importing a string edit distance package using Metaflow's 7 | conda based dependency management feature. Dependency management builds 8 | isolated and reproducible environments for individual steps.** 9 | 10 | #### Showcasing: 11 | - Metaflow's conda based dependency management. 12 | 13 | #### Before playing this episode: 14 | 1. This tutorial requires the 'conda' package manager to be installed with the 15 | conda-forge channel added. 16 | 1. Download Miniconda at https://docs.conda.io/en/latest/miniconda.html 17 | 2. ```conda config --add channels conda-forge``` 18 | 19 | #### To play this episode: 20 | 1. ```cd metaflow-tutorials``` 21 | 2. ```python 04-playlist-plus/playlist.py --environment=conda show``` 22 | 3. ```python 04-playlist-plus/playlist.py --environment=conda run``` 23 | 4. ```python 04-playlist-plus/playlist.py --environment=conda run --hint "Data Science Strikes Back"``` 24 | -------------------------------------------------------------------------------- /metaflow/tutorials/06-statistics-redux/README.md: -------------------------------------------------------------------------------- 1 | # Episode 06-statistics-redux: Computing in the Cloud. 2 | 3 | **This example revisits 'Episode 02-statistics: Is this Data Science?'. With 4 | Metaflow, you don't need to make any code changes to scale-up your flow by 5 | running on remote compute. In this example we re-run the 'stats.py' workflow 6 | adding the '--with kubernetes' command line argument. This instructs Metaflow to run 7 | all your steps on AWS Kubernetes without changing any code. You can control the 8 | behavior with additional arguments, like '--max-workers'. For this example, 9 | 'max-workers' is used to limit the number of parallel genre-specific statistics 10 | computations. 11 | You can then access the data artifacts (even the local CSV file) from anywhere 12 | because the data is being stored in AWS S3.** 13 | 14 | #### Showcasing: 15 | - '--with kubernetes' command line option 16 | - '--max-workers' command line option 17 | - Accessing data locally or remotely 18 | 19 | #### To play this episode: 20 | 1. ```python 02-statistics/stats.py run --with kubernetes --max-workers 4``` 21 | 2. Open ```06-statistics-redux/stats.ipynb``` -------------------------------------------------------------------------------- /metaflow/tutorials/07-worldview/README.md: -------------------------------------------------------------------------------- 1 | # Episode 07-worldview: Way up here. 2 | 3 | **This episode shows how you can use a notebook to setup a simple dashboard to 4 | monitor all of your Metaflow flows.** 5 | 6 | #### Showcasing: 7 | - The Metaflow client API. 8 | 9 | #### Before playing this episode: 10 | 1. ```python -m pip install notebook``` (only locally, if you don't have it already) 11 | 12 | #### To play this episode: 13 | 1. Open ```07-worldview/worldview.ipynb``` 14 | -------------------------------------------------------------------------------- /metaflow/tutorials/08-autopilot/README.md: -------------------------------------------------------------------------------- 1 | # Episode 08-autopilot: Scheduling Compute in the Cloud. 2 | 3 | **This example revisits 'Episode 06-statistics-redux: Computing in the Cloud'. 4 | With Metaflow, you don't need to make any code changes to schedule your flow 5 | in the cloud. In this example we will schedule the 'stats.py' workflow 6 | using the 'argo-workflows create' command line argument. This instructs 7 | Metaflow to schedule your flow on Argo Workflows without changing any code. 8 | You can execute your flow on Argo Workflows by using the 9 | 'step-functions trigger' command line argument. You can use a notebook to setup 10 | a simple dashboard to monitor all of your Metaflow flows.** 11 | 12 | #### Showcasing: 13 | - 'argo-workflows create' command line option 14 | - 'argo-workflows trigger' command line option 15 | - Accessing data locally or remotely through the Metaflow Client API 16 | 17 | #### To play this episode: 18 | 1. ```python 02-statistics/stats.py --with kubernetes argo-workflows create --max-workers 4``` 19 | 2. ```python 02-statistics/stats.py argo-workflows trigger ``` 20 | 3. Open ```08-autopilot/autopilot.ipynb``` 21 | -------------------------------------------------------------------------------- /metaflow/unbounded_foreach.py: -------------------------------------------------------------------------------- 1 | CONTROL_TASK_TAG = "control_task" 2 | UBF_CONTROL = "ubf_control" 3 | UBF_TASK = "ubf_task" 4 | 5 | 6 | class UnboundedForeachInput(object): 7 | """ 8 | Plugins that wish to support `UnboundedForeach` need their special 9 | input(s) subclass this class. 10 | This is used by the runtime to detect the difference between bounded 11 | and unbounded foreach, based on the variable passed to `foreach`. 12 | """ 13 | -------------------------------------------------------------------------------- /metaflow/user_configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/metaflow/user_configs/__init__.py -------------------------------------------------------------------------------- /metaflow/version.py: -------------------------------------------------------------------------------- 1 | metaflow_version = "2.15.15" 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_files = LICENSE 3 | -------------------------------------------------------------------------------- /stubs/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include metaflow-stubs/**/*.pyi 2 | include metaflow-stubs/py.typed 3 | include metaflow-stubs/generated_for.txt 4 | include version.py 5 | -------------------------------------------------------------------------------- /stubs/README.md: -------------------------------------------------------------------------------- 1 | # Metaflow Stubs 2 | 3 | This package contains stub files for `metaflow` and thus offers type hints for various editors (such as `VSCode`) and language servers (such as `Pylance`). 4 | 5 | ## Installation 6 | 7 | To install Metaflow Stubs in your local environment, you can install from [PyPi](https://pypi.org/project/metaflow-stubs/): 8 | 9 | ```sh 10 | pip install metaflow-stubs 11 | ``` 12 | -------------------------------------------------------------------------------- /stubs/test/setup.cfg: -------------------------------------------------------------------------------- 1 | [mypy] 2 | check_untyped_defs = True 3 | ignore_errors = False 4 | strict_optional = True 5 | show_error_context = False 6 | -------------------------------------------------------------------------------- /test/core/graphs/branch.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "single-and-branch", 3 | "graph": { 4 | "start": {"branch": ["a", "b"], "quals": ["split-and"]}, 5 | "a": {"linear": "join", "quals": ["single-branch-split"]}, 6 | "b": {"linear": "join", "quals": ["single-branch-split"]}, 7 | "join": {"linear": "end", "join": true, "quals": ["join-and"]}, 8 | "end": {} 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /test/core/graphs/foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "simple-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split"}, 5 | "foreach_split": { 6 | "foreach": "foreach_inner_first", 7 | "foreach_var": "arr", 8 | "foreach_var_default": "[1, 2, 3]", 9 | "quals": ["foreach-split"] 10 | }, 11 | "foreach_inner_first": { 12 | "linear": "foreach_inner_second", 13 | "quals": ["foreach-inner"] 14 | }, 15 | "foreach_inner_second": { 16 | "linear": "foreach_join", 17 | "quals": ["foreach-inner"] 18 | }, 19 | "foreach_join": { 20 | "linear": "end", 21 | "join": true, 22 | "quals": ["foreach-join"] 23 | }, 24 | "end": {} 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /test/core/graphs/linear.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "single-linear-step", 3 | "graph": { 4 | "start": {"linear": "a", "quals": ["singleton-start"]}, 5 | "a": {"linear": "end", "quals": ["singleton"]}, 6 | "end": {"quals": ["singleton-end"]} 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test/core/graphs/nested_foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nested-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split_x"}, 5 | "foreach_split_x": { 6 | "foreach": "foreach_split_y", 7 | "foreach_var": "x", 8 | "foreach_var_default": "'abc'", 9 | "quals": ["foreach-split"] 10 | }, 11 | "foreach_split_y": { 12 | "foreach": "foreach_split_z", 13 | "foreach_var": "y", 14 | "foreach_var_default": "'de'", 15 | "quals": ["foreach-split"] 16 | }, 17 | "foreach_split_z": { 18 | "foreach": "foreach_inner", 19 | "foreach_var": "z", 20 | "foreach_var_default": "'fghijk'", 21 | "quals": ["foreach-nested-split", "foreach-split"] 22 | }, 23 | "foreach_inner": { 24 | "linear": "foreach_join_z", 25 | "quals": ["foreach-nested-inner", "foreach-inner"] 26 | }, 27 | "foreach_join_z": { 28 | "linear": "foreach_join_y", 29 | "join": true, 30 | "quals": ["foreach-nested-join"] 31 | }, 32 | "foreach_join_y": { "linear": "foreach_join_x", "join": true }, 33 | "foreach_join_x": { "linear": "end", "join": true }, 34 | "end": {} 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test/core/graphs/parallel.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "small-parallel", 3 | "graph": { 4 | "start": {"linear": "parallel_split", "quals": ["start"]}, 5 | "parallel_split": { 6 | "num_parallel": 4, 7 | "parallel": "parallel_inner", 8 | "quals": ["parallel-split"] 9 | }, 10 | "parallel_inner": { 11 | "linear": "parallel_join", 12 | "quals": ["parallel-step"], 13 | "parallel_step": true 14 | }, 15 | "parallel_join": { 16 | "linear": "end", 17 | "join": true, 18 | "quals": ["parallel-join"] 19 | }, 20 | "end": {} 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/core/graphs/small_foreach.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "small-foreach", 3 | "graph": { 4 | "start": {"linear": "foreach_split"}, 5 | "foreach_split": { 6 | "foreach": "foreach_inner", 7 | "foreach_var": "arr", 8 | "foreach_var_default": "[1, 2, 3]", 9 | "quals": ["foreach-split-small", "foreach-split"] 10 | }, 11 | "foreach_inner": { 12 | "linear": "foreach_join", 13 | "quals": ["foreach-inner-small"] 14 | }, 15 | "foreach_join": { 16 | "linear": "end", 17 | "join": true, 18 | "quals": ["foreach-join-small"] 19 | }, 20 | "end": {} 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/config/mfextinit_test_org.py: -------------------------------------------------------------------------------- 1 | METAFLOW_ADDITIONAL_VALUE = 42 2 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/exceptions/mfextinit_test_org.py: -------------------------------------------------------------------------------- 1 | from metaflow.exception import MetaflowException 2 | 3 | 4 | class MetaflowTestException(MetaflowException): 5 | headline = "Subservice error" 6 | 7 | def __init__(self, error): 8 | msg = "Test error: '%s'" % error 9 | super(MetaflowTestException, self).__init__(msg) 10 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/cards/brokencard/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | 3 | 4 | class BrokenCard(MetaflowCard): 5 | type = "test_broken_card" 6 | 7 | def render(self, task): 8 | return task.pathspec 9 | 10 | 11 | CARDS = [BrokenCard] 12 | 13 | raise Exception("This module should not be importable") 14 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/cards/simplecard/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | from metaflow.plugins.cards.card_modules.test_cards import TestEditableCard 3 | 4 | 5 | class TestNonEditableImportCard(MetaflowCard): 6 | type = "non_editable_import_test_card" 7 | 8 | ALLOW_USER_COMPONENTS = False 9 | 10 | def __init__(self, options={}, components=[], graph=None, flow=None, **kwargs): 11 | self._options, self._components, self._graph = options, components, graph 12 | 13 | def render(self, task): 14 | return task.pathspec 15 | 16 | 17 | class TestEditableImportCard(TestEditableCard): 18 | type = "editable_import_test_card" 19 | 20 | ALLOW_USER_COMPONENTS = True 21 | 22 | 23 | CARDS = [TestEditableImportCard, TestNonEditableImportCard] 24 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/flow_options.py: -------------------------------------------------------------------------------- 1 | from metaflow.decorators import FlowDecorator 2 | from metaflow import current 3 | 4 | 5 | class FlowDecoratorWithOptions(FlowDecorator): 6 | name = "test_flow_decorator" 7 | 8 | options = {"foobar": dict(default=None, show_default=False, help="Test flag")} 9 | 10 | def flow_init( 11 | self, flow, graph, environment, flow_datastore, metadata, logger, echo, options 12 | ): 13 | current._update_env({"foobar_value": options["foobar"]}) 14 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/frameworks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/test/core/metaflow_extensions/test_org/plugins/frameworks/__init__.py -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/frameworks/pytorch.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.frameworks._orig.pytorch import ( 2 | PytorchParallelDecorator, 3 | setup_torch_distributed, 4 | ) 5 | 6 | 7 | class NewPytorchParallelDecorator(PytorchParallelDecorator): 8 | pass 9 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/mfextinit_test_org.py: -------------------------------------------------------------------------------- 1 | STEP_DECORATORS_DESC = [ 2 | ("test_step_decorator", ".test_step_decorator.TestStepDecorator"), 3 | ] 4 | 5 | FLOW_DECORATORS_DESC = [ 6 | ("test_flow_decorator", ".flow_options.FlowDecoratorWithOptions"), 7 | ] 8 | 9 | __mf_promote_submodules__ = ["nondecoplugin", "frameworks"] 10 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/nondecoplugin/__init__.py: -------------------------------------------------------------------------------- 1 | my_value = 42 2 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/plugins/test_step_decorator.py: -------------------------------------------------------------------------------- 1 | from metaflow.decorators import StepDecorator 2 | 3 | 4 | class TestStepDecorator(StepDecorator): 5 | name = "test_step_decorator" 6 | 7 | def task_post_step( 8 | self, step_name, flow, graph, retry_count, max_user_code_retries 9 | ): 10 | flow.plugin_set_value = step_name 11 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/toplevel/mfextinit_test_org.py: -------------------------------------------------------------------------------- 1 | toplevel = "test_org_toplevel" 2 | -------------------------------------------------------------------------------- /test/core/metaflow_extensions/test_org/toplevel/test_org_toplevel.py: -------------------------------------------------------------------------------- 1 | __mf_extensions__ = "test" 2 | 3 | tl_value = 42 4 | 5 | __version__ = None 6 | -------------------------------------------------------------------------------- /test/core/tests/basic_artifact.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class BasicArtifactTest(MetaflowTest): 5 | """ 6 | Test that an artifact defined in the first step 7 | is available in all steps downstream. 8 | """ 9 | 10 | PRIORITY = 0 11 | 12 | @steps(0, ["start"]) 13 | def step_start(self): 14 | self.data = "abc" 15 | 16 | @steps(1, ["join"]) 17 | def step_join(self): 18 | import metaflow_test 19 | 20 | inputset = {inp.data for inp in inputs} 21 | assert_equals({"abc"}, inputset) 22 | self.data = list(inputset)[0] 23 | 24 | @steps(2, ["all"]) 25 | def step_all(self): 26 | pass 27 | 28 | def check_results(self, flow, checker): 29 | for step in flow: 30 | checker.assert_artifact(step.name, "data", "abc") 31 | -------------------------------------------------------------------------------- /test/core/tests/basic_config_silly.txt: -------------------------------------------------------------------------------- 1 | baz:amazing 2 | -------------------------------------------------------------------------------- /test/core/tests/basic_foreach.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class BasicForeachTest(MetaflowTest): 5 | PRIORITY = 0 6 | 7 | @steps(0, ["foreach-split"], required=True) 8 | def split(self): 9 | self.my_index = None 10 | self.arr = range(32) 11 | 12 | @steps(0, ["foreach-inner"], required=True) 13 | def inner(self): 14 | # index must stay constant over multiple steps inside foreach 15 | if self.my_index is None: 16 | self.my_index = self.index 17 | assert_equals(self.my_index, self.index) 18 | assert_equals(self.input, self.arr[self.index]) 19 | self.my_input = self.input 20 | 21 | @steps(0, ["foreach-join"], required=True) 22 | def join(self, inputs): 23 | got = sorted([inp.my_input for inp in inputs]) 24 | assert_equals(list(range(32)), got) 25 | 26 | @steps(1, ["all"]) 27 | def step_all(self): 28 | pass 29 | -------------------------------------------------------------------------------- /test/core/tests/basic_parallel.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 2 | 3 | 4 | class BasicParallelTest(MetaflowTest): 5 | PRIORITY = 1 6 | 7 | @steps(0, ["parallel-split"], required=True) 8 | def split(self): 9 | self.my_node_index = None 10 | 11 | @steps(0, ["parallel-step"], required=True) 12 | def inner(self): 13 | from metaflow import current 14 | 15 | assert_equals(4, current.parallel.num_nodes) 16 | self.my_node_index = current.parallel.node_index 17 | assert_equals(self.my_node_index, self.input) 18 | 19 | @steps(0, ["join"], required=True) 20 | def join(self, inputs): 21 | got = sorted([inp.my_node_index for inp in inputs]) 22 | assert_equals(list(range(4)), got) 23 | 24 | @steps(1, ["all"]) 25 | def step_all(self): 26 | pass 27 | 28 | def check_results(self, flow, checker): 29 | run = checker.get_run() 30 | if type(checker).__name__ == "CliCheck": 31 | # CliCheck doesn't support enlisting of tasks. 32 | assert run is None 33 | else: 34 | assert run is not None 35 | tasks = run["parallel_inner"].tasks() 36 | task_list = list(tasks) 37 | assert_equals(4, len(task_list)) 38 | assert_equals(1, len(list(run["parallel_inner"].control_tasks()))) 39 | -------------------------------------------------------------------------------- /test/core/tests/card_error.py: -------------------------------------------------------------------------------- 1 | # Todo : Write Test case on graceful error handling. 2 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 3 | 4 | 5 | class CardErrorTest(MetaflowTest): 6 | """ 7 | Test that checks if the card decorator handles Errors gracefully. 8 | In the checker assert that the end step finished and has artifacts after failing 9 | to create the card on the start step. 10 | """ 11 | 12 | PRIORITY = 2 13 | 14 | @tag('card(type="test_error_card")') 15 | @steps(0, ["start"]) 16 | def step_start(self): 17 | self.data = "abc" 18 | 19 | @steps(1, ["all"]) 20 | def step_all(self): 21 | self.data = "end" 22 | 23 | def check_results(self, flow, checker): 24 | checker.assert_artifact("end", "data", "end") 25 | -------------------------------------------------------------------------------- /test/core/tests/custom_decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/metaflow/4670f77db0c229c82cc4de9d85d65dbb4c4f1aa3/test/core/tests/custom_decorators.py -------------------------------------------------------------------------------- /test/core/tests/detect_segfault.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class DetectSegFaultTest(MetaflowTest): 5 | """ 6 | Test that segmentation faults produce a message in the logs 7 | """ 8 | 9 | PRIORITY = 2 10 | SHOULD_FAIL = True 11 | 12 | @steps(0, ["singleton-end"], required=True) 13 | def step_end(self): 14 | # cause a segfault 15 | import ctypes 16 | 17 | print("Crash and burn!") 18 | ctypes.string_at(0) 19 | 20 | @steps(1, ["all"]) 21 | def step_all(self): 22 | pass 23 | 24 | def check_results(self, flow, checker): 25 | # CLI logs requires the exact task ID for failed tasks which 26 | # we don't have here. Let's rely on the Metadata checker only. 27 | run = checker.get_run() 28 | if run: 29 | # loglines prior to the segfault should be persisted 30 | checker.assert_log("end", "stdout", "Crash and burn!", exact_match=False) 31 | # a message should be printed that mentions "segmentation fault" 32 | checker.assert_log("end", "stderr", "segmentation fault", exact_match=False) 33 | -------------------------------------------------------------------------------- /test/core/tests/extensions.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 2 | 3 | 4 | class ExtensionsTest(MetaflowTest): 5 | """ 6 | Test that the metaflow_extensions module is properly loaded 7 | """ 8 | 9 | PRIORITY = 0 10 | 11 | @tag("test_step_decorator") 12 | @steps(0, ["all"]) 13 | def step_all(self): 14 | from metaflow.metaflow_config import METAFLOW_ADDITIONAL_VALUE 15 | from metaflow import tl_value 16 | from metaflow.plugins.nondecoplugin import my_value 17 | 18 | from metaflow.exception import MetaflowTestException 19 | from metaflow.plugins.frameworks.pytorch import NewPytorchParallelDecorator 20 | 21 | self.plugin_value = my_value 22 | self.tl_value = tl_value 23 | self.additional_value = METAFLOW_ADDITIONAL_VALUE 24 | 25 | def check_results(self, flow, checker): 26 | for step in flow: 27 | checker.assert_artifact(step.name, "additional_value", 42) 28 | checker.assert_artifact(step.name, "tl_value", 42) 29 | checker.assert_artifact(step.name, "plugin_value", 42) 30 | checker.assert_artifact(step.name, "plugin_set_value", step.name) 31 | -------------------------------------------------------------------------------- /test/core/tests/flow_options.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class FlowOptionsTest(MetaflowTest): 5 | """ 6 | Test that the metaflow_extensions module is properly loaded 7 | """ 8 | 9 | PRIORITY = 0 10 | HEADER = """ 11 | import os 12 | from metaflow import test_flow_decorator 13 | 14 | os.environ['METAFLOW_FOOBAR'] = 'this_is_foobar' 15 | @test_flow_decorator 16 | """ 17 | 18 | @steps(0, ["all"]) 19 | def step_all(self): 20 | from metaflow import current 21 | 22 | assert_equals(current.foobar_value, "this_is_foobar") 23 | -------------------------------------------------------------------------------- /test/core/tests/large_artifact.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class LargeArtifactTest(MetaflowTest): 5 | """ 6 | Test that you can serialize large objects (over 4GB) 7 | with Python3 - although on OSX, some versions of Python3 fail 8 | to serialize objects over 2GB - https://bugs.python.org/issue24658 9 | so YMMV. 10 | """ 11 | 12 | PRIORITY = 2 13 | 14 | @steps(0, ["singleton"], required=True) 15 | def step_single(self): 16 | import sys 17 | 18 | if sys.version_info[0] > 2: 19 | self.large = b"x" * int(4.1 * 1024**3) 20 | self.noop = False 21 | else: 22 | self.noop = True 23 | 24 | @steps(0, ["end"]) 25 | def step_end(self): 26 | import sys 27 | 28 | if sys.version_info[0] > 2: 29 | assert_equals(self.large, b"x" * int(4.1 * 1024**3)) 30 | 31 | @steps(1, ["all"]) 32 | def step_all(self): 33 | pass 34 | 35 | def check_results(self, flow, checker): 36 | import sys 37 | 38 | noop = next(iter(checker.artifact_dict("end", "noop").values()))["noop"] 39 | if not noop and sys.version_info[0] > 2: 40 | checker.assert_artifact("end", "large", b"x" * int(4.1 * 1024**3)) 41 | -------------------------------------------------------------------------------- /test/core/tests/merge_artifacts_propagation.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class MergeArtifactsPropagationTest(MetaflowTest): 5 | # This test simply tests whether things set on a single branch will 6 | # still get propagated down properly. Other merge_artifacts behaviors 7 | # are tested in the main test (merge_artifacts.py). This test basically 8 | # only matches with the small-foreach graph whereas the other test is 9 | # more generic. 10 | PRIORITY = 1 11 | 12 | @steps(0, ["start"]) 13 | def start(self): 14 | self.non_modified_passdown = "a" 15 | 16 | @steps(0, ["foreach-inner-small"], required=True) 17 | def modify_things(self): 18 | # Set different names to different things 19 | val = self.index 20 | setattr(self, "var%d" % (val), val) 21 | 22 | @steps(0, ["foreach-join-small"], required=True) 23 | def merge_things(self, inputs): 24 | self.merge_artifacts( 25 | inputs, 26 | ) 27 | 28 | # Ensure that everything we expect is passed down 29 | assert_equals(self.non_modified_passdown, "a") 30 | for i, _ in enumerate(inputs): 31 | assert_equals(getattr(self, "var%d" % (i)), i) 32 | 33 | @steps(1, ["all"]) 34 | def step_all(self): 35 | assert_equals(self.non_modified_passdown, "a") 36 | -------------------------------------------------------------------------------- /test/core/tests/nested_foreach.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class NestedForeachTest(MetaflowTest): 5 | PRIORITY = 1 6 | 7 | @steps(0, ["foreach-nested-inner"], required=True) 8 | def inner(self): 9 | [x, y, z] = self.foreach_stack() 10 | 11 | # assert that lengths are correct 12 | assert_equals(len(self.x), x[1]) 13 | assert_equals(len(self.y), y[1]) 14 | assert_equals(len(self.z), z[1]) 15 | 16 | # assert that variables are correct given their indices 17 | assert_equals(x[2], self.x[x[0]]) 18 | assert_equals(y[2], self.y[y[0]]) 19 | assert_equals(z[2], self.z[z[0]]) 20 | 21 | self.combo = x[2] + y[2] + z[2] 22 | 23 | @steps(1, ["all"]) 24 | def step_all(self): 25 | pass 26 | 27 | def check_results(self, flow, checker): 28 | from itertools import product 29 | 30 | artifacts = checker.artifact_dict("foreach_inner", "combo") 31 | got = sorted(val["combo"] for val in artifacts.values()) 32 | expected = sorted("".join(p) for p in product("abc", "de", "fghijk")) 33 | assert_equals(expected, got) 34 | -------------------------------------------------------------------------------- /test/core/tests/param_names.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, steps 2 | 3 | 4 | class ParameterNameTest(MetaflowTest): 5 | PRIORITY = 1 6 | PARAMETERS = {"foo": {"default": 1}} 7 | 8 | @steps(0, ["all"]) 9 | def step_all(self): 10 | from metaflow import current 11 | 12 | assert_equals(len(current.parameter_names), 1) 13 | assert_equals(current.parameter_names[0], "foo") 14 | -------------------------------------------------------------------------------- /test/core/tests/project_branch.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 2 | 3 | 4 | class ProjectBranchTest(MetaflowTest): 5 | PRIORITY = 1 6 | 7 | HEADER = """ 8 | import os 9 | 10 | os.environ['METAFLOW_BRANCH'] = 'this_is_a_test_branch' 11 | @project(name='project_branch') 12 | """ 13 | 14 | @steps(0, ["singleton"], required=True) 15 | def step_single(self): 16 | pass 17 | 18 | @steps(1, ["all"]) 19 | def step_all(self): 20 | from metaflow import current 21 | 22 | assert_equals(current.branch_name, "test.this_is_a_test_branch") 23 | assert_equals( 24 | current.project_flow_name, 25 | "project_branch.test.this_is_a_test_branch.ProjectBranchTestFlow", 26 | ) 27 | -------------------------------------------------------------------------------- /test/core/tests/project_production.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 2 | 3 | 4 | class ProjectProductionTest(MetaflowTest): 5 | PRIORITY = 1 6 | 7 | HEADER = """ 8 | import os 9 | 10 | os.environ['METAFLOW_PRODUCTION'] = 'True' 11 | @project(name='project_prod') 12 | """ 13 | 14 | @steps(0, ["singleton"], required=True) 15 | def step_single(self): 16 | pass 17 | 18 | @steps(1, ["all"]) 19 | def step_all(self): 20 | from metaflow import current 21 | 22 | assert_equals(current.branch_name, "prod") 23 | assert_equals( 24 | current.project_flow_name, "project_prod.prod.ProjectProductionTestFlow" 25 | ) 26 | -------------------------------------------------------------------------------- /test/core/tests/run_id_file.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class RunIdFileTest(MetaflowTest): 5 | """ 6 | Resuming and initial running of a flow should write run id file early (prior to execution) 7 | """ 8 | 9 | RESUME = True 10 | PRIORITY = 3 11 | 12 | @steps(0, ["singleton-start"], required=True) 13 | def step_start(self): 14 | import os 15 | from metaflow import current 16 | 17 | # Whether we are in "run" or "resume" mode, --run-id-file must be written prior to execution 18 | assert os.path.isfile( 19 | "run-id" 20 | ), "run id file should exist before resume execution" 21 | with open("run-id", "r") as f: 22 | run_id_from_file = f.read() 23 | assert run_id_from_file == current.run_id 24 | 25 | # Test both regular run and resume paths 26 | if not is_resumed(): 27 | raise ResumeFromHere() 28 | 29 | @steps(2, ["all"]) 30 | def step_all(self): 31 | pass 32 | 33 | def check_results(self, flow, checker): 34 | pass 35 | -------------------------------------------------------------------------------- /test/core/tests/task_exception.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class TaskExceptionTest(MetaflowTest): 5 | """ 6 | A test to validate if exceptions are stored and retrieved correctly 7 | """ 8 | 9 | PRIORITY = 1 10 | SHOULD_FAIL = True 11 | 12 | @steps(0, ["singleton-end"], required=True) 13 | def step_start(self): 14 | raise KeyError("Something has gone wrong") 15 | 16 | @steps(2, ["all"]) 17 | def step_all(self): 18 | pass 19 | 20 | def check_results(self, flow, checker): 21 | run = checker.get_run() 22 | if run is not None: 23 | for task in run["end"]: 24 | assert_equals("KeyError" in str(task.exception), True) 25 | assert_equals(task.exception.exception, "'Something has gone wrong'") 26 | -------------------------------------------------------------------------------- /test/core/tests/timeout_decorator.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps, tag 2 | 3 | 4 | class TimeoutDecoratorTest(MetaflowTest): 5 | """ 6 | Test that checks that the timeout decorator works as intended. 7 | """ 8 | 9 | PRIORITY = 2 10 | 11 | @tag('catch(var="ex", print_exception=False)') 12 | @tag("timeout(seconds=1)") 13 | @steps(0, ["singleton-start", "foreach-inner"], required=True) 14 | def step_sleep(self): 15 | self.check = True 16 | import time 17 | 18 | time.sleep(5) 19 | 20 | @steps(1, ["all"]) 21 | def step_all(self): 22 | pass 23 | 24 | def check_results(self, flow, checker): 25 | run = checker.get_run() 26 | if run: 27 | timeout_raised = False 28 | for step in run: 29 | for task in step: 30 | if "check" in task.data: 31 | extype = ( 32 | "metaflow.plugins.timeout_decorator." "TimeoutException" 33 | ) 34 | assert_equals(extype, str(task.data.ex.type)) 35 | timeout_raised = True 36 | assert_equals(True, timeout_raised) 37 | -------------------------------------------------------------------------------- /test/core/tests/wide_foreach.py: -------------------------------------------------------------------------------- 1 | from metaflow_test import MetaflowTest, ExpectationFailed, steps 2 | 3 | 4 | class WideForeachTest(MetaflowTest): 5 | PRIORITY = 3 6 | 7 | @steps(0, ["foreach-split-small"], required=True) 8 | def split(self): 9 | self.my_index = None 10 | self.arr = range(1200) 11 | 12 | @steps(0, ["foreach-inner-small"], required=True) 13 | def inner(self): 14 | self.my_input = self.input 15 | 16 | @steps(0, ["foreach-join-small"], required=True) 17 | def join(self, inputs): 18 | got = sorted([inp.my_input for inp in inputs]) 19 | assert_equals(list(range(1200)), got) 20 | 21 | @steps(1, ["all"]) 22 | def step_all(self): 23 | pass 24 | 25 | def check_results(self, flow, checker): 26 | run = checker.get_run() 27 | if run: 28 | # The client API shouldn't choke on many tasks 29 | res = sorted(task.data.my_input for task in run["foreach_inner"]) 30 | assert_equals(list(range(1200)), res) 31 | -------------------------------------------------------------------------------- /test/data/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Can set a default path here. Note that you can update the path 4 | # if you want a fresh set of data 5 | S3ROOT = os.environ.get("METAFLOW_S3_TEST_ROOT") 6 | 7 | from metaflow.plugins.datatools.s3.s3util import get_s3_client 8 | 9 | s3client, _ = get_s3_client() 10 | 11 | from metaflow import FlowSpec 12 | 13 | 14 | # ast parsing in metaflow.graph doesn't like this class 15 | # to be defined in test_s3.py. Defining it here works. 16 | class FakeFlow(FlowSpec): 17 | def __init__(self, name="FakeFlow", use_cli=False): 18 | self.name = name 19 | 20 | 21 | DO_TEST_RUN = False 22 | -------------------------------------------------------------------------------- /test/data/s3/__init__.py: -------------------------------------------------------------------------------- 1 | # nothing here 2 | -------------------------------------------------------------------------------- /test/data/s3/test_s3op.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.datatools.s3.s3op import convert_to_client_error 2 | 3 | 4 | def test_convert_to_client_error(): 5 | s = "boto3.exceptions.S3UploadFailedError: Failed to upload /a/b/c/d.parquet to e/f/g/h.parquet: An error occurred (SlowDown) when calling the CompleteMultipartUpload operation (reached max retries: 4): Please reduce your request rate." 6 | client_error = convert_to_client_error(s) 7 | assert client_error.response["Error"]["Code"] == "SlowDown" 8 | assert ( 9 | client_error.response["Error"]["Message"] == "Please reduce your request rate." 10 | ) 11 | assert client_error.operation_name == "CompleteMultipartUpload" 12 | -------------------------------------------------------------------------------- /test/extensions/README.md: -------------------------------------------------------------------------------- 1 | # Extensions Testing Framework. 2 | 3 | What does this framework do ? It installs the extensions and then runs the test suite which leverages the extensions. 4 | 5 | Currently installs the cards related packages. -------------------------------------------------------------------------------- /test/extensions/install_packages.sh: -------------------------------------------------------------------------------- 1 | pip install ./packages/card_via_extinit 2 | pip install ./packages/card_via_init 3 | pip install ./packages/card_via_ns_subpackage -------------------------------------------------------------------------------- /test/extensions/packages/card_via_extinit/README.md: -------------------------------------------------------------------------------- 1 | # card_via_extinit 2 | 3 | This test will check if card extensions installed with `mfextinit_*.py` work with Metaflow. -------------------------------------------------------------------------------- /test/extensions/packages/card_via_extinit/metaflow_extensions/card_via_extinit/plugins/cards/card_a/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | 3 | 4 | class TestMockCard(MetaflowCard): 5 | type = "card_ext_init_a" 6 | 7 | def __init__(self, options={"key": "task"}, **kwargs): 8 | self._key = options["key"] if "key" in options else "task" 9 | 10 | def render(self, task): 11 | task_data = task[self._key].data 12 | return "%s" % task_data 13 | 14 | 15 | CARDS = [TestMockCard] 16 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_extinit/metaflow_extensions/card_via_extinit/plugins/cards/card_b/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | 3 | 4 | class TestMockCard(MetaflowCard): 5 | type = "card_ext_init_b" 6 | 7 | def __init__(self, options={"key": "task"}, **kwargs): 8 | self._key = options["key"] if "key" in options else "task" 9 | 10 | def render(self, task): 11 | task_data = task[self._key].data 12 | return "%s" % task_data 13 | 14 | 15 | CARDS = [TestMockCard] 16 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_extinit/metaflow_extensions/card_via_extinit/plugins/cards/mfextinit_X.py: -------------------------------------------------------------------------------- 1 | from .card_a import CARDS as a 2 | from .card_b import CARDS as b 3 | 4 | CARDS = a + b 5 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_extinit/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_namespace_packages, setup 2 | 3 | 4 | def get_long_description() -> str: 5 | with open("README.md") as fh: 6 | return fh.read() 7 | 8 | 9 | setup( 10 | name="metaflow-card-via-extinit", 11 | version="1.0.0", 12 | description="A description of your card", 13 | long_description=get_long_description(), 14 | long_description_content_type="text/markdown", 15 | author="Your Name", 16 | author_email="your_name@yourdomain.com", 17 | license="Apache Software License 2.0", 18 | packages=find_namespace_packages(include=["metaflow_extensions.*"]), 19 | include_package_data=True, 20 | zip_safe=False, 21 | ) 22 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_init/README.md: -------------------------------------------------------------------------------- 1 | # card_via_init 2 | 3 | This test checks if card extensions directly with a `plugins/cards` directory structure work as planned. -------------------------------------------------------------------------------- /test/extensions/packages/card_via_init/metaflow_extensions/card_via_init/plugins/cards/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | 3 | 4 | class TestMockCard(MetaflowCard): 5 | type = "card_init" 6 | 7 | def __init__(self, options={"key": "task"}, **kwargs): 8 | self._key = options["key"] if "key" in options else "task" 9 | 10 | def render(self, task): 11 | task_data = task[self._key].data 12 | return "%s" % task_data 13 | 14 | 15 | CARDS = [TestMockCard] 16 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_init/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_namespace_packages, setup 2 | 3 | 4 | def get_long_description() -> str: 5 | with open("README.md") as fh: 6 | return fh.read() 7 | 8 | 9 | setup( 10 | name="metaflow-card-via-init", 11 | version="1.0.0", 12 | description="A description of your card", 13 | long_description=get_long_description(), 14 | long_description_content_type="text/markdown", 15 | author="Your Name", 16 | author_email="your_name@yourdomain.com", 17 | license="Apache Software License 2.0", 18 | packages=find_namespace_packages(include=["metaflow_extensions.*"]), 19 | include_package_data=True, 20 | zip_safe=False, 21 | ) 22 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_ns_subpackage/README.md: -------------------------------------------------------------------------------- 1 | # card_ns_subpackage 2 | 3 | This test will check if card extensions installed subpackages under namespace packages work -------------------------------------------------------------------------------- /test/extensions/packages/card_via_ns_subpackage/metaflow_extensions/card_via_ns_subpackage/plugins/cards/nssubpackage/__init__.py: -------------------------------------------------------------------------------- 1 | from metaflow.cards import MetaflowCard 2 | 3 | 4 | class TestMockCard(MetaflowCard): 5 | type = "card_ns_subpackage" 6 | 7 | def __init__(self, options={"key": "task"}, **kwargs): 8 | self._key = options["key"] if "key" in options else "task" 9 | 10 | def render(self, task): 11 | task_data = task[self._key].data 12 | return "%s" % task_data 13 | 14 | 15 | CARDS = [TestMockCard] 16 | -------------------------------------------------------------------------------- /test/extensions/packages/card_via_ns_subpackage/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_namespace_packages, setup 2 | 3 | 4 | def get_long_description() -> str: 5 | with open("README.md") as fh: 6 | return fh.read() 7 | 8 | 9 | setup( 10 | name="metaflow-card-via-nspackage", 11 | version="1.0.0", 12 | description="A description of your card", 13 | long_description=get_long_description(), 14 | long_description_content_type="text/markdown", 15 | author="Your Name", 16 | author_email="your_name@yourdomain.com", 17 | license="Apache Software License 2.0", 18 | packages=find_namespace_packages(include=["metaflow_extensions.*"]), 19 | include_package_data=True, 20 | zip_safe=False, 21 | ) 22 | -------------------------------------------------------------------------------- /test/parallel/parallel_test_flow.py: -------------------------------------------------------------------------------- 1 | from metaflow import FlowSpec, step, batch, current, parallel, Parameter 2 | 3 | 4 | class ParallelTest(FlowSpec): 5 | """ 6 | Test flow to test @parallel. 7 | """ 8 | 9 | num_parallel = Parameter( 10 | "num_parallel", help="Number of nodes in cluster", default=3 11 | ) 12 | 13 | @step 14 | def start(self): 15 | self.next(self.parallel_step, num_parallel=self.num_parallel) 16 | 17 | @parallel 18 | @step 19 | def parallel_step(self): 20 | self.node_index = current.parallel.node_index 21 | self.num_nodes = current.parallel.num_nodes 22 | print("parallel_step: node {} finishing.".format(self.node_index)) 23 | self.next(self.multinode_end) 24 | 25 | @step 26 | def multinode_end(self, inputs): 27 | j = 0 28 | for input in inputs: 29 | assert input.node_index == j 30 | assert input.num_nodes == self.num_parallel 31 | j += 1 32 | assert j == self.num_parallel 33 | self.next(self.end) 34 | 35 | @step 36 | def end(self): 37 | pass 38 | 39 | 40 | if __name__ == "__main__": 41 | ParallelTest() 42 | -------------------------------------------------------------------------------- /test/test_config/basic_config_silly.txt: -------------------------------------------------------------------------------- 1 | baz:amazing 2 | -------------------------------------------------------------------------------- /test/test_config/card_config.py: -------------------------------------------------------------------------------- 1 | import time 2 | from metaflow import FlowSpec, step, Config, card 3 | 4 | 5 | class CardConfigFlow(FlowSpec): 6 | 7 | config = Config("config", default_value="") 8 | 9 | @card(type=config.type) 10 | @step 11 | def start(self): 12 | print("card type", self.config.type) 13 | self.next(self.end) 14 | 15 | @step 16 | def end(self): 17 | print("full config", self.config) 18 | 19 | 20 | if __name__ == "__main__": 21 | CardConfigFlow() 22 | -------------------------------------------------------------------------------- /test/test_config/config2.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_param": 456, 3 | "default_param2": 789 4 | } 5 | -------------------------------------------------------------------------------- /test/test_config/config_card.py: -------------------------------------------------------------------------------- 1 | import time 2 | from metaflow import FlowSpec, step, card, current, Config, Parameter, config_expr 3 | from metaflow.cards import Image 4 | 5 | BASE = "https://picsum.photos/id" 6 | 7 | 8 | class ConfigurablePhotoFlow(FlowSpec): 9 | cfg = Config("config", default="photo_config.json") 10 | id = Parameter("id", default=cfg.id, type=int) 11 | size = Parameter("size", default=cfg.size, type=int) 12 | 13 | @card 14 | @step 15 | def start(self): 16 | import requests 17 | 18 | params = {k: v for k, v in self.cfg.style.items() if v} 19 | self.url = f"{BASE}/{self.id}/{self.size}/{self.size}" 20 | img = requests.get(self.url, params) 21 | current.card.append(Image(img.content)) 22 | self.next(self.end) 23 | 24 | @step 25 | def end(self): 26 | pass 27 | 28 | 29 | if __name__ == "__main__": 30 | ConfigurablePhotoFlow() 31 | -------------------------------------------------------------------------------- /test/test_config/config_parser_requirements.txt: -------------------------------------------------------------------------------- 1 | python==3.10.* 2 | regex==2024.11.6 3 | -------------------------------------------------------------------------------- /test/test_config/config_simple.json: -------------------------------------------------------------------------------- 1 | {"some": {"value": 5}} 2 | -------------------------------------------------------------------------------- /test/test_config/no_default.py: -------------------------------------------------------------------------------- 1 | from metaflow import Config, FlowSpec, card, step 2 | 3 | 4 | class Sample(FlowSpec): 5 | config = Config("config", default=None) 6 | 7 | @card 8 | @step 9 | def start(self): 10 | self.next(self.end) 11 | 12 | @step 13 | def end(self): 14 | pass 15 | 16 | 17 | if __name__ == "__main__": 18 | Sample() 19 | -------------------------------------------------------------------------------- /test/test_config/photo_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 1084, 3 | "size": 400, 4 | "style": { 5 | "grayscale": true, 6 | "blur": 5 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test/test_config/runner_flow.py: -------------------------------------------------------------------------------- 1 | from metaflow import FlowSpec, Runner, step 2 | 3 | 4 | class RunnerFlow(FlowSpec): 5 | @step 6 | def start(self): 7 | with Runner("./mutable_flow.py") as r: 8 | r.run() 9 | self.next(self.end) 10 | 11 | @step 12 | def end(self): 13 | print("Done") 14 | 15 | 16 | if __name__ == "__main__": 17 | RunnerFlow() 18 | -------------------------------------------------------------------------------- /test/unit/test_conda_decorator.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.pypi.conda_decorator import CondaStepDecorator 2 | 3 | 4 | def test_decorator_custom_attributes(): 5 | deco = CondaStepDecorator(attributes={"python": "3.9"}) 6 | deco.init() 7 | assert deco.is_attribute_user_defined( 8 | "python" 9 | ), "python is supposed to be an user-defined attribute" 10 | assert not deco.is_attribute_user_defined( 11 | "packages" 12 | ), "packages is supposed to be default" 13 | assert not deco.is_attribute_user_defined( 14 | "libraries" 15 | ), "libraries is supposed to be default" 16 | 17 | 18 | def test_decorator_custom_attributes_with_backward_compatibility(): 19 | deco = CondaStepDecorator(attributes={"libraries": {"a": "test"}}) 20 | deco.init() 21 | assert not deco.is_attribute_user_defined( 22 | "python" 23 | ), "python is supposed to be default" 24 | assert deco.is_attribute_user_defined( 25 | "packages" 26 | ), "packages is supposed to be user-defined" 27 | assert deco.is_attribute_user_defined( 28 | "libraries" 29 | ), "libraries is supposed to be user-defined" 30 | -------------------------------------------------------------------------------- /test/unit/test_local_metadata_provider.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.metadata_providers.local import LocalMetadataProvider 2 | 3 | 4 | def test_deduce_run_id_from_meta_dir(): 5 | test_cases = [ 6 | { 7 | "meta_path": ".metaflow/BasicParameterTestFlow/1652384326805262/start/1/_meta", 8 | "sub_type": "task", 9 | "expected_run_id": "1652384326805262", 10 | }, 11 | { 12 | "meta_path": ".metaflow/BasicParameterTestFlow/1652384326805262/start/_meta", 13 | "sub_type": "step", 14 | "expected_run_id": "1652384326805262", 15 | }, 16 | { 17 | "meta_path": ".metaflow/BasicParameterTestFlow/1652384326805262/_meta", 18 | "sub_type": "run", 19 | "expected_run_id": "1652384326805262", 20 | }, 21 | { 22 | "meta_path": ".metaflow/BasicParameterTestFlow/_meta", 23 | "sub_type": "flow", 24 | "expected_run_id": None, 25 | }, 26 | ] 27 | for case in test_cases: 28 | actual_run_id = LocalMetadataProvider._deduce_run_id_from_meta_dir( 29 | case["meta_path"], case["sub_type"] 30 | ) 31 | assert case["expected_run_id"] == actual_run_id 32 | -------------------------------------------------------------------------------- /test/unit/test_multicore_utils.py: -------------------------------------------------------------------------------- 1 | from metaflow.multicore_utils import parallel_map 2 | 3 | 4 | def test_parallel_map(): 5 | assert parallel_map(lambda s: s.upper(), ["a", "b", "c", "d", "e", "f"]) == [ 6 | "A", 7 | "B", 8 | "C", 9 | "D", 10 | "E", 11 | "F", 12 | ] 13 | -------------------------------------------------------------------------------- /test/unit/test_pypi_decorator.py: -------------------------------------------------------------------------------- 1 | from metaflow.plugins.pypi.pypi_decorator import PyPIStepDecorator 2 | 3 | 4 | def test_decorator_custom_attributes(): 5 | deco = PyPIStepDecorator(attributes={"python": "3.9"}) 6 | deco.init() 7 | assert deco.is_attribute_user_defined( 8 | "python" 9 | ), "python is supposed to be an user-defined attribute" 10 | assert not deco.is_attribute_user_defined( 11 | "packages" 12 | ), "packages is supposed to be default" 13 | -------------------------------------------------------------------------------- /test_runner: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit -o nounset -o pipefail 4 | 5 | install_deps() { 6 | for version in 2 3; 7 | do 8 | python$version -m pip install . 9 | done 10 | } 11 | 12 | install_extensions() { 13 | cd test/extensions 14 | sh install_packages.sh 15 | cd ../../ 16 | } 17 | 18 | run_tests() { 19 | cd test/core && PYTHONPATH=`pwd`/../../ python3 run_tests.py --num-parallel 8 && cd ../../ 20 | } 21 | 22 | # We run realtime cards tests separately because there these tests validate the asynchronous updates to the 23 | # information stored in the datastore. So if there are other processes starving resources then these tests will 24 | # surely fail since a lot of checks have timeouts. 25 | run_runtime_card_tests() { 26 | CARD_GRAPHS="small-foreach,small-parallel,nested-branches,single-linear-step,simple-foreach" 27 | cd test/core && PYTHONPATH=`pwd`/../../ python3 run_tests.py --num-parallel 8 --contexts python3-all-local-cards-realtime --graphs $CARD_GRAPHS && cd ../../ 28 | } 29 | 30 | install_deps && install_extensions && run_tests && run_runtime_card_tests 31 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | 3 | [testenv] 4 | allowlist_externals = ./test_runner 5 | commands = ./test_runner 6 | passenv = USER 7 | --------------------------------------------------------------------------------