├── .flake8 ├── .github ├── actions │ └── clear-action-cache │ │ └── action.yml └── workflows │ ├── checks.yml │ ├── monodocs_build.yml │ ├── serialize_example.yml │ └── upgrade_automation.yml ├── .gitignore ├── .goreleaser.yml ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── _example_template ├── Dockerfile ├── README.md ├── _example_template │ ├── __init__.py │ └── example.py └── requirements.in ├── boilerplate ├── flyte │ ├── code_of_conduct │ │ ├── CODE_OF_CONDUCT.md │ │ ├── README.rst │ │ └── update.sh │ └── end2end │ │ ├── Makefile │ │ ├── end2end.sh │ │ ├── functional-test-config.yaml │ │ └── run-tests.py ├── update.cfg └── update.sh ├── dev-requirements.in ├── dev-requirements.txt ├── docs-requirements.in ├── docs-requirements.txt ├── docs ├── Makefile ├── README.md ├── _ext │ └── auto_examples.py ├── _static │ ├── code-example-icon.png │ ├── custom.css │ ├── custom.js │ ├── flyte_circle_gradient_1_4x4.png │ └── sphx_gallery_autogen.css ├── _templates │ └── sidebar │ │ └── brand.html ├── conf.py ├── flyte_lockup_on_dark.png ├── flyte_mark_offset_pink.png ├── index.md ├── integrations │ └── index.md └── tutorials │ ├── bioinformatics │ └── index.md │ ├── feature_engineering │ └── index.md │ ├── flytelab │ ├── index.md │ └── weather_forecasting.md │ ├── index.md │ └── model_training │ └── index.md ├── examples ├── README.md ├── advanced_composition │ ├── Dockerfile │ ├── README.md │ └── advanced_composition │ │ ├── __init__.py │ │ ├── chain_entities.py │ │ ├── checkpoint.py │ │ ├── conditional.py │ │ ├── decorating_tasks.py │ │ ├── decorating_workflows.py │ │ ├── dynamic_workflow.py │ │ ├── eager_workflows.py │ │ ├── map_task.py │ │ ├── subworkflow.py │ │ └── waiting_for_external_inputs.py ├── airflow_agent │ ├── README.md │ ├── airflow_agent │ │ ├── __init__.py │ │ └── airflow_agent_example_usage.py │ └── requirements.in ├── airflow_plugin │ ├── README.md │ └── airflow_plugin │ │ ├── __init__.py │ │ └── airflow.py ├── athena_plugin │ ├── Dockerfile │ ├── README.md │ ├── athena_plugin │ │ ├── __init__.py │ │ └── athena.py │ └── requirements.in ├── aws_batch_plugin │ ├── Dockerfile │ ├── README.md │ ├── aws_batch_plugin │ │ ├── __init__.py │ │ └── batch.py │ └── requirements.in ├── basics │ ├── Dockerfile │ ├── README.md │ └── basics │ │ ├── __init__.py │ │ ├── basic_interactive_mode.ipynb │ │ ├── documenting_workflows.py │ │ ├── hello_world.py │ │ ├── imperative_workflow.py │ │ ├── launch_plan.py │ │ ├── named_outputs.py │ │ ├── shell_task.py │ │ ├── task.py │ │ └── workflow.py ├── bigquery_agent │ ├── Dockerfile │ ├── README.md │ ├── bigquery_agent │ │ ├── __init__.py │ │ └── bigquery_agent_example_usage.py │ └── requirements.in ├── bigquery_plugin │ ├── Dockerfile │ ├── README.md │ ├── bigquery_plugin │ │ ├── __init__.py │ │ └── bigquery_plugin_example.py │ └── requirements.in ├── blast │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── blast │ │ ├── __init__.py │ │ └── blastx_example.py │ └── requirements.in ├── chatgpt_agent │ ├── README.md │ ├── chatgpt_agent │ │ ├── __init__.py │ │ └── chatgpt_agent_example_usage.py │ └── requirements.in ├── comet_ml_plugin │ ├── Dockerfile │ ├── README.md │ ├── comet_ml_plugin │ │ ├── __init__.py │ │ └── comet_ml_example.py │ └── requirements.in ├── customizing_dependencies │ ├── Dockerfile │ ├── README.md │ ├── customizing_dependencies │ │ ├── __init__.py │ │ ├── calculate-ellipse-area-new.py │ │ ├── image_spec.py │ │ ├── multi_images.py │ │ └── raw_container.py │ ├── raw-containers-supporting-files │ │ ├── MAINTAINERS.md │ │ └── per-language │ │ │ ├── haskell │ │ │ ├── Dockerfile │ │ │ └── calculate-ellipse-area.hs │ │ │ ├── julia │ │ │ ├── Dockerfile │ │ │ └── calculate-ellipse-area.jl │ │ │ ├── python │ │ │ ├── Dockerfile │ │ │ └── calculate-ellipse-area.py │ │ │ ├── r │ │ │ ├── Dockerfile │ │ │ ├── calculate-ellipse-area.R │ │ │ └── install-readr.R │ │ │ └── shell │ │ │ ├── Dockerfile │ │ │ └── calculate-ellipse-area.sh │ └── requirements.in ├── data_types_and_io │ ├── Dockerfile │ ├── README.md │ ├── data_types_and_io │ │ ├── __init__.py │ │ ├── attribute_access.py │ │ ├── dataclass.py │ │ ├── dataclass_input.json │ │ ├── enum_type.py │ │ ├── file.py │ │ ├── file_streaming.py │ │ ├── folder.py │ │ ├── pickle_type.py │ │ ├── pydantic_basemodel.py │ │ ├── pytorch_type.py │ │ ├── structured_dataset.py │ │ └── tensorflow_type.py │ ├── requirements.in │ └── test_data │ │ ├── biostats.csv │ │ └── faithful.csv ├── databricks_agent │ ├── Dockerfile │ ├── README.md │ ├── databricks_agent │ │ ├── __init__.py │ │ └── databricks_agent_example_usage.py │ └── requirements.in ├── databricks_plugin │ ├── Dockerfile │ ├── README.md │ ├── databricks_plugin │ │ ├── __init__.py │ │ └── databricks_plugin_example.py │ └── requirements.in ├── dbt_plugin │ ├── Dockerfile │ ├── README.md │ ├── dbt_plugin │ │ ├── __init__.py │ │ └── dbt_example.py │ ├── profiles.yml │ └── requirements.in ├── development_lifecycle │ ├── Dockerfile │ ├── README.md │ ├── development_lifecycle │ │ ├── __init__.py │ │ ├── decks.py │ │ ├── failure_node.py │ │ ├── task_cache.py │ │ └── task_cache_serialize.py │ └── requirements.in ├── dolt_plugin │ ├── Dockerfile │ ├── README.md │ ├── dolt_plugin │ │ ├── __init__.py │ │ ├── dolt_branch_example.py │ │ └── dolt_quickstart_example.py │ └── requirements.in ├── duckdb_plugin │ ├── Dockerfile │ ├── README.md │ ├── duckdb_plugin │ │ ├── __init__.py │ │ └── duckdb_example.py │ └── requirements.in ├── exploratory_data_analysis │ ├── Dockerfile │ ├── README.md │ ├── exploratory_data_analysis │ │ ├── __init__.py │ │ ├── notebook.py │ │ ├── notebook_and_task.py │ │ ├── notebooks_as_tasks.py │ │ ├── supermarket_regression.ipynb │ │ ├── supermarket_regression_1.ipynb │ │ └── supermarket_regression_2.ipynb │ └── requirements.in ├── extending │ ├── README.md │ └── extending │ │ ├── __init__.py │ │ ├── custom_types.py │ │ └── user_container.py ├── feast_integration │ ├── Dockerfile │ ├── README.md │ ├── feast_integration │ │ ├── __init__.py │ │ ├── feast_flyte_remote.ipynb │ │ ├── feast_workflow.py │ │ └── feature_eng_tasks.py │ └── requirements.in ├── flyteinteractive_plugin │ ├── Dockerfile │ ├── README.md │ ├── flyteinteractive_plugin │ │ ├── __init__.py │ │ ├── jupyter.py │ │ └── vscode.py │ └── requirements.in ├── forecasting_sales │ ├── Dockerfile │ ├── README.md │ ├── forecasting_sales │ │ ├── __init__.py │ │ └── keras_spark_rossmann_estimator.py │ └── requirements.in ├── greatexpectations_plugin │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── data │ │ ├── movies.sqlite │ │ └── yellow_tripdata_sample_2019-01.csv │ ├── great_expectations │ │ ├── expectations │ │ │ ├── sqlite │ │ │ │ └── movies.json │ │ │ └── test │ │ │ │ └── demo.json │ │ ├── great_expectations.yml │ │ └── plugins │ │ │ └── custom_data_docs │ │ │ └── styles │ │ │ └── data_docs_custom_styles.css │ ├── greatexpectations_plugin │ │ ├── __init__.py │ │ ├── task_example.py │ │ └── type_example.py │ └── requirements.in ├── hive_plugin │ ├── README.md │ ├── hive_plugin │ │ ├── __init__.py │ │ └── hive.py │ └── requirements.in ├── house_price_prediction │ ├── Dockerfile │ ├── README.md │ ├── house_price_prediction │ │ ├── __init__.py │ │ ├── house_price_predictor.py │ │ └── multiregion_house_price_predictor.py │ └── requirements.in ├── k8s_dask_plugin │ ├── Dockerfile │ ├── README.md │ ├── k8s_dask_plugin │ │ ├── __init__.py │ │ └── dask_example.py │ └── requirements.in ├── k8s_pod_plugin │ ├── Dockerfile │ ├── README.md │ ├── k8s_pod_plugin │ │ ├── __init__.py │ │ └── pod.py │ └── requirements.in ├── k8s_spark_plugin │ ├── Dockerfile │ ├── README.md │ ├── k8s_spark_plugin │ │ ├── __init__.py │ │ ├── dataframe_passing.py │ │ └── pyspark_pi.py │ └── requirements.in ├── kfmpi_plugin │ ├── Dockerfile │ ├── README.md │ ├── kfmpi_plugin │ │ ├── __init__.py │ │ └── mpi_mnist.py │ └── requirements.in ├── kfpytorch_plugin │ ├── README.md │ ├── kfpytorch_plugin │ │ ├── __init__.py │ │ ├── pytorch_lightning_mnist_autoencoder.py │ │ └── pytorch_mnist.py │ └── requirements.in ├── kftensorflow_plugin │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── kftensorflow_plugin │ │ ├── __init__.py │ │ └── tf_mnist.py │ └── requirements.in ├── memray_plugin │ ├── Dockerfile │ ├── README.md │ ├── memray_plugin │ │ ├── __init__.py │ │ └── memray_example.py │ └── requirements.in ├── mlflow_plugin │ ├── Dockerfile │ ├── README.md │ ├── mlflow_plugin │ │ ├── __init__.py │ │ └── mlflow_example.py │ └── requirements.in ├── mmcloud_agent │ ├── Dockerfile │ ├── README.md │ ├── mmcloud_agent │ │ ├── __init__.py │ │ └── mmcloud_agent_example_usage.py │ └── requirements.in ├── mnist_classifier │ ├── Dockerfile │ ├── README.md │ ├── mnist_classifier │ │ ├── __init__.py │ │ ├── pytorch_single_node_and_gpu.py │ │ └── pytorch_single_node_multi_gpu.py │ └── requirements.in ├── modin_plugin │ ├── Dockerfile │ ├── README.md │ ├── modin_plugin │ │ ├── __init__.py │ │ └── knn_classifier.py │ └── requirements.in ├── neptune_plugin │ ├── Dockerfile │ ├── README.md │ ├── neptune_plugin │ │ ├── __init__.py │ │ └── neptune_example.py │ └── requirements.in ├── nim_plugin │ ├── Dockerfile │ ├── README.md │ ├── nim_plugin │ │ ├── __init__.py │ │ └── serve_nim_container.py │ └── requirements.in ├── nlp_processing │ ├── Dockerfile │ ├── README.md │ ├── nlp_processing │ │ ├── __init__.py │ │ └── word2vec_and_lda.py │ └── requirements.in ├── ollama_plugin │ ├── Dockerfile │ ├── README.md │ ├── ollama_plugin │ │ ├── __init__.py │ │ └── serve_llm.py │ └── requirements.in ├── onnx_plugin │ ├── README.md │ └── onnx_plugin │ │ ├── __init__.py │ │ ├── pytorch_onnx.py │ │ ├── scikitlearn_onnx.py │ │ └── tensorflow_onnx.py ├── openai_batch_agent │ ├── Dockerfile │ ├── README.md │ ├── openai_batch_agent │ │ ├── __init__.py │ │ ├── data.jsonl │ │ └── openai_batch_agent_example_usage.py │ └── requirements.in ├── pandera_plugin │ ├── Dockerfile │ ├── README.md │ ├── pandera_plugin │ │ ├── __init__.py │ │ ├── basic_schema_example.py │ │ └── validating_and_testing_ml_pipelines.py │ └── requirements.in ├── papermill_plugin │ ├── Dockerfile │ ├── README.md │ ├── papermill_plugin │ │ ├── __init__.py │ │ ├── nb_simple.ipynb │ │ └── simple.py │ └── requirements.in ├── perian_agent │ ├── Dockerfile │ ├── README.md │ ├── perian_agent │ │ ├── __init__.py │ │ └── example.py │ └── requirements.in ├── pima_diabetes │ ├── Dockerfile │ ├── README.md │ ├── pima_diabetes │ │ ├── __init__.py │ │ └── diabetes.py │ └── requirements.in ├── productionizing │ ├── Dockerfile │ ├── README.md │ └── productionizing │ │ ├── __init__.py │ │ ├── customizing_resources.py │ │ ├── lp_notifications.py │ │ ├── lp_schedules.py │ │ ├── reference_launch_plan.py │ │ ├── reference_task.py │ │ └── use_secrets.py ├── ray_plugin │ ├── Dockerfile │ ├── README.md │ ├── ray_plugin │ │ ├── __init__.py │ │ └── ray_example.py │ └── requirements.in ├── sagemaker_inference_agent │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ └── sagemaker_inference_agent │ │ ├── __init__.py │ │ └── sagemaker_inference_agent_example_usage.py ├── sensor │ ├── README.md │ └── sensor │ │ ├── __init__.py │ │ └── file_sensor_example.py ├── slurm_agent │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ └── slurm_agent │ │ ├── __init__.py │ │ └── slurm_agent_example_usage.py ├── snowflake_agent │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ └── snowflake_agent │ │ ├── __init__.py │ │ └── snowflake_agent_example_usage.py ├── snowflake_plugin │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ ├── snowflake_plugin.md │ └── snowflake_plugin │ │ ├── __init__.py │ │ └── snowflake_plugin_example.py ├── sql_plugin │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ └── sql_plugin │ │ ├── __init__.py │ │ ├── sql_alchemy.py │ │ └── sqlite3_integration.py ├── testing │ ├── README.md │ └── testing │ │ ├── __init__.py │ │ └── mocking.py ├── wandb_plugin │ ├── Dockerfile │ ├── README.md │ ├── requirements.in │ └── wandb_plugin │ │ ├── __init__.py │ │ └── wandb_example.py └── whylogs_plugin │ ├── .gitignore │ ├── README.md │ ├── requirements.in │ └── whylogs_plugin │ ├── __init__.py │ └── whylogs_example.py ├── flyte_tests.txt ├── flyte_tests_manifest.json ├── flyte_tests_validate.py ├── pyproject.toml └── scripts ├── create-example-project.sh ├── pip-compile.sh └── serialize-example.sh /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 180 3 | extend-ignore = E203, E266, E501, W503, E741 4 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,venv/*,src/*,.rst,build 5 | max-complexity=16 6 | -------------------------------------------------------------------------------- /.github/actions/clear-action-cache/action.yml: -------------------------------------------------------------------------------- 1 | name: 'Clear action cache' 2 | description: 'As suggested by GitHub to prevent low disk space: https://github.com/actions/runner-images/issues/2840#issuecomment-790492173' 3 | runs: 4 | using: 'composite' 5 | steps: 6 | - shell: bash 7 | run: | 8 | rm -rf /usr/share/dotnet 9 | rm -rf /opt/ghc 10 | rm -rf "/usr/local/share/boost" 11 | rm -rf "$AGENT_TOOLSDIRECTORY" 12 | -------------------------------------------------------------------------------- /.github/workflows/upgrade_automation.yml: -------------------------------------------------------------------------------- 1 | name: Upgrade Automation 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | component: 6 | description: "Name of Flyte component" 7 | required: true 8 | default: "boilerplate" 9 | type: choice 10 | options: 11 | - boilerplate 12 | 13 | jobs: 14 | trigger-upgrade: 15 | name: ${{ github.event.inputs.component }} Upgrade 16 | uses: flyteorg/flytetools/.github/workflows/flyte_automation.yml@master 17 | with: 18 | component: ${{ github.event.inputs.component }} 19 | secrets: 20 | FLYTE_BOT_PAT: ${{ secrets.FLYTE_BOT_PAT }} 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .pyc 3 | .idea 4 | .jpg 5 | .ipynb_checkpoints/ 6 | *.dat 7 | .DS_Store 8 | gen_modules 9 | *.h5 10 | data/ 11 | *.pt 12 | logs/ 13 | _pb_output/ 14 | _build/ 15 | .vim/ 16 | ._rsts/ 17 | *.zip 18 | .sandbox/bin/ 19 | .sandbox/data/ 20 | .vscode/ 21 | *-out.html 22 | *-out.ipynb 23 | .python-version 24 | release-snacks 25 | .kube/ 26 | .docker/ 27 | .cache/ 28 | download-artifact/ 29 | typescript 30 | .bash_history 31 | .venv/ 32 | docs/_tags/ 33 | docs/_flyte_decks/ 34 | .ipython/ 35 | auto_*/ 36 | docs/auto/* 37 | docs/_rsts/ 38 | .hypothesis 39 | .config/ 40 | test.ipynb 41 | jupyter_execute 42 | _flyte_decks 43 | .keras 44 | **/flyte-package.tgz 45 | .hypothesis 46 | docs/sg_execution_times.rst 47 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: flytesnacks 2 | builds: 3 | - skip: true 4 | release: 5 | github: 6 | owner: flyteorg 7 | name: flytesnacks 8 | draft: false 9 | prerelease: auto 10 | name_template: "{{.ProjectName}} v{{.Version}}" 11 | disable: false 12 | extra_files: 13 | - glob: ./release-snacks/* 14 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.1.6 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | args: [--fix] 9 | # Run the formatter. 10 | - id: ruff-format 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.0.1 13 | hooks: 14 | - id: check-yaml 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | - repo: https://github.com/shellcheck-py/shellcheck-py 18 | rev: v0.7.2.1 19 | hooks: 20 | - id: shellcheck 21 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | build: 8 | os: "ubuntu-22.04" 9 | tools: 10 | python: "3.12" 11 | apt_packages: 12 | - pandoc 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # Optionally set the version of Python and requirements required to build your docs 19 | python: 20 | install: 21 | - requirements: dev-requirements.txt 22 | - requirements: docs-requirements.txt 23 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. Unless a later match takes precedence. 3 | * @ppiegaze 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This project is governed by LF AI Foundation's [code of conduct](https://lfprojects.org/policies/code-of-conduct/). 2 | All contributors and participants agree to abide by its terms. 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export REPOSITORY=flytesnacks 2 | include boilerplate/flyte/end2end/Makefile 3 | .SILENT: 4 | 5 | define PIP_COMPILE 6 | pip-compile $(1) ${PIP_ARGS} --upgrade --verbose --resolver=backtracking 7 | endef 8 | 9 | install-piptools: 10 | pip install pip-tools 11 | 12 | dev-requirements.txt: export CUSTOM_COMPILE_COMMAND := $(MAKE) dev-requirements.txt 13 | dev-requirements.txt: dev-requirements.in install-piptools 14 | $(call PIP_COMPILE,dev-requirements.in) 15 | 16 | .PHONY: dev-requirements 17 | dev-requirements: dev-requirements.txt 18 | 19 | docs-requirements.txt: export CUSTOM_COMPILE_COMMAND := $(MAKE) docs-requirements.txt 20 | docs-requirements.txt: docs-requirements.in install-piptools 21 | $(call PIP_COMPILE,docs-requirements.in) 22 | 23 | .PHONY: docs-requirements 24 | docs-requirements: docs-requirements.txt 25 | 26 | .PHONY: fmt 27 | fmt: ## Format code with ruff 28 | pre-commit run ruff --all-files || true 29 | pre-commit run ruff-format --all-files || true 30 | 31 | .PHONY: update_boilerplate 32 | update_boilerplate: 33 | @boilerplate/update.sh 34 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | flytesnacks 2 | Copyright 2019 Lyft Inc. 3 | 4 | This product includes software developed at Lyft Inc. 5 | This product includes software derived from https://github.com/kubernetes/sample-controller 6 | -------------------------------------------------------------------------------- /_example_template/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # This is necessary for opencv to work 11 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 12 | 13 | WORKDIR /root 14 | 15 | ENV VENV /opt/venv 16 | # Virtual environment 17 | RUN python3 -m venv ${VENV} 18 | ENV PATH="${VENV}/bin:$PATH" 19 | 20 | # Install Python dependencies 21 | COPY requirements.in /root 22 | RUN pip install -r /root/requirements.in 23 | RUN pip freeze 24 | 25 | # Copy the actual code 26 | COPY . /root 27 | 28 | # This tag is supplied by the build script and will be used to determine the version 29 | # when registering tasks, workflows, and launch plans 30 | ARG tag 31 | ENV FLYTE_INTERNAL_IMAGE $tag 32 | -------------------------------------------------------------------------------- /_example_template/README.md: -------------------------------------------------------------------------------- 1 | # Example Template 2 | 3 | Replace this with a description of what the example project is about. 4 | 5 | ```{auto-examples-toc} 6 | example 7 | ``` 8 | -------------------------------------------------------------------------------- /_example_template/_example_template/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/_example_template/_example_template/__init__.py -------------------------------------------------------------------------------- /_example_template/_example_template/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/_example_template/_example_template/example.py -------------------------------------------------------------------------------- /_example_template/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=1.7.0 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | -------------------------------------------------------------------------------- /boilerplate/flyte/code_of_conduct/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This project is governed by LF AI Foundation's [code of conduct](https://lfprojects.org/policies/code-of-conduct/). 2 | All contributors and participants agree to abide by its terms. 3 | -------------------------------------------------------------------------------- /boilerplate/flyte/code_of_conduct/README.rst: -------------------------------------------------------------------------------- 1 | CODE OF CONDUCT 2 | ~~~~~~~~~~~~~~~ 3 | -------------------------------------------------------------------------------- /boilerplate/flyte/code_of_conduct/update.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # WARNING: THIS FILE IS MANAGED IN THE 'BOILERPLATE' REPO AND COPIED TO OTHER REPOSITORIES. 4 | # ONLY EDIT THIS FILE FROM WITHIN THE 'FLYTEORG/BOILERPLATE' REPOSITORY: 5 | # 6 | # TO OPT OUT OF UPDATES, SEE https://github.com/flyteorg/boilerplate/blob/master/Readme.rst 7 | 8 | set -e 9 | 10 | DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" 11 | 12 | cp "$DIR"/CODE_OF_CONDUCT.md "$DIR"/../../../CODE_OF_CONDUCT.md 13 | -------------------------------------------------------------------------------- /boilerplate/flyte/end2end/Makefile: -------------------------------------------------------------------------------- 1 | # WARNING: THIS FILE IS MANAGED IN THE 'BOILERPLATE' REPO AND COPIED TO OTHER REPOSITORIES. 2 | # ONLY EDIT THIS FILE FROM WITHIN THE 'FLYTEORG/BOILERPLATE' REPOSITORY: 3 | # 4 | # TO OPT OUT OF UPDATES, SEE https://github.com/flyteorg/boilerplate/blob/master/Readme.rst 5 | 6 | .PHONY: end2end_execute 7 | end2end_execute: export FLYTESNACKS_PRIORITIES ?= P0 8 | end2end_execute: export FLYTESNACKS_VERSION ?= $(shell curl --silent "https://api.github.com/repos/flyteorg/flytesnacks/releases/latest" | jq -r .tag_name) 9 | end2end_execute: 10 | ./boilerplate/flyte/end2end/end2end.sh ./boilerplate/flyte/end2end/functional-test-config.yaml --return_non_zero_on_failure 11 | 12 | .PHONY: k8s_integration_execute 13 | k8s_integration_execute: 14 | echo "pass" 15 | -------------------------------------------------------------------------------- /boilerplate/flyte/end2end/end2end.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # WARNING: THIS FILE IS MANAGED IN THE 'BOILERPLATE' REPO AND COPIED TO OTHER REPOSITORIES. 4 | # ONLY EDIT THIS FILE FROM WITHIN THE 'FLYTEORG/BOILERPLATE' REPOSITORY: 5 | # 6 | # TO OPT OUT OF UPDATES, SEE https://github.com/flyteorg/boilerplate/blob/master/Readme.rst 7 | set -eu 8 | 9 | CONFIG_FILE=$1 10 | shift 11 | EXTRA_FLAGS=("$@") 12 | 13 | python ./boilerplate/flyte/end2end/run-tests.py "$FLYTESNACKS_VERSION" "$FLYTESNACKS_PRIORITIES" "$CONFIG_FILE" "${EXTRA_FLAGS[@]}" 14 | -------------------------------------------------------------------------------- /boilerplate/flyte/end2end/functional-test-config.yaml: -------------------------------------------------------------------------------- 1 | admin: 2 | # For GRPC endpoints you might want to use dns:///flyte.myexample.com 3 | endpoint: dns:///localhost:30080 4 | authType: Pkce 5 | insecure: true 6 | -------------------------------------------------------------------------------- /boilerplate/update.cfg: -------------------------------------------------------------------------------- 1 | flyte/end2end 2 | flyte/code_of_conduct 3 | -------------------------------------------------------------------------------- /dev-requirements.in: -------------------------------------------------------------------------------- 1 | autoflake 2 | flytekit 3 | coverage 4 | pre-commit 5 | codespell 6 | mock 7 | pydantic>2 8 | pytest 9 | mypy 10 | mashumaro 11 | xgboost 12 | dask[dataframe] 13 | -------------------------------------------------------------------------------- /docs-requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | pydata-sphinx-theme 3 | fsspec 4 | mashumaro 5 | pillow 6 | psycopg2-binary 7 | flytekitplugins-deck-standard 8 | flytekitplugins-kfpytorch 9 | flytekitplugins-sqlalchemy 10 | jupytext 11 | sphinx 12 | sphinx-gallery 13 | sphinx-prompt 14 | sphinx-code-include 15 | sphinx-autoapi 16 | sphinx-copybutton 17 | sphinx-reredirects 18 | sphinxext-remoteliteralinclude 19 | sphinx_fontawesome 20 | sphinx-panels 21 | sphinxcontrib-mermaid 22 | sphinxcontrib-youtube 23 | sphinx-tabs 24 | astroid 25 | grpcio 26 | grpcio-status 27 | myst-nb 28 | ipython!=8.7.0 29 | sphinx-tags 30 | scikit-learn 31 | torch 32 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: rsts 2 | rsts: 3 | sphinx-apidoc --force --tocfile index --ext-autodoc --output-dir _rsts ../../ 4 | 5 | .PHONY: html 6 | html: 7 | sphinx-build ${SPHINXOPTS} -n -b html . _build 8 | 9 | .PHONY: linkcheck 10 | linkcheck: 11 | sphinx-build -n -b linkcheck . _build 12 | 13 | .PHONY: doctest 14 | doctest: 15 | sphinx-build -n -b doctest . _build 16 | 17 | .PHONY: clear-cache 18 | clear-cache: 19 | pyflyte local-cache clear 20 | 21 | .PHONY: clean 22 | clean: clear-cache 23 | rm -rf _build 24 | rm -rf auto_* 25 | rm -rf auto/* 26 | rm -rf auto_examples/* 27 | rm -rf _tags/* 28 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # FlyteSnacks Docs 2 | Welcome to Flytesnacks documentation. To generate the documentation, first 3 | install dev_requirements.txt, then run 4 | 5 | ```bash 6 | make html 7 | ``` 8 | 9 | ## How do the docs work? 10 | Flytesnacks uses the concept of `Literate Programming`_ to generate the documentation from the examples themselves. To achieve this it uses the excellent `Sphinx-Gallery Plugin`_ to render comments and codes inline in the docs. 11 | To make this work, it is essential that the examples are written with comments following the Sphinx-Gallery style of coding. Some important things to note: 12 | - The example directory should have a README.rst. 13 | - The example itself should have a header comment, which should have a heading 14 | as well. 15 | - Docs interspersed in the example should proceed with `# %%` comment and then 16 | multiline comments should not have blank spaces between them. 17 | ```rst 18 | # %% 19 | # my very important comment 20 | # 21 | # some other stuff 22 | def foo(): 23 | ... 24 | ``` 25 | - prompts should use 26 | ```rst 27 | .. prompt::bash 28 | 29 | flytectl --version 30 | ``` 31 | -------------------------------------------------------------------------------- /docs/_static/code-example-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/docs/_static/code-example-icon.png -------------------------------------------------------------------------------- /docs/_static/custom.js: -------------------------------------------------------------------------------- 1 | function resizePlotly() { 2 | var plotly = jQuery( ".plotly-graph-div" ) 3 | plotly.attr("style", "height: 100%; width: 100%;") 4 | window.dispatchEvent(new Event('resize')); 5 | plotly.addClass("show"); 6 | } 7 | 8 | jQuery(document).ready(function() { 9 | // make sure that plotly plots only take 100% of the width of the container 10 | resizePlotly() 11 | }) 12 | -------------------------------------------------------------------------------- /docs/_static/flyte_circle_gradient_1_4x4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/docs/_static/flyte_circle_gradient_1_4x4.png -------------------------------------------------------------------------------- /docs/_static/sphx_gallery_autogen.css: -------------------------------------------------------------------------------- 1 | 2 | #sphx-glr-download-auto-core-type-system-flyte-python-types-py, 3 | #sphx-glr-download-auto-core-containerization-private-images-py, 4 | #sphx-glr-download-auto-core-containerization-workflow-labels-annotations-py, 5 | #sphx-glr-download-auto-deployment-configure-logging-links-py, 6 | #sphx-glr-download-auto-deployment-configure-use-gpus-py, 7 | #sphx-glr-download-auto-remote-access-register-project-py, 8 | #sphx-glr-download-auto-remote-access-remote-workflow-py, 9 | #sphx-glr-download-auto-remote-access-remote-launchplan-py, 10 | #sphx-glr-download-auto-remote-access-inspecting-executions-py, 11 | #sphx-glr-download-auto-remote-access-debugging-workflows-tasks-py, 12 | #sphx-glr-download-auto-remote-access-remote-task-py, 13 | #sphx-glr-download-auto-core-extend-flyte-backend-plugins-py, 14 | #sphx-glr-download-auto-core-extend-flyte-agent-service-py, 15 | #sphx-glr-download-auto-core-extend-flyte-container-interface-py, 16 | #sphx-glr-download-auto-core-extend-flyte-prebuilt-container-py { 17 | height: 0px; 18 | visibility: hidden; 19 | } 20 | -------------------------------------------------------------------------------- /docs/_templates/sidebar/brand.html: -------------------------------------------------------------------------------- 1 | 19 | -------------------------------------------------------------------------------- /docs/flyte_lockup_on_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/docs/flyte_lockup_on_dark.png -------------------------------------------------------------------------------- /docs/flyte_mark_offset_pink.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/docs/flyte_mark_offset_pink.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | 8 | # override the toc-determined page navigation order 9 | next-page: getting_started/quickstart_guide 10 | next-page-title: Quickstart guide 11 | --- 12 | 13 | (getting_started_index)= 14 | 15 | # Flytesnacks 16 | 17 | Welcome to Flytesnacks. 18 | 19 | ```{toctree} 20 | :maxdepth: -1 21 | :hidden: 22 | 23 | Tutorials 24 | Integrations 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/tutorials/bioinformatics/index.md: -------------------------------------------------------------------------------- 1 | (bioinformatics)= 2 | 3 | # Bioinformatics 4 | 5 | **Bioinformatics** is defined as the application of tools of computation and analysis to capture and interpret biological data. 6 | 7 | Flyte very much supports running your bioinformatics applications. Dive deeper into how that can be done. 8 | 9 | ```{list-table} 10 | :header-rows: 0 11 | :widths: 20 30 12 | 13 | * - {doc}`Nucleotide Sequence Querying with BLASTX ` 14 | - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database 15 | ``` 16 | 17 | ```{toctree} 18 | :maxdepth: -1 19 | :caption: Contents 20 | :hidden: 21 | 22 | /auto_examples/blast/index 23 | ``` 24 | -------------------------------------------------------------------------------- /docs/tutorials/feature_engineering/index.md: -------------------------------------------------------------------------------- 1 | # Feature engineering 2 | 3 | **Feature Engineering** is an essential part of Machine Learning. Itss the process 4 | of transforming raw data into features that better represent the underlying problem 5 | to the predictive models, resulting in improved model accuracy on unseen data. 6 | 7 | Explore how features can be engineered with the power of Flyte. 8 | 9 | ```{list-table} 10 | :header-rows: 0 11 | :widths: 20 30 12 | 13 | * - {doc}`EDA and Feature Engineering With Papermill ` 14 | - How to use Jupyter notebook within Flyte 15 | * - {doc}`Data Cleaning and Feature Serving With Feast ` 16 | - How to use Feast to serve data in Flyte 17 | ``` 18 | 19 | 20 | ```{toctree} 21 | :maxdepth: -1 22 | :caption: Contents 23 | :hidden: 24 | 25 | /auto_examples/exploratory_data_analysis/index 26 | /auto_examples/feast_integration/index 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/tutorials/flytelab/index.md: -------------------------------------------------------------------------------- 1 | # Flytelab 2 | 3 | This section contains end-to-end ML projects using Flyte. 4 | 5 | 6 | ```{list-table} 7 | :header-rows: 0 8 | :widths: 20 30 9 | 10 | * - {doc}`Weather Forecasting ` 11 | - Build an online weather forecasting application. 12 | ``` 13 | 14 | 15 | ```{toctree} 16 | :maxdepth: -1 17 | :caption: Contents 18 | :hidden: 19 | 20 | weather_forecasting 21 | ``` 22 | -------------------------------------------------------------------------------- /docs/tutorials/flytelab/weather_forecasting.md: -------------------------------------------------------------------------------- 1 | # Weather Forecasting 2 | 3 | ```{image} https://img.shields.io/badge/Blog-Weather%20Forecasting-blue?style=for-the-badge 4 | :target: https://blog.flyte.org/building-a-weather-forecasting-application-with-flyte-pandera-and-streamlit 5 | :alt: Weather Forecasting Blog Post 6 | ``` 7 | 8 | ```{image} https://img.shields.io/badge/Repo-Weather%20Forecasting-blue?style=for-the-badge 9 | :target: https://github.com/flyteorg/flytelab/tree/main/projects/weather_forecasting 10 | :alt: Weather Forecasting Repo 11 | ``` 12 | 13 | Learn how to build an online weather forecasting system that updates a model daily and displays hourly forecasts on a web UI, using Pandera and Streamlit. 14 | 15 | The video below briefly shows how the Weather Forecasting app is made, a few launch plans, and a Streamlit demo. Find the [complete video](https://youtu.be/c-X1u42uK-g) on YouTube. 16 | 17 | ```{youtube} aovn_01bzwU 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/tutorials/model_training/index.md: -------------------------------------------------------------------------------- 1 | # Model training 2 | 3 | Understand how machine learning models can be trained from within Flyte, with an added advantage of orchestration benefits. 4 | 5 | ```{list-table} 6 | :header-rows: 0 7 | :widths: 20 30 8 | 9 | * - {doc}`Diabetes Classification ` 10 | - Train an XGBoost model on the Pima Indians Diabetes Dataset. 11 | * - {doc}`House Price Regression ` 12 | - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. 13 | * - {doc}`MNIST Classification ` 14 | - Train a neural network on MNIST with PyTorch and W&B 15 | * - {doc}`NLP Processing with Gensim ` 16 | - Word embedding and topic modelling on lee background corpus with Gensim 17 | * - {doc}`Forecast Sales Using Rossmann Store Sales ` 18 | - Forecast sales data with data-parallel distributed training using Horovod on Spark. 19 | ``` 20 | 21 | ```{toctree} 22 | :maxdepth: -1 23 | :caption: Contents 24 | :hidden: 25 | 26 | /auto_examples/pima_diabetes/index 27 | /auto_examples/house_price_prediction/index 28 | /auto_examples/mnist_classifier/index 29 | /auto_examples/nlp_processing/index 30 | /auto_examples/forecasting_sales/index 31 | ``` 32 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Flyte Examples 2 | 3 | This directory contains a series of example projects that demonstrate how to use 4 | Flyte. The basic structure of the examples is as follows: 5 | 6 | ```{code-block} bash 7 | example_project 8 | ├── README.md # High-level description of the example project 9 | ├── Dockerfile # Dockerfile for packaging up the project requirements 10 | ├── requirements.in # Minimal python requirements for the project 11 | ├── requirements.txt # Compiled python requirements using pip-compile 12 | └── example_project # Python package containing examples with the same name as the project 13 |    ├── __init__.py 14 |    ├── example_01.py 15 |    ├── example_02.py 16 |    ├── ... 17 |    └── example_n.py 18 | ``` 19 | 20 | These example projects are meant to be stand-alone projects that can be built 21 | and run by themselves. 22 | -------------------------------------------------------------------------------- /examples/advanced_composition/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | RUN apt-get update && apt-get install -y build-essential curl 14 | 15 | # Virtual environment 16 | ENV VENV /opt/venv 17 | RUN python3 -m venv ${VENV} 18 | ENV PATH="${VENV}/bin:$PATH" 19 | 20 | RUN pip install flytekit 21 | 22 | # Copy the actual code 23 | COPY . /root 24 | 25 | # This tag is supplied by the build script and will be used to determine the version 26 | # when registering tasks, workflows, and launch plans 27 | ARG tag 28 | ENV FLYTE_INTERNAL_IMAGE $tag 29 | -------------------------------------------------------------------------------- /examples/advanced_composition/README.md: -------------------------------------------------------------------------------- 1 | # Advanced Composition 2 | 3 | These examples introduce the advanced features of the Flytekit Python SDK. 4 | They cover more complex aspects of Flyte, including conditions, subworkflows, 5 | dynamic workflows, map tasks, gate nodes and more. 6 | -------------------------------------------------------------------------------- /examples/advanced_composition/advanced_composition/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/advanced_composition/advanced_composition/__init__.py -------------------------------------------------------------------------------- /examples/advanced_composition/advanced_composition/chain_entities.py: -------------------------------------------------------------------------------- 1 | from flytekit import LaunchPlan, task, workflow 2 | 3 | 4 | @task 5 | def t2(): 6 | print("Running t2") 7 | return 8 | 9 | 10 | @task 11 | def t1(): 12 | print("Running t1") 13 | return 14 | 15 | 16 | @task 17 | def t0(): 18 | print("Running t0") 19 | return 20 | 21 | 22 | # Chaining tasks 23 | @workflow 24 | def chain_tasks_wf(): 25 | t2_promise = t2() 26 | t1_promise = t1() 27 | t0_promise = t0() 28 | 29 | t0_promise >> t1_promise 30 | t1_promise >> t2_promise 31 | 32 | 33 | # Chaining subworkflows 34 | @workflow 35 | def sub_workflow_1(): 36 | t1() 37 | 38 | 39 | @workflow 40 | def sub_workflow_0(): 41 | t0() 42 | 43 | 44 | @workflow 45 | def chain_workflows_wf(): 46 | sub_wf1 = sub_workflow_1() 47 | sub_wf0 = sub_workflow_0() 48 | 49 | sub_wf0 >> sub_wf1 50 | 51 | 52 | # Chaining launchplans 53 | 54 | 55 | @workflow 56 | def chain_launchplans_wf(): 57 | lp1 = LaunchPlan.get_or_create(sub_workflow_1, "lp1")() 58 | lp0 = LaunchPlan.get_or_create(sub_workflow_0, "lp0")() 59 | 60 | lp0 >> lp1 61 | -------------------------------------------------------------------------------- /examples/advanced_composition/advanced_composition/checkpoint.py: -------------------------------------------------------------------------------- 1 | from flytekit import current_context, task, workflow 2 | from flytekit.exceptions.user import FlyteRecoverableException 3 | 4 | RETRIES = 3 5 | 6 | 7 | # Define a task to iterate precisely `n_iterations`, checkpoint its state, and recover from simulated failures. 8 | @task(retries=RETRIES) 9 | def use_checkpoint(n_iterations: int) -> int: 10 | cp = current_context().checkpoint 11 | prev = cp.read() 12 | 13 | start = 0 14 | if prev: 15 | start = int(prev.decode()) 16 | 17 | # Create a failure interval to simulate failures across 'n' iterations and then succeed after configured retries 18 | failure_interval = n_iterations // RETRIES 19 | index = 0 20 | for index in range(start, n_iterations): 21 | # Simulate a deterministic failure for demonstration. Showcasing how it eventually completes within the given retries 22 | if index > start and index % failure_interval == 0: 23 | raise FlyteRecoverableException(f"Failed at iteration {index}, failure_interval {failure_interval}.") 24 | # Save progress state. It is also entirely possible to save state every few intervals 25 | cp.write(f"{index + 1}".encode()) 26 | return index 27 | 28 | 29 | # Create a workflow that invokes the task. 30 | # The task will automatically undergo retries in the event of a FlyteRecoverableException. 31 | @workflow 32 | def checkpointing_example(n_iterations: int) -> int: 33 | return use_checkpoint(n_iterations=n_iterations) 34 | 35 | 36 | # The local checkpoint is not utilized here because retries are not supported. 37 | if __name__ == "__main__": 38 | try: 39 | checkpointing_example(n_iterations=10) 40 | except RuntimeError as e: # noqa : F841 41 | # Since no retries are performed, an exception is expected when run locally 42 | pass 43 | -------------------------------------------------------------------------------- /examples/advanced_composition/advanced_composition/decorating_tasks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from functools import partial, wraps 3 | 4 | from flytekit import task, workflow 5 | 6 | # Create a logger to monitor the execution's progress. 7 | logger = logging.getLogger(__file__) 8 | 9 | 10 | # Using a single decorator 11 | def log_io(fn): 12 | @wraps(fn) 13 | def wrapper(*args, **kwargs): 14 | logger.info(f"task {fn.__name__} called with args: {args}, kwargs: {kwargs}") 15 | out = fn(*args, **kwargs) 16 | logger.info(f"task {fn.__name__} output: {out}") 17 | return out 18 | 19 | return wrapper 20 | 21 | 22 | # Create a task named `t1` that is decorated with `log_io`. 23 | @task 24 | @log_io 25 | def t1(x: int) -> int: 26 | return x + 1 27 | 28 | 29 | # Stacking multiple decorators 30 | def validate_output(fn=None, *, floor=0): 31 | @wraps(fn) 32 | def wrapper(*args, **kwargs): 33 | out = fn(*args, **kwargs) 34 | if out <= floor: 35 | raise ValueError(f"output of task {fn.__name__} must be a positive number, found {out}") 36 | return out 37 | 38 | if fn is None: 39 | return partial(validate_output, floor=floor) 40 | 41 | return wrapper 42 | 43 | 44 | # Define a function that uses both the logging and validator decorators 45 | @task 46 | @log_io 47 | @validate_output(floor=10) 48 | def t2(x: int) -> int: 49 | return x + 10 50 | 51 | 52 | # Compose a workflow that calls `t1` and `t2` 53 | @workflow 54 | def decorating_task_wf(x: int) -> int: 55 | return t2(x=t1(x=x)) 56 | 57 | 58 | if __name__ == "__main__": 59 | print(f"Running decorating_task_wf(x=10) {decorating_task_wf(x=10)}") 60 | -------------------------------------------------------------------------------- /examples/airflow_agent/airflow_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/airflow_agent/airflow_agent/__init__.py -------------------------------------------------------------------------------- /examples/airflow_agent/airflow_agent/airflow_agent_example_usage.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # (airflow_agent_example_usage)= 3 | # # Airflow agent example usage 4 | # [Apache Airflow](https://airflow.apache.org) is a widely used open source 5 | # platform for managing workflows with a robust ecosystem. Flyte provides an 6 | # Airflow plugin that allows you to run Airflow tasks as Flyte tasks. 7 | # This allows you to use the Airflow plugin ecosystem in conjunction with 8 | # Flyte's powerful task execution and orchestration capabilities. 9 | # %% 10 | 11 | from airflow.operators.bash import BashOperator 12 | from airflow.sensors.filesystem import FileSensor 13 | from flytekit import task, workflow 14 | 15 | 16 | @task() 17 | def t1(): 18 | print("success") 19 | 20 | 21 | # %% [markdown] 22 | # Use the Airflow `FileSensor` to wait for a file to appear before running the task. 23 | # %% 24 | @workflow 25 | def file_sensor(): 26 | sensor = FileSensor(task_id="id", filepath="/tmp/1234") 27 | sensor >> t1() 28 | 29 | 30 | # %% [markdown] 31 | # Use the Airflow `BashOperator` to run a bash command. 32 | # %% 33 | @workflow 34 | def bash_sensor(): 35 | op = BashOperator(task_id="airflow_bash_operator", bash_command="echo hello") 36 | op >> t1() 37 | -------------------------------------------------------------------------------- /examples/airflow_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-airflow 2 | -------------------------------------------------------------------------------- /examples/airflow_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Airflow Provider 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Intermediate 5 | ``` 6 | 7 | ```{image} https://img.shields.io/badge/Blog-Airflow-blue?style=for-the-badge 8 | :target: https://blog.flyte.org/scale-airflow-for-machine-learning-tasks-with-the-flyte-airflow-provider 9 | :alt: Airflow Blog Post 10 | ``` 11 | 12 | The `airflow-provider-flyte` package provides an operator, a sensor, and a hook that integrates Flyte into Apache Airflow. 13 | `FlyteOperator` is helpful to trigger a task/workflow in Flyte and `FlyteSensor` enables monitoring a Flyte execution status for completion. 14 | 15 | The primary use case of this provider is to **scale Airflow for machine learning tasks using Flyte**. 16 | With the Flyte Airflow provider, you can construct your ETL pipelines in Airflow and machine learning pipelines in Flyte 17 | and use the provider to trigger machine learning or Flyte pipelines from within Airflow. 18 | 19 | ## Installation 20 | 21 | ``` 22 | pip install airflow-provider-flyte 23 | ``` 24 | 25 | All the configuration options for the provider are available in the provider repo's [README](https://github.com/flyteorg/airflow-provider-flyte#readme). 26 | 27 | ```{auto-examples-toc} 28 | airflow 29 | ``` 30 | -------------------------------------------------------------------------------- /examples/airflow_plugin/airflow_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/airflow_plugin/airflow_plugin/__init__.py -------------------------------------------------------------------------------- /examples/athena_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # This is necessary for opencv to work 11 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 12 | 13 | # Install the AWS cli separately to prevent issues with boto being written over 14 | RUN pip3 install awscli 15 | 16 | WORKDIR /opt 17 | RUN curl https://sdk.cloud.google.com > install.sh 18 | RUN bash /opt/install.sh --install-dir=/opt 19 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 20 | WORKDIR /root 21 | 22 | ENV VENV /opt/venv 23 | # Virtual environment 24 | RUN python3 -m venv ${VENV} 25 | ENV PATH="${VENV}/bin:$PATH" 26 | 27 | # Install Python dependencies 28 | COPY requirements.in /root 29 | RUN pip install -r /root/requirements.in 30 | 31 | # Copy the actual code 32 | COPY . /root/ 33 | 34 | # This tag is supplied by the build script and will be used to determine the version 35 | # when registering tasks, workflows, and launch plans 36 | ARG tag 37 | ENV FLYTE_INTERNAL_IMAGE $tag 38 | -------------------------------------------------------------------------------- /examples/athena_plugin/README.md: -------------------------------------------------------------------------------- 1 | (aws-athena)= 2 | 3 | # AWS Athena 4 | 5 | ```{eval-rst} 6 | .. tags:: Data, Integration, AWS, Advanced 7 | ``` 8 | 9 | ## Executing Athena Queries 10 | 11 | Flyte backend can be connected with Athena. Once enabled, it allows you to query AWS Athena service (Presto + ANSI SQL Support) and retrieve typed schema (optionally). 12 | This plugin is purely a spec and since SQL is completely portable, it has no need to build a container. Thus this plugin example does not have any Dockerfile. 13 | 14 | ### Installation 15 | 16 | To use the flytekit Athena plugin, simply run the following: 17 | 18 | ```{eval-rst} 19 | .. prompt:: bash 20 | 21 | pip install flytekitplugins-athena 22 | ``` 23 | 24 | Now let's dive into the code. 25 | 26 | ```{auto-examples-toc} 27 | athena 28 | ``` 29 | -------------------------------------------------------------------------------- /examples/athena_plugin/athena_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/athena_plugin/athena_plugin/__init__.py -------------------------------------------------------------------------------- /examples/athena_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-athena>=0.20.0 6 | -------------------------------------------------------------------------------- /examples/aws_batch_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # This is necessary for opencv to work 10 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 11 | 12 | # Install the AWS cli separately to prevent issues with boto being written over 13 | RUN pip3 install awscli 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root 28 | RUN pip install --no-deps -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/aws_batch_plugin/README.md: -------------------------------------------------------------------------------- 1 | # AWS Batch 2 | 3 | ```{eval-rst} 4 | .. tags:: Data, Integration, AWS, Advanced 5 | ``` 6 | 7 | ## Executing Batch Job 8 | 9 | Flyte backend can be connected with batch. Once enabled, it allows you to run regular task on AWS batch. 10 | 11 | This section provides a guide on how to use the AWS Batch Plugin using flytekit python. 12 | 13 | ### Installation 14 | 15 | To use the flytekit batch plugin simply run the following: 16 | 17 | ```{eval-rst} 18 | .. prompt:: bash 19 | 20 | pip install flytekitplugins-awsbatch 21 | ``` 22 | 23 | ### Configuring the backend to get AWS Batch working 24 | 25 | [Follow this guide to setting up the AWS Batch Plugin]() 26 | 27 | ### Quick Start 28 | 29 | This plugin allows you to run batch tasks on AWS and only requires you to change a few lines of code. 30 | We can then move workflow execution from Kubernetes to AWS. 31 | 32 | ```{eval-rst} 33 | .. testcode:: awsbatch-quickstart 34 | from flytekitplugins.awsbatch import AWSBatchConfig 35 | 36 | config = AWSBatch( 37 | parameters={"codec": "mp4"}, 38 | platformCapabilities="EC2", 39 | propagateTags=True, 40 | retryStrategy={"attempts": 10}, 41 | tags={"hello": "world"}, 42 | timeout={"attemptDurationSeconds": 60}, 43 | ) 44 | 45 | @task(task_config=config) 46 | def t1(a: int) -> str: 47 | return str(a) 48 | ``` 49 | 50 | ```{auto-examples-toc} 51 | batch 52 | ``` 53 | -------------------------------------------------------------------------------- /examples/aws_batch_plugin/aws_batch_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/aws_batch_plugin/aws_batch_plugin/__init__.py -------------------------------------------------------------------------------- /examples/aws_batch_plugin/aws_batch_plugin/batch.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # # AWS Batch 3 | # 4 | # This example shows how to use a Flyte AWS batch plugin to execute a tasks on batch service. 5 | # With AWS Batch, there is no need to install and manage batch computing software or server clusters 6 | # that you use to run your jobs, allowing you to focus on analyzing results and solving problems. 7 | 8 | # %% 9 | from flytekit import task, workflow 10 | from flytekitplugins.awsbatch import AWSBatchConfig 11 | 12 | # %% [markdown] 13 | # Use this to configure SubmitJobInput for a AWS batch job. Task's marked with this will automatically execute 14 | # natively onto AWS batch service. 15 | # Refer to the official [AWS SubmitJobInput documentation](https://docs.aws.amazon.com/sdk-for-go/api/service/batch/#SubmitJobInput) for more detail. 16 | # 17 | # %% 18 | config = AWSBatchConfig( 19 | parameters={"codec": "mp4"}, 20 | platformCapabilities="EC2", 21 | tags={"name": "flyte-example"}, 22 | ) 23 | 24 | 25 | @task(task_config=config) 26 | def t1(a: int) -> int: 27 | return a + 2 28 | 29 | 30 | @task(task_config=config) 31 | def t2(b: int) -> int: 32 | return b * 10 33 | 34 | 35 | @workflow 36 | def my_wf(a: int) -> int: 37 | b = t1(a=a) 38 | return t2(b=b) 39 | 40 | 41 | if __name__ == "__main__": 42 | print(f"Running my_wf(a=3') {my_wf(a=3)}") 43 | -------------------------------------------------------------------------------- /examples/aws_batch_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-awsbatch>=0.31.0b1 6 | -------------------------------------------------------------------------------- /examples/basics/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | RUN apt-get update && apt-get install -y build-essential curl 14 | 15 | # Virtual environment 16 | ENV VENV /opt/venv 17 | RUN python3 -m venv ${VENV} 18 | ENV PATH="${VENV}/bin:$PATH" 19 | 20 | RUN pip install flytekit 21 | 22 | # Copy the actual code 23 | COPY . /root 24 | 25 | # This tag is supplied by the build script and will be used to determine the version 26 | # when registering tasks, workflows, and launch plans 27 | ARG tag 28 | ENV FLYTE_INTERNAL_IMAGE $tag 29 | -------------------------------------------------------------------------------- /examples/basics/README.md: -------------------------------------------------------------------------------- 1 | # Basics 2 | 3 | These examples demonstrate the basic building blocks of Flyte using [`flytekit`](https://docs.flyte.org/en/latest/api/flytekit/docs_index.html). `flytekit` is a Python SDK for developing Flyte workflows and 4 | tasks, and can be used generally, whenever stateful computation is desirable. `flytekit` workflows and tasks are completely runnable locally, unless they need some advanced backend functionality like starting a distributed Spark cluster. 5 | 6 | These examples cover writing Flyte tasks, assembling them into workflows, 7 | running bash scripts, and documenting workflows. 8 | -------------------------------------------------------------------------------- /examples/basics/basics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/basics/basics/__init__.py -------------------------------------------------------------------------------- /examples/basics/basics/hello_world.py: -------------------------------------------------------------------------------- 1 | from flytekit import task, workflow 2 | 3 | 4 | # Define a task that produces the string "Hello, World!" 5 | # by using the `@task` decorator to annotate the Python function 6 | @task 7 | def say_hello() -> str: 8 | return "Hello, World!" 9 | 10 | 11 | # Handle the output of a task like that of a regular Python function. 12 | @workflow 13 | def hello_world_wf() -> str: 14 | res = say_hello() 15 | return res 16 | 17 | 18 | # Run the workflow locally by calling it like a Python function 19 | if __name__ == "__main__": 20 | print(f"Running hello_world_wf() {hello_world_wf()}") 21 | -------------------------------------------------------------------------------- /examples/basics/basics/imperative_workflow.py: -------------------------------------------------------------------------------- 1 | from flytekit import Workflow 2 | 3 | # Import the `slope` and `intercept` tasks from the workflow.py file 4 | from .workflow import intercept, slope 5 | 6 | # Create an imperative workflow 7 | imperative_wf = Workflow(name="imperative_workflow") 8 | 9 | 10 | # Add the workflow inputs to the imperative workflow 11 | imperative_wf.add_workflow_input("x", list[int]) 12 | imperative_wf.add_workflow_input("y", list[int]) 13 | 14 | 15 | # Add the tasks that need to be triggered from within the workflow 16 | node_t1 = imperative_wf.add_entity(slope, x=imperative_wf.inputs["x"], y=imperative_wf.inputs["y"]) 17 | node_t2 = imperative_wf.add_entity( 18 | intercept, x=imperative_wf.inputs["x"], y=imperative_wf.inputs["y"], slope=node_t1.outputs["o0"] 19 | ) 20 | 21 | 22 | # Add the workflow output 23 | imperative_wf.add_workflow_output("wf_output", node_t2.outputs["o0"]) 24 | 25 | 26 | # Execute the workflow locally as follows 27 | if __name__ == "__main__": 28 | print(f"Running imperative_wf() {imperative_wf(x=[-3, 0, 3], y=[7, 4, -2])}") 29 | -------------------------------------------------------------------------------- /examples/basics/basics/launch_plan.py: -------------------------------------------------------------------------------- 1 | from flytekit import LaunchPlan, current_context 2 | 3 | # Import the workflow from the workflow.py file 4 | # in order to create a launch plan for it 5 | from .workflow import simple_wf 6 | 7 | # Create a default launch plan with no inputs during serialization 8 | default_lp = LaunchPlan.get_default_launch_plan(current_context(), simple_wf) 9 | 10 | # Run the launch plan locally 11 | default_lp(x=[-3, 0, 3], y=[7, 4, -2]) 12 | 13 | # Create a launch plan and specify the default inputs 14 | simple_wf_lp = LaunchPlan.create( 15 | name="simple_wf_lp", workflow=simple_wf, default_inputs={"x": [-3, 0, 3], "y": [7, 4, -2]} 16 | ) 17 | 18 | # Trigger the launch plan locally 19 | simple_wf_lp() 20 | 21 | # Override the defaults as follows 22 | simple_wf_lp(x=[3, 5, 3], y=[-3, 2, -2]) 23 | 24 | # It's possible to lock launch plan inputs, preventing them from being overridden during execution 25 | simple_wf_lp_fixed_inputs = LaunchPlan.get_or_create( 26 | name="fixed_inputs", workflow=simple_wf, fixed_inputs={"x": [-3, 0, 3]} 27 | ) 28 | -------------------------------------------------------------------------------- /examples/basics/basics/named_outputs.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | from flytekit import task, workflow 4 | 5 | # Define a `NamedTuple` and assign it as an output to a task 6 | slope_value = NamedTuple("slope_value", [("slope", float)]) 7 | 8 | 9 | @task 10 | def slope(x: list[int], y: list[int]) -> slope_value: 11 | sum_xy = sum([x[i] * y[i] for i in range(len(x))]) 12 | sum_x_squared = sum([x[i] ** 2 for i in range(len(x))]) 13 | n = len(x) 14 | return (n * sum_xy - sum(x) * sum(y)) / (n * sum_x_squared - sum(x) ** 2) 15 | 16 | 17 | # Assign a `NamedTuple` to the output of `intercept` task 18 | intercept_value = NamedTuple("intercept_value", [("intercept", float)]) 19 | 20 | 21 | @task 22 | def intercept(x: list[int], y: list[int], slope: float) -> intercept_value: 23 | mean_x = sum(x) / len(x) 24 | mean_y = sum(y) / len(y) 25 | intercept = mean_y - slope * mean_x 26 | return intercept 27 | 28 | 29 | # While it's possible to create `NamedTuple`s directly within the code, 30 | # it's often better to declare them explicitly. This helps prevent potential 31 | # linting errors in tools like mypy. 32 | slope_and_intercept_values = NamedTuple("slope_and_intercept_values", [("slope", float), ("intercept", float)]) 33 | 34 | 35 | @workflow 36 | def simple_wf_with_named_outputs(x: list[int] = [-3, 0, 3], y: list[int] = [7, 4, -2]) -> slope_and_intercept_values: 37 | slope_value = slope(x=x, y=y) 38 | intercept_value = intercept(x=x, y=y, slope=slope_value.slope) 39 | return slope_and_intercept_values(slope=slope_value.slope, intercept=intercept_value.intercept) 40 | 41 | 42 | # Run the workflow locally 43 | if __name__ == "__main__": 44 | print(f"Running simple_wf_with_named_outputs() {simple_wf_with_named_outputs()}") 45 | -------------------------------------------------------------------------------- /examples/basics/basics/task.py: -------------------------------------------------------------------------------- 1 | from flytekit import task 2 | 3 | 4 | # Create a task that computes the slope of a regression line 5 | @task 6 | def slope(x: list[int], y: list[int]) -> float: 7 | sum_xy = sum([x[i] * y[i] for i in range(len(x))]) 8 | sum_x_squared = sum([x[i] ** 2 for i in range(len(x))]) 9 | n = len(x) 10 | return (n * sum_xy - sum(x) * sum(y)) / (n * sum_x_squared - sum(x) ** 2) 11 | 12 | 13 | # Run the task locally like a Python function 14 | if __name__ == "__main__": 15 | print(slope(x=[-3, 0, 3], y=[7, 4, -2])) 16 | -------------------------------------------------------------------------------- /examples/basics/basics/workflow.py: -------------------------------------------------------------------------------- 1 | from flytekit import task, workflow 2 | 3 | 4 | # Define`slope` and `intercept` tasks to compute the slope and 5 | # intercept of the regression line, respectively 6 | @task 7 | def slope(x: list[int], y: list[int]) -> float: 8 | sum_xy = sum([x[i] * y[i] for i in range(len(x))]) 9 | sum_x_squared = sum([x[i] ** 2 for i in range(len(x))]) 10 | n = len(x) 11 | return (n * sum_xy - sum(x) * sum(y)) / (n * sum_x_squared - sum(x) ** 2) 12 | 13 | 14 | @task 15 | def intercept(x: list[int], y: list[int], slope: float) -> float: 16 | mean_x = sum(x) / len(x) 17 | mean_y = sum(y) / len(y) 18 | intercept = mean_y - slope * mean_x 19 | return intercept 20 | 21 | 22 | # Define a workflow to establish the task dependencies. 23 | # Like tasks, workflows are strongly typed. 24 | @workflow 25 | def simple_wf(x: list[int], y: list[int]) -> float: 26 | slope_value = slope(x=x, y=y) 27 | intercept_value = intercept(x=x, y=y, slope=slope_value) 28 | return intercept_value 29 | 30 | 31 | # Run the workflow by calling it as you would with a Python function 32 | # and providing the necessary inputs. 33 | if __name__ == "__main__": 34 | print(f"Running simple_wf() {simple_wf(x=[-3, 0, 3], y=[7, 4, -2])}") 35 | 36 | # ## Use `partial` to provide default arguments to tasks 37 | # Use the functools.partial function to assign default or 38 | # constant values to the parameters of your tasks 39 | import functools 40 | 41 | 42 | @workflow 43 | def simple_wf_with_partial(x: list[int], y: list[int]) -> float: 44 | partial_task = functools.partial(slope, x=x) 45 | return partial_task(y=y) 46 | -------------------------------------------------------------------------------- /examples/bigquery_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | RUN apt-get update && apt-get install -y build-essential curl 11 | 12 | WORKDIR /opt 13 | RUN curl https://sdk.cloud.google.com > install.sh 14 | RUN bash /opt/install.sh --install-dir=/opt 15 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 16 | WORKDIR /root 17 | 18 | ENV VENV /opt/venv 19 | # Virtual environment 20 | RUN python3 -m venv ${VENV} 21 | ENV PATH="${VENV}/bin:$PATH" 22 | 23 | # Install Python dependencies 24 | COPY requirements.in /root 25 | RUN pip install -r /root/requirements.in 26 | 27 | # Copy the actual code 28 | COPY . /root/ 29 | 30 | # This tag is supplied by the build script and will be used to determine the version 31 | # when registering tasks, workflows, and launch plans 32 | ARG tag 33 | ENV FLYTE_INTERNAL_IMAGE $tag 34 | -------------------------------------------------------------------------------- /examples/bigquery_agent/README.md: -------------------------------------------------------------------------------- 1 | (bigquery_agent)= 2 | 3 | # BigQuery agent 4 | 5 | ## Installation 6 | 7 | To install the BigQuery agent, run the following command: 8 | 9 | ```{eval-rst} 10 | .. prompt:: bash 11 | 12 | pip install flytekitplugins-bigquery 13 | ``` 14 | 15 | This agent is purely a spec. Since SQL is completely portable, there is no need to build a Docker container. 16 | 17 | ## Example usage 18 | 19 | For an example query, see {doc}`BigQuery agent example usage`. 20 | 21 | ## Local testing 22 | 23 | To test the BigQuery agent locally, create a class for the agent task that inherits from [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L262). This mixin can handle asynchronous tasks and allows flytekit to mimic FlytePropeller's behavior in calling the agent. For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_in_a_local_python_environment.html)". 24 | 25 | ```{note} 26 | 27 | In some cases, you will need to store credentials in your local environment when testing locally. 28 | 29 | ``` 30 | 31 | ## Flyte deployment configuration 32 | 33 | ```{note} 34 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 35 | ``` 36 | 37 | To enable the BigQuery agent in your Flyte deployment, see the {ref}`BigQuery agent deployment guide`. 38 | 39 | 40 | ```{toctree} 41 | :maxdepth: -1 42 | :hidden: 43 | 44 | bigquery_agent_example_usage 45 | 46 | ``` 47 | -------------------------------------------------------------------------------- /examples/bigquery_agent/bigquery_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/bigquery_agent/bigquery_agent/__init__.py -------------------------------------------------------------------------------- /examples/bigquery_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | pandas 5 | flytekitplugins-deck-standard 6 | flytekitplugins-bigquery 7 | -------------------------------------------------------------------------------- /examples/bigquery_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | RUN apt-get update && apt-get install -y build-essential curl 11 | 12 | WORKDIR /opt 13 | RUN curl https://sdk.cloud.google.com > install.sh 14 | RUN bash /opt/install.sh --install-dir=/opt 15 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 16 | WORKDIR /root 17 | 18 | ENV VENV /opt/venv 19 | # Virtual environment 20 | RUN python3 -m venv ${VENV} 21 | ENV PATH="${VENV}/bin:$PATH" 22 | 23 | # Install Python dependencies 24 | COPY requirements.in /root 25 | RUN pip install -r /root/requirements.in 26 | 27 | # Copy the actual code 28 | COPY . /root/ 29 | 30 | # This tag is supplied by the build script and will be used to determine the version 31 | # when registering tasks, workflows, and launch plans 32 | ARG tag 33 | ENV FLYTE_INTERNAL_IMAGE $tag 34 | -------------------------------------------------------------------------------- /examples/bigquery_plugin/README.md: -------------------------------------------------------------------------------- 1 | # BigQuery plugin 2 | 3 | ```{warning} 4 | This example code uses the legacy implementation of the BigQuery integration. We recommend using the [BigQuery agent](https://docs.flyte.org/en/latest/flytesnacks/examples/bigquery_agent/index.html) instead. 5 | ``` 6 | 7 | This directory contains example code for the deprecated BigQuery plugin. For documentation on installing and using the plugin, see the [BigQuery plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/bigquery_plugin/index.html) 8 | 9 | ```{toctree} 10 | :maxdepth: -1 11 | :hidden: 12 | 13 | bigquery_plugin_example 14 | ``` 15 | -------------------------------------------------------------------------------- /examples/bigquery_plugin/bigquery_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/bigquery_plugin/bigquery_plugin/__init__.py -------------------------------------------------------------------------------- /examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # (bigquery_plugin_example)= 3 | # # BigQuery plugin example 4 | # 5 | # %% 6 | 7 | try: 8 | from typing import Annotated 9 | except ImportError: 10 | from typing_extensions import Annotated 11 | 12 | import pandas as pd 13 | from flytekit import StructuredDataset, kwtypes, task, workflow 14 | from flytekitplugins.bigquery import BigQueryConfig, BigQueryTask 15 | 16 | # Note that in order for registration to work properly, you'll need to give your 17 | # BigQuery task a name that's unique across your project/domain for your Flyte installation. 18 | bigquery_task_no_io = BigQueryTask( 19 | name="sql.bigquery.no_io", 20 | inputs={}, 21 | query_template="SELECT 1", 22 | task_config=BigQueryConfig(ProjectID="flyte"), 23 | ) 24 | 25 | 26 | @workflow 27 | def no_io_wf(): 28 | return bigquery_task_no_io() 29 | 30 | 31 | DogeCoinDataset = Annotated[StructuredDataset, kwtypes(hash=str, size=int, block_number=int)] 32 | 33 | bigquery_task_templatized_query = BigQueryTask( 34 | name="sql.bigquery.w_io", 35 | # Define inputs as well as their types that can be used to customize the query. 36 | inputs=kwtypes(version=int), 37 | output_structured_dataset_type=DogeCoinDataset, 38 | task_config=BigQueryConfig(ProjectID="flyte"), 39 | query_template="SELECT * FROM `bigquery-public-data.crypto_dogecoin.transactions` WHERE version = @version LIMIT 10;", 40 | ) 41 | 42 | 43 | @task 44 | def convert_bq_table_to_pandas_dataframe(sd: DogeCoinDataset) -> pd.DataFrame: 45 | return sd.open(pd.DataFrame).all() 46 | 47 | 48 | @workflow 49 | def full_bigquery_wf(version: int) -> pd.DataFrame: 50 | sd = bigquery_task_templatized_query(version=version) 51 | return convert_bq_table_to_pandas_dataframe(sd=sd) 52 | 53 | 54 | # Check query result on bigquery console: `https://console.cloud.google.com/bigquery` 55 | -------------------------------------------------------------------------------- /examples/bigquery_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | pandas 5 | flytekitplugins-deck-standard 6 | flytekitplugins-bigquery 7 | -------------------------------------------------------------------------------- /examples/blast/.gitignore: -------------------------------------------------------------------------------- 1 | stdout.txt 2 | *.png 3 | output/ 4 | -------------------------------------------------------------------------------- /examples/blast/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | ENV VENV /opt/venv 4 | ENV LANG C.UTF-8 5 | ENV LC_ALL C.UTF-8 6 | ENV PYTHONPATH /root 7 | 8 | RUN apt-get update \ 9 | && apt-get install -y software-properties-common \ 10 | && add-apt-repository ppa:deadsnakes/ppa \ 11 | && apt-get install -y \ 12 | && apt-get update \ 13 | && apt-get install -y \ 14 | cmake \ 15 | curl \ 16 | python3.8 \ 17 | python3.8-venv \ 18 | python3.8-dev \ 19 | make \ 20 | build-essential \ 21 | libssl-dev \ 22 | libffi-dev \ 23 | python3-pip \ 24 | zlib1g-dev \ 25 | vim \ 26 | wget 27 | 28 | # Install the AWS cli separately to prevent issues with boto being written over 29 | RUN pip3 install awscli 30 | 31 | WORKDIR /opt 32 | RUN curl https://sdk.cloud.google.com > install.sh 33 | RUN bash /opt/install.sh --install-dir=/opt 34 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 35 | WORKDIR /root 36 | 37 | # Virtual environment 38 | ENV VENV /opt/venv 39 | RUN python3 -m venv ${VENV} 40 | ENV PATH="${VENV}/bin:$PATH" 41 | 42 | # Download BLAST 43 | RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.13.0/ncbi-blast-2.13.0+-x64-linux.tar.gz && \ 44 | tar -xzf ncbi-blast-2.13.0+-x64-linux.tar.gz 45 | 46 | # Set the working directory 47 | WORKDIR /root 48 | 49 | # Install Python dependencies 50 | COPY requirements.in /root 51 | RUN ${VENV}/bin/pip install -r /root/requirements.in 52 | 53 | # Copy data 54 | # COPY blast/kitasatospora /root/kitasatospora 55 | 56 | # Copy the actual code 57 | COPY . /root/ 58 | 59 | # Copy over the helper script that the SDK relies on 60 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 61 | RUN chmod a+x /usr/local/bin/flytekit_venv 62 | 63 | # Check if BLAST is installed 64 | ENV PATH=$PATH:/root/ncbi-blast-2.13.0+/bin 65 | RUN echo $PATH 66 | RUN output="$(which blastx)" && echo $output 67 | 68 | # This tag is supplied by the build script and will be used to determine the version 69 | # when registering tasks, workflows, and launch plans 70 | ARG tag 71 | ENV FLYTE_INTERNAL_IMAGE $tag 72 | -------------------------------------------------------------------------------- /examples/blast/blast/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/blast/blast/__init__.py -------------------------------------------------------------------------------- /examples/blast/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | pandas 5 | flytekitplugins-deck-standard 6 | -------------------------------------------------------------------------------- /examples/chatgpt_agent/README.md: -------------------------------------------------------------------------------- 1 | (chatgpt_agent)= 2 | 3 | # ChatGPT agent 4 | 5 | ## Installation 6 | 7 | To install the ChatGPT agent, run the following command: 8 | 9 | ```{eval-rst} 10 | .. prompt:: bash 11 | 12 | pip install flytekitplugins-openai 13 | ``` 14 | 15 | ## Example usage 16 | 17 | For an example job, see {doc}`ChatGPT agent example usage`. 18 | 19 | ## Local testing 20 | 21 | To test the ChatGPT agent locally, create a class for the agent task that inherits from [SyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L225). This mixin can handle synchronous tasks and allows flytekit to mimic FlytePropeller's behavior in calling the agent. For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_in_a_local_python_environment.html)". 22 | 23 | ```{note} 24 | 25 | In some cases, you will need to store credentials in your local environment when testing locally. 26 | 27 | ``` 28 | 29 | ## Flyte deployment configuration 30 | 31 | ```{note} 32 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 33 | ``` 34 | 35 | To enable the ChatGPT agent in your Flyte deployment, see the {ref}`ChatGPT agent deployment guide`. 36 | 37 | 38 | ```{toctree} 39 | :maxdepth: -1 40 | :hidden: 41 | 42 | chatgpt_agent_example_usage 43 | 44 | ``` 45 | -------------------------------------------------------------------------------- /examples/chatgpt_agent/chatgpt_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/chatgpt_agent/chatgpt_agent/__init__.py -------------------------------------------------------------------------------- /examples/chatgpt_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-openai 2 | -------------------------------------------------------------------------------- /examples/comet_ml_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | WORKDIR /root 11 | 12 | ENV VENV /opt/venv 13 | # Virtual environment 14 | RUN python3 -m venv ${VENV} 15 | ENV PATH="${VENV}/bin:$PATH" 16 | 17 | # Install Python dependencies 18 | COPY requirements.in /root 19 | RUN pip install -r /root/requirements.in 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/comet_ml_plugin/README.md: -------------------------------------------------------------------------------- 1 | (comet_ml)= 2 | 3 | # Comet ML 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Data, Metrics, Intermediate 7 | ``` 8 | 9 | Comet’s machine learning platform integrates with your existing infrastructure and tools so you can manage, visualize, and optimize models from training runs to production monitoring. This plugin integrates Flyte with Comet by configuring links between the two platforms. 10 | 11 | To install the plugin, run: 12 | 13 | ```bash 14 | pip install flytekitplugins-comet-ml 15 | ``` 16 | 17 | Comet requires an API key to authenticate with their platform. In the above example, a secret is created using 18 | [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). 19 | 20 | To enable linking from the Flyte side panel to Comet.ml, add the following to Flyte's configuration: 21 | 22 | ```yaml 23 | plugins: 24 | logs: 25 | dynamic-log-links: 26 | - comet-ml-execution-id: 27 | displayName: Comet 28 | templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .executionName }}{{ .nodeId }}{{ .taskRetryAttempt }}{{ .taskConfig.link_suffix }}" 29 | - comet-ml-custom-id: 30 | displayName: Comet 31 | templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .taskConfig.experiment_key }}" 32 | ``` 33 | 34 | ```{auto-examples-toc} 35 | comet_ml_example 36 | ``` 37 | -------------------------------------------------------------------------------- /examples/comet_ml_plugin/comet_ml_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/comet_ml_plugin/comet_ml_plugin/__init__.py -------------------------------------------------------------------------------- /examples/comet_ml_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-comet-ml 2 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # This is necessary for opencv to work 11 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 12 | 13 | WORKDIR /root 14 | 15 | ENV VENV /opt/venv 16 | # Virtual environment 17 | RUN python3 -m venv ${VENV} 18 | ENV PATH="${VENV}/bin:$PATH" 19 | 20 | # Install Python dependencies 21 | COPY requirements.in /root 22 | RUN pip install -r /root/requirements.in 23 | RUN pip freeze 24 | 25 | # Copy the actual code 26 | COPY . /root 27 | 28 | # This tag is supplied by the build script and will be used to determine the version 29 | # when registering tasks, workflows, and launch plans 30 | ARG tag 31 | ENV FLYTE_INTERNAL_IMAGE $tag 32 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/README.md: -------------------------------------------------------------------------------- 1 | # Customizing Dependencies 2 | 3 | These examples demonstrate how Flyte utilizes Docker images to construct containers under the hood and how you can craft your own images to encompass all the necessary dependencies for your tasks or workflows. You will explore how to execute a raw container with custom commands, indicate multiple container images within a single workflow, 4 | and get familiar with the ins and outs of `ImageSpec`. 5 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/customizing_dependencies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/customizing_dependencies/customizing_dependencies/__init__.py -------------------------------------------------------------------------------- /examples/customizing_dependencies/customizing_dependencies/calculate-ellipse-area-new.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | 4 | 5 | def write_output(output_dir, output_file, v): 6 | with open(f"{output_dir}/{output_file}", "w") as f: 7 | f.write(str(v)) 8 | 9 | 10 | def calculate_area(a, b): 11 | return math.pi * a * b 12 | 13 | 14 | def main(a, b, output_dir): 15 | a = float(a) 16 | b = float(b) 17 | 18 | area = calculate_area(a, b) 19 | 20 | write_output(output_dir, "area", area) 21 | write_output(output_dir, "metadata", "[from python rawcontainer]") 22 | 23 | 24 | if __name__ == "__main__": 25 | a = sys.argv[1] 26 | b = sys.argv[2] 27 | output_dir = sys.argv[3] 28 | 29 | main(a, b, output_dir) 30 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/customizing_dependencies/image_spec.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import pandas as pd 4 | from flytekit import ImageSpec, Resources, task, workflow 5 | 6 | pandas_image_spec = ImageSpec( 7 | base_image="ghcr.io/flyteorg/flytekit:py3.8-1.6.2", 8 | packages=["pandas", "numpy"], 9 | python_version="3.9", 10 | apt_packages=["git"], 11 | env={"Debug": "True"}, 12 | registry="ghcr.io/flyteorg", 13 | ) 14 | 15 | sklearn_image_spec = ImageSpec( 16 | base_image="ghcr.io/flyteorg/flytekit:py3.8-1.6.2", 17 | packages=["scikit-learn"], 18 | registry="ghcr.io/flyteorg", 19 | ) 20 | 21 | if sklearn_image_spec.is_container(): 22 | from sklearn.linear_model import LogisticRegression 23 | 24 | 25 | # To enable tasks to utilize the images built with `ImageSpec`, 26 | # specify the container_image parameter for those tasks. 27 | @task(container_image=pandas_image_spec) 28 | def get_pandas_dataframe() -> typing.Tuple[pd.DataFrame, pd.Series]: 29 | df = pd.read_csv("https://storage.googleapis.com/download.tensorflow.org/data/heart.csv") 30 | print(df.head()) 31 | return df[["age", "thalach", "trestbps", "chol", "oldpeak"]], df.pop("target") 32 | 33 | 34 | @task(container_image=sklearn_image_spec, requests=Resources(cpu="1", mem="1Gi")) 35 | def get_model(max_iter: int, multi_class: str) -> typing.Any: 36 | return LogisticRegression(max_iter=max_iter, multi_class=multi_class) 37 | 38 | 39 | # Get a basic model to train 40 | @task(container_image=sklearn_image_spec, requests=Resources(cpu="1", mem="1Gi")) 41 | def train_model(model: typing.Any, feature: pd.DataFrame, target: pd.Series) -> typing.Any: 42 | model.fit(feature, target) 43 | return model 44 | 45 | 46 | # Define a workflow to capture dependencies between the tasks 47 | @workflow() 48 | def wf(): 49 | feature, target = get_pandas_dataframe() 50 | model = get_model(max_iter=3000, multi_class="auto") 51 | train_model(model=model, feature=feature, target=target) 52 | 53 | 54 | # Execute the workflow locally 55 | if __name__ == "__main__": 56 | wf() 57 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/customizing_dependencies/multi_images.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from flytekit import task, workflow 3 | 4 | 5 | @task(container_image="{{.image.mindmeld.fqn}}:{{.image.mindmeld.version}}") 6 | def get_data() -> np.ndarray: 7 | # here we're importing scikit learn within the Flyte task 8 | from sklearn import datasets 9 | 10 | iris = datasets.load_iris() 11 | X = iris.data[:, :2] 12 | return X 13 | 14 | 15 | @task(container_image="{{.image.borebuster.fqn}}:{{.image.borebuster.version}}") 16 | def normalize(X: np.ndarray) -> np.ndarray: 17 | return (X - X.mean(axis=0)) / X.std(axis=0) 18 | 19 | 20 | @workflow 21 | def multi_images_wf() -> np.ndarray: 22 | X = get_data() 23 | X = normalize(X=X) 24 | return X 25 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # raw-containers-demo 2 | 3 | This directory holds the Dockerfiles and supporting files needed to run the example described in `raw_container.py`, split by language. 4 | 5 | The actual example points to images present in the gihub registry (i.e. ghcr.io), so in case we need to update the examples for any reason we should keep in mind to push them to ghcr too. 6 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/haskell/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM haskell:9 2 | 3 | WORKDIR /root 4 | 5 | COPY calculate-ellipse-area.hs /root 6 | 7 | RUN ghc calculate-ellipse-area.hs 8 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/haskell/calculate-ellipse-area.hs: -------------------------------------------------------------------------------- 1 | import System.IO 2 | import System.Environment 3 | import Text.Read 4 | import Text.Printf 5 | 6 | calculateEllipseArea :: Float -> Float -> Float 7 | calculateEllipseArea a b = pi * a * b 8 | 9 | main = do 10 | args <- getArgs 11 | let a = args!!0 12 | b = args!!1 13 | 14 | let area = calculateEllipseArea (read a::Float) (read b::Float) 15 | 16 | let output_area = args!!2 ++ "/area" 17 | output_metadata = args!!2 ++ "/metadata" 18 | writeFile output_area (show area) 19 | writeFile output_metadata "[from haskell rawcontainer]" 20 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/julia/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM julia:1.6.4-buster 2 | 3 | WORKDIR /root 4 | 5 | COPY calculate-ellipse-area.jl /root 6 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/julia/calculate-ellipse-area.jl: -------------------------------------------------------------------------------- 1 | 2 | using Printf 3 | 4 | function calculate_area(a, b) 5 | π * a * b 6 | end 7 | 8 | function write_output(output_dir, output_file, v) 9 | output_path = @sprintf "%s/%s" output_dir output_file 10 | open(output_path, "w") do file 11 | write(file, string(v)) 12 | end 13 | end 14 | 15 | function main(a, b, output_dir) 16 | a = parse.(Float64, a) 17 | b = parse.(Float64, b) 18 | 19 | area = calculate_area(a, b) 20 | 21 | write_output(output_dir, "area", area) 22 | write_output(output_dir, "metadata", "[from julia rawcontainer]") 23 | end 24 | 25 | # the keyword ARGS is a special value that contains the command-line arguments 26 | # julia arrays are 1-indexed 27 | a = ARGS[1] 28 | b = ARGS[2] 29 | output_dir = ARGS[3] 30 | 31 | main(a, b, output_dir) 32 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/python/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim-buster 2 | 3 | WORKDIR /root 4 | 5 | COPY *.py /root/ 6 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/python/calculate-ellipse-area.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | 4 | 5 | def write_output(output_dir, output_file, v): 6 | with open(f"{output_dir}/{output_file}", "w") as f: 7 | f.write(str(v)) 8 | 9 | 10 | def calculate_area(a, b): 11 | return math.pi * a * b 12 | 13 | 14 | def main(a, b, output_dir): 15 | a = float(a) 16 | b = float(b) 17 | 18 | area = calculate_area(a, b) 19 | 20 | write_output(output_dir, "area", area) 21 | write_output(output_dir, "metadata", "[from python rawcontainer]") 22 | 23 | 24 | if __name__ == "__main__": 25 | a = sys.argv[1] 26 | b = sys.argv[2] 27 | output_dir = sys.argv[3] 28 | 29 | main(a, b, output_dir) 30 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/r/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM r-base 2 | 3 | WORKDIR /root 4 | 5 | COPY *.R /root/ 6 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/r/calculate-ellipse-area.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | args = commandArgs(trailingOnly=TRUE) 4 | 5 | a = args[1] 6 | b = args[2] 7 | output_dir = args[3] 8 | 9 | area <- pi * as.double(a) * as.double(b) 10 | print(area) 11 | 12 | writeLines(as.character(area), sprintf("%s/%s", output_dir, 'area')) 13 | writeLines("[from R rawcontainer]", sprintf("%s/%s", output_dir, 'metadata')) 14 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/r/install-readr.R: -------------------------------------------------------------------------------- 1 | install.packages("readr") 2 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/shell/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | WORKDIR /root 4 | 5 | COPY calculate-ellipse-area.sh /root 6 | RUN chmod +x /root/calculate-ellipse-area.sh 7 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/raw-containers-supporting-files/per-language/shell/calculate-ellipse-area.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | 3 | echo "4*a(1) * $1 * $2" | bc -l | tee "$3/area" 4 | 5 | echo "[from shell rawcontainer]" | tee "$3/metadata" 6 | -------------------------------------------------------------------------------- /examples/customizing_dependencies/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | scikit-learn 3 | pandas 4 | -------------------------------------------------------------------------------- /examples/data_types_and_io/Dockerfile: -------------------------------------------------------------------------------- 1 | #syntax=docker/dockerfile:1.8 2 | # ###################### 3 | # NOTE: For CI/CD only # 4 | ######################## 5 | FROM python:3.11-slim-buster 6 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 7 | 8 | WORKDIR /root 9 | ENV VENV /opt/venv 10 | ENV LANG C.UTF-8 11 | ENV LC_ALL C.UTF-8 12 | ENV PYTHONPATH /root 13 | 14 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 15 | 16 | # Virtual environment 17 | ENV VENV /opt/venv 18 | RUN python3 -m venv ${VENV} 19 | ENV PATH="${VENV}/bin:$PATH" 20 | 21 | RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ 22 | pip install flytekit pandas pyarrow pydantic>2 23 | RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ 24 | pip install torch --index-url https://download.pytorch.org/whl/cpu 25 | 26 | # Copy the actual code 27 | COPY . /root 28 | 29 | # This tag is supplied by the build script and will be used to determine the version 30 | # when registering tasks, workflows, and launch plans 31 | ARG tag 32 | ENV FLYTE_INTERNAL_IMAGE $tag 33 | -------------------------------------------------------------------------------- /examples/data_types_and_io/README.md: -------------------------------------------------------------------------------- 1 | # Data Types and IO 2 | 3 | These examples introduce the wide range of data types that Flyte supports. 4 | These types serve a dual purpose by not only validating the data but also enabling seamless 5 | transfer of data between local and cloud storage. 6 | They enable: 7 | 8 | - Data lineage 9 | - Memoization 10 | - Auto parallelization 11 | - Simplifying access to data 12 | - Auto generated CLI and launch UI 13 | 14 | For a more comprehensive understanding of how Flyte manages data, refer to the [Understand How Flyte Handles Data](https://docs.flyte.org/en/latest/concepts/data_management.html#divedeep-data-management) guide. 15 | -------------------------------------------------------------------------------- /examples/data_types_and_io/data_types_and_io/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/data_types_and_io/data_types_and_io/__init__.py -------------------------------------------------------------------------------- /examples/data_types_and_io/data_types_and_io/dataclass_input.json: -------------------------------------------------------------------------------- 1 | { "x": 5, "y": "5", "z": { "5": "5" } } 2 | -------------------------------------------------------------------------------- /examples/data_types_and_io/data_types_and_io/enum_type.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from flytekit import task, workflow 4 | 5 | 6 | # Define an enum and a simple coffee maker workflow that accepts an order 7 | # and brews coffee ☕️ accordingly. 8 | # The assumption is that the coffee maker only understands enum inputs. 9 | class Coffee(Enum): 10 | ESPRESSO = "espresso" 11 | AMERICANO = "americano" 12 | LATTE = "latte" 13 | CAPPUCCINO = "cappucccino" 14 | 15 | 16 | @task 17 | def take_order(coffee: str) -> Coffee: 18 | return Coffee(coffee) 19 | 20 | 21 | @task 22 | def prep_order(coffee_enum: Coffee) -> str: 23 | return f"Preparing {coffee_enum.value} ..." 24 | 25 | 26 | @workflow 27 | def coffee_maker(coffee: str) -> str: 28 | coffee_enum = take_order(coffee=coffee) 29 | return prep_order(coffee_enum=coffee_enum) 30 | 31 | 32 | # The workflow can also accept an enum value 33 | @workflow 34 | def coffee_maker_enum(coffee_enum: Coffee) -> str: 35 | return prep_order(coffee_enum=coffee_enum) 36 | 37 | 38 | # You can send a string to the coffee_maker_enum workflow during its execution: 39 | # pyflyte run \ 40 | # https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/data_types_and_io/data_types_and_io/enum_type.py \ 41 | # coffee_maker_enum --coffee_enum="latte" 42 | # 43 | # Run the workflows locally 44 | if __name__ == "__main__": 45 | print(coffee_maker(coffee="latte")) 46 | print(coffee_maker_enum(coffee_enum=Coffee.LATTE)) 47 | -------------------------------------------------------------------------------- /examples/data_types_and_io/data_types_and_io/file_streaming.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from flytekit import task, workflow 5 | from flytekit.types.directory import FlyteDirectory 6 | from flytekit.types.file import FlyteFile 7 | 8 | 9 | @task() 10 | def remove_some_rows(ff: FlyteFile) -> FlyteFile: 11 | """ 12 | Remove the rows that the value of city is 'Seattle'. 13 | This is an example with streaming support. 14 | """ 15 | new_file = FlyteFile.new_remote_file("data_without_seattle.csv") 16 | with ff.open("r") as r: 17 | with new_file.open("w") as w: 18 | df = pd.read_csv(r) 19 | df = df[df["City"] != "Seattle"] 20 | df.to_csv(w, index=False) 21 | return new_file 22 | 23 | 24 | @task 25 | def process_folder(fd: FlyteDirectory) -> FlyteDirectory: 26 | out_fd = FlyteDirectory.new_remote("folder-copy") 27 | for base, x in fd.crawl(): 28 | src = str(os.path.join(base, x)) 29 | out_file = out_fd.new_file(x) 30 | with FlyteFile(src).open("rb") as f: 31 | with out_file.open("wb") as o: 32 | o.write(f.read()) 33 | # The output path will be s3://my-s3-bucket/data/77/--0/folder-copy 34 | return out_fd 35 | 36 | 37 | @workflow() 38 | def wf(): 39 | remove_some_rows(ff=FlyteFile("s3://custom-bucket/data.csv")) 40 | process_folder(fd=FlyteDirectory("s3://my-s3-bucket/folder")) 41 | return 42 | 43 | 44 | if __name__ == "__main__": 45 | print(f"Running wf() {wf()}") 46 | -------------------------------------------------------------------------------- /examples/data_types_and_io/data_types_and_io/tensorflow_type.py: -------------------------------------------------------------------------------- 1 | # Import necessary libraries and modules 2 | 3 | from flytekit import ImageSpec, task, workflow 4 | from flytekit.types.directory import TFRecordsDirectory 5 | from flytekit.types.file import TFRecordFile 6 | 7 | custom_image = ImageSpec( 8 | packages=["tensorflow", "tensorflow-datasets", "flytekitplugins-kftensorflow"], 9 | registry="ghcr.io/flyteorg", 10 | ) 11 | 12 | import tensorflow as tf 13 | 14 | 15 | # TensorFlow Model 16 | @task 17 | def train_model() -> tf.keras.Model: 18 | model = tf.keras.Sequential( 19 | [tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax")] 20 | ) 21 | model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) 22 | return model 23 | 24 | 25 | @task 26 | def evaluate_model(model: tf.keras.Model, x: tf.Tensor, y: tf.Tensor) -> float: 27 | loss, accuracy = model.evaluate(x, y) 28 | return accuracy 29 | 30 | 31 | @workflow 32 | def training_workflow(x: tf.Tensor, y: tf.Tensor) -> float: 33 | model = train_model() 34 | return evaluate_model(model=model, x=x, y=y) 35 | 36 | 37 | # TFRecord Files 38 | @task 39 | def process_tfrecord(file: TFRecordFile) -> int: 40 | count = 0 41 | for record in tf.data.TFRecordDataset(file): 42 | count += 1 43 | return count 44 | 45 | 46 | @workflow 47 | def tfrecord_workflow(file: TFRecordFile) -> int: 48 | return process_tfrecord(file=file) 49 | 50 | 51 | # TFRecord Directories 52 | @task 53 | def process_tfrecords_dir(dir: TFRecordsDirectory) -> int: 54 | count = 0 55 | for record in tf.data.TFRecordDataset(dir.path): 56 | count += 1 57 | return count 58 | 59 | 60 | @workflow 61 | def tfrecords_dir_workflow(dir: TFRecordsDirectory) -> int: 62 | return process_tfrecords_dir(dir=dir) 63 | -------------------------------------------------------------------------------- /examples/data_types_and_io/requirements.in: -------------------------------------------------------------------------------- 1 | pandas 2 | torch 3 | tabulate 4 | tensorflow 5 | pyarrow 6 | pydantic>2 7 | -------------------------------------------------------------------------------- /examples/data_types_and_io/test_data/biostats.csv: -------------------------------------------------------------------------------- 1 | "Name", "Sex", "Age", "Height (in)", "Weight (lbs)" 2 | "Alex", "M", 41, 74, 170 3 | "Bert", "M", 42, 68, 166 4 | "Carl", "M", 32, 70, 155 5 | "Dave", "M", 39, 72, 167 6 | "Elly", "F", 30, 66, 124 7 | "Fran", "F", 33, 66, 115 8 | "Gwen", "F", 26, 64, 121 9 | "Hank", "M", 30, 71, 158 10 | "Ivan", "M", 53, 72, 175 11 | "Jake", "M", 32, 69, 143 12 | "Kate", "F", 47, 69, 139 13 | "Luke", "M", 34, 72, 163 14 | "Myra", "F", 23, 62, 98 15 | "Neil", "M", 36, 75, 160 16 | "Omar", "M", 38, 70, 145 17 | "Page", "F", 31, 67, 135 18 | "Quin", "M", 29, 71, 176 19 | "Ruth", "F", 28, 65, 131 20 | -------------------------------------------------------------------------------- /examples/databricks_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM databricksruntime/standard:14.3-LTS 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /databricks/driver 8 | ENV PATH="/databricks/python3/bin:$PATH" 9 | USER 0 10 | 11 | RUN sudo apt-get update && sudo apt-get install -y make build-essential libssl-dev git 12 | 13 | # Install Python dependencies 14 | COPY ./requirements.in /databricks/driver/requirements.in 15 | RUN /databricks/python3/bin/pip install -r /databricks/driver/requirements.in 16 | 17 | WORKDIR /databricks/driver 18 | 19 | # Copy the actual code 20 | COPY . /databricks/driver/ 21 | 22 | # This tag is supplied by the build script and will be used to determine the version 23 | # when registering tasks, workflows and launch plans. 24 | ARG tag 25 | ENV FLYTE_INTERNAL_IMAGE $tag 26 | -------------------------------------------------------------------------------- /examples/databricks_agent/README.md: -------------------------------------------------------------------------------- 1 | (databricks_agent)= 2 | 3 | # Databricks agent 4 | 5 | ```{eval-rst} 6 | .. tags:: Spark, Integration, DistributedComputing, Data, Advanced 7 | ``` 8 | 9 | Flyte can be integrated with the [Databricks](https://www.databricks.com/) service, 10 | enabling you to submit Spark jobs to the Databricks platform. 11 | 12 | ## Installation 13 | 14 | The Databricks agent comes bundled with the Spark plugin. To install the Spark plugin, run the following command: 15 | 16 | ``` 17 | pip install flytekitplugins-spark 18 | 19 | ``` 20 | 21 | ## Example usage 22 | 23 | For a usage example, see {doc}`Databricks agent example usage`. 24 | 25 | ## Local testing 26 | 27 | To test the Databricks agent locally, create a class for the agent task that inherits from [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L262). This mixin can handle asynchronous tasks and allows flytekit to mimic FlytePropeller's behavior in calling the agent. For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_in_a_local_python_environment.html)". 28 | 29 | ```{note} 30 | 31 | In some cases, you will need to store credentials in your local environment when testing locally. 32 | 33 | ``` 34 | 35 | ## Flyte deployment configuration 36 | 37 | ```{note} 38 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 39 | ``` 40 | 41 | To enable the Databricks agent in your Flyte deployment, see the {ref}`Databricks agent setup guide `. 42 | 43 | 44 | ```{toctree} 45 | :maxdepth: -1 46 | :hidden: 47 | 48 | databricks_agent_example_usage 49 | ``` 50 | -------------------------------------------------------------------------------- /examples/databricks_agent/databricks_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/databricks_agent/databricks_agent/__init__.py -------------------------------------------------------------------------------- /examples/databricks_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-spark 2 | -------------------------------------------------------------------------------- /examples/databricks_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM databricksruntime/standard:14.3-LTS 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /databricks/driver 8 | ENV PATH="/databricks/python3/bin:$PATH" 9 | USER 0 10 | 11 | RUN sudo apt-get update && sudo apt-get install -y make build-essential libssl-dev git 12 | 13 | # Install Python dependencies 14 | COPY ./requirements.in /databricks/driver/requirements.in 15 | RUN /databricks/python3/bin/pip install -r /databricks/driver/requirements.in 16 | 17 | WORKDIR /databricks/driver 18 | 19 | # Copy the actual code 20 | COPY . /databricks/driver/ 21 | 22 | # This tag is supplied by the build script and will be used to determine the version 23 | # when registering tasks, workflows and launch plans. 24 | ARG tag 25 | ENV FLYTE_INTERNAL_IMAGE $tag 26 | -------------------------------------------------------------------------------- /examples/databricks_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Databricks plugin 2 | 3 | ```{warning} 4 | This example code uses a legacy implementation of the Databricks integration. We recommend using the [Databricks agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. 5 | ``` 6 | 7 | This directory contains example code for the deprecated Databricks plugin. For documentation on installing and using the plugin, see the [Databricks plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/databricks_plugin/index.html) 8 | 9 | ```{toctree} 10 | :maxdepth: -1 11 | :hidden: 12 | 13 | databricks_plugin_example 14 | ``` 15 | -------------------------------------------------------------------------------- /examples/databricks_plugin/databricks_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/databricks_plugin/databricks_plugin/__init__.py -------------------------------------------------------------------------------- /examples/databricks_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-spark 2 | -------------------------------------------------------------------------------- /examples/dbt_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN apt-get update && apt-get install -y build-essential git postgresql-client libpq-dev 10 | 11 | # Install the AWS cli separately to prevent issues with boto being written over 12 | RUN pip3 install awscli 13 | 14 | ENV VENV /opt/venv 15 | # Virtual environment 16 | RUN python3 -m venv ${VENV} 17 | ENV PATH="${VENV}/bin:$PATH" 18 | 19 | # Install Python dependencies 20 | COPY requirements.in /root/ 21 | RUN pip install -r /root/requirements.in 22 | # psycopg2-binary is a dependency of the dbt-postgres adapter, but that doesn't work on mac M1s. 23 | # As per https://github.com/psycopg/psycopg2/issues/1360, we install psycopg to circumvent this. 24 | RUN pip uninstall -y psycopg2-binary && pip install psycopg2 25 | 26 | # Copy the actual code 27 | COPY . /root/ 28 | 29 | # Copy dbt-specific files 30 | COPY profiles.yml /root/dbt-profiles/ 31 | RUN git clone https://github.com/dbt-labs/jaffle_shop.git 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | 38 | ENV FLYTE_SDK_LOGGING_LEVEL 10 39 | -------------------------------------------------------------------------------- /examples/dbt_plugin/dbt_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/dbt_plugin/dbt_plugin/__init__.py -------------------------------------------------------------------------------- /examples/dbt_plugin/profiles.yml: -------------------------------------------------------------------------------- 1 | jaffle_shop: # this needs to match the profile in your dbt_project.yml file 2 | target: dev 3 | outputs: 4 | dev: 5 | type: postgres 6 | host: sandbox-postgresql.flyte.svc.cluster.local 7 | user: postgres 8 | password: postgres 9 | port: 5432 10 | dbname: jaffle_shop 11 | schema: dbt_demo 12 | threads: 4 13 | -------------------------------------------------------------------------------- /examples/dbt_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=1.8.1 2 | flytekitplugins-dbt 3 | dbt-postgres 4 | -------------------------------------------------------------------------------- /examples/development_lifecycle/Dockerfile: -------------------------------------------------------------------------------- 1 | #syntax=docker/dockerfile:1.8 2 | # ###################### 3 | # NOTE: For CI/CD only # 4 | ######################## 5 | FROM python:3.11-slim-buster 6 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 7 | 8 | WORKDIR /root 9 | ENV VENV /opt/venv 10 | ENV LANG C.UTF-8 11 | ENV LC_ALL C.UTF-8 12 | ENV PYTHONPATH /root 13 | WORKDIR /root 14 | 15 | # This is necessary for opencv to work 16 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 17 | 18 | # Virtual environment 19 | ENV VENV /opt/venv 20 | RUN python3 -m venv ${VENV} 21 | ENV PATH="${VENV}/bin:$PATH" 22 | 23 | # Install Python dependencies 24 | COPY requirements.in /root 25 | RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ 26 | pip install -r /root/requirements.in 27 | 28 | # Copy the actual code 29 | COPY . /root 30 | 31 | # This tag is supplied by the build script and will be used to determine the version 32 | # when registering tasks, workflows, and launch plans 33 | ARG tag 34 | ENV FLYTE_INTERNAL_IMAGE $tag 35 | -------------------------------------------------------------------------------- /examples/development_lifecycle/README.md: -------------------------------------------------------------------------------- 1 | # Development Lifecycle 2 | 3 | These examples demonstrate Flyte's features that aid in local workflow development, showcasing concepts like caching, the Flyte remote API, Agents, Decks and more. 4 | -------------------------------------------------------------------------------- /examples/development_lifecycle/development_lifecycle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/development_lifecycle/development_lifecycle/__init__.py -------------------------------------------------------------------------------- /examples/development_lifecycle/development_lifecycle/task_cache_serialize.py: -------------------------------------------------------------------------------- 1 | from flytekit import task 2 | 3 | 4 | # Task cache serializing is disabled by default to avoid unexpected behavior 5 | # for task executions. To enable use the `cache_serialize` parameter. 6 | # `cache_serialize` is a switch to enable or disable serialization of the task 7 | # This operation is only useful for cacheable tasks, where one may reuse output 8 | # from a previous execution. Flyte requires implicitly enabling the `cache` 9 | # parameter on all cache serializable tasks. 10 | # Cache key definitions follow the same rules as non-serialized cache tasks. 11 | # It is important to understand the implications of the task signature and 12 | # `cache_version` parameter in defining cached results. 13 | @task(cache=True, cache_serialize=True, cache_version="1.0") 14 | def square(n: int) -> int: 15 | """ 16 | Parameters: 17 | n (int): name of the parameter for the task will be derived from the name of the input variable. 18 | The type will be automatically deduced to Types.Integer 19 | 20 | Return: 21 | int: The label for the output will be automatically assigned, and the type will be deduced from the annotation 22 | 23 | """ 24 | return n * n 25 | -------------------------------------------------------------------------------- /examples/development_lifecycle/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | flytekitplugins-deck-standard 3 | plotly 4 | scikit-learn 5 | tabulate 6 | pandas 7 | pyarrow 8 | -------------------------------------------------------------------------------- /examples/dolt_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN apt-get update && \ 10 | apt-get -y install sudo curl 11 | 12 | # Install dolt 13 | RUN sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | sudo bash' &&\ 14 | dolt config --global --add user.email bojack@horseman.com &&\ 15 | dolt config --global --add user.name "Bojack Horseman" 16 | 17 | # Install the AWS cli separately to prevent issues with boto being written over 18 | RUN pip3 install awscli 19 | 20 | WORKDIR /opt 21 | RUN curl https://sdk.cloud.google.com > install.sh 22 | RUN bash /opt/install.sh --install-dir=/opt 23 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 24 | WORKDIR /root 25 | 26 | ENV VENV /opt/venv 27 | # Virtual environment 28 | RUN python3 -m venv ${VENV} 29 | ENV PATH="${VENV}/bin:$PATH" 30 | 31 | # Install Python dependencies 32 | COPY requirements.in /root/. 33 | RUN pip install -r /root/requirements.in 34 | 35 | # Copy the actual code 36 | COPY . /root/ 37 | 38 | RUN mkdir -p /root/foo &&\ 39 | cd /root/foo &&\ 40 | dolt init 41 | 42 | # This tag is supplied by the build script and will be used to determine the version 43 | # when registering tasks, workflows, and launch plans 44 | ARG tag 45 | ENV FLYTE_INTERNAL_IMAGE $tag 46 | -------------------------------------------------------------------------------- /examples/dolt_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Dolt 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Data, SQL, Intermediate 5 | ``` 6 | 7 | ```{image} https://img.shields.io/badge/Blog-Dolt-blue?style=for-the-badge 8 | :target: https://blog.flyte.org/upleveling-flyte-data-lineage-using-dolt 9 | :alt: Dolt Blog Post 10 | ``` 11 | 12 | The `DoltTable` plugin is a wrapper that uses [Dolt](https://github.com/dolthub/dolt) to move data between 13 | `pandas.DataFrame`'s at execution time and database tables at rest. 14 | 15 | ## Installation 16 | 17 | The dolt plugin and dolt command line tool are required to run these examples: 18 | 19 | ```bash 20 | pip install flytekitplugins.dolt 21 | sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | sudo bash' 22 | ``` 23 | 24 | Dolt requires a user configuration to run `init`: 25 | 26 | ```bash 27 | dolt config --global --add user.email 28 | dolt config --global --add user.name 29 | ``` 30 | 31 | These demos assume a `foo` database has been created locally: 32 | 33 | ```bash 34 | mkdir foo 35 | cd foo 36 | dolt init 37 | ``` 38 | 39 | ```{auto-examples-toc} 40 | dolt_quickstart_example 41 | dolt_branch_example 42 | ``` 43 | -------------------------------------------------------------------------------- /examples/dolt_plugin/dolt_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/dolt_plugin/dolt_plugin/__init__.py -------------------------------------------------------------------------------- /examples/dolt_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins.dolt>=0.18.0 6 | great-expectations>=0.13.31 7 | -------------------------------------------------------------------------------- /examples/duckdb_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | #syntax=docker/dockerfile:1.8 2 | FROM python:3.8-buster 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # Install the AWS cli separately to prevent issues with boto being written over 11 | RUN pip3 install awscli 12 | 13 | # Install gcloud for GCP 14 | RUN apt-get update && apt-get install -y curl 15 | 16 | WORKDIR /opt 17 | RUN curl https://sdk.cloud.google.com > install.sh 18 | RUN bash /opt/install.sh --install-dir=/opt 19 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 20 | WORKDIR /root 21 | 22 | ENV VENV /opt/venv 23 | # Virtual environment 24 | RUN python3 -m venv ${VENV} 25 | ENV PATH="${VENV}/bin:$PATH" 26 | 27 | # Install Python dependencies 28 | COPY requirements.in /root/ 29 | RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ 30 | pip install -r /root/requirements.in 31 | 32 | # Copy the actual code 33 | COPY . /root/ 34 | 35 | # This tag is supplied by the build script and will be used to determine the version 36 | # when registering tasks, workflows, and launch plans 37 | ARG tag 38 | ENV FLYTE_INTERNAL_IMAGE $tag 39 | -------------------------------------------------------------------------------- /examples/duckdb_plugin/README.md: -------------------------------------------------------------------------------- 1 | (duckdb)= 2 | 3 | # DuckDB 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Data, Analytics, Beginner 7 | ``` 8 | 9 | [DuckDB](https://duckdb.org/) is an in-process SQL OLAP database management system that is explicitly designed to achieve high performance in analytics. 10 | 11 | The Flytekit DuckDB plugin facilitates the efficient execution of intricate analytical queries within your workflow. 12 | 13 | To install the Flytekit DuckDB plugin, run the following command: 14 | 15 | ``` 16 | pip install flytekitplugins-duckdb 17 | ``` 18 | 19 | The Flytekit DuckDB plugin includes the {py:class}`~flytekitplugins:flytekitplugins.duckdb.DuckDBQuery` task, which allows you to specify the following parameters: 20 | 21 | - `query`: The DuckDB query to execute. 22 | - `inputs`: The query parameters to be used during query execution. This can be a StructuredDataset, a string or a list. 23 | 24 | ```{auto-examples-toc} 25 | duckdb_example 26 | ``` 27 | -------------------------------------------------------------------------------- /examples/duckdb_plugin/duckdb_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/duckdb_plugin/duckdb_plugin/__init__.py -------------------------------------------------------------------------------- /examples/duckdb_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-duckdb 6 | pyarrow 7 | -------------------------------------------------------------------------------- /examples/exploratory_data_analysis/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN : \ 10 | && apt-get update \ 11 | && apt install -y software-properties-common \ 12 | && add-apt-repository ppa:deadsnakes/ppa 13 | 14 | RUN : \ 15 | && apt-get update \ 16 | && apt-get install -y python3.8 python3-pip python3-venv make build-essential libssl-dev curl vim 17 | 18 | # This is necessary for opencv to work 19 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg 20 | 21 | # Install the AWS cli separately to prevent issues with boto being written over 22 | RUN pip3 install awscli 23 | 24 | WORKDIR /opt 25 | RUN curl https://sdk.cloud.google.com > install.sh 26 | RUN bash /opt/install.sh --install-dir=/opt 27 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 28 | WORKDIR /root 29 | 30 | # Virtual environment 31 | ENV VENV /opt/venv 32 | RUN python3 -m venv ${VENV} 33 | ENV PATH="${VENV}/bin:$PATH" 34 | 35 | # Install Python dependencies 36 | COPY requirements.in /root 37 | RUN ${VENV}/bin/pip install -r /root/requirements.in 38 | 39 | # Copy the actual code 40 | COPY . /root/ 41 | 42 | # Copy over the helper script that the SDK relies on 43 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 44 | RUN chmod a+x /usr/local/bin/flytekit_venv 45 | 46 | # This tag is supplied by the build script and will be used to determine the version 47 | # when registering tasks, workflows, and launch plans 48 | ARG tag 49 | ENV FLYTE_INTERNAL_IMAGE $tag 50 | -------------------------------------------------------------------------------- /examples/exploratory_data_analysis/exploratory_data_analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/exploratory_data_analysis/exploratory_data_analysis/__init__.py -------------------------------------------------------------------------------- /examples/exploratory_data_analysis/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-papermill 6 | matplotlib 7 | seaborn 8 | scikit-learn 9 | pysocks 10 | -------------------------------------------------------------------------------- /examples/extending/extending/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/extending/extending/__init__.py -------------------------------------------------------------------------------- /examples/feast_integration/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH "$PYTHONPATH:/root/feast_integration" 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | # Virtual environment 22 | ENV VENV /opt/venv 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN ${VENV}/bin/pip install -r /root/requirements.in 29 | 30 | # Copy the actual co 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | ENV DEMO 1 38 | 39 | # Copy over the helper script that the SDK relies on 40 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 41 | RUN chmod a+x /usr/local/bin/flytekit_venv 42 | -------------------------------------------------------------------------------- /examples/feast_integration/feast_integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/feast_integration/feast_integration/__init__.py -------------------------------------------------------------------------------- /examples/feast_integration/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | scikit-learn 6 | numpy 7 | boto3 8 | feast[aws] 9 | -------------------------------------------------------------------------------- /examples/flyteinteractive_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV PYTHONPATH /root 6 | 7 | RUN apt-get update && apt-get install build-essential -y \ 8 | && apt-get clean autoclean \ 9 | && apt-get autoremove --yes \ 10 | && rm -rf /var/lib/{apt,dpkg,cache,log}/ \ 11 | && useradd -u 1000 flytekit \ 12 | && chown flytekit: /root \ 13 | && chown flytekit: /home \ 14 | && : 15 | 16 | # Install Python dependencies 17 | COPY requirements.in /root 18 | RUN pip install -r /root/requirements.in 19 | RUN pip freeze 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/flyteinteractive_plugin/README.md: -------------------------------------------------------------------------------- 1 | (flyte-interactive)= 2 | 3 | # FlyteInteractive 4 | 5 | ```{eval-rst} 6 | .. tags:: Advanced 7 | ``` 8 | 9 | 10 | FlyteInteractive provides interactive task development in a remote environment. This allows developers to leverage remote environment capabilities while accessing features like debugging, code inspection, and Jupyter Notebook, traditionally available in local IDEs. 11 | 12 | 13 | Flyte tasks, designed as one-off jobs, require users to wait until completion to view results. These tasks are developed locally in a virtual environment before being deployed remotely. However, differences in data access, GPU availability, and dependencies between local and remote environments often lead to discrepancies, making local success an unreliable indicator of remote success. This results in frequent, tedious debugging cycles. 14 | 15 | 16 | 17 | ## Installation 18 | 19 | To use the Flyte interactive plugin, run the following command: 20 | 21 | ```{eval-rst} 22 | .. prompt:: bash 23 | 24 | pip install flytekitplugins-flyteinteractive 25 | ``` 26 | 27 | 28 | ## Acknowledgement 29 | 30 | This feature was created at LinkedIn and later donated to Flyte. 31 | 32 | ```{auto-examples-toc} 33 | vscode 34 | jupyter 35 | ``` 36 | -------------------------------------------------------------------------------- /examples/flyteinteractive_plugin/flyteinteractive_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/flyteinteractive_plugin/flyteinteractive_plugin/__init__.py -------------------------------------------------------------------------------- /examples/flyteinteractive_plugin/flyteinteractive_plugin/jupyter.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # # FlyteInteractive Jupyter Decorator 3 | # 4 | # The `@jupyter` task decorator launches and monitors a Jupyter notebook server. 5 | # ## Usage 6 | # ### 1. Add the `@jupyter` decorator to a task function definition 7 | # The `@jupyter` decorator takes the following optional parameters: 8 | # * **max_idle_seconds:** Optional[int] (default 36000) 9 | # * **port:** Optional[int] (default 8888) 10 | # * **enable:** Optional[bool] (default True) 11 | # * **notebook_dir:** Optional[str] (default "/root") 12 | # * **pre_execute:** Optional[Callable] (default None) 13 | # * **post_execute:** Optional[Callable], (default None) 14 | # %% 15 | 16 | from flytekit import task, workflow 17 | from flytekitplugins.flyteinteractive import jupyter 18 | 19 | 20 | @task 21 | @jupyter 22 | def jupyter_task(): 23 | print("opened notebook") 24 | 25 | 26 | @workflow 27 | def wf(): 28 | jupyter_task() 29 | 30 | 31 | if __name__ == "__main__": 32 | print(wf()) 33 | 34 | # %% [markdown] 35 | # ### 2. Connect to the Jupyter notebook server 36 | # You can connect in two ways: 37 | # * **(Recommended) Expose a URL on the Flyte console.** Set up ingress on the Flyte backend to expose a URL on the Flyte console. Details are to be determined (TBD). 38 | 39 | # * **Use port forwarding.** To use port forwarding, execute the following command: 40 | # ``` 41 | # $ kubectl port-forward 42 | # ``` 43 | # Then, open a browser and navigate to `localhost:`, replacing `` with the port number configured above. You should be presented with the Jupyter notebook interface. 44 | # %% 45 | -------------------------------------------------------------------------------- /examples/flyteinteractive_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=1.10.7 2 | flytekitplugins-flyteinteractive>=1.10.7 3 | -------------------------------------------------------------------------------- /examples/forecasting_sales/forecasting_sales/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/forecasting_sales/forecasting_sales/__init__.py -------------------------------------------------------------------------------- /examples/forecasting_sales/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-spark>=0.16.0 6 | pyspark==3.0.1 7 | s3fs 8 | -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/.gitignore: -------------------------------------------------------------------------------- 1 | !data/ 2 | !*.csv 3 | uncommitted/ 4 | .ge_store_backend_id 5 | -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/data/movies.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/greatexpectations_plugin/data/movies.sqlite -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/great_expectations/plugins/custom_data_docs/styles/data_docs_custom_styles.css: -------------------------------------------------------------------------------- 1 | /*index page*/ 2 | .ge-index-page-site-name-title {} 3 | .ge-index-page-table-container {} 4 | .ge-index-page-table {} 5 | .ge-index-page-table-profiling-links-header {} 6 | .ge-index-page-table-expectations-links-header {} 7 | .ge-index-page-table-validations-links-header {} 8 | .ge-index-page-table-profiling-links-list {} 9 | .ge-index-page-table-profiling-links-item {} 10 | .ge-index-page-table-expectation-suite-link {} 11 | .ge-index-page-table-validation-links-list {} 12 | .ge-index-page-table-validation-links-item {} 13 | 14 | /*breadcrumbs*/ 15 | .ge-breadcrumbs {} 16 | .ge-breadcrumbs-item {} 17 | 18 | /*navigation sidebar*/ 19 | .ge-navigation-sidebar-container {} 20 | .ge-navigation-sidebar-content {} 21 | .ge-navigation-sidebar-title {} 22 | .ge-navigation-sidebar-link {} 23 | -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/greatexpectations_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/greatexpectations_plugin/greatexpectations_plugin/__init__.py -------------------------------------------------------------------------------- /examples/greatexpectations_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-great_expectations>=0.22.0 6 | -------------------------------------------------------------------------------- /examples/hive_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Hive 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Data, Advanced 5 | ``` 6 | 7 | Flyte backend can be connected with various hive services. Once enabled it can allow you to query a hive service (e.g. Qubole) and retrieve typed schema (optionally). 8 | This section will provide how to use the Hive Query Plugin using flytekit python 9 | 10 | ## Installation 11 | 12 | To use the flytekit hive plugin simply run the following: 13 | 14 | ```{eval-rst} 15 | .. prompt:: bash 16 | 17 | pip install flytekitplugins-hive 18 | ``` 19 | 20 | ## No Need of a dockerfile 21 | 22 | This plugin is purely a spec. Since SQL is completely portable there is no need to build a Docker container. 23 | 24 | % TODO: write a subsection for "Configuring the backend to get hive working" 25 | 26 | ```{auto-examples-toc} 27 | hive 28 | ``` 29 | -------------------------------------------------------------------------------- /examples/hive_plugin/hive_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/hive_plugin/hive_plugin/__init__.py -------------------------------------------------------------------------------- /examples/hive_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-hive 2 | -------------------------------------------------------------------------------- /examples/house_price_prediction/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN : \ 10 | && apt-get update \ 11 | && apt install -y software-properties-common \ 12 | && add-apt-repository ppa:deadsnakes/ppa 13 | 14 | RUN : \ 15 | && apt-get update \ 16 | && apt-get install -y python3.8 python3-pip python3-venv make build-essential libssl-dev curl vim 17 | 18 | # This is necessary for opencv to work 19 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg 20 | 21 | # Install the AWS cli separately to prevent issues with boto being written over 22 | RUN pip3 install awscli 23 | 24 | WORKDIR /opt 25 | RUN curl https://sdk.cloud.google.com > install.sh 26 | RUN bash /opt/install.sh --install-dir=/opt 27 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 28 | WORKDIR /root 29 | 30 | # Virtual environment 31 | ENV VENV /opt/venv 32 | RUN python3 -m venv ${VENV} 33 | ENV PATH="${VENV}/bin:$PATH" 34 | 35 | # Install Python dependencies 36 | COPY requirements.in /root 37 | RUN ${VENV}/bin/pip install -r /root/requirements.in 38 | 39 | # Copy the actual code 40 | COPY . /root/ 41 | 42 | # Copy over the helper script that the SDK relies on 43 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 44 | RUN chmod a+x /usr/local/bin/flytekit_venv 45 | 46 | # This tag is supplied by the build script and will be used to determine the version 47 | # when registering tasks, workflows, and launch plans 48 | ARG tag 49 | ENV FLYTE_INTERNAL_IMAGE $tag 50 | -------------------------------------------------------------------------------- /examples/house_price_prediction/README.md: -------------------------------------------------------------------------------- 1 | # House Price Regression 2 | 3 | ```{eval-rst} 4 | .. tags:: Data, MachineLearning, DataFrame, Intermediate 5 | ``` 6 | 7 | House Price Regression refers to the prediction of house prices based on various factors, using the XGBoost Regression model (in our case). 8 | In this example, we will train our data on the XGBoost model to predict house prices in multiple regions. 9 | 10 | (flyte's-role)= 11 | 12 | ## Where Does Flyte Fit In? 13 | 14 | - Orchestrates the machine learning pipeline. 15 | - Helps cache the output state between {py:func}`tasks `. 16 | - Easier backtracking to the error source. 17 | - Provides a Rich UI to view and manage the pipeline. 18 | 19 | House price prediction pipeline for one region doesn't require a {py:func}`~flytekit:flytekit.dynamic` workflow. When multiple regions are involved, to iterate through the regions at run-time and thereby build the DAG, Flyte workflow has to be {py:func}`~flytekit:flytekit.dynamic`. 20 | 21 | ```{tip} 22 | Refer to {ref}`dynamic_workflow` section to learn more about dynamic workflows. 23 | ``` 24 | 25 | ## Dataset 26 | 27 | We will create a custom dataset to build our model by referring to the [SageMaker example](https://github.com/aws/amazon-sagemaker-examples/blob/master/advanced_functionality/multi_model_xgboost_home_value/xgboost_multi_model_endpoint_home_value.ipynb). 28 | 29 | The dataset will have the following columns: 30 | 31 | - Price 32 | - House Size 33 | - Number of Bedrooms 34 | - Year Built 35 | - Number of Bathrooms 36 | - Number of Garage Spaces 37 | - Lot Size 38 | 39 | ## Takeaways 40 | 41 | - An in-depth dive into dynamic workflows 42 | - How the Flyte type-system works 43 | 44 | ## Examples 45 | 46 | ```{auto-examples-toc} 47 | house_price_predictor 48 | multiregion_house_price_predictor 49 | ``` 50 | -------------------------------------------------------------------------------- /examples/house_price_prediction/house_price_prediction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/house_price_prediction/house_price_prediction/__init__.py -------------------------------------------------------------------------------- /examples/house_price_prediction/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | xgboost<2.1.0 6 | joblib 7 | scikit-learn 8 | tabulate 9 | matplotlib 10 | pandas 11 | -------------------------------------------------------------------------------- /examples/k8s_dask_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM ubuntu:latest 5 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | ENV DEBIAN_FRONTEND=noninteractive 13 | 14 | # Install Python3 and other basics 15 | RUN apt-get update \ 16 | && apt-get install -y software-properties-common \ 17 | && add-apt-repository ppa:ubuntu-toolchain-r/test \ 18 | && add-apt-repository -y ppa:deadsnakes/ppa \ 19 | && apt-get install -y \ 20 | build-essential \ 21 | curl \ 22 | git \ 23 | libssl-dev \ 24 | make \ 25 | python3-pip \ 26 | python3.11 \ 27 | python3.11-venv \ 28 | && rm -rf /var/lib/apt/lists/* \ 29 | && : 30 | 31 | ENV VENV /opt/venv 32 | # Virtual environment 33 | RUN python3.11 -m venv ${VENV} 34 | ENV PATH="${VENV}/bin:$PATH" 35 | 36 | # Install Python dependencies 37 | COPY requirements.in /root 38 | RUN pip install -r /root/requirements.in 39 | 40 | # Copy the actual code 41 | COPY . /root/ 42 | 43 | # This tag is supplied by the build script and will be used to determine the version 44 | # when registering tasks, workflows, and launch plans 45 | ARG tag 46 | ENV FLYTE_INTERNAL_IMAGE $tag 47 | -------------------------------------------------------------------------------- /examples/k8s_dask_plugin/k8s_dask_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/k8s_dask_plugin/k8s_dask_plugin/__init__.py -------------------------------------------------------------------------------- /examples/k8s_dask_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-dask 2 | -------------------------------------------------------------------------------- /examples/k8s_pod_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # Install the AWS cli separately to prevent issues with boto being written over 11 | RUN pip3 install awscli 12 | 13 | # Install gcloud for GCP 14 | RUN apt-get update && apt-get install -y curl 15 | 16 | WORKDIR /opt 17 | RUN curl https://sdk.cloud.google.com > install.sh 18 | RUN bash /opt/install.sh --install-dir=/opt 19 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 20 | WORKDIR /root 21 | 22 | ENV VENV /opt/venv 23 | # Virtual environment 24 | RUN python3 -m venv ${VENV} 25 | ENV PATH="${VENV}/bin:$PATH" 26 | 27 | # Install Python dependencies 28 | COPY requirements.in /root/. 29 | RUN pip install -r /root/requirements.in 30 | 31 | # Copy the actual code 32 | COPY . /root/ 33 | 34 | # This tag is supplied by the build script and will be used to determine the version 35 | # when registering tasks, workflows, and launch plans 36 | ARG tag 37 | ENV FLYTE_INTERNAL_IMAGE $tag 38 | -------------------------------------------------------------------------------- /examples/k8s_pod_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes Pods 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Kubernetes, Advanced 5 | ``` 6 | 7 | ```{important} 8 | This plugin is no longer needed and is here only for backwards compatibility. No new versions will be published after v1.13.x Please use the `pod_template` and `pod_template_name` arguments to `@task` as described in the {ref}`Kubernetes task pod configuration guide ` instead. 9 | ``` 10 | 11 | Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. 12 | However, there may be situations where you need to run a job with more than one container or require additional capabilities, such as: 13 | 14 | - Running a hyper-parameter optimizer that stores state in a Redis database 15 | - Simulating a service locally 16 | - Running a sidecar container for logging and monitoring purposes 17 | - Running a pod with additional capabilities, such as mounting volumes 18 | 19 | To support these use cases, Flyte provides a Pod configuration that allows you to customize the pod specification used to run the task. 20 | This simplifies the process of implementing the Kubernetes pod abstraction for running multiple containers. 21 | 22 | :::{note} 23 | A Kubernetes pod will not exit if it contains any sidecar containers (containers that do not exit automatically). 24 | You do not need to write any additional code to handle this, as Flyte automatically manages pod tasks. 25 | ::: 26 | 27 | ## Installation 28 | 29 | To use the Flytekit pod plugin, run the following command: 30 | 31 | ```{eval-rst} 32 | .. prompt:: bash 33 | 34 | pip install flytekitplugins-pod 35 | ``` 36 | 37 | ```{auto-examples-toc} 38 | pod 39 | ``` 40 | -------------------------------------------------------------------------------- /examples/k8s_pod_plugin/k8s_pod_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/k8s_pod_plugin/k8s_pod_plugin/__init__.py -------------------------------------------------------------------------------- /examples/k8s_pod_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-pod 6 | -------------------------------------------------------------------------------- /examples/k8s_spark_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM apache/spark-py:v3.4.0 5 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | ENV DEBIAN_FRONTEND=noninteractive 13 | ARG spark_uid=1001 14 | 15 | ## Install Python3 and other basics 16 | USER 0 17 | RUN apt-get update && apt-get install -y python3 python3-venv make build-essential libssl-dev python3-pip curl wget 18 | 19 | # Virtual environment 20 | ENV VENV /opt/venv 21 | RUN python3 -m venv ${VENV} 22 | ENV PATH="${VENV}/bin:$PATH" 23 | RUN pip3 install wheel 24 | 25 | # Install Python dependencies 26 | COPY requirements.in /root 27 | RUN pip install -r /root/requirements.in 28 | 29 | RUN wget https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.2.4/hadoop-aws-3.2.4.jar -P /opt/spark/jars && \ 30 | wget https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.901/aws-java-sdk-bundle-1.11.901.jar -P /opt/spark/jars 31 | 32 | # Copy the actual code 33 | COPY . /root/ 34 | 35 | # This tag is supplied by the build script and will be used to determine the version 36 | # when registering tasks, workflows, and launch plans 37 | ARG tag 38 | ENV FLYTE_INTERNAL_IMAGE $tag 39 | 40 | # Set /root user and group 41 | RUN chown -R ${spark_uid}:${spark_uid} /root 42 | 43 | # For spark we want to use the default entrypoint which is part of the 44 | # distribution, also enable the virtualenv for this image. 45 | ENTRYPOINT ["/opt/entrypoint.sh"] 46 | 47 | ENV HOME /root 48 | USER ${spark_uid} 49 | -------------------------------------------------------------------------------- /examples/k8s_spark_plugin/k8s_spark_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/k8s_spark_plugin/k8s_spark_plugin/__init__.py -------------------------------------------------------------------------------- /examples/k8s_spark_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-spark 2 | pandas 3 | -------------------------------------------------------------------------------- /examples/kfmpi_plugin/kfmpi_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/kfmpi_plugin/kfmpi_plugin/__init__.py -------------------------------------------------------------------------------- /examples/kfmpi_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-kfmpi 2 | tensorflow 3 | -------------------------------------------------------------------------------- /examples/kfpytorch_plugin/README.md: -------------------------------------------------------------------------------- 1 | (kf-pytorch-op)= 2 | 3 | # PyTorch Distributed 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, DistributedComputing, MachineLearning, KubernetesOperator, Advanced 7 | ``` 8 | 9 | The Kubeflow PyTorch plugin leverages the [Kubeflow training operator](https://github.com/kubeflow/training-operator) 10 | to offer a highly streamlined interface for conducting distributed training using different PyTorch backends. 11 | 12 | ## Install the plugin 13 | 14 | To use the PyTorch plugin, run the following command: 15 | 16 | ``` 17 | pip install flytekitplugins-kfpytorch 18 | ``` 19 | 20 | To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. 21 | 22 | ## Run the example on the Flyte cluster 23 | 24 | To run the provided examples on the Flyte cluster, use the following commands: 25 | 26 | Distributed pytorch training: 27 | 28 | ``` 29 | pyflyte run --remote pytorch_mnist.py pytorch_training_wf 30 | ``` 31 | 32 | Pytorch lightning training: 33 | 34 | ``` 35 | pyflyte run --remote pytorch_lightning_mnist_autoencoder.py train_workflow 36 | ``` 37 | 38 | ```{auto-examples-toc} 39 | pytorch_mnist 40 | pytorch_lightning_mnist_autoencoder 41 | ``` 42 | -------------------------------------------------------------------------------- /examples/kfpytorch_plugin/kfpytorch_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/kfpytorch_plugin/kfpytorch_plugin/__init__.py -------------------------------------------------------------------------------- /examples/kfpytorch_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | flytekitplugins-kfpytorch 3 | kubernetes 4 | lightning 5 | matplotlib 6 | torch 7 | tensorboardX 8 | torchvision 9 | lightning 10 | -------------------------------------------------------------------------------- /examples/kftensorflow_plugin/.gitignore: -------------------------------------------------------------------------------- 1 | training_checkpoints/ 2 | saved_model/ 3 | -------------------------------------------------------------------------------- /examples/kftensorflow_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM tensorflow/tensorflow:latest 5 | # You can enable GPU support by replacing the above line with: 6 | # FROM tensorflow/tensorflow:latest-gpu 7 | 8 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 9 | 10 | WORKDIR /root 11 | ENV LANG C.UTF-8 12 | ENV LC_ALL C.UTF-8 13 | ENV PYTHONPATH /root 14 | ENV DEBIAN_FRONTEND noninteractive 15 | ENV TERM linux 16 | 17 | # Install Python3.10 and other libraries 18 | RUN apt-get update \ 19 | && apt-get install -y software-properties-common \ 20 | && apt-get install -y \ 21 | build-essential \ 22 | git \ 23 | wget \ 24 | python3.10 \ 25 | python3.10-venv \ 26 | python3.10-dev \ 27 | libssl-dev \ 28 | python3-pip \ 29 | python3-wheel \ 30 | libuv1 31 | 32 | # Virtual environment 33 | ENV VENV /opt/venv 34 | RUN python3.10 -m venv ${VENV} 35 | ENV PATH="${VENV}/bin:$PATH" 36 | 37 | # Install wheel after venv is activated 38 | RUN pip3 install wheel 39 | 40 | # Install Python dependencies 41 | COPY requirements.in /root 42 | RUN pip install -r /root/requirements.in 43 | 44 | # Install TensorFlow 45 | RUN pip install tensorflow 46 | 47 | # Copy the actual code 48 | COPY . /root/ 49 | 50 | # This tag is supplied by the build script and will be used to determine the version 51 | # when registering tasks, workflows, and launch plans 52 | ARG tag 53 | ENV FLYTE_INTERNAL_IMAGE $tag 54 | -------------------------------------------------------------------------------- /examples/kftensorflow_plugin/README.md: -------------------------------------------------------------------------------- 1 | (kftensorflow-plugin)= 2 | 3 | # TensorFlow Distributed 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, DistributedComputing, MachineLearning, KubernetesOperator, Advanced 7 | ``` 8 | 9 | TensorFlow operator is useful to natively run distributed TensorFlow training jobs on Flyte. 10 | It leverages the [Kubeflow training operator](https://github.com/kubeflow/training-operator). 11 | 12 | ## Install the plugin 13 | 14 | To install the Kubeflow TensorFlow plugin, run the following command: 15 | 16 | ``` 17 | pip install flytekitplugins-kftensorflow 18 | ``` 19 | 20 | To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. 21 | 22 | ## Run the example on the Flyte cluster 23 | 24 | To run the provided example on the Flyte cluster, use the following command: 25 | 26 | ``` 27 | pyflyte run --remote tf_mnist.py \ 28 | mnist_tensorflow_workflow 29 | ``` 30 | 31 | ```{auto-examples-toc} 32 | tf_mnist 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/kftensorflow_plugin/kftensorflow_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/kftensorflow_plugin/kftensorflow_plugin/__init__.py -------------------------------------------------------------------------------- /examples/kftensorflow_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | tensorflow-datasets 2 | flytekitplugins-kftensorflow 3 | tensorflow 4 | -------------------------------------------------------------------------------- /examples/memray_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | WORKDIR /root 11 | 12 | ENV VENV /opt/venv 13 | # Virtual environment 14 | RUN python3 -m venv ${VENV} 15 | ENV PATH="${VENV}/bin:$PATH" 16 | 17 | # Install Python dependencies 18 | COPY requirements.in /root 19 | RUN pip install -r /root/requirements.in 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/memray_plugin/README.md: -------------------------------------------------------------------------------- 1 | (memray_plugin)= 2 | 3 | # Memray Profiling 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Profiling, Observability 7 | ``` 8 | 9 | Memray tracks and reports memory allocations, both in python code and in compiled extension modules. 10 | This Memray Profiling plugin enables memory tracking on the Flyte task level and renders a memgraph profiling graph on Flyte Deck. 11 | 12 | First, install the Memray plugin: 13 | 14 | ```bash 15 | pip install flytekitplugins-memray 16 | ``` 17 | 18 | ```{auto-examples-toc} 19 | memray_example 20 | ``` 21 | -------------------------------------------------------------------------------- /examples/memray_plugin/memray_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/memray_plugin/memray_plugin/__init__.py -------------------------------------------------------------------------------- /examples/memray_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-memray 2 | -------------------------------------------------------------------------------- /examples/mlflow_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/mlflow_plugin/README.md: -------------------------------------------------------------------------------- 1 | (mlflow)= 2 | 3 | # MLFlow 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Data, Metrics, Intermediate 7 | ``` 8 | 9 | The MLflow Tracking component is an API and UI for logging parameters, 10 | code versions, metrics, and output files when running your machine learning code and for later visualizing the results 11 | 12 | First, install the Flyte MLflow plugin: 13 | 14 | ```{eval-rst} 15 | .. prompt:: bash $ 16 | 17 | pip install flytekitplugins-mlflow 18 | ``` 19 | 20 | To log the metrics and parameters to Flyte deck, add {py:func}`@mlflow_autolog ` to the task. For example 21 | 22 | ```python 23 | @task(enable_deck=True) 24 | @mlflow_autolog(framework=mlflow.keras) 25 | def train_model(epochs: int): 26 | ... 27 | ``` 28 | 29 | To log the metric and parameters to a remote mlflow server, add default environment variable [MLFLOW_TRACKING_URI](https://mlflow.org/docs/latest/tracking.html#logging-to-a-tracking-server) to the flytepropeller config map. 30 | 31 | ```{eval-rst} 32 | .. prompt:: bash $ 33 | 34 | kubectl edit cm flyte-propeller-config 35 | ``` 36 | 37 | ```yaml 38 | plugins: 39 | k8s: 40 | default-cpus: 100m 41 | default-env-vars: 42 | - MLFLOW_TRACKING_URI: postgresql+psycopg2://postgres:@postgres.flyte.svc.cluster.local:5432/flyteadmin 43 | ``` 44 | 45 | :::{figure} https://raw.githubusercontent.com/flyteorg/static-resources/f4b53a550bed70d9d7722d523e0b7568b781fc7d/flytesnacks/integrations/mlflow/server.png 46 | :alt: MLflow UI 47 | :class: with-shadow 48 | ::: 49 | 50 | ```{auto-examples-toc} 51 | mlflow_example 52 | ``` 53 | -------------------------------------------------------------------------------- /examples/mlflow_plugin/mlflow_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/mlflow_plugin/mlflow_plugin/__init__.py -------------------------------------------------------------------------------- /examples/mlflow_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-mlflow 2 | tensorflow 3 | -------------------------------------------------------------------------------- /examples/mmcloud_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | WORKDIR /root 11 | 12 | ENV VENV /opt/venv 13 | # Virtual environment 14 | RUN python3 -m venv ${VENV} 15 | ENV PATH="${VENV}/bin:$PATH" 16 | 17 | # Install Python dependencies 18 | COPY requirements.in /root 19 | RUN pip install -r /root/requirements.in 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/mmcloud_agent/mmcloud_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/mmcloud_agent/mmcloud_agent/__init__.py -------------------------------------------------------------------------------- /examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # (mmcloud_agent_example_usage)= 3 | # # Memory Machine Cloud agent example usage 4 | # 5 | # This example shows how to use the MMCloud agent to execute tasks on MemVerge Memory Machine Cloud. 6 | 7 | # %% 8 | from flytekit import Resources, task, workflow 9 | from flytekitplugins.mmcloud import MMCloudConfig 10 | 11 | # %% [markdown] 12 | # `MMCloudConfig` configures `MMCloudTask`. Tasks specified with `MMCloudConfig` will be executed using MMCloud. Tasks will be executed with requests `cpu="1"` and `mem="1Gi"` by default. 13 | 14 | 15 | # %% 16 | @task(task_config=MMCloudConfig()) 17 | def to_str(i: int) -> str: 18 | return str(i) 19 | 20 | 21 | @task(task_config=MMCloudConfig()) 22 | def to_int(s: str) -> int: 23 | return int(s) 24 | 25 | 26 | # %% [markdown] 27 | # [Resource](https://docs.flyte.org/en/latest/user_guide/productionizing/customizing_task_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/index.html#customizing-dependencies) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. 28 | 29 | 30 | # %% 31 | @task( 32 | task_config=MMCloudConfig(submit_extra="--migratePolicy [enable=true]"), 33 | requests=Resources(cpu="1", mem="1Gi"), 34 | limits=Resources(cpu="2", mem="4Gi"), 35 | environment={"KEY": "value"}, 36 | ) 37 | def concatenate_str(s1: str, s2: str) -> str: 38 | return s1 + s2 39 | 40 | 41 | @workflow 42 | def concatenate_int_wf(i1: int, i2: int) -> int: 43 | i1_str = to_str(i=i1) 44 | i2_str = to_str(i=i2) 45 | return to_int(s=concatenate_str(s1=i1_str, s2=i2_str)) 46 | -------------------------------------------------------------------------------- /examples/mmcloud_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-mmcloud 2 | -------------------------------------------------------------------------------- /examples/mnist_classifier/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Set your wandb API key and user name. Get the API key from https://wandb.ai/authorize. 10 | # ENV WANDB_API_KEY 11 | # ENV WANDB_USERNAME 12 | 13 | # Install the AWS cli for AWS support 14 | RUN pip install awscli 15 | 16 | # Install gcloud for GCP 17 | RUN apt-get update && apt-get install -y make build-essential libssl-dev curl 18 | 19 | # Virtual environment 20 | ENV VENV /opt/venv 21 | RUN python3 -m venv ${VENV} 22 | ENV PATH="${VENV}/bin:$PATH" 23 | 24 | # Install Python dependencies 25 | COPY requirements.in /root 26 | RUN pip install -r /root/requirements.in 27 | 28 | # Copy the actual code 29 | COPY . /root/ 30 | 31 | # This tag is supplied by the build script and will be used to determine the version 32 | # when registering tasks, workflows, and launch plans 33 | ARG tag 34 | ENV FLYTE_INTERNAL_IMAGE $tag 35 | -------------------------------------------------------------------------------- /examples/mnist_classifier/mnist_classifier/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/mnist_classifier/mnist_classifier/__init__.py -------------------------------------------------------------------------------- /examples/mnist_classifier/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | torch 6 | torchvision 7 | wandb 8 | numpy<1.22.0 9 | pandas<=1.4.3 10 | -------------------------------------------------------------------------------- /examples/modin_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/modin_plugin/README.md: -------------------------------------------------------------------------------- 1 | (modin-integration)= 2 | 3 | # Modin 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, DataFrame, MachineLearning, Intermediate 7 | ``` 8 | 9 | Modin is a pandas-accelerator that helps handle large datasets. 10 | Pandas works gracefully with small datasets since it is inherently single-threaded, and designed to work on a single CPU core. 11 | With large datasets, the performance of pandas drops (becomes slow or runs out of memory) due to single core usage. 12 | This is where Modin can be helpful. 13 | 14 | Instead of optimizing pandas workflows for a specific setup, we can speed up pandas workflows by utilizing all the resources (cores) available in the system using the concept of `parallelism`, which is possible through modin. [Here](https://modin.readthedocs.io/en/stable/getting_started/why_modin/pandas.html#scalablity-of-implementation) is a visual representation of how the cores are utilized in case of Pandas and Modin. 15 | 16 | ## Installation 17 | 18 | ```bash 19 | pip install flytekitplugins-modin 20 | ``` 21 | 22 | ## How is Modin different? 23 | 24 | Modin **scales** the Pandas workflows by changing only a **single line of code**. 25 | 26 | The plugin supports the usage of Modin DataFrame as an input to and output of a task/workflow, similar to how a pandas DataFrame can be used. 27 | 28 | ```{auto-examples-toc} 29 | knn_classifier 30 | ``` 31 | -------------------------------------------------------------------------------- /examples/modin_plugin/modin_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/modin_plugin/modin_plugin/__init__.py -------------------------------------------------------------------------------- /examples/modin_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-modin 6 | scikit-learn 7 | modin 8 | ray 9 | pandas 10 | -------------------------------------------------------------------------------- /examples/neptune_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install Python dependencies 10 | COPY requirements.in /root 11 | RUN pip install uv && uv pip install --system --no-cache-dir -r /root/requirements.in 12 | 13 | # Copy the actual code 14 | COPY . /root 15 | 16 | # This tag is supplied by the build script and will be used to determine the version 17 | # when registering tasks, workflows, and launch plans 18 | ARG tag 19 | ENV FLYTE_INTERNAL_IMAGE $tag 20 | -------------------------------------------------------------------------------- /examples/neptune_plugin/README.md: -------------------------------------------------------------------------------- 1 | (neptune_plugin)= 2 | 3 | # Neptune plugin 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Data, Metrics, Intermediate 7 | ``` 8 | 9 | [Neptune](https://neptune.ai/) is an experiment tracker for large-scale model training. It allows AI researchers to monitor their model training in real time, visualize and compare experiments, and collaborate on them with a team. This plugin enables seamless use of Neptune within Flyte by configuring links between the two platforms. You can find more information about how to use Neptune in their [documentation](https://docs.neptune.ai/). 10 | 11 | ## Installation 12 | 13 | To install the Flyte Neptune plugin, run the following command: 14 | 15 | ```bash 16 | pip install flytekitplugins-neptune 17 | ``` 18 | 19 | ## Example usage 20 | 21 | For a usage example, see the {doc}`Neptune example `. 22 | 23 | ## Local testing 24 | 25 | To run {doc}`Neptune example ` locally: 26 | 27 | 1. Create an account on [Neptune](https://neptune.ai/). 28 | 2. Create a project on Neptune. 29 | 3. In the example, set `NEPTUNE_PROJECT` to your project name. 30 | 4. Add a secret using [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html) with `key="neptune-api-token"` and `group="neptune-api-group"` 31 | 5. If you want to see the dynamic log links in the UI, then add the configuration in the next section. 32 | 33 | ## Flyte deployment configuration 34 | 35 | To enable dynamic log links, add the plugin to Flyte's configuration file: 36 | ```yaml 37 | plugins: 38 | logs: 39 | dynamic-log-links: 40 | - neptune-run-id: 41 | displayName: Neptune 42 | templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.project }}?query=(%60flyte%2Fexecution_id%60%3Astring%20%3D%20%22{{ .executionName }}-{{ .nodeId }}-{{ .taskRetryAttempt }}%22)&lbViewUnpacked=true" 43 | ``` 44 | 45 | ```{auto-examples-toc} 46 | neptune_example 47 | ``` 48 | -------------------------------------------------------------------------------- /examples/neptune_plugin/neptune_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/neptune_plugin/neptune_plugin/__init__.py -------------------------------------------------------------------------------- /examples/neptune_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-neptune 2 | xgboost 3 | neptune 4 | neptune-xgboost 5 | scikit-learn==1.5.1 6 | numpy==1.26.1 7 | matplotlib==3.9.2 8 | -------------------------------------------------------------------------------- /examples/nim_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | # Install Python dependencies 14 | COPY requirements.in /root 15 | RUN pip install -r /root/requirements.in 16 | 17 | # Copy the actual code 18 | COPY . /root/ 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/nim_plugin/README.md: -------------------------------------------------------------------------------- 1 | (nim_plugin)= 2 | 3 | # NIM 4 | 5 | ```{eval-rst} 6 | .. tags:: Inference, NVIDIA 7 | ``` 8 | 9 | Serve optimized model containers with NIM in a Flyte task. 10 | 11 | [NVIDIA NIM](https://www.nvidia.com/en-in/ai/), part of NVIDIA AI Enterprise, provides a streamlined path 12 | for developing AI-powered enterprise applications and deploying AI models in production. 13 | It includes an out-of-the-box optimization suite, enabling AI model deployment across any cloud, 14 | data center, or workstation. Since NIM can be self-hosted, there is greater control over cost, data privacy, 15 | and more visibility into behind-the-scenes operations. 16 | 17 | With NIM, you can invoke the model's endpoint as if it is hosted locally, minimizing network overhead. 18 | 19 | ## Installation 20 | 21 | To use the NIM plugin, run the following command: 22 | 23 | ``` 24 | pip install flytekitplugins-inference 25 | ``` 26 | 27 | ## Example usage 28 | 29 | For a usage example, see {doc}`NIM example usage `. 30 | 31 | ```{note} 32 | NIM can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. 33 | ``` 34 | 35 | ```{toctree} 36 | :maxdepth: -1 37 | :hidden: 38 | 39 | serve_nim_container 40 | ``` 41 | -------------------------------------------------------------------------------- /examples/nim_plugin/nim_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/nim_plugin/nim_plugin/__init__.py -------------------------------------------------------------------------------- /examples/nim_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-inference>=1.13.1a5 2 | -------------------------------------------------------------------------------- /examples/nlp_processing/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN : \ 10 | && apt-get update \ 11 | && apt install -y software-properties-common \ 12 | && add-apt-repository ppa:deadsnakes/ppa 13 | 14 | RUN : \ 15 | && apt-get update \ 16 | && apt-get install -y python3.8 python3-pip python3-venv make build-essential libssl-dev curl vim 17 | 18 | # This is necessary for opencv to work 19 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg 20 | 21 | # Install the AWS cli separately to prevent issues with boto being written over 22 | RUN pip3 install awscli 23 | 24 | WORKDIR /opt 25 | RUN curl https://sdk.cloud.google.com > install.sh 26 | RUN bash /opt/install.sh --install-dir=/opt 27 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 28 | WORKDIR /root 29 | 30 | # Virtual environment 31 | ENV VENV /opt/venv 32 | RUN python3 -m venv ${VENV} 33 | ENV PATH="${VENV}/bin:$PATH" 34 | 35 | # Install Python dependencies 36 | COPY requirements.in /root 37 | RUN ${VENV}/bin/pip install -r /root/requirements.in 38 | 39 | # Copy the actual code 40 | COPY . /root/ 41 | 42 | # Copy over the helper script that the SDK relies on 43 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 44 | RUN chmod a+x /usr/local/bin/flytekit_venv 45 | 46 | # This tag is supplied by the build script and will be used to determine the version 47 | # when registering tasks, workflows, and launch plans 48 | ARG tag 49 | ENV FLYTE_INTERNAL_IMAGE $tag 50 | -------------------------------------------------------------------------------- /examples/nlp_processing/README.md: -------------------------------------------------------------------------------- 1 | # NLP Processing 2 | 3 | ```{eval-rst} 4 | .. tags:: MachineLearning, UI, Intermediate 5 | ``` 6 | 7 | This tutorial will demonstrate how to process text data and generate word embeddings and visualizations 8 | as part of a Flyte workflow. It's an adaptation of the official Gensim [Word2Vec tutorial](https://radimrehurek.com/gensim/auto_examples/tutorials/run_word2vec.html). 9 | 10 | ## About Gensim 11 | 12 | Gensim is a popular open-source natural language processing (NLP) library used to process 13 | large corpora (can be larger than RAM). 14 | It has efficient multicore implementations of a number of algorithms such as [Latent Semantic Analysis](http://lsa.colorado.edu/papers/dp1.LSAintro.pdf), [Latent Dirichlet Allocation (LDA)](https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf), 15 | [Word2Vec deep learning](https://arxiv.org/pdf/1301.3781.pdf) to perform complex tasks including understanding 16 | document relationships, topic modeling, learning word embeddings, and more. 17 | 18 | You can read more about Gensim [here](https://radimrehurek.com/gensim/). 19 | 20 | ## Data 21 | 22 | The dataset used for this tutorial is the open-source [Lee Background Corpus](https://github.com/RaRe-Technologies/gensim/blob/develop/gensim/test/test_data/lee_background.cor) 23 | that comes with the Gensim library. 24 | 25 | ## Step-by-Step Process 26 | 27 | The following points outline the modelling process: 28 | 29 | - Returns a preprocessed (tokenized, stop words excluded, lemmatized) corpus from the custom iterator. 30 | - Trains the Word2vec model on the preprocessed corpus. 31 | - Generates a bag of words from the corpus and trains the LDA model. 32 | - Saves the LDA and Word2Vec models to disk. 33 | - Deserializes the Word2Vec model, runs word similarity and computes word movers distance. 34 | - Reduces the dimensionality (using tsne) and plots the word embeddings. 35 | 36 | Let's dive into the code! 37 | 38 | ## Examples 39 | 40 | ```{auto-examples-toc} 41 | word2vec_and_lda 42 | ``` 43 | -------------------------------------------------------------------------------- /examples/nlp_processing/nlp_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/nlp_processing/nlp_processing/__init__.py -------------------------------------------------------------------------------- /examples/nlp_processing/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | numpy 6 | gensim 7 | nltk 8 | plotly 9 | pyemd 10 | scikit-learn 11 | scipy==1.10.1 12 | -------------------------------------------------------------------------------- /examples/ollama_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | # Install Python dependencies 14 | COPY requirements.in /root 15 | RUN pip install -r /root/requirements.in 16 | 17 | # Copy the actual code 18 | COPY . /root/ 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/ollama_plugin/README.md: -------------------------------------------------------------------------------- 1 | (ollama_plugin)= 2 | 3 | # Ollama 4 | 5 | ```{eval-rst} 6 | .. tags:: Inference, LLM 7 | ``` 8 | 9 | Serve large language models (LLMs) in a Flyte task. 10 | 11 | [Ollama](https://ollama.com/) simplifies the process of serving fine-tuned LLMs. 12 | Whether you're generating predictions from a customized model or deploying it across different hardware setups, 13 | Ollama enables you to encapsulate the entire workflow in a single pipeline. 14 | 15 | ## Installation 16 | 17 | To use the Ollama plugin, run the following command: 18 | 19 | ``` 20 | pip install flytekitplugins-inference 21 | ``` 22 | 23 | ## Example usage 24 | 25 | For a usage example, see {doc}`Ollama example usage `. 26 | 27 | ```{note} 28 | Ollama can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. 29 | ``` 30 | 31 | ```{toctree} 32 | :maxdepth: -1 33 | :hidden: 34 | 35 | serve_llm 36 | ``` 37 | -------------------------------------------------------------------------------- /examples/ollama_plugin/ollama_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/ollama_plugin/ollama_plugin/__init__.py -------------------------------------------------------------------------------- /examples/ollama_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-inference>=1.13.6b1 2 | -------------------------------------------------------------------------------- /examples/onnx_plugin/README.md: -------------------------------------------------------------------------------- 1 | (onnx)= 2 | 3 | # ONNX 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, MachineLearning, Intermediate 7 | ``` 8 | 9 | Open Neural Network Exchange ([ONNX](https://github.com/onnx/onnx)) is an open standard format for representing machine learning 10 | and deep learning models. It enables interoperability between different frameworks and streamlines the path from research to production. 11 | 12 | The flytekit onnx type plugin comes in three flavors: 13 | 14 | ::::{tab-set} 15 | 16 | :::{tab-item} ScikitLearn 17 | 18 | ```{code-block} 19 | pip install flytekitplugins-onnxpytorch 20 | ``` 21 | 22 | This plugin enables the conversion from scikitlearn models to ONNX models. 23 | ::: 24 | 25 | :::{tab-item} TensorFlow 26 | 27 | ```{code-block} 28 | pip install flytekitplugins-onnxtensorflow 29 | ``` 30 | 31 | This plugin enables the conversion from tensorflow models to ONNX models. 32 | ::: 33 | 34 | :::{tab-item} PyTorch 35 | 36 | ```{code-block} 37 | pip install flytekitplugins-onnxpytorch 38 | ``` 39 | 40 | This plugin enables the conversion from pytorch models to ONNX models. 41 | ::: 42 | 43 | :::: 44 | 45 | :::{note} 46 | If you'd like to add support for a new framework, please create an issue and submit a pull request to the flytekit repo. 47 | You can find the ONNX plugin source code [here](https://github.com/flyteorg/flytekit/tree/master/plugins). 48 | ::: 49 | 50 | ```{auto-examples-toc} 51 | pytorch_onnx 52 | scikitlearn_onnx 53 | tensorflow_onnx 54 | ``` 55 | -------------------------------------------------------------------------------- /examples/onnx_plugin/onnx_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/onnx_plugin/onnx_plugin/__init__.py -------------------------------------------------------------------------------- /examples/openai_batch_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | # Install Python dependencies 14 | COPY requirements.in /root 15 | RUN pip install -r /root/requirements.in 16 | 17 | # Copy the actual code 18 | COPY . /root/ 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/openai_batch_agent/README.md: -------------------------------------------------------------------------------- 1 | (openai_batch_agent)= 2 | 3 | # OpenAI Batch Agent 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Intermediate, OpenAI 7 | ``` 8 | 9 | The Batch API agent allows you to submit requests for asynchronous batch processing on OpenAI. 10 | You can provide either a JSONL file or a JSON iterator, and the agent handles the upload to OpenAI, 11 | creation of the batch, and downloading of the output and error files. 12 | 13 | ## Installation 14 | 15 | To use the OpenAI Batch agent, run the following command: 16 | 17 | ``` 18 | pip install flytekitplugins-openai 19 | ``` 20 | 21 | ## Example usage 22 | 23 | For a usage example, see {doc}`OpenAI Batch agent example usage `. 24 | 25 | ## Local testing 26 | 27 | To test an agent locally, create a class for the agent task that inherits from 28 | [SyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L222-L256) 29 | or [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L259-L354). 30 | These mixins can handle synchronous and synchronous tasks, respectively, 31 | and allow flytekit to mimic FlytePropeller's behavior in calling the agent. 32 | For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_locally.html)". 33 | 34 | ## Flyte deployment configuration 35 | 36 | ```{note} 37 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 38 | ``` 39 | 40 | To enable the OpenAI Batch agent in your Flyte deployment, refer to the 41 | {ref}`OpenAI Batch agent setup guide `. 42 | 43 | ```{toctree} 44 | :maxdepth: -1 45 | :hidden: 46 | 47 | openai_batch_agent_example_usage 48 | ``` 49 | -------------------------------------------------------------------------------- /examples/openai_batch_agent/openai_batch_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/openai_batch_agent/openai_batch_agent/__init__.py -------------------------------------------------------------------------------- /examples/openai_batch_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-openai>=1.12.1b2 2 | -------------------------------------------------------------------------------- /examples/pandera_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/pandera_plugin/pandera_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/pandera_plugin/pandera_plugin/__init__.py -------------------------------------------------------------------------------- /examples/pandera_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-pandera>=0.16.0 6 | hypothesis 7 | joblib 8 | pandas 9 | pandera>=0.20.0 10 | scikit-learn 11 | -------------------------------------------------------------------------------- /examples/papermill_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/papermill_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Papermill 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Jupyter, Intermediate 5 | ``` 6 | 7 | It is possible to run a Jupyter notebook as a Flyte task using [papermill](https://github.com/nteract/papermill). 8 | Papermill executes the notebook as a whole, so before using this plugin, it is essential to construct your notebook as 9 | recommended by papermill. When using this plugin, there are a few important things to keep in mind: 10 | 11 | 1. This plugin can be used for any task - type. 12 | : - It can be python code, which can be a tensorflow model, a data transformation, etc - but things that run in a container 13 | and you would typically write in a `@task`. 14 | - It can be a {py:func}`~flytekit.dynamic` workflow. 15 | - It can be a any other plugin like `Spark`, `SageMaker` etc, **ensure that the plugin is installed as well** 16 | 2. Flytekit will execute the notebook and capture the output notebook as an *.ipynb* file and an HTML rendered notebook as well 17 | 3. Flytekit will pass the inputs into the notebook as long as you have the first cell annotated as `parameters` and inputs are specified 18 | 4. Flytekit will read the outputs from the notebook, as long as you use annotate the notebook with `outputs` and outputs are specified 19 | 20 | ## Installation 21 | 22 | To use the flytekit papermill plugin simply run the following: 23 | 24 | ```{eval-rst} 25 | .. prompt:: bash 26 | 27 | pip install flytekitplugins-papermill 28 | ``` 29 | 30 | ```{auto-examples-toc} 31 | simple 32 | nb_simple 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/papermill_plugin/papermill_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/papermill_plugin/papermill_plugin/__init__.py -------------------------------------------------------------------------------- /examples/papermill_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-papermill>=0.16.0 6 | -------------------------------------------------------------------------------- /examples/perian_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | # This is necessary for opencv to work 11 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl 12 | 13 | WORKDIR /root 14 | 15 | ENV VENV /opt/venv 16 | # Virtual environment 17 | RUN python3 -m venv ${VENV} 18 | ENV PATH="${VENV}/bin:$PATH" 19 | 20 | # Install Python dependencies 21 | COPY requirements.in /root 22 | RUN pip install -r /root/requirements.in 23 | RUN pip freeze 24 | 25 | # Copy the actual code 26 | COPY . /root 27 | 28 | # This tag is supplied by the build script and will be used to determine the version 29 | # when registering tasks, workflows, and launch plans 30 | ARG tag 31 | ENV FLYTE_INTERNAL_IMAGE $tag 32 | -------------------------------------------------------------------------------- /examples/perian_agent/README.md: -------------------------------------------------------------------------------- 1 | ```{eval-rst} 2 | .. tags:: Cloud, GPU, Integration, Advanced 3 | ``` 4 | 5 | (perian_agent)= 6 | 7 | # PERIAN Job Platform Agent 8 | 9 | The PERIAN Flyte Agent enables you to execute Flyte tasks on the [PERIAN Sky Platform](https://perian.io/). PERIAN allows the execution of any task on servers aggregated from multiple cloud providers. 10 | 11 | Example usage: 12 | 13 | ```{auto-examples-toc} 14 | example 15 | ``` 16 | 17 | To get started with PERIAN, see the [PERIAN documentation](https://perian.io/docs/overview) and the [PERIAN Flyte Agent documentation](https://perian.io/docs/flyte-getting-started). 18 | 19 | ## Agent setup 20 | 21 | Consult the [PERIAN Flyte Agent setup guide](https://perian.io/docs/flyte-setup-guide). 22 | -------------------------------------------------------------------------------- /examples/perian_agent/perian_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/perian_agent/perian_agent/__init__.py -------------------------------------------------------------------------------- /examples/perian_agent/perian_agent/example.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # (example)= 3 | # # PERIAN agent example usage 4 | # 5 | # This example shows how to use the PERIAN agent to execute tasks on PERIAN Job Platform. 6 | 7 | # %% 8 | from flytekit import ImageSpec, task, workflow 9 | from flytekitplugins.perian_job import PerianConfig 10 | 11 | image_spec = ImageSpec( 12 | name="flyte-test", 13 | registry="my-registry", 14 | python_version="3.11", 15 | apt_packages=["wget", "curl", "git"], 16 | packages=[ 17 | "flytekitplugins-perian-job", 18 | ], 19 | ) 20 | 21 | 22 | # %% [markdown] 23 | # `PerianConfig` configures `PerianTask`. Tasks specified with `PerianConfig` will be executed on PERIAN Job Platform. 24 | 25 | 26 | # %% 27 | @task( 28 | container_image=image_spec, 29 | task_config=PerianConfig( 30 | accelerators=1, 31 | accelerator_type="A100", 32 | ), 33 | ) 34 | def perian_hello(name: str) -> str: 35 | return f"hello {name}!" 36 | 37 | 38 | @workflow 39 | def my_wf(name: str = "world") -> str: 40 | return perian_hello(name=name) 41 | -------------------------------------------------------------------------------- /examples/perian_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=1.7.0 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-perian-job 6 | -------------------------------------------------------------------------------- /examples/pima_diabetes/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:focal 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | RUN : \ 10 | && apt-get update \ 11 | && apt install -y software-properties-common \ 12 | && add-apt-repository ppa:deadsnakes/ppa 13 | 14 | RUN : \ 15 | && apt-get update \ 16 | && apt-get install -y python3.8 python3-pip python3-venv make build-essential libssl-dev curl vim 17 | 18 | # This is necessary for opencv to work 19 | RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg 20 | 21 | # Install the AWS cli separately to prevent issues with boto being written over 22 | RUN pip3 install awscli 23 | 24 | WORKDIR /opt 25 | RUN curl https://sdk.cloud.google.com > install.sh 26 | RUN bash /opt/install.sh --install-dir=/opt 27 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 28 | WORKDIR /root 29 | 30 | # Virtual environment 31 | ENV VENV /opt/venv 32 | RUN python3 -m venv ${VENV} 33 | ENV PATH="${VENV}/bin:$PATH" 34 | 35 | # Install Python dependencies 36 | COPY requirements.in /root 37 | RUN ${VENV}/bin/pip install -r /root/requirements.in 38 | 39 | # Copy the actual code 40 | COPY . /root/ 41 | 42 | # Copy over the helper script that the SDK relies on 43 | RUN cp ${VENV}/bin/flytekit_venv /usr/local/bin/ 44 | RUN chmod a+x /usr/local/bin/flytekit_venv 45 | 46 | # This tag is supplied by the build script and will be used to determine the version 47 | # when registering tasks, workflows, and launch plans 48 | ARG tag 49 | ENV FLYTE_INTERNAL_IMAGE $tag 50 | -------------------------------------------------------------------------------- /examples/pima_diabetes/pima_diabetes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/pima_diabetes/pima_diabetes/__init__.py -------------------------------------------------------------------------------- /examples/pima_diabetes/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit>=0.32.3 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | xgboost<2.1.0 6 | joblib 7 | scikit-learn 8 | tabulate 9 | matplotlib 10 | pandas 11 | -------------------------------------------------------------------------------- /examples/productionizing/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-buster 2 | LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | WORKDIR /root 10 | 11 | RUN apt-get update && apt-get install -y build-essential 12 | 13 | # Virtual environment 14 | ENV VENV /opt/venv 15 | RUN python3 -m venv ${VENV} 16 | ENV PATH="${VENV}/bin:$PATH" 17 | 18 | # Install Python dependencies 19 | RUN pip install flytekit 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/productionizing/README.md: -------------------------------------------------------------------------------- 1 | # Productionizing 2 | 3 | These examples demonstrate how to take Flyte pipelines into production, modeling concepts such as customizing resources, notifications, scheduling, GPU configuration, secrets, spot instances and more. 4 | -------------------------------------------------------------------------------- /examples/productionizing/productionizing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/productionizing/productionizing/__init__.py -------------------------------------------------------------------------------- /examples/productionizing/productionizing/customizing_resources.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from flytekit import Resources, task, workflow 4 | 5 | 6 | # Define a task and configure the resources to be allocated to it 7 | @task( 8 | requests=Resources(cpu="1", mem="100Mi", ephemeral_storage="200Mi"), 9 | limits=Resources(cpu="2", mem="150Mi", ephemeral_storage="500Mi"), 10 | ) 11 | def count_unique_numbers(x: typing.List[int]) -> int: 12 | s = set() 13 | for i in x: 14 | s.add(i) 15 | return len(s) 16 | 17 | 18 | # Define a task that computes the square of a number 19 | @task 20 | def square(x: int) -> int: 21 | return x * x 22 | 23 | 24 | # Use the tasks decorated with memory and storage hints 25 | # like regular tasks in a workflow 26 | @workflow 27 | def my_workflow(x: typing.List[int]) -> int: 28 | return square(x=count_unique_numbers(x=x)) 29 | 30 | 31 | # Run the workflow locally 32 | if __name__ == "__main__": 33 | print(count_unique_numbers(x=[1, 1, 2])) 34 | print(my_workflow(x=[1, 1, 2])) 35 | 36 | # In the example below, we sse the `with_overrides` method 37 | # to override the resources allocated to the tasks dynamically. 38 | import typing # noqa: E402 39 | 40 | from flytekit import Resources, task, workflow # noqa: E402 41 | 42 | 43 | # Define a task and configure the resources to be allocated to it 44 | @task(requests=Resources(cpu="1", mem="200Mi"), limits=Resources(cpu="2", mem="350Mi")) 45 | def count_unique_numbers_1(x: typing.List[int]) -> int: 46 | s = set() 47 | for i in x: 48 | s.add(i) 49 | return len(s) 50 | 51 | 52 | # Define a task that computes the square of a number 53 | @task 54 | def square_1(x: int) -> int: 55 | return x * x 56 | 57 | 58 | # The `with_overrides` method overrides the old resource allocations. 59 | @workflow 60 | def my_pipeline(x: typing.List[int]) -> int: 61 | return square_1(x=count_unique_numbers_1(x=x)).with_overrides(limits=Resources(cpu="6", mem="500Mi")) 62 | 63 | 64 | # Run the workflow locally 65 | if __name__ == "__main__": 66 | print(count_unique_numbers_1(x=[1, 1, 2])) 67 | print(my_pipeline(x=[1, 1, 2])) 68 | -------------------------------------------------------------------------------- /examples/productionizing/productionizing/lp_schedules.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from flytekit import task, workflow 4 | 5 | 6 | @task 7 | def format_date(run_date: datetime) -> str: 8 | return run_date.strftime("%Y-%m-%d %H:%M") 9 | 10 | 11 | @workflow 12 | def date_formatter_wf(kickoff_time: datetime): 13 | formatted_kickoff_time = format_date(run_date=kickoff_time) 14 | print(formatted_kickoff_time) 15 | 16 | 17 | from flytekit import CronSchedule, LaunchPlan # noqa: E402 18 | 19 | # creates a launch plan that runs every minute. 20 | cron_lp = LaunchPlan.get_or_create( 21 | name="my_cron_scheduled_lp", 22 | workflow=date_formatter_wf, 23 | schedule=CronSchedule( 24 | # Note that the ``kickoff_time_input_arg`` matches the workflow input we defined above: kickoff_time 25 | # But in case you are using the AWS scheme of schedules and not using the native scheduler then switch over the schedule parameter with cron_expression 26 | schedule="*/1 * * * *", # Following schedule runs every min 27 | kickoff_time_input_arg="kickoff_time", 28 | ), 29 | ) 30 | 31 | # If you prefer to use an interval rather than a cron scheduler to schedule 32 | # your workflows, you can use the fixed-rate scheduler. 33 | # A fixed-rate scheduler runs at the specified interval. 34 | from datetime import timedelta # noqa: E402 35 | 36 | from flytekit import FixedRate, LaunchPlan # noqa: E402 37 | 38 | 39 | @task 40 | def be_positive(name: str) -> str: 41 | return f"You're awesome, {name}" 42 | 43 | 44 | @workflow 45 | def positive_wf(name: str): 46 | reminder = be_positive(name=name) 47 | print(f"{reminder}") 48 | 49 | 50 | fixed_rate_lp = LaunchPlan.get_or_create( 51 | name="my_fixed_rate_lp", 52 | workflow=positive_wf, 53 | # Note that the workflow above doesn't accept any kickoff time arguments. 54 | # We just omit the ``kickoff_time_input_arg`` from the FixedRate schedule invocation 55 | schedule=FixedRate(duration=timedelta(minutes=10)), 56 | fixed_inputs={"name": "you"}, 57 | ) 58 | -------------------------------------------------------------------------------- /examples/productionizing/productionizing/reference_launch_plan.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from flytekit import reference_launch_plan, workflow 4 | from flytekit.types.file import FlyteFile 5 | 6 | # A `flytekit.reference_launch_plan` references previously defined, serialized, 7 | # and registered Flyte launch plans. 8 | # You can reference launch plans from other projects and create workflows 9 | # that use launch plans declared by others. 10 | 11 | # The following example illustrates how to use reference launch plans 12 | 13 | 14 | @reference_launch_plan( 15 | project="flytesnacks", 16 | domain="development", 17 | name="data_types_and_io.file.normalize_csv_file", 18 | version="{{ registration.version }}", 19 | ) 20 | def normalize_csv_file( 21 | csv_url: FlyteFile, 22 | column_names: List[str], 23 | columns_to_normalize: List[str], 24 | output_location: str, 25 | ) -> FlyteFile: 26 | ... 27 | 28 | 29 | @workflow 30 | def reference_lp_wf() -> FlyteFile: 31 | return normalize_csv_file( 32 | csv_url="https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", 33 | column_names=["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], 34 | columns_to_normalize=["Age"], 35 | output_location="", 36 | ) 37 | -------------------------------------------------------------------------------- /examples/productionizing/productionizing/reference_task.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from flytekit import reference_task, workflow 4 | from flytekit.types.file import FlyteFile 5 | 6 | # A `flytekit.reference_task` references the Flyte tasks that have already been defined, serialized, and registered. 7 | # You can reference tasks from other projects and create workflows that use tasks declared by others. 8 | # These tasks can be in their own containers, python runtimes, flytekit versions, and even different languages. 9 | 10 | # The following example illustrates how to use reference tasks 11 | # Note that reference tasks cannot be run locally. You must mock them out 12 | 13 | 14 | @reference_task( 15 | project="flytesnacks", 16 | domain="development", 17 | name="data_types_and_io.file.normalize_columns", 18 | version="{{ registration.version }}", 19 | ) 20 | def normalize_columns( 21 | csv_url: FlyteFile, 22 | column_names: List[str], 23 | columns_to_normalize: List[str], 24 | output_location: str, 25 | ) -> FlyteFile: 26 | ... 27 | 28 | 29 | @reference_task( 30 | project="flytesnacks", 31 | domain="development", 32 | name="sql.bigquery.no_io", 33 | version="{{ registration.version }}", 34 | ) 35 | def bigquery_task(): 36 | ... 37 | 38 | 39 | @workflow 40 | def wf() -> FlyteFile: 41 | bigquery_task() 42 | return normalize_columns( 43 | csv_url="https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", 44 | column_names=["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], 45 | columns_to_normalize=["Age"], 46 | output_location="", 47 | ) 48 | -------------------------------------------------------------------------------- /examples/ray_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM rayproject/ray:2.5.1-py310-cpu 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | USER root 13 | 14 | # Install Python dependencies 15 | COPY requirements.in /root 16 | RUN pip install -r /root/requirements.in 17 | 18 | # Copy the actual code 19 | COPY . /root/ 20 | 21 | # This tag is supplied by the build script and will be used to determine the version 22 | # when registering tasks, workflows, and launch plans 23 | ARG tag 24 | ENV FLYTE_INTERNAL_IMAGE $tag 25 | -------------------------------------------------------------------------------- /examples/ray_plugin/ray_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/ray_plugin/ray_plugin/__init__.py -------------------------------------------------------------------------------- /examples/ray_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-ray 2 | -------------------------------------------------------------------------------- /examples/sagemaker_inference_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | # Install Python dependencies 14 | COPY requirements.in /root 15 | RUN pip install -r /root/requirements.in 16 | 17 | # Copy the actual code 18 | COPY . /root/ 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/sagemaker_inference_agent/README.md: -------------------------------------------------------------------------------- 1 | (aws_sagemaker_inference_agent)= 2 | 3 | # AWS SageMaker Inference Agent 4 | 5 | ```{eval-rst} 6 | .. tags:: AWS, Integration, Advanced 7 | ``` 8 | 9 | The AWS SageMaker inference agent allows you to deploy models, and create and trigger inference endpoints. 10 | You can also fully remove the SageMaker deployment. 11 | 12 | ## Installation 13 | 14 | To use the AWS SageMaker inference agent, run the following command: 15 | 16 | ``` 17 | pip install flytekitplugins-awssagemaker 18 | ``` 19 | 20 | ## Example usage 21 | 22 | For a usage example, see {doc}`AWS SageMaker inference agent example usage `. 23 | 24 | ## Local testing 25 | 26 | To test an agent locally, create a class for the agent task that inherits from 27 | [SyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L222-L256) 28 | or [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L259-L354). 29 | These mixins can handle synchronous and synchronous tasks, respectively, 30 | and allow flytekit to mimic FlytePropeller's behavior in calling the agent. 31 | For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_locally.html)". 32 | 33 | ## Flyte deployment configuration 34 | 35 | ```{note} 36 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 37 | ``` 38 | 39 | To enable the AWS SageMaker inference agent in your Flyte deployment, refer to the 40 | {ref}`AWS SageMaker inference agent setup guide `. 41 | 42 | ```{toctree} 43 | :maxdepth: -1 44 | :hidden: 45 | 46 | sagemaker_inference_agent_example_usage 47 | ``` 48 | -------------------------------------------------------------------------------- /examples/sagemaker_inference_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-awssagemaker 2 | xgboost 3 | fastapi 4 | uvicorn 5 | scikit-learn 6 | flytekit 7 | flyteidl 8 | -------------------------------------------------------------------------------- /examples/sagemaker_inference_agent/sagemaker_inference_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/sagemaker_inference_agent/sagemaker_inference_agent/__init__.py -------------------------------------------------------------------------------- /examples/sensor/README.md: -------------------------------------------------------------------------------- 1 | (sensor)= 2 | 3 | # Sensor 4 | 5 | ```{eval-rst} 6 | .. tags:: Data, Basic 7 | ``` 8 | 9 | ## Usage 10 | 11 | For an example of detecting a file with the `FileSensor`, see the {doc}`file sensor example `. 12 | 13 | ### Run the file senseor example on a Flyte cluster 14 | 15 | To run the provided example on a Flyte cluster, use the following command: 16 | 17 | ``` 18 | pyflyte run --remote \ 19 | https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/sensor/sensor/file_sensor_example.py wf 20 | ``` 21 | 22 | ## Deployment configuration 23 | 24 | ```{note} 25 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 26 | ``` 27 | 28 | To enable the sensor agent in your Flyte deployment, see the {ref}`sensor agent deployment guide`. 29 | 30 | ```{toctree} 31 | :maxdepth: -1 32 | :hidden: 33 | file_sensor_example 34 | ``` 35 | -------------------------------------------------------------------------------- /examples/sensor/sensor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/sensor/sensor/__init__.py -------------------------------------------------------------------------------- /examples/sensor/sensor/file_sensor_example.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # # File Sensor 3 | # 4 | # This example shows how to use the `FileSensor` to detect files appearing in your local or remote filesystem. 5 | # 6 | # First, import the required libraries. 7 | 8 | # %% 9 | from flytekit import task, workflow 10 | from flytekit.sensor.file_sensor import FileSensor 11 | 12 | # %% [markdown] 13 | # Next, create a FileSensor task. 14 | 15 | # %% 16 | sensor = FileSensor(name="test_file_sensor") 17 | 18 | # %% [markdown] 19 | # To use the FileSensor created in the previous step, you must specify the path parameter. In the sandbox, you can use the S3 path. 20 | 21 | 22 | # %% 23 | @task() 24 | def t1(): 25 | print("SUCCEEDED") 26 | 27 | 28 | @workflow() 29 | def wf(): 30 | sensor(path="s3://my-s3-bucket/file.txt") >> t1() 31 | 32 | 33 | if __name__ == "__main__": 34 | wf() 35 | 36 | # %% [markdown] 37 | # You can also use the S3 or GCS file system. 38 | # We have already set the minio credentials in the agent by default. If you test the sandbox example locally, you will need to set the AWS credentials in your environment variables. 39 | # 40 | # ```{prompt} bash 41 | # export FLYTE_AWS_ENDPOINT="http://localhost:30002" 42 | # export FLYTE_AWS_ACCESS_KEY_ID="minio" 43 | # export FLYTE_AWS_SECRET_ACCESS_KEY="miniostorage" 44 | # ``` 45 | -------------------------------------------------------------------------------- /examples/slurm_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | # ###################### 2 | # NOTE: For CI/CD only # 3 | ######################## 4 | FROM python:3.11-slim-buster 5 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 6 | 7 | WORKDIR /root 8 | ENV VENV /opt/venv 9 | ENV LANG C.UTF-8 10 | ENV LC_ALL C.UTF-8 11 | ENV PYTHONPATH /root 12 | 13 | # Install Python dependencies 14 | COPY requirements.in /root 15 | RUN pip install -r /root/requirements.in 16 | 17 | # Copy the actual code 18 | COPY . /root/ 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/slurm_agent/README.md: -------------------------------------------------------------------------------- 1 | (slurm_agent)= 2 | 3 | # Slurm agent 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, HighPerformanceComputing, Advanced 7 | ``` 8 | 9 | ## Installation 10 | 11 | To install the Slurm agent, run the following command: 12 | 13 | ```{eval-rst} 14 | .. prompt:: bash 15 | 16 | pip install flytekitplugins-slurm 17 | ``` 18 | 19 | ## Example usage 20 | 21 | For the example usage of different Slurm task types, please see {doc}`Slurm agent example usage`. 22 | 23 | ## Local testing 24 | 25 | To test the Slurm agent locally, create a class for the agent task that inherits from [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/cd6bd01ad0ba6688afc71a33a59ece53f90e841a/flytekit/extend/backend/base_agent.py#L3). This mixin can handle asynchronous tasks and allows flytekit to mimic FlytePropeller's behavior in calling the agent. For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_in_a_local_python_environment.html)". 26 | 27 | ```{note} 28 | In some cases, you will need to store credentials in your local environment when testing locally. 29 | ``` 30 | 31 | ## Flyte deployment configuration 32 | 33 | ```{note} 34 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 35 | ``` 36 | 37 | To enable the Slurm agent in your Flyte deployment, see the {ref}`Slurm agent deployment guide`. 38 | 39 | 40 | ```{toctree} 41 | :maxdepth: -1 42 | :hidden: 43 | 44 | slurm_agent_example_usage 45 | ``` 46 | -------------------------------------------------------------------------------- /examples/slurm_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-slurm 2 | torch 3 | torchvision 4 | tqdm 5 | -------------------------------------------------------------------------------- /examples/slurm_agent/slurm_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/slurm_agent/slurm_agent/__init__.py -------------------------------------------------------------------------------- /examples/snowflake_agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | 11 | # Install Python dependencies 12 | COPY requirements.in /root 13 | RUN pip install -r /root/requirements.in 14 | RUN pip freeze 15 | 16 | # Copy the actual code 17 | COPY . /root/ 18 | 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/snowflake_agent/README.md: -------------------------------------------------------------------------------- 1 | (snowflake_agent)= 2 | 3 | # Snowflake agent 4 | 5 | ```{eval-rst} 6 | .. tags:: AWS, GCP, AliCloud, Integration, Advanced 7 | ``` 8 | Flyte can be seamlessly integrated with the [Snowflake](https://www.snowflake.com) service, 9 | providing you with a straightforward means to query data in Snowflake. 10 | 11 | ## Installation 12 | 13 | To use the Snowflake agent, run the following command: 14 | 15 | ``` 16 | pip install flytekitplugins-snowflake 17 | ``` 18 | 19 | ## Example usage 20 | 21 | For a usage example, see {doc}`Snowflake agent example usage`. 22 | 23 | ## Local testing 24 | 25 | To test the Snowflake agent locally, create a class for the agent task that inherits from [AsyncAgentExecutorMixin](https://github.com/flyteorg/flytekit/blob/master/flytekit/extend/backend/base_agent.py#L262). This mixin can handle asynchronous tasks and allows flytekit to mimic FlytePropeller's behavior in calling the agent. For more information, see "[Testing agents locally](https://docs.flyte.org/en/latest/flyte_agents/testing_agents_in_a_local_python_environment.html)". 26 | 27 | ```{note} 28 | 29 | In some cases, you will need to store credentials in your local environment when testing locally. 30 | 31 | ``` 32 | 33 | ## Flyte deployment configuration 34 | 35 | ```{note} 36 | If you are using a managed deployment of Flyte, you will need to contact your deployment administrator to configure agents in your deployment. 37 | ``` 38 | 39 | To enable the Snowflake agent in your Flyte deployment, see the {ref}`Snowflake agent setup guide `. 40 | 41 | 42 | ```{toctree} 43 | :maxdepth: -1 44 | :hidden: 45 | 46 | snowflake_agent_example_usage 47 | ``` 48 | -------------------------------------------------------------------------------- /examples/snowflake_agent/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-snowflake 2 | flytekit 3 | pandas 4 | pyarrow 5 | -------------------------------------------------------------------------------- /examples/snowflake_agent/snowflake_agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/snowflake_agent/snowflake_agent/__init__.py -------------------------------------------------------------------------------- /examples/snowflake_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-buster 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | 11 | # Install Python dependencies 12 | COPY requirements.in /root 13 | RUN pip install -r /root/requirements.in 14 | RUN pip freeze 15 | 16 | # Copy the actual code 17 | COPY . /root/ 18 | 19 | 20 | # This tag is supplied by the build script and will be used to determine the version 21 | # when registering tasks, workflows, and launch plans 22 | ARG tag 23 | ENV FLYTE_INTERNAL_IMAGE $tag 24 | -------------------------------------------------------------------------------- /examples/snowflake_plugin/README.md: -------------------------------------------------------------------------------- 1 | # Snowflake plugin 2 | 3 | ```{warning} 4 | This example code uses a legacy implementation of the Snowflake integration. We recommend using the [Snowflake agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. 5 | ``` 6 | 7 | This directory contains example code for the deprecated Snowflake plugin. For documentation on installing and using the plugin, see the [Snowflake plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/snowflake_plugin/index.html) 8 | 9 | ```{toctree} 10 | :maxdepth: -1 11 | :hidden: 12 | 13 | snowflake_plugin_example 14 | ``` 15 | -------------------------------------------------------------------------------- /examples/snowflake_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-snowflake==1.7.0 2 | flytekit==1.7.1b1 3 | marshmallow_enum 4 | -------------------------------------------------------------------------------- /examples/snowflake_plugin/snowflake_plugin.md: -------------------------------------------------------------------------------- 1 | (snowflake_plugin)= 2 | 3 | # Snowflake plugin 4 | 5 | ```{note} 6 | 7 | This is a legacy implementation of the Snowflake integration. We recommend using the {ref}`Snowflake agent ` instead. 8 | 9 | ``` 10 | 11 | ## Installation 12 | 13 | To use the Snowflake plugin, run the following command: 14 | 15 | ``` 16 | pip install flytekitplugins-snowflake 17 | ``` 18 | 19 | ## Flyte deployment configuration 20 | 21 | If you intend to run the plugin on the Flyte cluster, you must first set it up on the backend. 22 | Please refer to the 23 | {ref}`Snowflake plugin setup guide ` 24 | for detailed instructions. 25 | 26 | ## Run the example on the Flyte cluster 27 | 28 | To run the provided example on the Flyte cluster, use the following command: 29 | 30 | ``` 31 | pyflyte run --remote \ 32 | https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/snowflake_plugin/snowflake_plugin/snowflake.py \ 33 | snowflake_wf --nation_key 10 34 | ``` 35 | -------------------------------------------------------------------------------- /examples/snowflake_plugin/snowflake_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/snowflake_plugin/snowflake_plugin/__init__.py -------------------------------------------------------------------------------- /examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py: -------------------------------------------------------------------------------- 1 | # %% [markdown] 2 | # # Snowflake plugin example 3 | # 4 | # %% 5 | 6 | from flytekit import kwtypes, workflow 7 | from flytekitplugins.snowflake import SnowflakeConfig, SnowflakeTask 8 | 9 | snowflake_task_no_io = SnowflakeTask( 10 | name="sql.snowflake.no_io", 11 | inputs={}, 12 | query_template="SELECT 1", 13 | output_schema_type=None, 14 | task_config=SnowflakeConfig( 15 | account="", 16 | database="SNOWFLAKE_SAMPLE_DATA", 17 | schema="TPCH_SF1000", 18 | warehouse="COMPUTE_WH", 19 | ), 20 | ) 21 | 22 | 23 | snowflake_task_templatized_query = SnowflakeTask( 24 | name="sql.snowflake.w_io", 25 | # Define inputs as well as their types that can be used to customize the query. 26 | inputs=kwtypes(nation_key=int), 27 | task_config=SnowflakeConfig( 28 | account="", 29 | database="SNOWFLAKE_SAMPLE_DATA", 30 | schema="TPCH_SF1000", 31 | warehouse="COMPUTE_WH", 32 | ), 33 | query_template="SELECT * from CUSTOMER where C_NATIONKEY = {{ .inputs.nation_key }} limit 100", 34 | ) 35 | 36 | 37 | @workflow 38 | def snowflake_wf(nation_key: int): 39 | return snowflake_task_templatized_query(nation_key=nation_key) 40 | 41 | 42 | # To review the query results, access the Snowflake console at: 43 | # `https://.snowflakecomputing.com/console#/monitoring/queries/detail`. 44 | # 45 | # You can also execute the task and workflow locally. 46 | if __name__ == "__main__": 47 | print(snowflake_task_no_io()) 48 | print(snowflake_wf(nation_key=10)) 49 | -------------------------------------------------------------------------------- /examples/sql_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-buster 2 | 3 | WORKDIR /root 4 | ENV VENV /opt/venv 5 | ENV LANG C.UTF-8 6 | ENV LC_ALL C.UTF-8 7 | ENV PYTHONPATH /root 8 | 9 | # Install the AWS cli separately to prevent issues with boto being written over 10 | RUN pip3 install awscli 11 | 12 | # Install gcloud for GCP 13 | RUN apt-get update && apt-get install -y curl 14 | 15 | WORKDIR /opt 16 | RUN curl https://sdk.cloud.google.com > install.sh 17 | RUN bash /opt/install.sh --install-dir=/opt 18 | ENV PATH $PATH:/opt/google-cloud-sdk/bin 19 | WORKDIR /root 20 | 21 | ENV VENV /opt/venv 22 | # Virtual environment 23 | RUN python3 -m venv ${VENV} 24 | ENV PATH="${VENV}/bin:$PATH" 25 | 26 | # Install Python dependencies 27 | COPY requirements.in /root/. 28 | RUN pip install -r /root/requirements.in 29 | 30 | # Copy the actual code 31 | COPY . /root/ 32 | 33 | # This tag is supplied by the build script and will be used to determine the version 34 | # when registering tasks, workflows, and launch plans 35 | ARG tag 36 | ENV FLYTE_INTERNAL_IMAGE $tag 37 | -------------------------------------------------------------------------------- /examples/sql_plugin/README.md: -------------------------------------------------------------------------------- 1 | # SQL 2 | 3 | ```{eval-rst} 4 | .. tags:: Integration, Data, SQL, Intermediate 5 | ``` 6 | 7 | Flyte tasks are not always restricted to running user-supplied containers, nor even containers at all. Indeed, this is 8 | one of the most important design decisions in Flyte. Non-container tasks can have arbitrary targets for execution -- 9 | an API that executes SQL queries like SnowFlake, BigQuery, a synchronous WebAPI, etc. 10 | 11 | ```{auto-examples-toc} 12 | sqlite3_integration 13 | sql_alchemy 14 | ``` 15 | -------------------------------------------------------------------------------- /examples/sql_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-sqlalchemy>=0.20.1 6 | psycopg2-binary 7 | -------------------------------------------------------------------------------- /examples/sql_plugin/sql_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/sql_plugin/sql_plugin/__init__.py -------------------------------------------------------------------------------- /examples/testing/README.md: -------------------------------------------------------------------------------- 1 | # Testing Flyte tasks and workflows 2 | 3 | The `flytekit` python SDK provides a few utilities for making it easier to test 4 | your tasks and workflows in your test suite. For more details, you can also refer 5 | to the [`flytekit.testing`](https://docs.flyte.org/en/latest/api/flytekit/testing.html) module in the API reference. 6 | -------------------------------------------------------------------------------- /examples/testing/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/testing/testing/__init__.py -------------------------------------------------------------------------------- /examples/testing/testing/mocking.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pandas 4 | from flytekit import SQLTask, TaskMetadata, kwtypes, task, workflow 5 | from flytekit.testing import patch, task_mock 6 | from flytekit.types.schema import FlyteSchema 7 | 8 | # This is a generic SQL task (and is by default not hooked up to any datastore 9 | # nor handled by any plugin), and must be mocked. 10 | sql = SQLTask( 11 | "my-query", 12 | query_template="SELECT * FROM hive.city.fact_airport_sessions WHERE ds = '{{ .Inputs.ds }}' LIMIT 10", 13 | inputs=kwtypes(ds=datetime.datetime), 14 | outputs=kwtypes(results=FlyteSchema), 15 | metadata=TaskMetadata(retries=2), 16 | ) 17 | 18 | 19 | # This is a task that can run locally 20 | @task 21 | def t1() -> datetime.datetime: 22 | return datetime.datetime.now() 23 | 24 | 25 | # Declare a workflow that chains these two tasks together 26 | @workflow 27 | def my_wf() -> FlyteSchema: 28 | dt = t1() 29 | return sql(ds=dt) 30 | 31 | 32 | # Without a mock, calling the workflow would typically raise an exception, 33 | # but with the `task_mock` construct, which returns a `MagicMock` object, 34 | # we can override the return value. 35 | def main_1(): 36 | with task_mock(sql) as mock: 37 | mock.return_value = pandas.DataFrame(data={"x": [1, 2], "y": ["3", "4"]}) 38 | assert (my_wf().open().all() == pandas.DataFrame(data={"x": [1, 2], "y": ["3", "4"]})).all().all() 39 | 40 | 41 | # %% [markdown] 42 | # There is another utility as well called `patch` which offers the same 43 | # functionality, but in the traditional Python 44 | # patching style, where the first argument is the `MagicMock` object. 45 | def main_2(): 46 | @patch(sql) 47 | def test_user_demo_test(mock_sql): 48 | mock_sql.return_value = pandas.DataFrame(data={"x": [1, 2], "y": ["3", "4"]}) 49 | assert (my_wf().open().all() == pandas.DataFrame(data={"x": [1, 2], "y": ["3", "4"]})).all().all() 50 | 51 | test_user_demo_test() 52 | 53 | 54 | if __name__ == "__main__": 55 | main_1() 56 | main_2() 57 | -------------------------------------------------------------------------------- /examples/wandb_plugin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bookworm 2 | LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks 3 | 4 | WORKDIR /root 5 | ENV VENV /opt/venv 6 | ENV LANG C.UTF-8 7 | ENV LC_ALL C.UTF-8 8 | ENV PYTHONPATH /root 9 | 10 | WORKDIR /root 11 | 12 | ENV VENV /opt/venv 13 | # Virtual environment 14 | RUN python3 -m venv ${VENV} 15 | ENV PATH="${VENV}/bin:$PATH" 16 | 17 | # Install Python dependencies 18 | COPY requirements.in /root 19 | RUN pip install -r /root/requirements.in 20 | 21 | # Copy the actual code 22 | COPY . /root 23 | 24 | # This tag is supplied by the build script and will be used to determine the version 25 | # when registering tasks, workflows, and launch plans 26 | ARG tag 27 | ENV FLYTE_INTERNAL_IMAGE $tag 28 | -------------------------------------------------------------------------------- /examples/wandb_plugin/README.md: -------------------------------------------------------------------------------- 1 | (wandb_plugin)= 2 | 3 | # Weights and Biases 4 | 5 | ```{eval-rst} 6 | .. tags:: Integration, Data, Metrics, Intermediate 7 | ``` 8 | 9 | The Weights and Biases MLOps platform helps AI developers streamline their ML workflows from end to end. This plugin 10 | enables seamless use of Weights & Biases within Flyte by configuring links between the two platforms. 11 | 12 | First, install the Flyte Weights & Biases plugin: 13 | 14 | ```bash 15 | pip install flytekitplugins-wandb 16 | ``` 17 | 18 | ```{auto-examples-toc} 19 | wandb_example 20 | ``` 21 | -------------------------------------------------------------------------------- /examples/wandb_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekitplugins-wandb 2 | xgboost 3 | scikit-learn 4 | wandb 5 | -------------------------------------------------------------------------------- /examples/wandb_plugin/wandb_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/wandb_plugin/wandb_plugin/__init__.py -------------------------------------------------------------------------------- /examples/whylogs_plugin/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/whylogs_plugin/.gitignore -------------------------------------------------------------------------------- /examples/whylogs_plugin/requirements.in: -------------------------------------------------------------------------------- 1 | flytekit 2 | wheel 3 | matplotlib 4 | flytekitplugins-deck-standard 5 | flytekitplugins-whylogs>=1.1.1b0 6 | scikit-learn 7 | whylogs[s3] 8 | whylogs[mlflow] 9 | whylogs[whylabs] 10 | pandas 11 | -------------------------------------------------------------------------------- /examples/whylogs_plugin/whylogs_plugin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flyteorg/flytesnacks/c9b2c44dc5d0ce42e482be1ffb16c8b19e72c23c/examples/whylogs_plugin/whylogs_plugin/__init__.py -------------------------------------------------------------------------------- /flyte_tests.txt: -------------------------------------------------------------------------------- 1 | examples/advanced_composition/advanced_composition/chain_entities.py 2 | examples/advanced_composition/advanced_composition/conditional.py 3 | examples/advanced_composition/advanced_composition/decorating_tasks.py 4 | examples/advanced_composition/advanced_composition/decorating_workflows.py 5 | examples/advanced_composition/advanced_composition/dynamic_workflow.py 6 | examples/advanced_composition/advanced_composition/map_task.py 7 | examples/advanced_composition/advanced_composition/waiting_for_external_inputs.py 8 | examples/basics/basics/documenting_workflows.py 9 | examples/basics/basics/hello_world.py 10 | examples/basics/basics/named_outputs.py 11 | examples/basics/basics/shell_task.py 12 | examples/basics/basics/workflow.py 13 | examples/data_types_and_io/data_types_and_io/dataclass.py 14 | examples/data_types_and_io/data_types_and_io/enum_type.py 15 | examples/data_types_and_io/data_types_and_io/file.py 16 | examples/data_types_and_io/data_types_and_io/folder.py 17 | examples/data_types_and_io/data_types_and_io/structured_dataset.py 18 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | line-length = 120 3 | select = ["E", "W", "F", "I"] 4 | ignore = [ 5 | # Whitespace before '{symbol}' 6 | "E203", 7 | # Too many leading # before block comment 8 | "E266", 9 | # Line too long ({width} > {limit}) 10 | "E501", 11 | # Ambiguous variable name: {name} 12 | "E741", 13 | # Undefined name {name} 14 | "F821", 15 | ] 16 | 17 | [tool.ruff.extend-per-file-ignores] 18 | "*/__init__.py" = [ 19 | # unused-import 20 | "F401", 21 | ] 22 | "examples/**/*.py" = [ 23 | # Module level import not at top of cell 24 | "E402", 25 | ] 26 | -------------------------------------------------------------------------------- /scripts/create-example-project.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Usage for default project: 4 | # 5 | # ./scripts/create-example-project.sh 6 | # 7 | # For project that doesn't require dependencies: 8 | # 9 | # ./scripts/create-example-project.sh 0 10 | 11 | project_name="$1" 12 | requires_deps="$2" 13 | 14 | if [ -z "$project_name" ] 15 | then 16 | echo "Please provide a project name" 17 | exit 1 18 | fi 19 | 20 | if [ -z "$requires_deps" ] 21 | then 22 | requires_deps="1" 23 | fi 24 | 25 | cp -R _example_template ./examples/"$project_name" 26 | mv "./examples/$project_name/_example_template" "./examples/$project_name/$project_name" 27 | 28 | echo Creating a new project "./examples/$project_name" 29 | 30 | if [ "$requires_deps" = "0" ] 31 | then 32 | echo Removing dependency files 33 | rm "./examples/$project_name/Dockerfile" ./examples/"$project_name"/requirements.* 34 | fi 35 | -------------------------------------------------------------------------------- /scripts/pip-compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Usage for simple directory: 4 | # 5 | # ./scripts/pip-compile.sh 6 | # 7 | # Compile all examples: 8 | # 9 | # ./scripts/pip-compile.sh 10 | 11 | examples=$1 12 | 13 | if [ -z "$examples" ] 14 | then 15 | examples=$(find examples -type d -d 1) 16 | fi 17 | 18 | build_requirements() { 19 | pip-compile requirements.in --upgrade --verbose --resolver=backtracking 20 | } 21 | 22 | for dir in $examples 23 | do 24 | (cd "$dir" && build_requirements) 25 | done 26 | -------------------------------------------------------------------------------- /scripts/serialize-example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Usage: ./scripts/serialize-example.sh 4 | 5 | dir="$1" 6 | version="$2" 7 | 8 | build() { 9 | docker build . -t "$1" 10 | } 11 | 12 | # NOTE: the additional images are for the multi-image containerization examples 13 | serialize() { 14 | docker run -i --rm -v "$(pwd)":/root "$2" \ 15 | pyflyte --pkgs "$1" \ 16 | package \ 17 | --image "$2" \ 18 | --image mindmeld="ghcr.io/flyteorg/flytecookbook:core-latest" \ 19 | --image borebuster="ghcr.io/flyteorg/flytekit:py3.9-latest" \ 20 | --output /root/flyte-package.tgz \ 21 | --force 22 | } 23 | 24 | if [ -z "$version" ] 25 | then 26 | version="latest" 27 | fi 28 | 29 | example_name=$(basename -- "$dir") 30 | image_uri=ghcr.io/flyteorg/flytecookbook:"$example_name"-"$version" 31 | (cd "$dir" && build "$image_uri" && serialize "$example_name" "$image_uri") 32 | echo "$image_uri" 33 | --------------------------------------------------------------------------------