├── examples ├── simple_example_with_requirements │ ├── requirements.txt │ ├── run.py │ └── fuego_run.py ├── github_runner_app │ ├── style.css │ ├── requirements.txt │ └── app.py ├── README.md ├── run_all.sh ├── simple_example │ ├── fuego_run.py │ └── run.py ├── pytorch_vae_github │ └── fuego_run.py ├── transformers_github │ └── fuego_run.py └── fuego_demo.ipynb ├── src └── fuego │ ├── __init__.py │ ├── runtime.py │ └── run_on_spaces.py ├── Makefile ├── pyproject.toml ├── .github └── workflows │ ├── sync-with-huggingface.yml │ └── python-publish.yml ├── setup.py ├── README.md ├── .gitignore └── LICENSE /examples/simple_example_with_requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | fire==0.5.0 -------------------------------------------------------------------------------- /examples/github_runner_app/style.css: -------------------------------------------------------------------------------- 1 | h1 { 2 | text-align: center; 3 | } -------------------------------------------------------------------------------- /examples/github_runner_app/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio 2 | PyYAML 3 | fuego==0.0.8 4 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | See each example subdirectory's `fuego_run.py` for details. -------------------------------------------------------------------------------- /src/fuego/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_on_spaces import github_run, run 2 | 3 | 4 | __version__ = "0.0.9.dev0" 5 | -------------------------------------------------------------------------------- /examples/run_all.sh: -------------------------------------------------------------------------------- 1 | python simple_example/fuego_run.py 2 | python simple_example_with_requirements/fuego_run.py 3 | python transformers_github/fuego_run.py 4 | python pytorch_vae_github/fuego_run.py -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: quality style 2 | 3 | # Check that source code meets quality standards 4 | quality: 5 | black --check --diff . 6 | ruff . 7 | 8 | # Format source code automatically 9 | style: 10 | black . 11 | ruff . --fix 12 | -------------------------------------------------------------------------------- /examples/simple_example/fuego_run.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fuego 4 | 5 | 6 | space_url, dataset_url = fuego.run( 7 | script=str(Path(__file__).parent / "run.py"), 8 | delete_space_on_completion=True, # When debugging, set this to False 9 | # Kwargs 10 | message="Howdy, world!!!", 11 | ) 12 | print(f"space_url: {space_url}") 13 | print(f"dataset_url: {dataset_url}") 14 | -------------------------------------------------------------------------------- /examples/simple_example_with_requirements/run.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fire 4 | 5 | 6 | def main(message: str = "Hello, world!", output_dir="./outputs"): 7 | logdir = Path(output_dir) 8 | logdir.mkdir(exist_ok=True, parents=True) 9 | outfile_path = logdir / "message.txt" 10 | outfile_path.write_text(message) 11 | 12 | 13 | if __name__ == "__main__": 14 | fire.Fire(main) 15 | -------------------------------------------------------------------------------- /examples/pytorch_vae_github/fuego_run.py: -------------------------------------------------------------------------------- 1 | import fuego 2 | 3 | 4 | space_url, dataset_url = fuego.github_run( 5 | github_repo_id="pytorch/examples", 6 | script="vae/main.py", 7 | requirements_file="vae/requirements.txt", 8 | space_output_dirs=["./results"], 9 | # Kwargs, passed as argparse args to the script 10 | epochs=3, 11 | ) 12 | print(f"Space: {space_url}") 13 | print(f"Dataset: {dataset_url}") 14 | -------------------------------------------------------------------------------- /examples/simple_example_with_requirements/fuego_run.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import fuego 4 | 5 | 6 | space_url, dataset_url = fuego.run( 7 | script=str(Path(__file__).parent / "run.py"), 8 | requirements_file=str(Path(__file__).parent / "requirements.txt"), 9 | delete_space_on_completion=True, # When debugging, set this to False 10 | # Kwargs 11 | message="Howdy, world!", 12 | ) 13 | print(f"space_url: {space_url}") 14 | print(f"dataset_url: {dataset_url}") 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target_version = ['py37'] 4 | 5 | [tool.ruff] 6 | # Never enforce `E501` (line length violations). 7 | ignore = ["C901", "E501", "E741", "W605"] 8 | select = ["C", "E", "F", "I", "W"] 9 | line-length = 119 10 | 11 | # Ignore import violations in all `__init__.py` files. 12 | [tool.ruff.per-file-ignores] 13 | "__init__.py" = ["E402", "F401", "F403", "F811"] 14 | 15 | [tool.ruff.isort] 16 | known-first-party = ["fuego"] 17 | lines-after-imports = 2 18 | -------------------------------------------------------------------------------- /.github/workflows/sync-with-huggingface.yml: -------------------------------------------------------------------------------- 1 | name: Sync with Hugging Face Hub 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - .github/workflows/sync-with-huggingface.yml 9 | - examples/github_runner_app/** 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Sync with Hugging Face 16 | uses: nateraw/huggingface-sync-action@v0.0.4 17 | with: 18 | github_repo_id: huggingface/fuego 19 | huggingface_repo_id: nateraw/fuego 20 | repo_type: space 21 | space_sdk: gradio 22 | subdirectory: examples/github_runner_app 23 | hf_token: ${{ secrets.HF_TOKEN }} 24 | -------------------------------------------------------------------------------- /examples/simple_example/run.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from pathlib import Path 3 | 4 | 5 | def main(message: str = "Hello, world!", output_dir="./outputs"): 6 | logdir = Path(output_dir) 7 | logdir.mkdir(exist_ok=True, parents=True) 8 | outfile_path = logdir / "message.txt" 9 | outfile_path.write_text(message) 10 | 11 | 12 | def parse_args(args=None): 13 | parser = ArgumentParser() 14 | parser.add_argument("--message", type=str, default="Hello, world!") 15 | parser.add_argument("--output_dir", type=str, default="./outputs") 16 | return parser.parse_args(args=args) 17 | 18 | 19 | if __name__ == "__main__": 20 | main(**vars(parse_args())) 21 | -------------------------------------------------------------------------------- /examples/transformers_github/fuego_run.py: -------------------------------------------------------------------------------- 1 | import fuego 2 | 3 | 4 | space_url, dataset_url = fuego.github_run( 5 | github_repo_id="huggingface/transformers", 6 | script="examples/pytorch/text-classification/run_glue.py", 7 | requirements_file="examples/pytorch/text-classification/requirements.txt", 8 | space_hardware="t4-small", 9 | # Adding additional pip requirements to the requirements.txt file 10 | extra_requirements=["tensorboard", "git+https://github.com/huggingface/transformers@ea55bd8#egg=transformers"], 11 | # Kwargs, passed as argparse args to the script 12 | model_name_or_path="bert-base-cased", 13 | task_name="mrpc", 14 | do_train=True, 15 | do_eval=True, 16 | max_seq_length=128, 17 | per_device_train_batch_size=32, 18 | learning_rate=2e-5, 19 | num_train_epochs=3, 20 | output_dir="./outputs", 21 | logging_dir="./logs", 22 | logging_steps=20, 23 | report_to="tensorboard", 24 | ) 25 | print(f"Space: {space_url}") 26 | print(f"Dataset: {dataset_url}") 27 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | push: 8 | tags: 9 | - v* 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: "3.x" 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | 4 | def get_version() -> str: 5 | rel_path = "src/fuego/__init__.py" 6 | with open(rel_path, "r") as fp: 7 | for line in fp.read().splitlines(): 8 | if line.startswith("__version__"): 9 | delim = '"' if '"' in line else "'" 10 | return line.split(delim)[1] 11 | raise RuntimeError("Unable to find version string.") 12 | 13 | 14 | requirements = [ 15 | "fire", 16 | "huggingface_hub>=0.12.0", 17 | "GitPython", 18 | ] 19 | 20 | extras = {} 21 | extras["quality"] = ["black~=23.1", "ruff>=0.0.241"] 22 | 23 | setup( 24 | name="fuego", 25 | description="Fuego", 26 | long_description=open("README.md", "r", encoding="utf-8").read(), 27 | long_description_content_type="text/markdown", 28 | url="https://github.com/huggingface/fuego", 29 | version=get_version(), 30 | author="Nathan Raw", 31 | author_email="nate@huggingface.com", 32 | license="Apache", 33 | install_requires=requirements, 34 | extras_require=extras, 35 | package_dir={"": "src"}, 36 | packages=find_packages("src"), 37 | entry_points={"console_scripts": ["fuego=fuego.run_on_spaces:cli_run"]}, 38 | ) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fuego 2 | 3 | A 🔥 tool for running code in the cloud 4 | 5 | 🔥[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/fuego/blob/main/examples/fuego_demo.ipynb)🔥 6 | 7 | ### Note 8 | 9 | ❗ **This project is a WIP and just an _idea_ right now. Under active development.** ❗ 10 | 11 | 🤗 Suggestions/ideas/feedback from community are welcome! Please feel free to submit a [new idea](https://github.com/huggingface/fuego/discussions/new?category=ideas) in the discussions tab. 12 | 13 | ## The idea 14 | 15 | A nice interface for running scripts on Hugging Face Spaces 16 | 17 | ## Installation 18 | 19 | For now, you can install from source: 20 | 21 | ```bash 22 | git clone https://github.com/huggingface/fuego.git 23 | cd fuego 24 | pip install -e "." 25 | ``` 26 | 27 | ## WIP API 28 | 29 | The Python API and CLI should have very similar experiences so folks can use whichever they prefer. 30 | 31 | See the examples folder for more details. 32 | 33 | #### Python 34 | 35 | 36 | ```python 37 | import fuego 38 | 39 | fuego.run( 40 | script='run.py', 41 | requirements_file='requirements.txt', 42 | # Kwargs 43 | message='hello world', 44 | ) 45 | ``` 46 | 47 | #### CLI 48 | 49 | ```bash 50 | fuego run --script run.py --requirements_file requirements.txt --message "hello world" 51 | ``` 52 | -------------------------------------------------------------------------------- /src/fuego/runtime.py: -------------------------------------------------------------------------------- 1 | """Check presence of installed packages at runtime.""" 2 | import sys 3 | 4 | import packaging.version 5 | 6 | 7 | _PY_VERSION: str = sys.version.split()[0].rstrip("+") 8 | 9 | if packaging.version.Version(_PY_VERSION) < packaging.version.Version("3.8.0"): 10 | import importlib_metadata # type: ignore 11 | else: 12 | import importlib.metadata as importlib_metadata # type: ignore 13 | 14 | 15 | _package_versions = {} 16 | 17 | _CANDIDATES = { 18 | "huggingface_hub": {"huggingface_hub"}, 19 | } 20 | 21 | # Check once at runtime 22 | for candidate_name, package_names in _CANDIDATES.items(): 23 | _package_versions[candidate_name] = "N/A" 24 | for name in package_names: 25 | try: 26 | _package_versions[candidate_name] = importlib_metadata.version(name) 27 | break 28 | except importlib_metadata.PackageNotFoundError: 29 | pass 30 | 31 | 32 | def _get_version(package_name: str) -> str: 33 | return _package_versions.get(package_name, "N/A") 34 | 35 | 36 | def _is_available(package_name: str) -> bool: 37 | return _get_version(package_name) != "N/A" 38 | 39 | 40 | # Python 41 | def get_python_version() -> str: 42 | return _PY_VERSION 43 | 44 | 45 | # AzureML SDKv1 (azureml-core) 46 | def is_huggingface_hub_available() -> bool: 47 | return _is_available("huggingface_hub") 48 | 49 | 50 | def get_huggingface_hub_version() -> str: 51 | return _get_version("huggingface_hub") 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | 133 | config.json 134 | examples/config.json 135 | 136 | run.py 137 | 138 | # Ruff cache 139 | .ruff_cache/ 140 | 141 | examples/lora* -------------------------------------------------------------------------------- /examples/fuego_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyNSlh6WA3l/3mdm12G17FG0", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "Install `fuego` from GitHub" 33 | ], 34 | "metadata": { 35 | "id": "Le73xlSdSA8j" 36 | } 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "2__BOdt-Roq-" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "%%capture\n", 47 | "! pip install git+https://github.com/huggingface/fuego" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "source": [ 53 | "Log in using a token with write access" 54 | ], 55 | "metadata": { 56 | "id": "5HhZ1aWDR-Cm" 57 | } 58 | }, 59 | { 60 | "cell_type": "code", 61 | "source": [ 62 | "from huggingface_hub import login\n", 63 | "\n", 64 | "login()" 65 | ], 66 | "metadata": { 67 | "id": "3lPgvaFBRvKM" 68 | }, 69 | "execution_count": null, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "source": [ 75 | "# Run From GitHub\n", 76 | "\n", 77 | "Note - if either of these lead to \"no application file\" in the resulting space, just factory reset it to get it building/running. It happens from time to time." 78 | ], 79 | "metadata": { 80 | "id": "LXqo_XaISewV" 81 | } 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "### PyTorch Example - CPU\n", 87 | "\n", 88 | "In the script, they save outputs to ./results, so we'll upload that instead of the default './outputs' and './logs' directories." 89 | ], 90 | "metadata": { 91 | "id": "yaLBmTRCSL_g" 92 | } 93 | }, 94 | { 95 | "cell_type": "code", 96 | "source": [ 97 | "import fuego\n", 98 | "\n", 99 | "\n", 100 | "space_url, dataset_url = fuego.github_run(\n", 101 | " github_repo_id=\"pytorch/examples\",\n", 102 | " script=\"vae/main.py\",\n", 103 | " requirements_file=\"vae/requirements.txt\",\n", 104 | " space_output_dirs=['./results'],\n", 105 | " # Kwargs, passed as argparse args to the script\n", 106 | " epochs=3,\n", 107 | ")\n", 108 | "print(f\"Space: {space_url}\")\n", 109 | "print(f\"Dataset: {dataset_url}\")" 110 | ], 111 | "metadata": { 112 | "id": "1_ZXvfX7SOFZ" 113 | }, 114 | "execution_count": null, 115 | "outputs": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "source": [ 120 | "### Transformers Example - GPU\n", 121 | "\n", 122 | "In this example, we see how we can add additional pip requirements if the supplied requirements.txt file doesn't have all the deps you need/want. \n", 123 | "\n", 124 | "In the case of transformers, `transformers` is not listed in the examples' deps, so we'll want to add that. Additionally, we'll add `tensorboard` so we can use it to log metrics.\n", 125 | "\n", 126 | "We'll run this one on a `t4-small` instance on Spaces." 127 | ], 128 | "metadata": { 129 | "id": "Az9vc9TSR6sk" 130 | } 131 | }, 132 | { 133 | "cell_type": "code", 134 | "source": [ 135 | "import fuego\n", 136 | "\n", 137 | "\n", 138 | "space_url, dataset_url = fuego.github_run(\n", 139 | " github_repo_id=\"huggingface/transformers\",\n", 140 | " script=\"examples/pytorch/text-classification/run_glue.py\",\n", 141 | " requirements_file=\"examples/pytorch/text-classification/requirements.txt\",\n", 142 | " space_hardware=\"t4-small\",\n", 143 | " # Adding additional pip requirements to the requirements.txt file\n", 144 | " extra_requirements=[\"tensorboard\", \"git+https://github.com/huggingface/transformers@main#egg=transformers\"],\n", 145 | " # Kwargs, passed as argparse args to the script\n", 146 | " model_name_or_path=\"bert-base-cased\",\n", 147 | " task_name=\"mrpc\",\n", 148 | " do_train=True,\n", 149 | " do_eval=True,\n", 150 | " max_seq_length=128,\n", 151 | " per_device_train_batch_size=32,\n", 152 | " learning_rate=2e-5,\n", 153 | " num_train_epochs=3,\n", 154 | " output_dir=\"./outputs\",\n", 155 | " logging_dir=\"./logs\",\n", 156 | " logging_steps=20,\n", 157 | " report_to=\"tensorboard\",\n", 158 | ")\n", 159 | "print(f\"Space: {space_url}\")\n", 160 | "print(f\"Dataset: {dataset_url}\")" 161 | ], 162 | "metadata": { 163 | "id": "GExAOpA5SLio" 164 | }, 165 | "execution_count": null, 166 | "outputs": [] 167 | } 168 | ] 169 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /examples/github_runner_app/app.py: -------------------------------------------------------------------------------- 1 | # Gradio app to run fuego.github_run() on Hugging Face Spaces 2 | # Hosted at https://hf.co/nateraw/fuego 3 | import gradio as gr 4 | import yaml 5 | 6 | import fuego 7 | 8 | 9 | def fuego_github_run_wrapper( 10 | token, 11 | github_repo_id, 12 | github_repo_branch, 13 | script, 14 | requirements_file, 15 | extra_requirements, 16 | script_args, 17 | output_dirs, 18 | private, 19 | delete_space_on_completion, 20 | downgrade_hardware_on_completion, 21 | space_hardware, 22 | ): 23 | if not token.strip(): 24 | return gr.update( 25 | value="""## token with write access is required. Get one from here""", 26 | visible=True, 27 | ) 28 | 29 | if script_args.strip(): 30 | script_args = yaml.safe_load(script_args) 31 | 32 | if not requirements_file.strip(): 33 | requirements_file = None 34 | 35 | if extra_requirements.strip(): 36 | extra_requirements = [x.strip() for x in extra_requirements.split("\n")] 37 | else: 38 | extra_requirements = None 39 | 40 | if output_dirs.strip(): 41 | output_dirs = [x.strip() for x in output_dirs.split(",")] 42 | 43 | github_repo_id = github_repo_id.strip() 44 | if not github_repo_id: 45 | return gr.update(value="## GitHub repo ID is required", visible=True) 46 | 47 | script = script.strip() 48 | if not script: 49 | return gr.update(value="## script is required", visible=True) 50 | 51 | github_repo_branch = github_repo_branch.strip() 52 | if not github_repo_branch: 53 | return gr.update("## github repo branch is required", visible=True) 54 | 55 | space_url, dataset_url = fuego.github_run( 56 | github_repo_id.strip(), 57 | script.strip(), 58 | requirements_file, 59 | github_repo_branch, 60 | space_hardware=space_hardware, 61 | private=private, 62 | delete_space_on_completion=delete_space_on_completion, 63 | downgrade_hardware_on_completion=downgrade_hardware_on_completion, 64 | space_output_dirs=output_dirs, 65 | extra_requirements=extra_requirements, 66 | token=token, 67 | **script_args, 68 | ) 69 | output_message = f""" 70 | ## Job launched successfully! 🚀 71 | - Link to Space 72 | - Link to Dataset 73 | """ 74 | return gr.update(value=output_message, visible=True) 75 | 76 | 77 | description = """ 78 | This app lets you run scripts from GitHub on Spaces, using any hardware you'd like. Just point to a repo, the script you'd like to run, the dependencies to install, and any args to pass to your script, and watch it go. 😎 79 | 80 | It uses 🔥[fuego](https://github.com/huggingface/fuego)🔥 under the hood to launch your script in one line of Python code. Give the repo a ⭐️ if you think its 🔥. 81 | 82 | **Note: You'll need a Hugging Face token with write access, which you can get from [here](https://hf.co/settings/tokens)** 83 | """ 84 | 85 | additional_info = """ 86 | ## Pricing 87 | 88 | Runs using this tool are **free** as long as you use `cpu-basic` hardware. 🔥 89 | 90 | **See pricing for accelerated hardware (anything other than `cpu-basic`) [here](https://hf.co/pricing#spaces)** 91 | 92 | ## What this space does: 93 | 1. Spins up 2 new HF repos for you: a "runner" space repo and an "output" dataset repo. 94 | 2. Uploads your code to the space, as well as some wrapper code that invokes your script. 95 | 3. Runs your code on the space via the wrapper. Logs should show up in the space. 96 | 4. When the script is done, it takes anything saved to the `output_dirs` and uploads the files within to the output dataset repo 97 | 5. Deletes the space (or downgrades, or just leaves on). Depends on your choice of `delete_space_on_completion` and `downgrade_hardware_on_completion`. 98 | 99 | ## FAQ 100 | 101 | - If your space ends up having a "no application file" issue, you may need to "factory reset" the space. You can do this from the settings page of the space. 102 | """ 103 | 104 | output_message = gr.Markdown("", visible=False) 105 | 106 | with gr.Blocks(css="style.css") as demo: 107 | gr.Markdown("# 🔥Fuego🔥 GitHub Script Runner") 108 | gr.Markdown(description) 109 | with gr.Accordion("👀 More Details (Hardware Pricing, How it Works, and FAQ)", open=False): 110 | gr.Markdown(additional_info) 111 | 112 | with gr.Row(): 113 | token = gr.Textbox(lines=1, label="Hugging Face token with write access", type="password") 114 | 115 | with gr.Row(): 116 | with gr.Column(): 117 | with gr.Box(): 118 | gr.Markdown("What script would you like to run? Also, what are its dependencies?") 119 | github_repo_id = gr.Textbox(lines=1, label="GitHub repo ID (ex. huggingface/fuego)") 120 | github_repo_branch = gr.Textbox( 121 | lines=1, label="Branch of GitHub repo (ex. main)", value="main", interactive=True 122 | ) 123 | script = gr.Textbox(lines=1, label="Path to python script in the GitHub repo") 124 | requirements_file = gr.Textbox(lines=1, label="Path to pip requirements file in the repo") 125 | extra_requirements = gr.Textbox( 126 | lines=5, 127 | label="Any extra pip requirements to your script, just as you would write them in requirements.txt", 128 | ) 129 | with gr.Column(): 130 | with gr.Box(): 131 | gr.Markdown("How should we run your script?") 132 | script_args = gr.Textbox(lines=10, label="Script args to your python file. Input here as YAML.") 133 | spaces_output_dirs = gr.Textbox( 134 | lines=1, 135 | label="Name of output directory to save assets to from within your script. Use commas if you have multiple.", 136 | value="./outputs, ./logs", 137 | ) 138 | private = gr.Checkbox(False, label="Should space/dataset be made as private repos?") 139 | delete_space_on_completion = gr.Checkbox(True, label="Delete the space on completion?") 140 | downgrade_hardware_on_completion = gr.Checkbox( 141 | True, 142 | label="Downgrade hardware of the space on completion? Only applicable if not deleting on completion.", 143 | ) 144 | with gr.Row(): 145 | with gr.Column(): 146 | spaces_hardware = gr.Dropdown( 147 | ["cpu-basic", "cpu-upgrade", "t4-small", "t4-medium", "a10g-small", "a10g-large", "a100-large"], 148 | label="Spaces Hardware", 149 | value="cpu-basic", 150 | interactive=True, 151 | ) 152 | spaces_hardware_msg = gr.Markdown( 153 | """ 154 | 🔴 **The hardware you chose is not free, and you will be charged for it** 🔴 155 | 156 | If you want to run your script for free, please choose `cpu-basic` as your hardware. 157 | """, 158 | visible=False, 159 | ) 160 | spaces_hardware.change( 161 | lambda x: gr.update(visible=True) if x != "cpu-basic" else gr.update(visible=False), 162 | inputs=[spaces_hardware], 163 | outputs=[spaces_hardware_msg], 164 | ) 165 | 166 | with gr.Row(): 167 | with gr.Accordion("👀 Examples", open=False): 168 | gr.Examples( 169 | [ 170 | [ 171 | "pytorch/examples", 172 | "main", 173 | "vae/main.py", 174 | "vae/requirements.txt", 175 | "", 176 | "epochs: 3", 177 | "./results", 178 | False, 179 | True, 180 | True, 181 | "cpu-basic", 182 | ], 183 | [ 184 | "huggingface/transformers", 185 | "main", 186 | "examples/pytorch/text-classification/run_glue.py", 187 | "examples/pytorch/text-classification/requirements.txt", 188 | "tensorboard\ngit+https://github.com/huggingface/transformers@main#egg=transformers", 189 | "model_name_or_path: bert-base-cased\ntask_name: mrpc\ndo_train: True\ndo_eval: True\nmax_seq_length: 128\nper_device_train_batch_size: 32\nlearning_rate: 2e-5\nnum_train_epochs: 3\noutput_dir: ./outputs\nlogging_dir: ./logs\nlogging_steps: 20\nreport_to: tensorboard", 190 | "./outputs,./logs", 191 | False, 192 | True, 193 | True, 194 | "cpu-basic", 195 | ], 196 | ], 197 | inputs=[ 198 | github_repo_id, 199 | github_repo_branch, 200 | script, 201 | requirements_file, 202 | extra_requirements, 203 | script_args, 204 | spaces_output_dirs, 205 | private, 206 | delete_space_on_completion, 207 | downgrade_hardware_on_completion, 208 | spaces_hardware, 209 | ], 210 | outputs=[ 211 | github_repo_id, 212 | github_repo_branch, 213 | script, 214 | requirements_file, 215 | extra_requirements, 216 | script_args, 217 | spaces_output_dirs, 218 | private, 219 | delete_space_on_completion, 220 | downgrade_hardware_on_completion, 221 | spaces_hardware, 222 | ], 223 | cache_examples=False, 224 | ) 225 | 226 | with gr.Row(): 227 | submit = gr.Button("Submit") 228 | reset_btn = gr.Button("Reset fields") 229 | 230 | with gr.Row(): 231 | output_message.render() 232 | 233 | submit.click( 234 | fuego_github_run_wrapper, 235 | inputs=[ 236 | token, 237 | github_repo_id, 238 | github_repo_branch, 239 | script, 240 | requirements_file, 241 | extra_requirements, 242 | script_args, 243 | spaces_output_dirs, 244 | private, 245 | delete_space_on_completion, 246 | downgrade_hardware_on_completion, 247 | spaces_hardware, 248 | ], 249 | outputs=[output_message], 250 | ) 251 | 252 | def reset_fields(): 253 | return { 254 | output_message: gr.update(value="", visible=False), 255 | github_repo_id: gr.update(value=""), 256 | github_repo_branch: gr.update(value="main"), 257 | script: gr.update(value=""), 258 | requirements_file: gr.update(value=""), 259 | extra_requirements: gr.update(value=""), 260 | script_args: gr.update(value=""), 261 | spaces_output_dirs: gr.update(value="./outputs, ./logs"), 262 | private: gr.update(value=False), 263 | delete_space_on_completion: gr.update(value=True), 264 | downgrade_hardware_on_completion: gr.update(value=True), 265 | spaces_hardware: gr.update(value="cpu-basic"), 266 | } 267 | 268 | reset_btn.click( 269 | reset_fields, 270 | outputs=[ 271 | output_message, 272 | github_repo_id, 273 | github_repo_branch, 274 | script, 275 | requirements_file, 276 | extra_requirements, 277 | script_args, 278 | spaces_output_dirs, 279 | private, 280 | delete_space_on_completion, 281 | downgrade_hardware_on_completion, 282 | spaces_hardware, 283 | ], 284 | ) 285 | 286 | if __name__ == "__main__": 287 | demo.launch(debug=True) 288 | -------------------------------------------------------------------------------- /src/fuego/run_on_spaces.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import uuid 3 | from datetime import datetime 4 | from pathlib import Path 5 | from typing import List, Optional 6 | 7 | import fire 8 | import git 9 | from huggingface_hub import ( 10 | DatasetCard, 11 | HfFolder, 12 | SpaceHardware, 13 | add_space_secret, 14 | create_repo, 15 | upload_file, 16 | upload_folder, 17 | ) 18 | from huggingface_hub.utils import logging 19 | 20 | 21 | logger = logging.get_logger(__name__) 22 | 23 | 24 | SPACES_HARDWARE_TYPES = [x.value for x in SpaceHardware] 25 | 26 | 27 | _status_checker_content = """import os 28 | import subprocess 29 | import time 30 | from pathlib import Path 31 | from threading import Thread 32 | from typing import List, Union 33 | 34 | import gradio as gr 35 | from huggingface_hub import HfFolder, delete_repo, upload_folder, get_space_runtime, request_space_hardware, DatasetCard 36 | 37 | 38 | def process_is_complete(process_pid): 39 | '''Checks if the process with the given PID is still running''' 40 | p = subprocess.Popen(["ps", "-p", process_pid], stdout=subprocess.PIPE) 41 | out = p.communicate()[0].decode("utf-8").strip().split("\\n") 42 | return len(out) == 1 43 | 44 | def get_task_status(output_dataset_id): 45 | '''Gets the task status from the output dataset repo''' 46 | card = DatasetCard.load(output_dataset_id) 47 | return card.data.fuego['status'] 48 | 49 | def set_task_status(output_dataset_id, status="done"): 50 | '''Sets the task status in the output dataset repo''' 51 | card = DatasetCard.load(output_dataset_id) 52 | card.data.fuego['status'] = status 53 | card.push_to_hub(output_dataset_id) 54 | 55 | def check_for_status( 56 | process_pid, this_space_id, output_dataset_id, output_dirs, delete_on_completion, downgrade_hardware_on_completion 57 | ): 58 | task_status = get_task_status(output_dataset_id) 59 | print("Task status (found in dataset repo)", task_status) 60 | if task_status == "done": 61 | print("Task was already done, exiting...") 62 | return 63 | elif task_status == "preparing": 64 | print("Setting task status to running...") 65 | set_task_status(output_dataset_id, "running") 66 | 67 | print("Watching PID of script to see if it is done running") 68 | while True: 69 | if process_is_complete(process_pid): 70 | print("Process is complete! Uploading assets to output dataset repo") 71 | for output_dir in output_dirs: 72 | if Path(output_dir).exists(): 73 | print("Uploading folder", output_dir) 74 | upload_folder( 75 | repo_id=output_dataset_id, 76 | folder_path=str(output_dir), 77 | path_in_repo=str(Path('.outputs') / output_dir), 78 | repo_type="dataset", 79 | ) 80 | else: 81 | print("Folder", output_dir, "does not exist, skipping") 82 | 83 | print("Finished uploading outputs to dataset repo...Finishing up...") 84 | if delete_on_completion: 85 | print("Deleting space...") 86 | delete_repo(repo_id=this_space_id, repo_type="space") 87 | elif downgrade_hardware_on_completion: 88 | runtime = get_space_runtime(this_space_id) 89 | if runtime.hardware not in [None, "cpu-basic"]: 90 | print("Requesting downgrade to CPU Basic...") 91 | request_space_hardware(repo_id=this_space_id, hardware="cpu-basic") 92 | else: 93 | print("Space is already on cpu-basic, not downgrading.") 94 | print("Done! Setting task status to done in dataset repo") 95 | set_task_status(output_dataset_id, "done") 96 | return 97 | time.sleep(5) 98 | 99 | 100 | def main( 101 | this_space_repo_id: str, 102 | output_dataset_id: str, 103 | output_dirs: Union[str, List[str]] = "./outputs", 104 | delete_on_completion: bool = True, 105 | downgrade_hardware_on_completion: bool = True, 106 | ): 107 | token_env_var = os.getenv("HF_TOKEN") 108 | if token_env_var is None: 109 | raise ValueError( 110 | "Please set HF_TOKEN environment variable to your Hugging Face token. You can do this in the settings tab of your space." 111 | ) 112 | 113 | if isinstance(output_dirs, str): 114 | output_dirs = [output_dirs] 115 | 116 | HfFolder().save_token(token_env_var) 117 | 118 | # Watch python script's process to see when it's done running 119 | process_pid = os.getenv("USER_SCRIPT_PID", None) 120 | 121 | with gr.Blocks() as demo: 122 | gr.Markdown(Path("about.md").read_text()) 123 | 124 | thread = Thread( 125 | target=check_for_status, 126 | daemon=True, 127 | args=( 128 | process_pid, 129 | this_space_repo_id, 130 | output_dataset_id, 131 | output_dirs, 132 | delete_on_completion, 133 | downgrade_hardware_on_completion, 134 | ), 135 | ) 136 | thread.start() 137 | demo.launch() 138 | 139 | 140 | if __name__ == "__main__": 141 | import fire 142 | 143 | fire.Fire(main) 144 | """ 145 | 146 | # TODO - align with the GPU Dockerfile a bit more 147 | _dockerfile_cpu_content = """FROM python:3.9 148 | 149 | WORKDIR /code 150 | 151 | COPY ./requirements.txt /code/requirements.txt 152 | 153 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 154 | RUN pip install --no-cache-dir fire gradio datasets huggingface_hub 155 | 156 | # Set up a new user named "user" with user ID 1000 157 | RUN useradd -m -u 1000 user 158 | 159 | # Switch to the "user" user 160 | USER user 161 | 162 | # Set home to the user's home directory 163 | ENV HOME=/home/user \ 164 | PATH=/home/user/.local/bin:$PATH \ 165 | PYTHONPATH=$HOME/app \ 166 | PYTHONUNBUFFERED=1 \ 167 | GRADIO_ALLOW_FLAGGING=never \ 168 | GRADIO_NUM_PORTS=1 \ 169 | GRADIO_SERVER_NAME=0.0.0.0 \ 170 | GRADIO_THEME=huggingface \ 171 | SYSTEM=spaces 172 | 173 | # Set the working directory to the user's home directory 174 | WORKDIR $HOME/app 175 | 176 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user 177 | COPY --chown=user . $HOME/app 178 | 179 | RUN chmod +x start_server.sh 180 | 181 | CMD ["./start_server.sh"] 182 | """ 183 | 184 | _dockerfile_gpu_content = """FROM nvidia/cuda:11.3.1-base-ubuntu20.04 185 | 186 | # Remove any third-party apt sources to avoid issues with expiring keys. 187 | RUN rm -f /etc/apt/sources.list.d/*.list 188 | 189 | # Install some basic utilities 190 | RUN apt-get update && apt-get install -y \ 191 | curl \ 192 | ca-certificates \ 193 | sudo \ 194 | git \ 195 | bzip2 \ 196 | libx11-6 \ 197 | && rm -rf /var/lib/apt/lists/* 198 | 199 | # Create a working directory 200 | RUN mkdir /app 201 | WORKDIR /app 202 | 203 | # Create a non-root user and switch to it 204 | RUN adduser --disabled-password --gecos '' --shell /bin/bash user \ 205 | && chown -R user:user /app 206 | RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user 207 | USER user 208 | 209 | # All users can use /home/user as their home directory 210 | ENV HOME=/home/user 211 | RUN mkdir $HOME/.cache $HOME/.config \ 212 | && chmod -R 777 $HOME 213 | 214 | # Set up the Conda environment 215 | ENV CONDA_AUTO_UPDATE_CONDA=false \ 216 | PATH=$HOME/miniconda/bin:$PATH 217 | RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \ 218 | && chmod +x ~/miniconda.sh \ 219 | && ~/miniconda.sh -b -p ~/miniconda \ 220 | && rm ~/miniconda.sh \ 221 | && conda clean -ya 222 | 223 | 224 | ENV PYTHONUNBUFFERED=1 \ 225 | GRADIO_ALLOW_FLAGGING=never \ 226 | GRADIO_NUM_PORTS=1 \ 227 | GRADIO_SERVER_NAME=0.0.0.0 \ 228 | GRADIO_THEME=huggingface \ 229 | SYSTEM=spaces 230 | 231 | RUN pip install --no-cache-dir fire gradio datasets huggingface_hub 232 | 233 | # Install user requirements 234 | COPY ./requirements.txt /app/requirements.txt 235 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt 236 | 237 | WORKDIR $HOME/app 238 | 239 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user 240 | COPY --chown=user . $HOME/app 241 | 242 | RUN chmod +x start_server.sh 243 | 244 | CMD ["./start_server.sh"] 245 | """ 246 | 247 | _start_server_template = """#!/bin/bash 248 | 249 | # Start the python script in the background asynchronously 250 | nohup {command} & 251 | 252 | # Save the PID of the python script so we can reference it in the status checker 253 | export USER_SCRIPT_PID=$! 254 | 255 | # Start a simple web server to watch the status of the python script 256 | python status_checker.py {status_checker_args} 257 | """ 258 | 259 | _about_md_template = """ 260 | # Fuego Runner 261 | 262 | This space is running some job thanks to [fuego](https://github.com/huggingface/fuego)! 🔥 263 | 264 | - Check out the associated [output repo]({output_repo_url}) 265 | """ 266 | 267 | 268 | def convert_dict_to_args_str(args_dict: dict) -> str: 269 | """Convert a dictionary of arguments to a string of arguments that can be passed to a command line script""" 270 | args_str = "" 271 | for arg_name, arg_value in args_dict.items(): 272 | if isinstance(arg_value, (list, dict, tuple)) or (isinstance(arg_value, str) and " " in arg_value): 273 | args_str += f' --{arg_name} "{repr(arg_value)}"' 274 | else: 275 | args_str += f" --{arg_name} {arg_value}" 276 | return args_str.strip() 277 | 278 | 279 | def resolve_requirements_txt(file_path: str = None, requirements: List[str] = None): 280 | # If both the file path and the requirements list are provided 281 | if file_path and requirements: 282 | # Read the contents of the requirements file 283 | file_contents = Path(file_path).read_text() 284 | 285 | # Combine the contents of the file and the list of requirements 286 | combined_contents = file_contents + "\n" + "\n".join(requirements) 287 | return combined_contents 288 | 289 | # If only the file path is provided 290 | elif file_path: 291 | # Read the contents of the requirements file 292 | file_contents = Path(file_path).read_text() 293 | return file_contents 294 | 295 | # If only the list of requirements is provided 296 | elif requirements: 297 | return "\n".join(requirements) 298 | 299 | # If neither the file path nor the list of requirements is provided 300 | else: 301 | return "" 302 | 303 | 304 | def run( 305 | script: str, 306 | requirements_file: Optional[str] = None, 307 | space_id: str = None, 308 | space_hardware: str = "cpu-basic", 309 | dataset_id: Optional[str] = None, 310 | private: bool = False, 311 | allow_patterns: Optional[List[str]] = None, 312 | ignore_patterns: Optional[List[str]] = None, 313 | save_code_snapshot_in_dataset_repo: bool = False, 314 | delete_space_on_completion: bool = True, 315 | downgrade_hardware_on_completion: bool = True, 316 | space_output_dirs: Optional[List[str]] = None, 317 | token: Optional[str] = None, 318 | extra_run_metadata: Optional[dict] = None, 319 | extra_requirements: Optional[List[str]] = None, 320 | **kwargs, 321 | ): 322 | """Create a Hugging Face Space and run a script in it. When finished, the outputs will be saved to a Hugging Face Dataset Repo. 323 | 324 | Args: 325 | script (`str`): 326 | Path to the script to run. 327 | requirements_file (`str`, optional): 328 | Path to requirements file for the job. Defaults to None. 329 | space_id (`str`, optional): 330 | ID of the Hugging Face Space. Defaults to None. 331 | space_hardware (`str`, optional): 332 | Hardware for the Hugging Face Space. Defaults to "cpu". 333 | dataset_id (`str`, optional): 334 | ID of the Hugging Face Dataset Repo. Defaults to None. 335 | private (bool, optional): 336 | If True, both the Hugging Face Space and Dataset Repo will be private. Defaults to False. 337 | allow_patterns (`List[str]`, optional): 338 | List of file patterns to include in the parent directory of `script`. Defaults to None. 339 | ignore_patterns (`List[str]`, optional): 340 | List of file patterns to exclude in the parent directory of `script`. Defaults to None. 341 | save_code_snapshot_in_dataset_repo (`bool`, optional): 342 | If True, a code snapshot will be saved in the Hugging Face Dataset Repo. Defaults to False. 343 | delete_space_on_completion (`bool`, optional): 344 | If True, the Hugging Face Space will be deleted after the job completes. Defaults to True. 345 | downgrade_hardware_on_completion (`bool`, optional): 346 | If True, and `delete_space_on_completion` is False, the Hugging Face Space hardware will be 347 | downgraded to "cpu-basic" after the job completes. Defaults to True. 348 | space_output_dirs (`str`, optional): 349 | Dirs in the space that will be uploaded to output dataset on run completion. If unspecified, 350 | will default to ["outputs", "logs"]. 351 | token (`str`, optional): 352 | Hugging Face token. Uses your cached token (if available) by default. Defaults to None. 353 | extra_run_metadata (`dict`, optional): 354 | Extra metadata to add to the run metadata json file that gets added to the output dataset. Defaults to None. 355 | extra_requirements (`List[str]`, optional): 356 | List of pip requirements to install in the Hugging Face Space. If requirements_file is also provided, 357 | the requirements in the file will be installed in addition to the requirements in this list. Defaults to None. 358 | **kwargs: 359 | Keyword arguments are passed to the script as argparse args or unpacked to the main function. 360 | 361 | Raises: 362 | ValueError: When `space_hardware` is not a valid Hugging Face Space hardware type. 363 | 364 | Returns: 365 | Tuple[str, str]: Tuple of the Hugging Face Space URL and Hugging Face Dataset Repo URL. 366 | """ 367 | if space_hardware not in SPACES_HARDWARE_TYPES: 368 | raise ValueError(f"Invalid instance type: {space_hardware}. Should be one of {SPACES_HARDWARE_TYPES}") 369 | 370 | if space_output_dirs is None: 371 | space_output_dirs = ["outputs", "logs"] 372 | 373 | # The command to run in the space 374 | # Ex. python train.py --learning_rate 0.1 375 | command = f"python {Path(script).name} {convert_dict_to_args_str(kwargs)}" 376 | 377 | task_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid.uuid4().hex[:6]}" 378 | space_id = space_id or f"fuego-{task_id}" 379 | dataset_id = dataset_id or f"fuego-{task_id}" 380 | 381 | # Create 2 new repos. One space for running code, one dataset for storing artifacts 382 | space_repo_url = create_repo( 383 | space_id, 384 | exist_ok=True, 385 | repo_type="space", 386 | space_sdk="docker", 387 | space_hardware=space_hardware, 388 | private=private, 389 | token=token, 390 | ) 391 | space_id = space_repo_url.repo_id 392 | 393 | dataset_repo_url = create_repo(dataset_id, exist_ok=True, repo_type="dataset", private=private, token=token) 394 | dataset_id = dataset_repo_url.repo_id 395 | 396 | logger.info(f"Created Repo at: {space_repo_url}") 397 | logger.info(f"Created Dataset at: {dataset_repo_url}") 398 | 399 | # Add current HF token to the new space, so it has ability to push to output dataset 400 | add_space_secret(space_id, "HF_TOKEN", token or HfFolder().get_token(), token=token) 401 | 402 | # We want to ignore at the very least README.md and .git folder of the cloned 403 | # GitHub repo, but you can include more filters if you want. 404 | if ignore_patterns is None: 405 | ignore_patterns = [] 406 | elif isinstance(ignore_patterns, str): 407 | ignore_patterns = [ignore_patterns] 408 | ignore_patterns += [".git*", "README.md"] 409 | 410 | source_dir = Path(script).parent 411 | 412 | # We push the source up to the Space 413 | upload_folder( 414 | repo_id=space_id, 415 | folder_path=str(source_dir), 416 | path_in_repo=".", 417 | repo_type="space", 418 | allow_patterns=allow_patterns, 419 | ignore_patterns=ignore_patterns, 420 | token=token, 421 | ) 422 | 423 | requirements_file_content = resolve_requirements_txt(requirements_file, extra_requirements) 424 | upload_file( 425 | repo_id=space_id, 426 | path_or_fileobj=requirements_file_content.encode(), 427 | path_in_repo="requirements.txt", 428 | repo_type="space", 429 | token=token, 430 | ) 431 | 432 | # Optionally, you can also push the source to the output dataset 433 | if save_code_snapshot_in_dataset_repo: 434 | upload_folder( 435 | repo_id=dataset_id, 436 | folder_path=str(source_dir), 437 | path_in_repo=".snapshot", 438 | repo_type="dataset", 439 | allow_patterns=allow_patterns, 440 | ignore_patterns=ignore_patterns, 441 | token=token, 442 | ) 443 | 444 | # We put together some metadata here about the task and push that to the dataset 445 | # for safekeeping. 446 | logger.info("Uploaded run metadata to dataset repo for tracking!") 447 | card = DatasetCard("") 448 | card.data.tags = ["fuego"] 449 | card.data.fuego = dict( 450 | id=task_id, 451 | status="preparing", 452 | script=Path(script).name, 453 | requirements_file=Path(requirements_file).name if requirements_file else None, 454 | space_id=space_id, 455 | space_hardware=space_hardware, 456 | **extra_run_metadata or {}, 457 | ) 458 | card.push_to_hub(dataset_id, token=token) 459 | 460 | # about.md 461 | upload_file( 462 | repo_id=space_id, 463 | path_or_fileobj=_about_md_template.format(output_repo_url=dataset_repo_url).encode(), 464 | path_in_repo="about.md", 465 | repo_type="space", 466 | token=token, 467 | ) 468 | 469 | # start_server.sh 470 | upload_file( 471 | repo_id=space_id, 472 | path_or_fileobj=_start_server_template.format( 473 | command=command, 474 | status_checker_args=convert_dict_to_args_str( 475 | { 476 | "this_space_repo_id": space_id, 477 | "output_dataset_id": dataset_id, 478 | "output_dirs": space_output_dirs, 479 | "delete_on_completion": delete_space_on_completion, 480 | "downgrade_hardware_on_completion": downgrade_hardware_on_completion, 481 | } 482 | ), 483 | ).encode(), 484 | path_in_repo="start_server.sh", 485 | repo_type="space", 486 | token=token, 487 | ) 488 | 489 | # status_checker.py 490 | upload_file( 491 | repo_id=space_id, 492 | path_or_fileobj=_status_checker_content.encode(), 493 | path_in_repo="status_checker.py", 494 | repo_type="space", 495 | token=token, 496 | ) 497 | 498 | # Dockerfile 499 | dockerfile_content = ( 500 | _dockerfile_cpu_content if space_hardware in ["cpu-basic", "cpu-upgrade"] else _dockerfile_gpu_content 501 | ) 502 | upload_file( 503 | repo_id=space_id, 504 | path_or_fileobj=dockerfile_content.encode(), 505 | path_in_repo="Dockerfile", 506 | repo_type="space", 507 | token=token, 508 | ) 509 | 510 | return space_repo_url, dataset_repo_url 511 | 512 | 513 | def github_run( 514 | github_repo_id: str, 515 | script: str, 516 | requirements_file: Optional[str] = None, 517 | github_repo_branch: str = "main", 518 | space_id: str = None, 519 | space_hardware: str = "cpu-basic", 520 | dataset_id: Optional[str] = None, 521 | private: bool = False, 522 | allow_patterns: Optional[List[str]] = None, 523 | ignore_patterns: Optional[List[str]] = None, 524 | save_code_snapshot_in_dataset_repo: bool = False, 525 | delete_space_on_completion: bool = True, 526 | downgrade_hardware_on_completion: bool = True, 527 | space_output_dirs: Optional[List[str]] = None, 528 | token: Optional[str] = None, 529 | extra_run_metadata: Optional[dict] = None, 530 | extra_requirements: Optional[List[str]] = None, 531 | **kwargs, 532 | ): 533 | """Create a run from code within a GitHub repo. See `run` for more details.""" 534 | # We clone the GitHub repo into a temporary directory 535 | with tempfile.TemporaryDirectory() as tmp: 536 | repo_url = f"https://github.com/{github_repo_id}" 537 | repo = git.Repo.clone_from(repo_url, tmp, branch=github_repo_branch) 538 | tempdir = Path(tmp) 539 | 540 | script_path = tempdir / script 541 | if not script_path.exists(): 542 | raise ValueError(f"Could not find script {script} in repo {repo_url}") 543 | script = str(script_path) 544 | 545 | if requirements_file is not None: 546 | requirements_path = tempdir / requirements_file 547 | if not requirements_path.exists(): 548 | raise ValueError(f"Could not find requirements file {requirements_file} in repo {repo_url}") 549 | requirements_file = str(requirements_path) 550 | 551 | return run( 552 | script=str(script_path), 553 | requirements_file=requirements_file, 554 | space_id=space_id, 555 | space_hardware=space_hardware, 556 | dataset_id=dataset_id, 557 | private=private, 558 | allow_patterns=allow_patterns, 559 | ignore_patterns=ignore_patterns, 560 | save_code_snapshot_in_dataset_repo=save_code_snapshot_in_dataset_repo, 561 | delete_space_on_completion=delete_space_on_completion, 562 | downgrade_hardware_on_completion=downgrade_hardware_on_completion, 563 | space_output_dirs=space_output_dirs, 564 | token=token, 565 | extra_run_metadata=dict( 566 | github_repo_id=github_repo_id, 567 | github_repo_branch=github_repo_branch, 568 | github_repo_sha=repo.head.object.hexsha, 569 | **extra_run_metadata or {}, 570 | ), 571 | extra_requirements=extra_requirements, 572 | **kwargs, 573 | ) 574 | 575 | 576 | def cli_run(): 577 | fire.Fire( 578 | { 579 | "run": run, 580 | "github_run": github_run, 581 | } 582 | ) 583 | --------------------------------------------------------------------------------