├── examples
├── simple_example_with_requirements
│ ├── requirements.txt
│ ├── run.py
│ └── fuego_run.py
├── github_runner_app
│ ├── style.css
│ ├── requirements.txt
│ └── app.py
├── README.md
├── run_all.sh
├── simple_example
│ ├── fuego_run.py
│ └── run.py
├── pytorch_vae_github
│ └── fuego_run.py
├── transformers_github
│ └── fuego_run.py
└── fuego_demo.ipynb
├── src
└── fuego
│ ├── __init__.py
│ ├── runtime.py
│ └── run_on_spaces.py
├── Makefile
├── pyproject.toml
├── .github
└── workflows
│ ├── sync-with-huggingface.yml
│ └── python-publish.yml
├── setup.py
├── README.md
├── .gitignore
└── LICENSE
/examples/simple_example_with_requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | fire==0.5.0
--------------------------------------------------------------------------------
/examples/github_runner_app/style.css:
--------------------------------------------------------------------------------
1 | h1 {
2 | text-align: center;
3 | }
--------------------------------------------------------------------------------
/examples/github_runner_app/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio
2 | PyYAML
3 | fuego==0.0.8
4 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | See each example subdirectory's `fuego_run.py` for details.
--------------------------------------------------------------------------------
/src/fuego/__init__.py:
--------------------------------------------------------------------------------
1 | from .run_on_spaces import github_run, run
2 |
3 |
4 | __version__ = "0.0.9.dev0"
5 |
--------------------------------------------------------------------------------
/examples/run_all.sh:
--------------------------------------------------------------------------------
1 | python simple_example/fuego_run.py
2 | python simple_example_with_requirements/fuego_run.py
3 | python transformers_github/fuego_run.py
4 | python pytorch_vae_github/fuego_run.py
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: quality style
2 |
3 | # Check that source code meets quality standards
4 | quality:
5 | black --check --diff .
6 | ruff .
7 |
8 | # Format source code automatically
9 | style:
10 | black .
11 | ruff . --fix
12 |
--------------------------------------------------------------------------------
/examples/simple_example/fuego_run.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import fuego
4 |
5 |
6 | space_url, dataset_url = fuego.run(
7 | script=str(Path(__file__).parent / "run.py"),
8 | delete_space_on_completion=True, # When debugging, set this to False
9 | # Kwargs
10 | message="Howdy, world!!!",
11 | )
12 | print(f"space_url: {space_url}")
13 | print(f"dataset_url: {dataset_url}")
14 |
--------------------------------------------------------------------------------
/examples/simple_example_with_requirements/run.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import fire
4 |
5 |
6 | def main(message: str = "Hello, world!", output_dir="./outputs"):
7 | logdir = Path(output_dir)
8 | logdir.mkdir(exist_ok=True, parents=True)
9 | outfile_path = logdir / "message.txt"
10 | outfile_path.write_text(message)
11 |
12 |
13 | if __name__ == "__main__":
14 | fire.Fire(main)
15 |
--------------------------------------------------------------------------------
/examples/pytorch_vae_github/fuego_run.py:
--------------------------------------------------------------------------------
1 | import fuego
2 |
3 |
4 | space_url, dataset_url = fuego.github_run(
5 | github_repo_id="pytorch/examples",
6 | script="vae/main.py",
7 | requirements_file="vae/requirements.txt",
8 | space_output_dirs=["./results"],
9 | # Kwargs, passed as argparse args to the script
10 | epochs=3,
11 | )
12 | print(f"Space: {space_url}")
13 | print(f"Dataset: {dataset_url}")
14 |
--------------------------------------------------------------------------------
/examples/simple_example_with_requirements/fuego_run.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import fuego
4 |
5 |
6 | space_url, dataset_url = fuego.run(
7 | script=str(Path(__file__).parent / "run.py"),
8 | requirements_file=str(Path(__file__).parent / "requirements.txt"),
9 | delete_space_on_completion=True, # When debugging, set this to False
10 | # Kwargs
11 | message="Howdy, world!",
12 | )
13 | print(f"space_url: {space_url}")
14 | print(f"dataset_url: {dataset_url}")
15 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 119
3 | target_version = ['py37']
4 |
5 | [tool.ruff]
6 | # Never enforce `E501` (line length violations).
7 | ignore = ["C901", "E501", "E741", "W605"]
8 | select = ["C", "E", "F", "I", "W"]
9 | line-length = 119
10 |
11 | # Ignore import violations in all `__init__.py` files.
12 | [tool.ruff.per-file-ignores]
13 | "__init__.py" = ["E402", "F401", "F403", "F811"]
14 |
15 | [tool.ruff.isort]
16 | known-first-party = ["fuego"]
17 | lines-after-imports = 2
18 |
--------------------------------------------------------------------------------
/.github/workflows/sync-with-huggingface.yml:
--------------------------------------------------------------------------------
1 | name: Sync with Hugging Face Hub
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - .github/workflows/sync-with-huggingface.yml
9 | - examples/github_runner_app/**
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Sync with Hugging Face
16 | uses: nateraw/huggingface-sync-action@v0.0.4
17 | with:
18 | github_repo_id: huggingface/fuego
19 | huggingface_repo_id: nateraw/fuego
20 | repo_type: space
21 | space_sdk: gradio
22 | subdirectory: examples/github_runner_app
23 | hf_token: ${{ secrets.HF_TOKEN }}
24 |
--------------------------------------------------------------------------------
/examples/simple_example/run.py:
--------------------------------------------------------------------------------
1 | from argparse import ArgumentParser
2 | from pathlib import Path
3 |
4 |
5 | def main(message: str = "Hello, world!", output_dir="./outputs"):
6 | logdir = Path(output_dir)
7 | logdir.mkdir(exist_ok=True, parents=True)
8 | outfile_path = logdir / "message.txt"
9 | outfile_path.write_text(message)
10 |
11 |
12 | def parse_args(args=None):
13 | parser = ArgumentParser()
14 | parser.add_argument("--message", type=str, default="Hello, world!")
15 | parser.add_argument("--output_dir", type=str, default="./outputs")
16 | return parser.parse_args(args=args)
17 |
18 |
19 | if __name__ == "__main__":
20 | main(**vars(parse_args()))
21 |
--------------------------------------------------------------------------------
/examples/transformers_github/fuego_run.py:
--------------------------------------------------------------------------------
1 | import fuego
2 |
3 |
4 | space_url, dataset_url = fuego.github_run(
5 | github_repo_id="huggingface/transformers",
6 | script="examples/pytorch/text-classification/run_glue.py",
7 | requirements_file="examples/pytorch/text-classification/requirements.txt",
8 | space_hardware="t4-small",
9 | # Adding additional pip requirements to the requirements.txt file
10 | extra_requirements=["tensorboard", "git+https://github.com/huggingface/transformers@ea55bd8#egg=transformers"],
11 | # Kwargs, passed as argparse args to the script
12 | model_name_or_path="bert-base-cased",
13 | task_name="mrpc",
14 | do_train=True,
15 | do_eval=True,
16 | max_seq_length=128,
17 | per_device_train_batch_size=32,
18 | learning_rate=2e-5,
19 | num_train_epochs=3,
20 | output_dir="./outputs",
21 | logging_dir="./logs",
22 | logging_steps=20,
23 | report_to="tensorboard",
24 | )
25 | print(f"Space: {space_url}")
26 | print(f"Dataset: {dataset_url}")
27 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | push:
8 | tags:
9 | - v*
10 |
11 | jobs:
12 | deploy:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: "3.x"
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/*
32 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 |
4 | def get_version() -> str:
5 | rel_path = "src/fuego/__init__.py"
6 | with open(rel_path, "r") as fp:
7 | for line in fp.read().splitlines():
8 | if line.startswith("__version__"):
9 | delim = '"' if '"' in line else "'"
10 | return line.split(delim)[1]
11 | raise RuntimeError("Unable to find version string.")
12 |
13 |
14 | requirements = [
15 | "fire",
16 | "huggingface_hub>=0.12.0",
17 | "GitPython",
18 | ]
19 |
20 | extras = {}
21 | extras["quality"] = ["black~=23.1", "ruff>=0.0.241"]
22 |
23 | setup(
24 | name="fuego",
25 | description="Fuego",
26 | long_description=open("README.md", "r", encoding="utf-8").read(),
27 | long_description_content_type="text/markdown",
28 | url="https://github.com/huggingface/fuego",
29 | version=get_version(),
30 | author="Nathan Raw",
31 | author_email="nate@huggingface.com",
32 | license="Apache",
33 | install_requires=requirements,
34 | extras_require=extras,
35 | package_dir={"": "src"},
36 | packages=find_packages("src"),
37 | entry_points={"console_scripts": ["fuego=fuego.run_on_spaces:cli_run"]},
38 | )
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fuego
2 |
3 | A 🔥 tool for running code in the cloud
4 |
5 | 🔥[](https://colab.research.google.com/github/huggingface/fuego/blob/main/examples/fuego_demo.ipynb)🔥
6 |
7 | ### Note
8 |
9 | ❗ **This project is a WIP and just an _idea_ right now. Under active development.** ❗
10 |
11 | 🤗 Suggestions/ideas/feedback from community are welcome! Please feel free to submit a [new idea](https://github.com/huggingface/fuego/discussions/new?category=ideas) in the discussions tab.
12 |
13 | ## The idea
14 |
15 | A nice interface for running scripts on Hugging Face Spaces
16 |
17 | ## Installation
18 |
19 | For now, you can install from source:
20 |
21 | ```bash
22 | git clone https://github.com/huggingface/fuego.git
23 | cd fuego
24 | pip install -e "."
25 | ```
26 |
27 | ## WIP API
28 |
29 | The Python API and CLI should have very similar experiences so folks can use whichever they prefer.
30 |
31 | See the examples folder for more details.
32 |
33 | #### Python
34 |
35 |
36 | ```python
37 | import fuego
38 |
39 | fuego.run(
40 | script='run.py',
41 | requirements_file='requirements.txt',
42 | # Kwargs
43 | message='hello world',
44 | )
45 | ```
46 |
47 | #### CLI
48 |
49 | ```bash
50 | fuego run --script run.py --requirements_file requirements.txt --message "hello world"
51 | ```
52 |
--------------------------------------------------------------------------------
/src/fuego/runtime.py:
--------------------------------------------------------------------------------
1 | """Check presence of installed packages at runtime."""
2 | import sys
3 |
4 | import packaging.version
5 |
6 |
7 | _PY_VERSION: str = sys.version.split()[0].rstrip("+")
8 |
9 | if packaging.version.Version(_PY_VERSION) < packaging.version.Version("3.8.0"):
10 | import importlib_metadata # type: ignore
11 | else:
12 | import importlib.metadata as importlib_metadata # type: ignore
13 |
14 |
15 | _package_versions = {}
16 |
17 | _CANDIDATES = {
18 | "huggingface_hub": {"huggingface_hub"},
19 | }
20 |
21 | # Check once at runtime
22 | for candidate_name, package_names in _CANDIDATES.items():
23 | _package_versions[candidate_name] = "N/A"
24 | for name in package_names:
25 | try:
26 | _package_versions[candidate_name] = importlib_metadata.version(name)
27 | break
28 | except importlib_metadata.PackageNotFoundError:
29 | pass
30 |
31 |
32 | def _get_version(package_name: str) -> str:
33 | return _package_versions.get(package_name, "N/A")
34 |
35 |
36 | def _is_available(package_name: str) -> bool:
37 | return _get_version(package_name) != "N/A"
38 |
39 |
40 | # Python
41 | def get_python_version() -> str:
42 | return _PY_VERSION
43 |
44 |
45 | # AzureML SDKv1 (azureml-core)
46 | def is_huggingface_hub_available() -> bool:
47 | return _is_available("huggingface_hub")
48 |
49 |
50 | def get_huggingface_hub_version() -> str:
51 | return _get_version("huggingface_hub")
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | .DS_Store
132 |
133 | config.json
134 | examples/config.json
135 |
136 | run.py
137 |
138 | # Ruff cache
139 | .ruff_cache/
140 |
141 | examples/lora*
--------------------------------------------------------------------------------
/examples/fuego_demo.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": [],
7 | "authorship_tag": "ABX9TyNSlh6WA3l/3mdm12G17FG0",
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | },
14 | "language_info": {
15 | "name": "python"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {
22 | "id": "view-in-github",
23 | "colab_type": "text"
24 | },
25 | "source": [
26 | "
"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "source": [
32 | "Install `fuego` from GitHub"
33 | ],
34 | "metadata": {
35 | "id": "Le73xlSdSA8j"
36 | }
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {
42 | "id": "2__BOdt-Roq-"
43 | },
44 | "outputs": [],
45 | "source": [
46 | "%%capture\n",
47 | "! pip install git+https://github.com/huggingface/fuego"
48 | ]
49 | },
50 | {
51 | "cell_type": "markdown",
52 | "source": [
53 | "Log in using a token with write access"
54 | ],
55 | "metadata": {
56 | "id": "5HhZ1aWDR-Cm"
57 | }
58 | },
59 | {
60 | "cell_type": "code",
61 | "source": [
62 | "from huggingface_hub import login\n",
63 | "\n",
64 | "login()"
65 | ],
66 | "metadata": {
67 | "id": "3lPgvaFBRvKM"
68 | },
69 | "execution_count": null,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "source": [
75 | "# Run From GitHub\n",
76 | "\n",
77 | "Note - if either of these lead to \"no application file\" in the resulting space, just factory reset it to get it building/running. It happens from time to time."
78 | ],
79 | "metadata": {
80 | "id": "LXqo_XaISewV"
81 | }
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "source": [
86 | "### PyTorch Example - CPU\n",
87 | "\n",
88 | "In the script, they save outputs to ./results, so we'll upload that instead of the default './outputs' and './logs' directories."
89 | ],
90 | "metadata": {
91 | "id": "yaLBmTRCSL_g"
92 | }
93 | },
94 | {
95 | "cell_type": "code",
96 | "source": [
97 | "import fuego\n",
98 | "\n",
99 | "\n",
100 | "space_url, dataset_url = fuego.github_run(\n",
101 | " github_repo_id=\"pytorch/examples\",\n",
102 | " script=\"vae/main.py\",\n",
103 | " requirements_file=\"vae/requirements.txt\",\n",
104 | " space_output_dirs=['./results'],\n",
105 | " # Kwargs, passed as argparse args to the script\n",
106 | " epochs=3,\n",
107 | ")\n",
108 | "print(f\"Space: {space_url}\")\n",
109 | "print(f\"Dataset: {dataset_url}\")"
110 | ],
111 | "metadata": {
112 | "id": "1_ZXvfX7SOFZ"
113 | },
114 | "execution_count": null,
115 | "outputs": []
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "source": [
120 | "### Transformers Example - GPU\n",
121 | "\n",
122 | "In this example, we see how we can add additional pip requirements if the supplied requirements.txt file doesn't have all the deps you need/want. \n",
123 | "\n",
124 | "In the case of transformers, `transformers` is not listed in the examples' deps, so we'll want to add that. Additionally, we'll add `tensorboard` so we can use it to log metrics.\n",
125 | "\n",
126 | "We'll run this one on a `t4-small` instance on Spaces."
127 | ],
128 | "metadata": {
129 | "id": "Az9vc9TSR6sk"
130 | }
131 | },
132 | {
133 | "cell_type": "code",
134 | "source": [
135 | "import fuego\n",
136 | "\n",
137 | "\n",
138 | "space_url, dataset_url = fuego.github_run(\n",
139 | " github_repo_id=\"huggingface/transformers\",\n",
140 | " script=\"examples/pytorch/text-classification/run_glue.py\",\n",
141 | " requirements_file=\"examples/pytorch/text-classification/requirements.txt\",\n",
142 | " space_hardware=\"t4-small\",\n",
143 | " # Adding additional pip requirements to the requirements.txt file\n",
144 | " extra_requirements=[\"tensorboard\", \"git+https://github.com/huggingface/transformers@main#egg=transformers\"],\n",
145 | " # Kwargs, passed as argparse args to the script\n",
146 | " model_name_or_path=\"bert-base-cased\",\n",
147 | " task_name=\"mrpc\",\n",
148 | " do_train=True,\n",
149 | " do_eval=True,\n",
150 | " max_seq_length=128,\n",
151 | " per_device_train_batch_size=32,\n",
152 | " learning_rate=2e-5,\n",
153 | " num_train_epochs=3,\n",
154 | " output_dir=\"./outputs\",\n",
155 | " logging_dir=\"./logs\",\n",
156 | " logging_steps=20,\n",
157 | " report_to=\"tensorboard\",\n",
158 | ")\n",
159 | "print(f\"Space: {space_url}\")\n",
160 | "print(f\"Dataset: {dataset_url}\")"
161 | ],
162 | "metadata": {
163 | "id": "GExAOpA5SLio"
164 | },
165 | "execution_count": null,
166 | "outputs": []
167 | }
168 | ]
169 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/examples/github_runner_app/app.py:
--------------------------------------------------------------------------------
1 | # Gradio app to run fuego.github_run() on Hugging Face Spaces
2 | # Hosted at https://hf.co/nateraw/fuego
3 | import gradio as gr
4 | import yaml
5 |
6 | import fuego
7 |
8 |
9 | def fuego_github_run_wrapper(
10 | token,
11 | github_repo_id,
12 | github_repo_branch,
13 | script,
14 | requirements_file,
15 | extra_requirements,
16 | script_args,
17 | output_dirs,
18 | private,
19 | delete_space_on_completion,
20 | downgrade_hardware_on_completion,
21 | space_hardware,
22 | ):
23 | if not token.strip():
24 | return gr.update(
25 | value="""## token with write access is required. Get one from here""",
26 | visible=True,
27 | )
28 |
29 | if script_args.strip():
30 | script_args = yaml.safe_load(script_args)
31 |
32 | if not requirements_file.strip():
33 | requirements_file = None
34 |
35 | if extra_requirements.strip():
36 | extra_requirements = [x.strip() for x in extra_requirements.split("\n")]
37 | else:
38 | extra_requirements = None
39 |
40 | if output_dirs.strip():
41 | output_dirs = [x.strip() for x in output_dirs.split(",")]
42 |
43 | github_repo_id = github_repo_id.strip()
44 | if not github_repo_id:
45 | return gr.update(value="## GitHub repo ID is required", visible=True)
46 |
47 | script = script.strip()
48 | if not script:
49 | return gr.update(value="## script is required", visible=True)
50 |
51 | github_repo_branch = github_repo_branch.strip()
52 | if not github_repo_branch:
53 | return gr.update("## github repo branch is required", visible=True)
54 |
55 | space_url, dataset_url = fuego.github_run(
56 | github_repo_id.strip(),
57 | script.strip(),
58 | requirements_file,
59 | github_repo_branch,
60 | space_hardware=space_hardware,
61 | private=private,
62 | delete_space_on_completion=delete_space_on_completion,
63 | downgrade_hardware_on_completion=downgrade_hardware_on_completion,
64 | space_output_dirs=output_dirs,
65 | extra_requirements=extra_requirements,
66 | token=token,
67 | **script_args,
68 | )
69 | output_message = f"""
70 | ## Job launched successfully! 🚀
71 | - Link to Space
72 | - Link to Dataset
73 | """
74 | return gr.update(value=output_message, visible=True)
75 |
76 |
77 | description = """
78 | This app lets you run scripts from GitHub on Spaces, using any hardware you'd like. Just point to a repo, the script you'd like to run, the dependencies to install, and any args to pass to your script, and watch it go. 😎
79 |
80 | It uses 🔥[fuego](https://github.com/huggingface/fuego)🔥 under the hood to launch your script in one line of Python code. Give the repo a ⭐️ if you think its 🔥.
81 |
82 | **Note: You'll need a Hugging Face token with write access, which you can get from [here](https://hf.co/settings/tokens)**
83 | """
84 |
85 | additional_info = """
86 | ## Pricing
87 |
88 | Runs using this tool are **free** as long as you use `cpu-basic` hardware. 🔥
89 |
90 | **See pricing for accelerated hardware (anything other than `cpu-basic`) [here](https://hf.co/pricing#spaces)**
91 |
92 | ## What this space does:
93 | 1. Spins up 2 new HF repos for you: a "runner" space repo and an "output" dataset repo.
94 | 2. Uploads your code to the space, as well as some wrapper code that invokes your script.
95 | 3. Runs your code on the space via the wrapper. Logs should show up in the space.
96 | 4. When the script is done, it takes anything saved to the `output_dirs` and uploads the files within to the output dataset repo
97 | 5. Deletes the space (or downgrades, or just leaves on). Depends on your choice of `delete_space_on_completion` and `downgrade_hardware_on_completion`.
98 |
99 | ## FAQ
100 |
101 | - If your space ends up having a "no application file" issue, you may need to "factory reset" the space. You can do this from the settings page of the space.
102 | """
103 |
104 | output_message = gr.Markdown("", visible=False)
105 |
106 | with gr.Blocks(css="style.css") as demo:
107 | gr.Markdown("# 🔥Fuego🔥 GitHub Script Runner")
108 | gr.Markdown(description)
109 | with gr.Accordion("👀 More Details (Hardware Pricing, How it Works, and FAQ)", open=False):
110 | gr.Markdown(additional_info)
111 |
112 | with gr.Row():
113 | token = gr.Textbox(lines=1, label="Hugging Face token with write access", type="password")
114 |
115 | with gr.Row():
116 | with gr.Column():
117 | with gr.Box():
118 | gr.Markdown("What script would you like to run? Also, what are its dependencies?")
119 | github_repo_id = gr.Textbox(lines=1, label="GitHub repo ID (ex. huggingface/fuego)")
120 | github_repo_branch = gr.Textbox(
121 | lines=1, label="Branch of GitHub repo (ex. main)", value="main", interactive=True
122 | )
123 | script = gr.Textbox(lines=1, label="Path to python script in the GitHub repo")
124 | requirements_file = gr.Textbox(lines=1, label="Path to pip requirements file in the repo")
125 | extra_requirements = gr.Textbox(
126 | lines=5,
127 | label="Any extra pip requirements to your script, just as you would write them in requirements.txt",
128 | )
129 | with gr.Column():
130 | with gr.Box():
131 | gr.Markdown("How should we run your script?")
132 | script_args = gr.Textbox(lines=10, label="Script args to your python file. Input here as YAML.")
133 | spaces_output_dirs = gr.Textbox(
134 | lines=1,
135 | label="Name of output directory to save assets to from within your script. Use commas if you have multiple.",
136 | value="./outputs, ./logs",
137 | )
138 | private = gr.Checkbox(False, label="Should space/dataset be made as private repos?")
139 | delete_space_on_completion = gr.Checkbox(True, label="Delete the space on completion?")
140 | downgrade_hardware_on_completion = gr.Checkbox(
141 | True,
142 | label="Downgrade hardware of the space on completion? Only applicable if not deleting on completion.",
143 | )
144 | with gr.Row():
145 | with gr.Column():
146 | spaces_hardware = gr.Dropdown(
147 | ["cpu-basic", "cpu-upgrade", "t4-small", "t4-medium", "a10g-small", "a10g-large", "a100-large"],
148 | label="Spaces Hardware",
149 | value="cpu-basic",
150 | interactive=True,
151 | )
152 | spaces_hardware_msg = gr.Markdown(
153 | """
154 | 🔴 **The hardware you chose is not free, and you will be charged for it** 🔴
155 |
156 | If you want to run your script for free, please choose `cpu-basic` as your hardware.
157 | """,
158 | visible=False,
159 | )
160 | spaces_hardware.change(
161 | lambda x: gr.update(visible=True) if x != "cpu-basic" else gr.update(visible=False),
162 | inputs=[spaces_hardware],
163 | outputs=[spaces_hardware_msg],
164 | )
165 |
166 | with gr.Row():
167 | with gr.Accordion("👀 Examples", open=False):
168 | gr.Examples(
169 | [
170 | [
171 | "pytorch/examples",
172 | "main",
173 | "vae/main.py",
174 | "vae/requirements.txt",
175 | "",
176 | "epochs: 3",
177 | "./results",
178 | False,
179 | True,
180 | True,
181 | "cpu-basic",
182 | ],
183 | [
184 | "huggingface/transformers",
185 | "main",
186 | "examples/pytorch/text-classification/run_glue.py",
187 | "examples/pytorch/text-classification/requirements.txt",
188 | "tensorboard\ngit+https://github.com/huggingface/transformers@main#egg=transformers",
189 | "model_name_or_path: bert-base-cased\ntask_name: mrpc\ndo_train: True\ndo_eval: True\nmax_seq_length: 128\nper_device_train_batch_size: 32\nlearning_rate: 2e-5\nnum_train_epochs: 3\noutput_dir: ./outputs\nlogging_dir: ./logs\nlogging_steps: 20\nreport_to: tensorboard",
190 | "./outputs,./logs",
191 | False,
192 | True,
193 | True,
194 | "cpu-basic",
195 | ],
196 | ],
197 | inputs=[
198 | github_repo_id,
199 | github_repo_branch,
200 | script,
201 | requirements_file,
202 | extra_requirements,
203 | script_args,
204 | spaces_output_dirs,
205 | private,
206 | delete_space_on_completion,
207 | downgrade_hardware_on_completion,
208 | spaces_hardware,
209 | ],
210 | outputs=[
211 | github_repo_id,
212 | github_repo_branch,
213 | script,
214 | requirements_file,
215 | extra_requirements,
216 | script_args,
217 | spaces_output_dirs,
218 | private,
219 | delete_space_on_completion,
220 | downgrade_hardware_on_completion,
221 | spaces_hardware,
222 | ],
223 | cache_examples=False,
224 | )
225 |
226 | with gr.Row():
227 | submit = gr.Button("Submit")
228 | reset_btn = gr.Button("Reset fields")
229 |
230 | with gr.Row():
231 | output_message.render()
232 |
233 | submit.click(
234 | fuego_github_run_wrapper,
235 | inputs=[
236 | token,
237 | github_repo_id,
238 | github_repo_branch,
239 | script,
240 | requirements_file,
241 | extra_requirements,
242 | script_args,
243 | spaces_output_dirs,
244 | private,
245 | delete_space_on_completion,
246 | downgrade_hardware_on_completion,
247 | spaces_hardware,
248 | ],
249 | outputs=[output_message],
250 | )
251 |
252 | def reset_fields():
253 | return {
254 | output_message: gr.update(value="", visible=False),
255 | github_repo_id: gr.update(value=""),
256 | github_repo_branch: gr.update(value="main"),
257 | script: gr.update(value=""),
258 | requirements_file: gr.update(value=""),
259 | extra_requirements: gr.update(value=""),
260 | script_args: gr.update(value=""),
261 | spaces_output_dirs: gr.update(value="./outputs, ./logs"),
262 | private: gr.update(value=False),
263 | delete_space_on_completion: gr.update(value=True),
264 | downgrade_hardware_on_completion: gr.update(value=True),
265 | spaces_hardware: gr.update(value="cpu-basic"),
266 | }
267 |
268 | reset_btn.click(
269 | reset_fields,
270 | outputs=[
271 | output_message,
272 | github_repo_id,
273 | github_repo_branch,
274 | script,
275 | requirements_file,
276 | extra_requirements,
277 | script_args,
278 | spaces_output_dirs,
279 | private,
280 | delete_space_on_completion,
281 | downgrade_hardware_on_completion,
282 | spaces_hardware,
283 | ],
284 | )
285 |
286 | if __name__ == "__main__":
287 | demo.launch(debug=True)
288 |
--------------------------------------------------------------------------------
/src/fuego/run_on_spaces.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | import uuid
3 | from datetime import datetime
4 | from pathlib import Path
5 | from typing import List, Optional
6 |
7 | import fire
8 | import git
9 | from huggingface_hub import (
10 | DatasetCard,
11 | HfFolder,
12 | SpaceHardware,
13 | add_space_secret,
14 | create_repo,
15 | upload_file,
16 | upload_folder,
17 | )
18 | from huggingface_hub.utils import logging
19 |
20 |
21 | logger = logging.get_logger(__name__)
22 |
23 |
24 | SPACES_HARDWARE_TYPES = [x.value for x in SpaceHardware]
25 |
26 |
27 | _status_checker_content = """import os
28 | import subprocess
29 | import time
30 | from pathlib import Path
31 | from threading import Thread
32 | from typing import List, Union
33 |
34 | import gradio as gr
35 | from huggingface_hub import HfFolder, delete_repo, upload_folder, get_space_runtime, request_space_hardware, DatasetCard
36 |
37 |
38 | def process_is_complete(process_pid):
39 | '''Checks if the process with the given PID is still running'''
40 | p = subprocess.Popen(["ps", "-p", process_pid], stdout=subprocess.PIPE)
41 | out = p.communicate()[0].decode("utf-8").strip().split("\\n")
42 | return len(out) == 1
43 |
44 | def get_task_status(output_dataset_id):
45 | '''Gets the task status from the output dataset repo'''
46 | card = DatasetCard.load(output_dataset_id)
47 | return card.data.fuego['status']
48 |
49 | def set_task_status(output_dataset_id, status="done"):
50 | '''Sets the task status in the output dataset repo'''
51 | card = DatasetCard.load(output_dataset_id)
52 | card.data.fuego['status'] = status
53 | card.push_to_hub(output_dataset_id)
54 |
55 | def check_for_status(
56 | process_pid, this_space_id, output_dataset_id, output_dirs, delete_on_completion, downgrade_hardware_on_completion
57 | ):
58 | task_status = get_task_status(output_dataset_id)
59 | print("Task status (found in dataset repo)", task_status)
60 | if task_status == "done":
61 | print("Task was already done, exiting...")
62 | return
63 | elif task_status == "preparing":
64 | print("Setting task status to running...")
65 | set_task_status(output_dataset_id, "running")
66 |
67 | print("Watching PID of script to see if it is done running")
68 | while True:
69 | if process_is_complete(process_pid):
70 | print("Process is complete! Uploading assets to output dataset repo")
71 | for output_dir in output_dirs:
72 | if Path(output_dir).exists():
73 | print("Uploading folder", output_dir)
74 | upload_folder(
75 | repo_id=output_dataset_id,
76 | folder_path=str(output_dir),
77 | path_in_repo=str(Path('.outputs') / output_dir),
78 | repo_type="dataset",
79 | )
80 | else:
81 | print("Folder", output_dir, "does not exist, skipping")
82 |
83 | print("Finished uploading outputs to dataset repo...Finishing up...")
84 | if delete_on_completion:
85 | print("Deleting space...")
86 | delete_repo(repo_id=this_space_id, repo_type="space")
87 | elif downgrade_hardware_on_completion:
88 | runtime = get_space_runtime(this_space_id)
89 | if runtime.hardware not in [None, "cpu-basic"]:
90 | print("Requesting downgrade to CPU Basic...")
91 | request_space_hardware(repo_id=this_space_id, hardware="cpu-basic")
92 | else:
93 | print("Space is already on cpu-basic, not downgrading.")
94 | print("Done! Setting task status to done in dataset repo")
95 | set_task_status(output_dataset_id, "done")
96 | return
97 | time.sleep(5)
98 |
99 |
100 | def main(
101 | this_space_repo_id: str,
102 | output_dataset_id: str,
103 | output_dirs: Union[str, List[str]] = "./outputs",
104 | delete_on_completion: bool = True,
105 | downgrade_hardware_on_completion: bool = True,
106 | ):
107 | token_env_var = os.getenv("HF_TOKEN")
108 | if token_env_var is None:
109 | raise ValueError(
110 | "Please set HF_TOKEN environment variable to your Hugging Face token. You can do this in the settings tab of your space."
111 | )
112 |
113 | if isinstance(output_dirs, str):
114 | output_dirs = [output_dirs]
115 |
116 | HfFolder().save_token(token_env_var)
117 |
118 | # Watch python script's process to see when it's done running
119 | process_pid = os.getenv("USER_SCRIPT_PID", None)
120 |
121 | with gr.Blocks() as demo:
122 | gr.Markdown(Path("about.md").read_text())
123 |
124 | thread = Thread(
125 | target=check_for_status,
126 | daemon=True,
127 | args=(
128 | process_pid,
129 | this_space_repo_id,
130 | output_dataset_id,
131 | output_dirs,
132 | delete_on_completion,
133 | downgrade_hardware_on_completion,
134 | ),
135 | )
136 | thread.start()
137 | demo.launch()
138 |
139 |
140 | if __name__ == "__main__":
141 | import fire
142 |
143 | fire.Fire(main)
144 | """
145 |
146 | # TODO - align with the GPU Dockerfile a bit more
147 | _dockerfile_cpu_content = """FROM python:3.9
148 |
149 | WORKDIR /code
150 |
151 | COPY ./requirements.txt /code/requirements.txt
152 |
153 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
154 | RUN pip install --no-cache-dir fire gradio datasets huggingface_hub
155 |
156 | # Set up a new user named "user" with user ID 1000
157 | RUN useradd -m -u 1000 user
158 |
159 | # Switch to the "user" user
160 | USER user
161 |
162 | # Set home to the user's home directory
163 | ENV HOME=/home/user \
164 | PATH=/home/user/.local/bin:$PATH \
165 | PYTHONPATH=$HOME/app \
166 | PYTHONUNBUFFERED=1 \
167 | GRADIO_ALLOW_FLAGGING=never \
168 | GRADIO_NUM_PORTS=1 \
169 | GRADIO_SERVER_NAME=0.0.0.0 \
170 | GRADIO_THEME=huggingface \
171 | SYSTEM=spaces
172 |
173 | # Set the working directory to the user's home directory
174 | WORKDIR $HOME/app
175 |
176 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user
177 | COPY --chown=user . $HOME/app
178 |
179 | RUN chmod +x start_server.sh
180 |
181 | CMD ["./start_server.sh"]
182 | """
183 |
184 | _dockerfile_gpu_content = """FROM nvidia/cuda:11.3.1-base-ubuntu20.04
185 |
186 | # Remove any third-party apt sources to avoid issues with expiring keys.
187 | RUN rm -f /etc/apt/sources.list.d/*.list
188 |
189 | # Install some basic utilities
190 | RUN apt-get update && apt-get install -y \
191 | curl \
192 | ca-certificates \
193 | sudo \
194 | git \
195 | bzip2 \
196 | libx11-6 \
197 | && rm -rf /var/lib/apt/lists/*
198 |
199 | # Create a working directory
200 | RUN mkdir /app
201 | WORKDIR /app
202 |
203 | # Create a non-root user and switch to it
204 | RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
205 | && chown -R user:user /app
206 | RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
207 | USER user
208 |
209 | # All users can use /home/user as their home directory
210 | ENV HOME=/home/user
211 | RUN mkdir $HOME/.cache $HOME/.config \
212 | && chmod -R 777 $HOME
213 |
214 | # Set up the Conda environment
215 | ENV CONDA_AUTO_UPDATE_CONDA=false \
216 | PATH=$HOME/miniconda/bin:$PATH
217 | RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
218 | && chmod +x ~/miniconda.sh \
219 | && ~/miniconda.sh -b -p ~/miniconda \
220 | && rm ~/miniconda.sh \
221 | && conda clean -ya
222 |
223 |
224 | ENV PYTHONUNBUFFERED=1 \
225 | GRADIO_ALLOW_FLAGGING=never \
226 | GRADIO_NUM_PORTS=1 \
227 | GRADIO_SERVER_NAME=0.0.0.0 \
228 | GRADIO_THEME=huggingface \
229 | SYSTEM=spaces
230 |
231 | RUN pip install --no-cache-dir fire gradio datasets huggingface_hub
232 |
233 | # Install user requirements
234 | COPY ./requirements.txt /app/requirements.txt
235 | RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
236 |
237 | WORKDIR $HOME/app
238 |
239 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user
240 | COPY --chown=user . $HOME/app
241 |
242 | RUN chmod +x start_server.sh
243 |
244 | CMD ["./start_server.sh"]
245 | """
246 |
247 | _start_server_template = """#!/bin/bash
248 |
249 | # Start the python script in the background asynchronously
250 | nohup {command} &
251 |
252 | # Save the PID of the python script so we can reference it in the status checker
253 | export USER_SCRIPT_PID=$!
254 |
255 | # Start a simple web server to watch the status of the python script
256 | python status_checker.py {status_checker_args}
257 | """
258 |
259 | _about_md_template = """
260 | # Fuego Runner
261 |
262 | This space is running some job thanks to [fuego](https://github.com/huggingface/fuego)! 🔥
263 |
264 | - Check out the associated [output repo]({output_repo_url})
265 | """
266 |
267 |
268 | def convert_dict_to_args_str(args_dict: dict) -> str:
269 | """Convert a dictionary of arguments to a string of arguments that can be passed to a command line script"""
270 | args_str = ""
271 | for arg_name, arg_value in args_dict.items():
272 | if isinstance(arg_value, (list, dict, tuple)) or (isinstance(arg_value, str) and " " in arg_value):
273 | args_str += f' --{arg_name} "{repr(arg_value)}"'
274 | else:
275 | args_str += f" --{arg_name} {arg_value}"
276 | return args_str.strip()
277 |
278 |
279 | def resolve_requirements_txt(file_path: str = None, requirements: List[str] = None):
280 | # If both the file path and the requirements list are provided
281 | if file_path and requirements:
282 | # Read the contents of the requirements file
283 | file_contents = Path(file_path).read_text()
284 |
285 | # Combine the contents of the file and the list of requirements
286 | combined_contents = file_contents + "\n" + "\n".join(requirements)
287 | return combined_contents
288 |
289 | # If only the file path is provided
290 | elif file_path:
291 | # Read the contents of the requirements file
292 | file_contents = Path(file_path).read_text()
293 | return file_contents
294 |
295 | # If only the list of requirements is provided
296 | elif requirements:
297 | return "\n".join(requirements)
298 |
299 | # If neither the file path nor the list of requirements is provided
300 | else:
301 | return ""
302 |
303 |
304 | def run(
305 | script: str,
306 | requirements_file: Optional[str] = None,
307 | space_id: str = None,
308 | space_hardware: str = "cpu-basic",
309 | dataset_id: Optional[str] = None,
310 | private: bool = False,
311 | allow_patterns: Optional[List[str]] = None,
312 | ignore_patterns: Optional[List[str]] = None,
313 | save_code_snapshot_in_dataset_repo: bool = False,
314 | delete_space_on_completion: bool = True,
315 | downgrade_hardware_on_completion: bool = True,
316 | space_output_dirs: Optional[List[str]] = None,
317 | token: Optional[str] = None,
318 | extra_run_metadata: Optional[dict] = None,
319 | extra_requirements: Optional[List[str]] = None,
320 | **kwargs,
321 | ):
322 | """Create a Hugging Face Space and run a script in it. When finished, the outputs will be saved to a Hugging Face Dataset Repo.
323 |
324 | Args:
325 | script (`str`):
326 | Path to the script to run.
327 | requirements_file (`str`, optional):
328 | Path to requirements file for the job. Defaults to None.
329 | space_id (`str`, optional):
330 | ID of the Hugging Face Space. Defaults to None.
331 | space_hardware (`str`, optional):
332 | Hardware for the Hugging Face Space. Defaults to "cpu".
333 | dataset_id (`str`, optional):
334 | ID of the Hugging Face Dataset Repo. Defaults to None.
335 | private (bool, optional):
336 | If True, both the Hugging Face Space and Dataset Repo will be private. Defaults to False.
337 | allow_patterns (`List[str]`, optional):
338 | List of file patterns to include in the parent directory of `script`. Defaults to None.
339 | ignore_patterns (`List[str]`, optional):
340 | List of file patterns to exclude in the parent directory of `script`. Defaults to None.
341 | save_code_snapshot_in_dataset_repo (`bool`, optional):
342 | If True, a code snapshot will be saved in the Hugging Face Dataset Repo. Defaults to False.
343 | delete_space_on_completion (`bool`, optional):
344 | If True, the Hugging Face Space will be deleted after the job completes. Defaults to True.
345 | downgrade_hardware_on_completion (`bool`, optional):
346 | If True, and `delete_space_on_completion` is False, the Hugging Face Space hardware will be
347 | downgraded to "cpu-basic" after the job completes. Defaults to True.
348 | space_output_dirs (`str`, optional):
349 | Dirs in the space that will be uploaded to output dataset on run completion. If unspecified,
350 | will default to ["outputs", "logs"].
351 | token (`str`, optional):
352 | Hugging Face token. Uses your cached token (if available) by default. Defaults to None.
353 | extra_run_metadata (`dict`, optional):
354 | Extra metadata to add to the run metadata json file that gets added to the output dataset. Defaults to None.
355 | extra_requirements (`List[str]`, optional):
356 | List of pip requirements to install in the Hugging Face Space. If requirements_file is also provided,
357 | the requirements in the file will be installed in addition to the requirements in this list. Defaults to None.
358 | **kwargs:
359 | Keyword arguments are passed to the script as argparse args or unpacked to the main function.
360 |
361 | Raises:
362 | ValueError: When `space_hardware` is not a valid Hugging Face Space hardware type.
363 |
364 | Returns:
365 | Tuple[str, str]: Tuple of the Hugging Face Space URL and Hugging Face Dataset Repo URL.
366 | """
367 | if space_hardware not in SPACES_HARDWARE_TYPES:
368 | raise ValueError(f"Invalid instance type: {space_hardware}. Should be one of {SPACES_HARDWARE_TYPES}")
369 |
370 | if space_output_dirs is None:
371 | space_output_dirs = ["outputs", "logs"]
372 |
373 | # The command to run in the space
374 | # Ex. python train.py --learning_rate 0.1
375 | command = f"python {Path(script).name} {convert_dict_to_args_str(kwargs)}"
376 |
377 | task_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid.uuid4().hex[:6]}"
378 | space_id = space_id or f"fuego-{task_id}"
379 | dataset_id = dataset_id or f"fuego-{task_id}"
380 |
381 | # Create 2 new repos. One space for running code, one dataset for storing artifacts
382 | space_repo_url = create_repo(
383 | space_id,
384 | exist_ok=True,
385 | repo_type="space",
386 | space_sdk="docker",
387 | space_hardware=space_hardware,
388 | private=private,
389 | token=token,
390 | )
391 | space_id = space_repo_url.repo_id
392 |
393 | dataset_repo_url = create_repo(dataset_id, exist_ok=True, repo_type="dataset", private=private, token=token)
394 | dataset_id = dataset_repo_url.repo_id
395 |
396 | logger.info(f"Created Repo at: {space_repo_url}")
397 | logger.info(f"Created Dataset at: {dataset_repo_url}")
398 |
399 | # Add current HF token to the new space, so it has ability to push to output dataset
400 | add_space_secret(space_id, "HF_TOKEN", token or HfFolder().get_token(), token=token)
401 |
402 | # We want to ignore at the very least README.md and .git folder of the cloned
403 | # GitHub repo, but you can include more filters if you want.
404 | if ignore_patterns is None:
405 | ignore_patterns = []
406 | elif isinstance(ignore_patterns, str):
407 | ignore_patterns = [ignore_patterns]
408 | ignore_patterns += [".git*", "README.md"]
409 |
410 | source_dir = Path(script).parent
411 |
412 | # We push the source up to the Space
413 | upload_folder(
414 | repo_id=space_id,
415 | folder_path=str(source_dir),
416 | path_in_repo=".",
417 | repo_type="space",
418 | allow_patterns=allow_patterns,
419 | ignore_patterns=ignore_patterns,
420 | token=token,
421 | )
422 |
423 | requirements_file_content = resolve_requirements_txt(requirements_file, extra_requirements)
424 | upload_file(
425 | repo_id=space_id,
426 | path_or_fileobj=requirements_file_content.encode(),
427 | path_in_repo="requirements.txt",
428 | repo_type="space",
429 | token=token,
430 | )
431 |
432 | # Optionally, you can also push the source to the output dataset
433 | if save_code_snapshot_in_dataset_repo:
434 | upload_folder(
435 | repo_id=dataset_id,
436 | folder_path=str(source_dir),
437 | path_in_repo=".snapshot",
438 | repo_type="dataset",
439 | allow_patterns=allow_patterns,
440 | ignore_patterns=ignore_patterns,
441 | token=token,
442 | )
443 |
444 | # We put together some metadata here about the task and push that to the dataset
445 | # for safekeeping.
446 | logger.info("Uploaded run metadata to dataset repo for tracking!")
447 | card = DatasetCard("")
448 | card.data.tags = ["fuego"]
449 | card.data.fuego = dict(
450 | id=task_id,
451 | status="preparing",
452 | script=Path(script).name,
453 | requirements_file=Path(requirements_file).name if requirements_file else None,
454 | space_id=space_id,
455 | space_hardware=space_hardware,
456 | **extra_run_metadata or {},
457 | )
458 | card.push_to_hub(dataset_id, token=token)
459 |
460 | # about.md
461 | upload_file(
462 | repo_id=space_id,
463 | path_or_fileobj=_about_md_template.format(output_repo_url=dataset_repo_url).encode(),
464 | path_in_repo="about.md",
465 | repo_type="space",
466 | token=token,
467 | )
468 |
469 | # start_server.sh
470 | upload_file(
471 | repo_id=space_id,
472 | path_or_fileobj=_start_server_template.format(
473 | command=command,
474 | status_checker_args=convert_dict_to_args_str(
475 | {
476 | "this_space_repo_id": space_id,
477 | "output_dataset_id": dataset_id,
478 | "output_dirs": space_output_dirs,
479 | "delete_on_completion": delete_space_on_completion,
480 | "downgrade_hardware_on_completion": downgrade_hardware_on_completion,
481 | }
482 | ),
483 | ).encode(),
484 | path_in_repo="start_server.sh",
485 | repo_type="space",
486 | token=token,
487 | )
488 |
489 | # status_checker.py
490 | upload_file(
491 | repo_id=space_id,
492 | path_or_fileobj=_status_checker_content.encode(),
493 | path_in_repo="status_checker.py",
494 | repo_type="space",
495 | token=token,
496 | )
497 |
498 | # Dockerfile
499 | dockerfile_content = (
500 | _dockerfile_cpu_content if space_hardware in ["cpu-basic", "cpu-upgrade"] else _dockerfile_gpu_content
501 | )
502 | upload_file(
503 | repo_id=space_id,
504 | path_or_fileobj=dockerfile_content.encode(),
505 | path_in_repo="Dockerfile",
506 | repo_type="space",
507 | token=token,
508 | )
509 |
510 | return space_repo_url, dataset_repo_url
511 |
512 |
513 | def github_run(
514 | github_repo_id: str,
515 | script: str,
516 | requirements_file: Optional[str] = None,
517 | github_repo_branch: str = "main",
518 | space_id: str = None,
519 | space_hardware: str = "cpu-basic",
520 | dataset_id: Optional[str] = None,
521 | private: bool = False,
522 | allow_patterns: Optional[List[str]] = None,
523 | ignore_patterns: Optional[List[str]] = None,
524 | save_code_snapshot_in_dataset_repo: bool = False,
525 | delete_space_on_completion: bool = True,
526 | downgrade_hardware_on_completion: bool = True,
527 | space_output_dirs: Optional[List[str]] = None,
528 | token: Optional[str] = None,
529 | extra_run_metadata: Optional[dict] = None,
530 | extra_requirements: Optional[List[str]] = None,
531 | **kwargs,
532 | ):
533 | """Create a run from code within a GitHub repo. See `run` for more details."""
534 | # We clone the GitHub repo into a temporary directory
535 | with tempfile.TemporaryDirectory() as tmp:
536 | repo_url = f"https://github.com/{github_repo_id}"
537 | repo = git.Repo.clone_from(repo_url, tmp, branch=github_repo_branch)
538 | tempdir = Path(tmp)
539 |
540 | script_path = tempdir / script
541 | if not script_path.exists():
542 | raise ValueError(f"Could not find script {script} in repo {repo_url}")
543 | script = str(script_path)
544 |
545 | if requirements_file is not None:
546 | requirements_path = tempdir / requirements_file
547 | if not requirements_path.exists():
548 | raise ValueError(f"Could not find requirements file {requirements_file} in repo {repo_url}")
549 | requirements_file = str(requirements_path)
550 |
551 | return run(
552 | script=str(script_path),
553 | requirements_file=requirements_file,
554 | space_id=space_id,
555 | space_hardware=space_hardware,
556 | dataset_id=dataset_id,
557 | private=private,
558 | allow_patterns=allow_patterns,
559 | ignore_patterns=ignore_patterns,
560 | save_code_snapshot_in_dataset_repo=save_code_snapshot_in_dataset_repo,
561 | delete_space_on_completion=delete_space_on_completion,
562 | downgrade_hardware_on_completion=downgrade_hardware_on_completion,
563 | space_output_dirs=space_output_dirs,
564 | token=token,
565 | extra_run_metadata=dict(
566 | github_repo_id=github_repo_id,
567 | github_repo_branch=github_repo_branch,
568 | github_repo_sha=repo.head.object.hexsha,
569 | **extra_run_metadata or {},
570 | ),
571 | extra_requirements=extra_requirements,
572 | **kwargs,
573 | )
574 |
575 |
576 | def cli_run():
577 | fire.Fire(
578 | {
579 | "run": run,
580 | "github_run": github_run,
581 | }
582 | )
583 |
--------------------------------------------------------------------------------