├── distributask
├── __init__.py
├── tests
│ ├── __init__.py
│ ├── worker.py
│ └── tests.py
├── example
│ ├── __init__.py
│ ├── worker.py
│ ├── shared.py
│ ├── distributed.py
│ └── local.py
└── distributask.py
├── MANIFEST.in
├── docs
├── assets
│ ├── DeepAI.png
│ ├── banner.png
│ ├── logo.png
│ ├── diagram.png
│ └── favicon.ico
├── distributask.md
├── more_info.md
├── index.md
└── getting_started.md
├── requirements.txt
├── .env.default
├── mkdocs.yml
├── Dockerfile
├── scripts
└── kill_redis_connections.sh
├── .github
└── workflows
│ ├── mkdocs.yml
│ ├── publish.yml
│ ├── test.yml
│ └── dockerhub.yml
├── LICENSE
├── setup.py
├── .gitignore
└── README.md
/distributask/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributask import *
2 |
--------------------------------------------------------------------------------
/distributask/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from .tests import *
2 | from .worker import *
3 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include version.txt
2 | include README.md
3 | include requirements.txt
4 |
--------------------------------------------------------------------------------
/docs/assets/DeepAI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepAI-Research/Distributask/HEAD/docs/assets/DeepAI.png
--------------------------------------------------------------------------------
/docs/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepAI-Research/Distributask/HEAD/docs/assets/banner.png
--------------------------------------------------------------------------------
/docs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepAI-Research/Distributask/HEAD/docs/assets/logo.png
--------------------------------------------------------------------------------
/docs/assets/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepAI-Research/Distributask/HEAD/docs/assets/diagram.png
--------------------------------------------------------------------------------
/docs/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepAI-Research/Distributask/HEAD/docs/assets/favicon.ico
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | fsspec
3 | celery
4 | redis
5 | huggingface_hub
6 | python-dotenv
7 | omegaconf
8 | tqdm
--------------------------------------------------------------------------------
/distributask/example/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed import *
2 | from .local import *
3 | from .worker import *
4 | from .shared import *
5 |
--------------------------------------------------------------------------------
/.env.default:
--------------------------------------------------------------------------------
1 | REDIS_HOST=localhost
2 | REDIS_PORT=6379
3 | REDIS_USER=default
4 | REDIS_PASSWORD=
5 | VAST_API_KEY=
6 | HF_TOKEN=hf_***
7 | HF_REPO_ID=RaccoonResearch/test_dataset
8 |
--------------------------------------------------------------------------------
/docs/distributask.md:
--------------------------------------------------------------------------------
1 | # Distributask Class
2 |
3 | ::: distributask.Distributask
4 | options:
5 | members: true
6 | show_root_heading: true
7 | show_source: true
--------------------------------------------------------------------------------
/distributask/example/worker.py:
--------------------------------------------------------------------------------
1 | from .shared import distributask, example_function
2 |
3 | # Register function to worker using distributask instance
4 | distributask.register_function(example_function)
5 |
6 | # Create Celery worker
7 | celery = distributask.app
8 |
--------------------------------------------------------------------------------
/distributask/tests/worker.py:
--------------------------------------------------------------------------------
1 | from ..distributask import create_from_config
2 |
3 | distributaur = create_from_config()
4 |
5 |
6 | # Define and register the test_function
7 | def example_test_function(arg1, arg2):
8 | return f"Result: arg1+arg2={arg1+arg2}"
9 |
10 |
11 | celery = distributaur.app
12 |
13 |
14 | if __name__ == "__main__":
15 | distributaur.register_function(example_test_function)
16 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: Distributask
2 | theme:
3 | name: material
4 | logo: assets/logo.png
5 | favicon: assets/favicon.ico
6 | plugins:
7 | - search
8 | - autorefs
9 | - mkdocstrings:
10 | enabled: true
11 | default_handler: python
12 | nav:
13 | - Home: index.md
14 | - Getting Started: getting_started.md
15 | - More Information: more_info.md
16 | - Distributask Class: distributask.md
17 |
18 |
19 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM --platform=linux/x86_64 ubuntu:24.04
2 |
3 | RUN apt-get update && \
4 | apt-get install -y \
5 | wget \
6 | xz-utils \
7 | bzip2 \
8 | git \
9 | git-lfs \
10 | python3-pip \
11 | python3 \
12 | && apt-get install -y software-properties-common \
13 | && apt-get clean \
14 | && rm -rf /var/lib/apt/lists/*
15 |
16 | COPY requirements.txt .
17 |
18 | RUN pip install -r requirements.txt --break-system-packages
19 |
20 | COPY distributask/ ./distributask/
21 |
22 | CMD ["celery", "-A", "distributask.example.worker", "worker", "--loglevel=info", "--concurrency=1"]
--------------------------------------------------------------------------------
/scripts/kill_redis_connections.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | # Load environment variables from .env file
5 | source .env
6 |
7 | # Check if REDIS_PORT is set in the .env file
8 | if [ -z "$REDIS_PORT" ]; then
9 | echo "REDIS_PORT not found in .env file. Please set it and try again."
10 | exit 1
11 | fi
12 |
13 | # Use lsof to find all PIDs for the given port and store them in an array
14 | PIDS=($(lsof -i TCP:$REDIS_PORT -t))
15 |
16 | # Check if there are any PIDs to kill
17 | if [ ${#PIDS[@]} -eq 0 ]; then
18 | echo "No processes found using port $REDIS_PORT."
19 | exit 0
20 | fi
21 |
22 | # Loop through each PID and kill it
23 | for PID in "${PIDS[@]}"; do
24 | echo "Killing process $PID"
25 | sudo kill -9 $PID
26 | done
27 |
28 | echo "All processes using port $REDIS_PORT have been killed."
--------------------------------------------------------------------------------
/.github/workflows/mkdocs.yml:
--------------------------------------------------------------------------------
1 | name: mkdocs
2 | on:
3 | push:
4 | branches:
5 | - main
6 | permissions:
7 | contents: write
8 | jobs:
9 | deploy:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v4
13 | - name: Configure Git Credentials
14 | run: |
15 | git config user.name github-actions[bot]
16 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com
17 | - uses: actions/setup-python@v5
18 | with:
19 | python-version: 3.x
20 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV # (3)!
21 | - uses: actions/cache@v4
22 | with:
23 | key: mkdocs-material-${{ env.cache_id }}
24 | path: .cache
25 | restore-keys: |
26 | mkdocs-material-
27 | - run: pip install mkdocs-material mkdocstrings mkdocstrings-python
28 | - run: mkdocs gh-deploy --force
29 |
--------------------------------------------------------------------------------
/distributask/example/shared.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 |
5 | from ..distributask import create_from_config
6 |
7 | # Create distributask instance
8 | distributask = create_from_config()
9 |
10 | # This is the function that will be executed on the nodes
11 | # You can make your own function and pass in whatever arguments you want
12 | def example_function(index, arg1, arg2):
13 |
14 | result = arg1 + arg2
15 |
16 | time.sleep(random.randint(1, 6))
17 |
18 | # Save the result to a file
19 | with open(f"result_{index}.txt", "w") as f:
20 | f.write(f"{str(arg1)} plus {str(arg2)} is {str(result)}")
21 |
22 | # Write the file to huggingface
23 | distributask.upload_file(f"result_{index}.txt")
24 |
25 | # Delete local file
26 | os.remove(f"result_{index}.txt")
27 |
28 | # Return the result - you can get this value from the task object
29 | return f"Task {index} completed. Result ({str(arg1)} + {str(arg2)}): {str(result)}"
30 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v3
17 | - name: Set up Python
18 | uses: actions/setup-python@v3
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install build
25 | - name: Extract package version
26 | id: extract_version
27 | run: echo "package_version=$(echo $GITHUB_REF | cut -d / -f 3)" >> $GITHUB_ENV
28 | - name: Write package version to file
29 | run: echo "${{ env.package_version }}" > version.txt
30 | - name: Build package
31 | run: python -m build
32 | - name: Publish package
33 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
34 | with:
35 | user: ${{ secrets.PYPI_USERNAME }}
36 | password: ${{ secrets.PYPI_PASSWORD }}
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 M̵̞̗̝̼̅̏̎͝Ȯ̴̝̻̊̃̋̀Õ̷̼͋N̸̩̿͜ ̶̜̠̹̼̩͒
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Lint and Test
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: ["3.11"]
11 | env:
12 | REDIS_HOST: ${{ secrets.REDIS_HOST }}
13 | REDIS_PORT: ${{ secrets.REDIS_PORT }}
14 | REDIS_USER: ${{ secrets.REDIS_USER }}
15 | REDIS_PASSWORD: ${{ secrets.REDIS_PASSWORD }}
16 | VAST_API_KEY: ${{ secrets.VAST_API_KEY }}
17 | HF_TOKEN: ${{ secrets.HF_TOKEN }}
18 | HF_REPO_ID: ${{ secrets.HF_REPO_ID }}
19 | steps:
20 | - uses: actions/checkout@v3
21 | - name: Set up Python ${{ matrix.python-version }}
22 | uses: actions/setup-python@v3
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 | - name: Install dependencies
26 | run: |
27 | python -m pip install --upgrade pip
28 | pip install pytest
29 | pip install -r requirements.txt
30 | - name: Write package version
31 | run: echo ::set-output name=package_version::$(echo $GITHUB_REF | cut -d / -f 3) > version.txt
32 | - name: Running tests
33 | run: |
34 | pytest distributask/tests/tests.py
35 |
--------------------------------------------------------------------------------
/.github/workflows/dockerhub.yml:
--------------------------------------------------------------------------------
1 | name: Publish Docker image
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | env:
8 | REGISTRY: index.docker.io
9 | IMAGE_NAME: antbaez/distributask-test-worker
10 |
11 | jobs:
12 | push_to_registry:
13 | name: Push Docker image to Docker Hub
14 | runs-on: ubuntu-latest
15 | permissions:
16 | packages: write
17 | contents: read
18 | attestations: write
19 | id-token: write
20 | steps:
21 | - name: Check out the repo
22 | uses: actions/checkout@v4
23 |
24 | - name: Log in to Docker Hub
25 | uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
26 | with:
27 | username: ${{ secrets.DOCKER_USERNAME }}
28 | password: ${{ secrets.DOCKER_PASSWORD }}
29 |
30 | - name: Extract metadata (tags, labels) for Docker
31 | id: meta
32 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
33 | with:
34 | images: ${{ env.IMAGE_NAME }}
35 |
36 | - name: Build and push Docker image
37 | id: push
38 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
39 | with:
40 | context: .
41 | file: ./Dockerfile
42 | push: true
43 | tags: ${{ steps.meta.outputs.tags }}
44 | labels: ${{ steps.meta.outputs.labels }}
45 |
46 |
47 | - name: Generate artifact attestation
48 | uses: actions/attest-build-provenance@v1
49 | with:
50 | subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
51 | subject-digest: ${{ steps.push.outputs.digest }}
52 | push-to-registry: true
53 |
54 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import os
3 |
4 | # get the cwd where the setup.py file is located
5 | file_path = os.path.dirname(os.path.realpath(__file__))
6 |
7 | long_description = ""
8 | with open(os.path.join(file_path, "README.md"), "r") as fh:
9 | long_description = fh.read()
10 | long_description = long_description.split("\n")
11 | long_description = [line for line in long_description if not "")[0].split("<")[0] for line in install_requires
23 | ]
24 |
25 | setup(
26 | name="distributask",
27 | version=version,
28 | description="Simple task manager and job queue for distributed rendering. Built on celery and redis.",
29 | long_description=long_description,
30 | long_description_content_type="text/markdown",
31 | url="https://github.com/DeepAI-Research/Distributask",
32 | author="DeepAIResearch",
33 | author_email="team@deepai.org",
34 | license="MIT",
35 | packages=find_packages(),
36 | install_requires=install_requires,
37 | classifiers=[
38 | "Development Status :: 4 - Beta",
39 | "Intended Audience :: Science/Research",
40 | "License :: OSI Approved :: MIT License",
41 | "Operating System :: POSIX :: Linux",
42 | "Programming Language :: Python :: 3",
43 | "Operating System :: MacOS :: MacOS X",
44 | "Operating System :: Microsoft :: Windows",
45 | ],
46 | )
47 |
--------------------------------------------------------------------------------
/docs/more_info.md:
--------------------------------------------------------------------------------
1 | # Summary of most relevant functions
2 |
3 | #### Settings, Environment, and Help
4 |
5 | - `create_from_config()` - creates Distribtask instance using environment variables
6 | - `get_env(key)` - gets value from .env
7 | - `get_settings(key)` - gets value from settings dictionary
8 |
9 | #### Celery tasks
10 |
11 | - `register_function(func)` - registers function to be task for worker
12 | - `execute_function(func_name, args)` - creates Celery task using registered function
13 |
14 | #### Redis server
15 |
16 | - `get_redis_url()` - gets Redis host url
17 | - `get_redis_connection()` - gets Redis connection instance
18 |
19 | #### Worker management via Vast.ai API
20 |
21 | - `search_offers(max_price)` - searches for available instances on Vast.ai
22 | - `rent_nodes(max_price, max_nodes, image, module_name, command)` - rents nodes using Vast.ai instance
23 | - `terminate_nodes(node_id_lists)` - terminates Vast.ai instance
24 |
25 |
26 | #### HuggingFace repositories and uploading
27 |
28 | - `initialize_dataset()` - intializes dataset repo on HuggingFace
29 | - `upload_file(path_to_file)` - uploads file to Huggingface
30 | - `upload_directory(path_to_directory)` - uploads folder to Huggingface repo
31 | - `delete_file(path_to_file)` - deletes file on HuggingFace repo
32 |
33 | #### Visit the [Distributask Class](distributask.md) page for full, detailed documentation of the distributask class.
34 |
35 | # Docker Setup
36 |
37 | Distributask uses a Docker image to transfer the environment and neccessary files to the Vast.ai nodes. In your implementation using Distributask, you can use the Docker file in the Distributask repository as a base for your own Docker file. If you do this, be sure to add Distributask to the list of packages to be installed on your Docker file.
38 |
39 | # Important Packages
40 |
41 | Visit the websites of these wonderful packages to learn more about how they work and how to use them.
42 |
43 | Celery: `https://docs.celeryq.dev/en/stable/`
44 | Redis: `https://redis.io/docs/latest/`
45 | Hugging Face: `https://huggingface.co/docs/huggingface_hub/en/guides/upload`
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | Distributask is a simple way to distribute rendering tasks across multiple machines.
2 |
3 | This documentation is intended to help you understand the structure of the Distributask API and codebase and how to use it to distribute rendering tasks across multiple machines for your own projects.
4 |
5 | ## Core Use Cases
6 | Distributask can be used for any task that can is parallelizable. Some specific use cases include:
7 |
8 | - Rendering videos
9 | - Running simulations
10 | - Generating or processing large datasets
11 |
12 | ## Getting Started
13 |
14 | Visit the [Getting Started](getting_started.md) page to learn how to set up your environment and get started with distributing with Distributask.
15 |
16 | ## Overview
17 |
18 | Distributed rendering using Distributask can be broken into four steps:
19 |
20 | #### Creating the task queue
21 |
22 | Distributask uses Celery, an asyncronous distributed task processing package, to create the task queue on your local machine. Each task on the queue is a function that tells remote machines, or workers, what to do. For example, if we wanted to render videos, each task would be a function that contains the code to render a different video.
23 |
24 | #### Passing the tasks to workers
25 |
26 | Distributask uses Redis, a data structure that can be used as a database, as a message broker. This means that Redis is used to transfer tasks yet to be done from the task queue to the worker so that the job can be done.
27 |
28 | #### Executing the tasks
29 |
30 | Distributask uses Vast.ai, a decentralized GPU market, to create workers that execute the task. The task is given to the worker, executed, and the completed task status is passed back to the central machine via Redis.
31 |
32 | #### Storing results of the tasks
33 |
34 | Distributask uses Huggingface, a platform for sharing AI models and datasets, to store the results of the task. The results of the task are uploaded to Hugginface using API calls in Distributask. For example, our rendered videos would be uploaded as a dataset on Huggingface.
35 |
36 | ## Flowchart of Distributask process
37 |
38 |
39 |
--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | Below are instructions to get distributask running on your machine. Please read through the rest of the documentation for more detailed information.
4 |
5 | ## Installation
6 |
7 | ```bash
8 | pip install distributask
9 | ```
10 |
11 | ## Development
12 |
13 | ### Prerequisites
14 |
15 | - Python 3.8 or newer (tested on Python 3.11)
16 | - Redis server
17 | - Vast.ai API key
18 | - HuggingFace API key
19 |
20 |
21 | ### Setup
22 |
23 | Clone the repository and navigate to the project directory:
24 |
25 | ```bash
26 | git clone https://github.com/RaccoonResearch/Distributask.git
27 | cd Distributask
28 | ```
29 |
30 | Install the required packages:
31 |
32 | ```bash
33 | pip install -r requirements.txt
34 | ```
35 |
36 | Install the distributask package:
37 |
38 | ```bash
39 | python setup.py install
40 | ```
41 |
42 | ### Configuration
43 |
44 | Create a `.env` file in the root directory of your project or set environment variables to match your setup:
45 |
46 | ```plaintext
47 | REDIS_HOST=redis_host
48 | REDIS_PORT=redis_port
49 | REDIS_USER=redis_user
50 | REDIS_PASSWORD=redis_password
51 | VAST_API_KEY=your_vastai_api_key
52 | HF_TOKEN=your_huggingface_token
53 | HF_REPO_ID=your_huggingface_repo
54 | BROKER_POOL_LIMIT=broker_pool_limit
55 | ```
56 |
57 | ### Running an Example Task
58 |
59 | To run an example task and see distributask in action, you can execute the example script provided in the project:
60 |
61 | ```bash
62 | # Run an example task locally
63 | python -m distributask.example.local
64 |
65 | # Run an example task on Vast.ai ("kitchen sink" example)
66 | python -m distributask.example.distributed
67 | ```
68 |
69 | ### Command Options
70 |
71 | Below are options you can pass into your distributask example run.
72 |
73 | - `--max_price` is the max price (in $/hour) a node can be be rented for.
74 | - `--max_nodes` is the max number of vast.ai nodes that can be rented.
75 | - `--docker_image` is the name of the docker image to load to the vast.ai node.
76 | - `--module_name` is the name of the celery worker
77 | - `--number_of_tasks` is the number of example tasks that will be added to the queue and done by the workers.
--------------------------------------------------------------------------------
/distributask/example/distributed.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import argparse
4 | import atexit
5 |
6 | from .shared import distributask, example_function
7 |
8 | if __name__ == "__main__":
9 | # Create an ArgumentParser object
10 | parser = argparse.ArgumentParser(description="Distributask example script")
11 |
12 | # Add arguments with default values
13 | parser.add_argument(
14 | "--max_price",
15 | type=float,
16 | default=0.20,
17 | help="Max price per node, in dollars (default: 0.20)",
18 | )
19 | parser.add_argument(
20 | "--max_nodes",
21 | type=int,
22 | default=1,
23 | help="Max number of nodes to rent (default: 1)",
24 | )
25 | parser.add_argument(
26 | "--docker_image",
27 | type=str,
28 | default="antbaez/distributask-test-worker",
29 | help="Docker image to use for the worker (default: antbaez/distributask-test-worker)",
30 | )
31 | parser.add_argument(
32 | "--module_name",
33 | type=str,
34 | default="distributask.example.worker",
35 | help="Module name (default: distributask.example.worker)",
36 | )
37 | parser.add_argument(
38 | "--number_of_tasks", type=int, default=10, help="Number of tasks (default: 10)"
39 | )
40 |
41 | args = parser.parse_args()
42 |
43 | completed = False
44 |
45 | # Register function to distributask object
46 | distributask.register_function(example_function)
47 |
48 | # Initialize the dataset on Hugging Face
49 | distributask.initialize_dataset()
50 |
51 | # Create a file with the current date and time and save it as "datetime.txt"
52 | with open("datetime.txt", "w") as f:
53 | f.write(time.strftime("%Y-%m-%d %H:%M:%S"))
54 |
55 | # Upload file to the repository
56 | distributask.upload_file("datetime.txt")
57 |
58 | # Remove the example file from local
59 | os.remove("datetime.txt")
60 |
61 | vast_api_key = distributask.get_env("VAST_API_KEY")
62 | if not vast_api_key:
63 | raise ValueError("Vast API key not found in configuration.")
64 |
65 | job_configs = []
66 |
67 | # Compile parameters for tasks
68 | for i in range(args.number_of_tasks):
69 | job_configs.append(
70 | {
71 | "outputs": [f"result_{i}.txt"],
72 | "task_params": {"index": i, "arg1": 1, "arg2": 2},
73 | }
74 | )
75 |
76 | # Rent Vast.ai nodes and get list of node ids
77 | print("Renting nodes...")
78 | rented_nodes = distributask.rent_nodes(
79 | args.max_price, args.max_nodes, args.docker_image, args.module_name
80 | )
81 |
82 | print("Total rented nodes: ", len(rented_nodes))
83 |
84 | tasks = []
85 |
86 | # Submit the tasks to the queue for the Vast.ai worker nodes to execute
87 | for i in range(args.number_of_tasks):
88 | job_config = job_configs[i]
89 | print(f"Task {i}")
90 | print(job_config)
91 | print("Task params: ", job_config["task_params"])
92 |
93 | params = job_config["task_params"]
94 |
95 | # Each task executes the function "example_function", defined in shared.py
96 | task = distributask.execute_function(example_function.__name__, params)
97 |
98 | # Add the task to the list of tasks
99 | tasks.append(task)
100 |
101 | def terminate_workers():
102 | distributask.terminate_nodes(rented_nodes)
103 | print("Workers terminated.")
104 |
105 | # Terminate Vast.ai nodes on exit of script
106 | atexit.register(terminate_workers)
107 |
108 | # Monitor the status of the tasks with tqdm
109 | distributask.monitor_tasks(tasks)
110 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .DS_Store
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 |
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 |
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 |
120 | # SageMath parsed files
121 | *.sage.py
122 |
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 |
147 | # Pyre type checker
148 | .pyre/
149 |
150 | # pytype static type analyzer
151 | .pytype/
152 |
153 | # Cython debug symbols
154 | cython_debug/
155 |
156 | # PyCharm
157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | # and can be added to the global gitignore or merged into this file. For a more nuclear
160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 |
163 | .vscode/
164 | .chroma
165 | memory
166 | test
167 | version.txt
168 | config.json
--------------------------------------------------------------------------------
/distributask/example/local.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import os
3 | import subprocess
4 | import time
5 |
6 | from .shared import distributask, example_function
7 |
8 |
9 | if __name__ == "__main__":
10 | completed = False
11 |
12 | # Register function to distributask object
13 | distributask.register_function(example_function)
14 |
15 | # First, initialize the dataset on Hugging Face
16 | distributask.initialize_dataset()
17 |
18 | # Create a file with the current date and time and save it as "datetime.txt"
19 | with open("datetime.txt", "w") as f:
20 | f.write(time.strftime("%Y-%m-%d %H:%M:%S"))
21 |
22 | # Upload this to the repository
23 | distributask.upload_file("datetime.txt")
24 |
25 | # Remove the example file from local
26 | os.remove("datetime.txt")
27 |
28 | vast_api_key = distributask.get_env("VAST_API_KEY")
29 | if not vast_api_key:
30 | raise ValueError("Vast API key not found in configuration.")
31 |
32 | job_configs = []
33 | number_of_tasks = 3
34 |
35 | # Compile parameters for tasks
36 | for i in range(number_of_tasks):
37 | job_configs.append(
38 | {
39 | "outputs": [f"result_{i}.txt"],
40 | "task_params": {"index": i, "arg1": 1, "arg2": 2},
41 | }
42 | )
43 |
44 | tasks = []
45 |
46 | repo_id = distributask.get_env("HF_REPO_ID")
47 |
48 | # Submit the tasks to the queue for the Vast.ai worker nodes to execute
49 | for i in range(number_of_tasks):
50 | job_config = job_configs[i]
51 | print(f"Task {i}")
52 | print(job_config)
53 | print("Task params: ", job_config["task_params"])
54 |
55 | params = job_config["task_params"]
56 |
57 | # Each task executes the function "example_function", defined in shared.py
58 | task = distributask.execute_function(example_function.__name__, params)
59 |
60 | # Add the task to the list of tasks
61 | tasks.append(task)
62 |
63 | # Start the local worker
64 | docker_installed = False
65 | # Check if docker is installed
66 | try:
67 | subprocess.run(["docker", "version"], check=True)
68 | docker_installed = True
69 | except Exception as e:
70 | print("Docker is not installed. Starting worker locally.")
71 | print(e)
72 |
73 | docker_process = None
74 | # If docker is installed, start local Docker worker
75 | # If docker is not installed, start local Celery worker
76 | if docker_installed is False:
77 | print("Docker is not installed. Starting worker locally.")
78 | celery_worker = subprocess.Popen(
79 | ["celery", "-A", "distributask.example.worker", "worker", "--loglevel=info"]
80 | )
81 |
82 | else:
83 | build_process = subprocess.Popen(
84 | [
85 | "docker",
86 | "build",
87 | "-t",
88 | "distributask-example-worker",
89 | ".",
90 | ]
91 | )
92 | build_process.wait()
93 |
94 | docker_process = subprocess.Popen(
95 | [
96 | "docker",
97 | "run",
98 | "-e",
99 | f"VAST_API_KEY={vast_api_key}",
100 | "-e",
101 | f"REDIS_HOST={distributask.get_env('REDIS_HOST')}",
102 | "-e",
103 | f"REDIS_PORT={distributask.get_env('REDIS_PORT')}",
104 | "-e",
105 | f"REDIS_PASSWORD={distributask.get_env('REDIS_PASSWORD')}",
106 | "-e",
107 | f"REDIS_USER={distributask.get_env('REDIS_USER')}",
108 | "-e",
109 | f"HF_TOKEN={distributask.get_env('HF_TOKEN')}",
110 | "-e",
111 | f"HF_REPO_ID={repo_id}",
112 | "distributask-example-worker",
113 | ]
114 | )
115 |
116 | def kill_docker():
117 | print("Killing docker container")
118 | docker_process.terminate()
119 |
120 | # Terminate Docker worker on exit of script
121 | atexit.register(kill_docker)
122 |
123 | # Monitor the status of the tasks with tqdm
124 | distributask.monitor_tasks(tasks)
125 |
126 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Distributask
2 |
3 |
4 | A simple way to distribute rendering tasks across multiple machines.
5 |
6 | [](https://github.com/DeepAI-Research/Distributask/actions/workflows/test.yml)
7 | [](https://badge.fury.io/py/distributask)
8 | [](https://github.com/DeepAI-Research/Distributask/blob/main/LICENSE)
9 |
10 |
11 | # Description
12 |
13 | Distributask is a package that automatically queues, executes, and uploads the result of any task you want using Vast.ai, a decentralized network of GPUs. It works by first creating a Celery queue of the tasks, which contain the code that you want to be ran on a GPU. The tasks are then passed to the Vast.ai GPU workers using Redis as a message broker. Once a worker has completed a task, the result is uploaded to Hugging Face.
14 |
15 | # Installation
16 |
17 | ```bash
18 | pip install distributask
19 | ```
20 |
21 | # Development
22 |
23 | ### Setup
24 |
25 | Clone the repository and navigate to the project directory:
26 |
27 | ```bash
28 | git clone https://github.com/DeepAI-Research/Distributask.git
29 | cd Distributask
30 | ```
31 |
32 | Install the required packages:
33 |
34 | ```bash
35 | pip install -r requirements.txt
36 | ```
37 |
38 | Or install Distributask as a package:
39 |
40 | ```bash
41 | pip install distributask
42 | ```
43 |
44 | ### Configuration
45 |
46 | Create a `.env` file in the root directory of your project or set environment variables to create your desired setup:
47 |
48 | ```plaintext
49 | REDIS_HOST="name of your redis server"
50 | REDIS_PORT="port of your redis server
51 | REDIS_USER="username to login to redis server"
52 | REDIS_PASSWORD="password to login to redis server"
53 | VAST_API_KEY="your Vast.ai API key"
54 | HF_TOKEN="your Hugging Face token"
55 | HF_REPO_ID="name of your Hugging Face repository"
56 | BROKER_POOL_LIMIT="your broker pool limit setting"
57 | ```
58 |
59 | ## Getting Started
60 |
61 | ### Running an Example Task
62 |
63 | To run an example task and see Distributask in action, you can execute the example script provided in the project:
64 |
65 | ```bash
66 | # Run the example task locally using either a Docker container or a Celery worker:
67 | python -m distributask.example.local
68 |
69 | # Run the example task on Vast.ai ("kitchen sink" example):
70 | python -m distributask.example.distributed
71 |
72 | ```
73 |
74 | This script configures the environment, registers a sample function, creates a queue of tasks, and monitors its execution on some workers.
75 |
76 | ### Command Options
77 |
78 | - `--max_price` is the max price (in $/hour) a node can be be rented for.
79 | - `--max_nodes` is the max number of vast.ai nodes that can be rented.
80 | - `--docker_image` is the name of the docker image to load to the vast.ai node.
81 | - `--module_name` is the name of the Celery worker.
82 | - `--number_of_tasks` is the number of example tasks that will be added to the queue and done by the workers.
83 |
84 | ## Documentation
85 |
86 | For more info checkout our in-depth [documentation](https://deepai-research.github.io/Distributask)!
87 |
88 | ## Contributing
89 |
90 | Contributions are welcome! For any changes you would like to see, please open an issue to discuss what you would like to see changed or to change yourself.
91 |
92 | ## License
93 |
94 | This project is licensed under the MIT License - see the `LICENSE` file for details.
95 |
96 | ## Citation
97 |
98 | ```bibtex
99 | @misc{Distributask,
100 | author = {DeepAIResearch},
101 | title = {Distributask: a simple way to distribute rendering tasks across mulitiple machines},
102 | year = {2024},
103 | publisher = {GitHub},
104 | howpublished = {\url{https://github.com/DeepAI-Research/Distributask}}
105 | }
106 | ```
107 |
108 | ## Contributors
109 |
110 |
M̵̞̗̝̼̅̏̎͝Ȯ̴̝̻̊̃̋̀Õ̷̼͋N̸̩̿͜ ̶̜̠̹̼̩͒ 114 | | Anthony |
115 |