├── .github └── workflows │ ├── check-style.yaml │ ├── push-docker-image.yaml │ └── run-tests.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── benchmarks ├── benchmark_forward.py ├── benchmark_inference.py └── benchmark_training.py ├── examples ├── prompt-tuning-personachat.ipynb └── prompt-tuning-sst2.ipynb ├── pyproject.toml ├── setup.cfg ├── src └── petals │ ├── __init__.py │ ├── cli │ ├── __init__.py │ ├── run_dht.py │ ├── run_prod_server.sh │ └── run_server.py │ ├── client │ ├── __init__.py │ ├── config.py │ ├── from_pretrained.py │ ├── inference_session.py │ ├── lm_head.py │ ├── ptune.py │ ├── remote_forward_backward.py │ ├── remote_generation.py │ ├── remote_sequential.py │ ├── routing │ │ ├── __init__.py │ │ ├── sequence_info.py │ │ ├── sequence_manager.py │ │ └── spending_policy.py │ └── sequential_autograd.py │ ├── constants.py │ ├── data_structures.py │ ├── dht_utils.py │ ├── models │ ├── __init__.py │ ├── bloom │ │ ├── __init__.py │ │ ├── block.py │ │ ├── config.py │ │ └── model.py │ ├── falcon │ │ ├── __init__.py │ │ ├── block.py │ │ ├── config.py │ │ └── model.py │ ├── llama │ │ ├── __init__.py │ │ ├── block.py │ │ ├── config.py │ │ ├── model.py │ │ └── speculative_model.py │ └── mixtral │ │ ├── __init__.py │ │ ├── block.py │ │ ├── config.py │ │ └── model.py │ ├── server │ ├── __init__.py │ ├── backend.py │ ├── block_functions.py │ ├── block_selection.py │ ├── block_utils.py │ ├── from_pretrained.py │ ├── handler.py │ ├── memory_cache.py │ ├── reachability.py │ ├── server.py │ ├── task_pool.py │ ├── task_prioritizer.py │ └── throughput.py │ └── utils │ ├── __init__.py │ ├── asyncio.py │ ├── auto_config.py │ ├── convert_block.py │ ├── cuda_graphs.py │ ├── dht.py │ ├── disk_cache.py │ ├── hf_auth.py │ ├── logging.py │ ├── misc.py │ ├── packaging.py │ ├── peft.py │ ├── ping.py │ ├── random.py │ └── version.py └── tests ├── bootstrap.id ├── conftest.py ├── server2.id ├── test_aux_functions.py ├── test_block_exact_match.py ├── test_cache.py ├── test_chained_calls.py ├── test_dtype.py ├── test_full_model.py ├── test_optimized_layers.py ├── test_peft.py ├── test_priority_pool.py ├── test_remote_sequential.py ├── test_sequence_manager.py ├── test_server_stats.py ├── test_speculative_generation.py ├── test_tensor_parallel.py └── test_utils.py /.github/workflows/check-style.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/.github/workflows/check-style.yaml -------------------------------------------------------------------------------- /.github/workflows/push-docker-image.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/.github/workflows/push-docker-image.yaml -------------------------------------------------------------------------------- /.github/workflows/run-tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/.github/workflows/run-tests.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/benchmark_forward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/benchmarks/benchmark_forward.py -------------------------------------------------------------------------------- /benchmarks/benchmark_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/benchmarks/benchmark_inference.py -------------------------------------------------------------------------------- /benchmarks/benchmark_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/benchmarks/benchmark_training.py -------------------------------------------------------------------------------- /examples/prompt-tuning-personachat.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/examples/prompt-tuning-personachat.ipynb -------------------------------------------------------------------------------- /examples/prompt-tuning-sst2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/examples/prompt-tuning-sst2.ipynb -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/pyproject.toml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/setup.cfg -------------------------------------------------------------------------------- /src/petals/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/__init__.py -------------------------------------------------------------------------------- /src/petals/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/petals/cli/run_dht.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/cli/run_dht.py -------------------------------------------------------------------------------- /src/petals/cli/run_prod_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/cli/run_prod_server.sh -------------------------------------------------------------------------------- /src/petals/cli/run_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/cli/run_server.py -------------------------------------------------------------------------------- /src/petals/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/__init__.py -------------------------------------------------------------------------------- /src/petals/client/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/config.py -------------------------------------------------------------------------------- /src/petals/client/from_pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/from_pretrained.py -------------------------------------------------------------------------------- /src/petals/client/inference_session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/inference_session.py -------------------------------------------------------------------------------- /src/petals/client/lm_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/lm_head.py -------------------------------------------------------------------------------- /src/petals/client/ptune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/ptune.py -------------------------------------------------------------------------------- /src/petals/client/remote_forward_backward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/remote_forward_backward.py -------------------------------------------------------------------------------- /src/petals/client/remote_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/remote_generation.py -------------------------------------------------------------------------------- /src/petals/client/remote_sequential.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/remote_sequential.py -------------------------------------------------------------------------------- /src/petals/client/routing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/routing/__init__.py -------------------------------------------------------------------------------- /src/petals/client/routing/sequence_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/routing/sequence_info.py -------------------------------------------------------------------------------- /src/petals/client/routing/sequence_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/routing/sequence_manager.py -------------------------------------------------------------------------------- /src/petals/client/routing/spending_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/routing/spending_policy.py -------------------------------------------------------------------------------- /src/petals/client/sequential_autograd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/client/sequential_autograd.py -------------------------------------------------------------------------------- /src/petals/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/constants.py -------------------------------------------------------------------------------- /src/petals/data_structures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/data_structures.py -------------------------------------------------------------------------------- /src/petals/dht_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/dht_utils.py -------------------------------------------------------------------------------- /src/petals/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/__init__.py -------------------------------------------------------------------------------- /src/petals/models/bloom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/bloom/__init__.py -------------------------------------------------------------------------------- /src/petals/models/bloom/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/bloom/block.py -------------------------------------------------------------------------------- /src/petals/models/bloom/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/bloom/config.py -------------------------------------------------------------------------------- /src/petals/models/bloom/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/bloom/model.py -------------------------------------------------------------------------------- /src/petals/models/falcon/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/falcon/__init__.py -------------------------------------------------------------------------------- /src/petals/models/falcon/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/falcon/block.py -------------------------------------------------------------------------------- /src/petals/models/falcon/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/falcon/config.py -------------------------------------------------------------------------------- /src/petals/models/falcon/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/falcon/model.py -------------------------------------------------------------------------------- /src/petals/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/llama/__init__.py -------------------------------------------------------------------------------- /src/petals/models/llama/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/llama/block.py -------------------------------------------------------------------------------- /src/petals/models/llama/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/llama/config.py -------------------------------------------------------------------------------- /src/petals/models/llama/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/llama/model.py -------------------------------------------------------------------------------- /src/petals/models/llama/speculative_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/llama/speculative_model.py -------------------------------------------------------------------------------- /src/petals/models/mixtral/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/mixtral/__init__.py -------------------------------------------------------------------------------- /src/petals/models/mixtral/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/mixtral/block.py -------------------------------------------------------------------------------- /src/petals/models/mixtral/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/mixtral/config.py -------------------------------------------------------------------------------- /src/petals/models/mixtral/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/models/mixtral/model.py -------------------------------------------------------------------------------- /src/petals/server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/petals/server/backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/backend.py -------------------------------------------------------------------------------- /src/petals/server/block_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/block_functions.py -------------------------------------------------------------------------------- /src/petals/server/block_selection.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/block_selection.py -------------------------------------------------------------------------------- /src/petals/server/block_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/block_utils.py -------------------------------------------------------------------------------- /src/petals/server/from_pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/from_pretrained.py -------------------------------------------------------------------------------- /src/petals/server/handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/handler.py -------------------------------------------------------------------------------- /src/petals/server/memory_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/memory_cache.py -------------------------------------------------------------------------------- /src/petals/server/reachability.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/reachability.py -------------------------------------------------------------------------------- /src/petals/server/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/server.py -------------------------------------------------------------------------------- /src/petals/server/task_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/task_pool.py -------------------------------------------------------------------------------- /src/petals/server/task_prioritizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/task_prioritizer.py -------------------------------------------------------------------------------- /src/petals/server/throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/server/throughput.py -------------------------------------------------------------------------------- /src/petals/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/__init__.py -------------------------------------------------------------------------------- /src/petals/utils/asyncio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/asyncio.py -------------------------------------------------------------------------------- /src/petals/utils/auto_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/auto_config.py -------------------------------------------------------------------------------- /src/petals/utils/convert_block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/convert_block.py -------------------------------------------------------------------------------- /src/petals/utils/cuda_graphs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/cuda_graphs.py -------------------------------------------------------------------------------- /src/petals/utils/dht.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/dht.py -------------------------------------------------------------------------------- /src/petals/utils/disk_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/disk_cache.py -------------------------------------------------------------------------------- /src/petals/utils/hf_auth.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/hf_auth.py -------------------------------------------------------------------------------- /src/petals/utils/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/logging.py -------------------------------------------------------------------------------- /src/petals/utils/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/misc.py -------------------------------------------------------------------------------- /src/petals/utils/packaging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/packaging.py -------------------------------------------------------------------------------- /src/petals/utils/peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/peft.py -------------------------------------------------------------------------------- /src/petals/utils/ping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/ping.py -------------------------------------------------------------------------------- /src/petals/utils/random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/random.py -------------------------------------------------------------------------------- /src/petals/utils/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/src/petals/utils/version.py -------------------------------------------------------------------------------- /tests/bootstrap.id: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/bootstrap.id -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/server2.id: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/server2.id -------------------------------------------------------------------------------- /tests/test_aux_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_aux_functions.py -------------------------------------------------------------------------------- /tests/test_block_exact_match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_block_exact_match.py -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_cache.py -------------------------------------------------------------------------------- /tests/test_chained_calls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_chained_calls.py -------------------------------------------------------------------------------- /tests/test_dtype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_dtype.py -------------------------------------------------------------------------------- /tests/test_full_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_full_model.py -------------------------------------------------------------------------------- /tests/test_optimized_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_optimized_layers.py -------------------------------------------------------------------------------- /tests/test_peft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_peft.py -------------------------------------------------------------------------------- /tests/test_priority_pool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_priority_pool.py -------------------------------------------------------------------------------- /tests/test_remote_sequential.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_remote_sequential.py -------------------------------------------------------------------------------- /tests/test_sequence_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_sequence_manager.py -------------------------------------------------------------------------------- /tests/test_server_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_server_stats.py -------------------------------------------------------------------------------- /tests/test_speculative_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_speculative_generation.py -------------------------------------------------------------------------------- /tests/test_tensor_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_tensor_parallel.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bigscience-workshop/petals/HEAD/tests/test_utils.py --------------------------------------------------------------------------------