├── .buildkite
    ├── generate_pipeline.py
    └── test_buildkite_pipeline_generation.py
├── .dockerignore
├── .github
    ├── issue_template.md
    ├── pull_request_template.md
    └── workflows
    │   ├── dashboard.yml
    │   ├── docker-build.yaml
    │   ├── extract-buildkite.yml
    │   ├── format.yml
    │   ├── fusermount-server-image.yaml
    │   ├── go-reviewable.yaml
    │   ├── helm-docker-release.yaml
    │   ├── mypy.yml
    │   ├── nightly-build.yml
    │   ├── publish-and-validate.yml
    │   ├── publish-helm.yml
    │   ├── pylint.yml
    │   ├── pytest-generic.yml
    │   ├── pytest.yml
    │   ├── release-build.yml
    │   ├── release-publish.yml
    │   ├── smoke-tests-trigger.yaml
    │   ├── stale.yml
    │   ├── test-doc-build.yml
    │   └── wait-for-buildkite.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .readthedocs.yml
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile_k8s
├── Dockerfile_k8s_gpu
├── LICENSE
├── MANIFEST.in
├── README.md
├── addons
    └── fuse-proxy
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── Makefile
    │   ├── README.md
    │   ├── cmd
    │       ├── fusermount-server
    │       │   └── main.go
    │       ├── fusermount-shim
    │       │   └── main.go
    │       └── fusermount-wrapper
    │       │   └── main.go
    │   ├── go.mod
    │   ├── go.sum
    │   └── pkg
    │       ├── client
    │           └── client.go
    │       ├── common
    │           └── common.go
    │       └── server
    │           └── server.go
├── charts
    └── skypilot
    │   ├── .gitignore
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── developer.md
    │   ├── templates
    │       ├── NOTES.txt
    │       ├── _helpers.tpl
    │       ├── api-configmap.yaml
    │       ├── api-deployment.yaml
    │       ├── api-secrets.yaml
    │       ├── api-service.yaml
    │       ├── auth.yaml
    │       ├── ingress-nodeport.yaml
    │       ├── ingress.yaml
    │       ├── oauth2-proxy-deployment.yaml
    │       ├── oauth2-proxy-ingress.yaml
    │       ├── oauth2-proxy-redis.yaml
    │       ├── oauth2-proxy-service.yaml
    │       ├── pvc.yaml
    │       ├── rbac.yaml
    │       └── system-rbac.yaml
    │   └── values.yaml
├── docs
    ├── .nojekyll
    ├── Makefile
    ├── README.md
    ├── build.sh
    ├── make.bat
    ├── repo-images
    │   ├── README
    │   ├── managed-job-schedule-state-diagram.png
    │   └── managed-job-status-diagram.png
    ├── requirements-docs.txt
    └── source
    │   ├── .gitignore
    │   ├── _static
    │       ├── SkyPilot_wide_dark.svg
    │       ├── SkyPilot_wide_light.svg
    │       ├── custom.css
    │       ├── custom.js
    │       ├── favicon.ico
    │       ├── intro.gif
    │       ├── intro.mp4
    │       └── rtd-data.js
    │   ├── _templates
    │       ├── author.html
    │       ├── header.html
    │       ├── layout.html
    │       ├── main-sidebar-home.html
    │       ├── main-sidebar.html
    │       └── navbar-skypilot-logo.html
    │   ├── admin
    │       └── workspaces.rst
    │   ├── cloud-setup
    │       ├── cloud-permissions
    │       │   ├── aws.rst
    │       │   ├── gcp.rst
    │       │   ├── index.rst
    │       │   ├── kubernetes.rst
    │       │   ├── nebius.rst
    │       │   └── vsphere.rst
    │       ├── policy.rst
    │       └── quota.rst
    │   ├── compute
    │       ├── cloud-vm.rst
    │       ├── gpus.rst
    │       ├── show-gpus-all.txt
    │       └── show-gpus-h100-8.txt
    │   ├── conf.py
    │   ├── developers
    │       ├── CONTRIBUTING.md
    │       └── index.rst
    │   ├── docs
    │       └── index.rst
    │   ├── examples
    │       ├── applications
    │       │   ├── batch_inference.md
    │       │   ├── index.rst
    │       │   ├── localgpt.md
    │       │   ├── rag.md
    │       │   ├── stable_diffusion.md
    │       │   ├── tabby.md
    │       │   └── vector_database.md
    │       ├── auto-failover.rst
    │       ├── docker-containers.rst
    │       ├── frameworks
    │       │   ├── airflow.md
    │       │   ├── dvc.md
    │       │   ├── index.rst
    │       │   ├── jupyter.md
    │       │   └── mpi.md
    │       ├── index.rst
    │       ├── interactive-development.rst
    │       ├── managed-jobs.rst
    │       ├── models
    │       │   ├── codellama.md
    │       │   ├── dbrx.md
    │       │   ├── deepseek-janus.md
    │       │   ├── deepseek-r1-distilled.md
    │       │   ├── deepseek-r1.md
    │       │   ├── gemma.md
    │       │   ├── gemma3.md
    │       │   ├── gpt-2.md
    │       │   ├── index.rst
    │       │   ├── llama-2.md
    │       │   ├── llama-3.md
    │       │   ├── llama-3_1.md
    │       │   ├── llama-3_2.md
    │       │   ├── llama-4.md
    │       │   ├── mixtral.md
    │       │   ├── pixtral.md
    │       │   ├── qwen.md
    │       │   ├── vicuna.md
    │       │   └── yi.md
    │       ├── performance
    │       │   ├── aws_efa.md
    │       │   ├── gcp_gpu_direct_tcpx.md
    │       │   ├── index.rst
    │       │   └── nebius_infiniband.md
    │       ├── ports.rst
    │       ├── serving
    │       │   ├── cog.md
    │       │   ├── index.rst
    │       │   ├── lorax.md
    │       │   ├── ollama.md
    │       │   ├── sglang.md
    │       │   ├── tgi.md
    │       │   └── vllm.md
    │       ├── syncing-code-artifacts.rst
    │       └── training
    │       │   ├── axolotl.md
    │       │   ├── deepspeed.md
    │       │   ├── distributed-pytorch.md
    │       │   ├── distributed-tensorflow.md
    │       │   ├── index.rst
    │       │   ├── llama-2-finetuning.md
    │       │   ├── llama-3_1-finetuning.md
    │       │   ├── nemo.md
    │       │   ├── ray.md
    │       │   ├── tpu.md
    │       │   └── unsloth.md
    │   ├── extension
    │       └── linting.py
    │   ├── gallery
    │       ├── applications
    │       │   ├── localgpt.rst
    │       │   ├── rag.rst
    │       │   ├── tabby.rst
    │       │   └── vector_database.rst
    │       ├── frameworks
    │       │   ├── lorax.rst
    │       │   ├── ollama.rst
    │       │   ├── sglang.rst
    │       │   ├── tgi.rst
    │       │   └── vllm.rst
    │       ├── index.rst
    │       └── llms
    │       │   ├── codellama.rst
    │       │   ├── dbrx.rst
    │       │   ├── deepseek-janus.rst
    │       │   ├── deepseek-r1-distilled.rst
    │       │   ├── deepseek-r1.rst
    │       │   ├── gemma.rst
    │       │   ├── gpt-2.rst
    │       │   ├── index.rst
    │       │   ├── llama-2.rst
    │       │   ├── llama-3.rst
    │       │   ├── llama-3_1.rst
    │       │   ├── llama-3_2.rst
    │       │   ├── mixtral.rst
    │       │   ├── pixtral.rst
    │       │   ├── qwen.rst
    │       │   ├── vicuna.rst
    │       │   └── yi.rst
    │   ├── generate_examples.py
    │   ├── getting-started
    │       ├── installation.rst
    │       ├── quickstart.rst
    │       └── tutorial.rst
    │   ├── images
    │       ├── SkyPilot-logo-wide.png
    │       ├── ai-gallery-cover.png
    │       ├── client-server
    │       │   ├── arch.png
    │       │   ├── auth-proxy-internals.svg
    │       │   ├── auth-proxy-user-flow.svg
    │       │   ├── cluster-users.png
    │       │   ├── executor.png
    │       │   ├── high-level-arch.png
    │       │   ├── local.png
    │       │   ├── okta-setup.png
    │       │   ├── okta.png
    │       │   ├── remote.png
    │       │   └── token-page.png
    │       ├── cloud-logos-dark.png
    │       ├── cloud-logos-light.png
    │       ├── config-cheatsheet-dark.svg
    │       ├── config-cheatsheet-light.svg
    │       ├── dashboard-clusters.png
    │       ├── dashboard-managed-jobs.png
    │       ├── gcp-vm.png
    │       ├── jupyter-auth.png
    │       ├── jupyter-covid.png
    │       ├── jupyter-create.png
    │       ├── jupyter-gpu.png
    │       ├── k8s-pod.png
    │       ├── k8s-skypilot-architecture-dark.png
    │       ├── k8s-skypilot-architecture-light.png
    │       ├── managed-jobs-arch.png
    │       ├── managed-jobs-dashboard.png
    │       ├── multi-kubernetes.svg
    │       ├── screenshots
    │       │   ├── aws
    │       │   │   ├── aws-add-policy.png
    │       │   │   ├── aws-add-role-entity.png
    │       │   │   ├── aws-add-role.png
    │       │   │   ├── aws-add-user.png
    │       │   │   ├── aws-create-access-key.png
    │       │   │   └── aws-create-policy.png
    │       │   ├── gcp
    │       │   │   ├── cloud-nat.png
    │       │   │   ├── create-iam.png
    │       │   │   ├── create-role.png
    │       │   │   ├── create-service-account.png
    │       │   │   ├── service-account-grant-role.png
    │       │   │   └── service-account-name.png
    │       │   ├── kubernetes
    │       │   │   └── kubernetes-dashboard.png
    │       │   ├── nebius
    │       │   │   └── nebius-k8s-attach-fs.png
    │       │   └── vsphere
    │       │   │   ├── content-lib-item-tag-adding.png
    │       │   │   ├── content-lib-item.png
    │       │   │   ├── content-lib-local.png
    │       │   │   ├── content-lib-name.png
    │       │   │   ├── content-lib-security-policy.png
    │       │   │   ├── content-lib-storage.png
    │       │   │   ├── content-libs-navigate.png
    │       │   │   ├── content-libs.png
    │       │   │   ├── vm-clone-to-template-cl.png
    │       │   │   ├── vm-clone-to-template-ovf.png
    │       │   │   ├── vm-clone-to-template.png
    │       │   │   ├── vsphere-catagory-create.png
    │       │   │   ├── vsphere-catagory-create_navigate.png
    │       │   │   ├── vsphere-catagory-create_navigate_new.png
    │       │   │   ├── vsphere-datastore-tag-adding.png
    │       │   │   ├── vsphere-tags-create.png
    │       │   │   ├── vsphere-tags-create_navigate.png
    │       │   │   ├── vsphere-vm-storage-policy-inventory.png
    │       │   │   ├── vsphere-vm-storage-policy-name.png
    │       │   │   ├── vsphere-vm-storage-policy-navigate-new.png
    │       │   │   ├── vsphere-vm-storage-policy-navigate.png
    │       │   │   ├── vsphere-vm-storage-policy-review.png
    │       │   │   ├── vsphere-vm-storage-policy-rule.png
    │       │   │   └── vsphere-vm-storage-policy-tag.png
    │       ├── sky-above-clouds-gen.jpg
    │       ├── sky-existing-infra-workflow-dark.png
    │       ├── sky-existing-infra-workflow-light.png
    │       ├── sky-serve-architecture.png
    │       ├── sky-serve-status-full.png
    │       ├── sky-serve-status-output-provisioning.png
    │       ├── sky-serve-status-tgi.png
    │       ├── sky-serve-status-vicuna-ready.png
    │       ├── sky-serve-status-vllm.png
    │       ├── sky-storage-modes.svg
    │       ├── skypilot-abstractions-long-2.png
    │       ├── skypilot-wide-dark-1k.png
    │       ├── skypilot-wide-light-1k.png
    │       ├── ssh-node-pools
    │       │   ├── infra.png
    │       │   └── pool-details.png
    │       └── workspaces
    │       │   ├── config.png
    │       │   ├── edit.png
    │       │   ├── overview.png
    │       │   └── resources.png
    │   ├── index.rst
    │   ├── overview.rst
    │   ├── reference
    │       ├── api-server
    │       │   ├── api-server-admin-deploy.rst
    │       │   ├── api-server-troubleshooting.rst
    │       │   ├── api-server-tunning.rst
    │       │   ├── api-server-upgrade.rst
    │       │   ├── api-server.rst
    │       │   ├── examples
    │       │   │   ├── api-server-auth-proxy.rst
    │       │   │   ├── api-server-persistence.rst
    │       │   │   └── example-deploy-gke-nebius-okta.rst
    │       │   └── helm-values-spec.rst
    │       ├── api.rst
    │       ├── async.rst
    │       ├── auto-stop.rst
    │       ├── cli.rst
    │       ├── comparison.rst
    │       ├── config-sources.rst
    │       ├── config.rst
    │       ├── faq.rst
    │       ├── job-queue.rst
    │       ├── kubernetes
    │       │   ├── examples
    │       │   │   └── index.rst
    │       │   ├── index.rst
    │       │   ├── kubernetes-deployment.rst
    │       │   ├── kubernetes-getting-started.rst
    │       │   ├── kubernetes-ports.rst
    │       │   ├── kubernetes-priorities.rst
    │       │   ├── kubernetes-setup.rst
    │       │   ├── kubernetes-troubleshooting.rst
    │       │   ├── multi-kubernetes.rst
    │       │   └── skypilot-and-vanilla-k8s.rst
    │       ├── logging.rst
    │       ├── storage.rst
    │       ├── tpu.rst
    │       ├── training-guide.rst
    │       ├── volumes.rst
    │       └── yaml-spec.rst
    │   ├── reservations
    │       ├── existing-machines.rst
    │       └── reservations.rst
    │   ├── running-jobs
    │       ├── distributed-jobs.rst
    │       ├── environment-variables.rst
    │       └── many-jobs.rst
    │   ├── serving
    │       ├── auth.rst
    │       ├── autoscaling.rst
    │       ├── https.rst
    │       ├── sky-serve.rst
    │       ├── spot-policy.rst
    │       ├── update.rst
    │       └── user-guides.rst
    │   └── sky-computing.rst
├── examples
    ├── README.md
    ├── admin_policy
    │   ├── add_labels.yaml
    │   ├── disable_public_ip.yaml
    │   ├── dynamic_kubernetes_contexts_update.yaml
    │   ├── enforce_autostop.yaml
    │   ├── example_policy
    │   │   ├── example_policy
    │   │   │   ├── __init__.py
    │   │   │   └── skypilot_policy.py
    │   │   └── pyproject.toml
    │   ├── reject_all.yaml
    │   ├── task.yaml
    │   └── use_spot_for_gpu.yaml
    ├── airflow
    │   ├── README.md
    │   ├── data_preprocessing.yaml
    │   ├── eval.yaml
    │   ├── sky_train_dag.py
    │   └── train.yaml
    ├── api-deploy-gke-nebius-okta
    │   └── README.md
    ├── autogluon.yaml
    ├── aws-neuron
    │   ├── README.md
    │   ├── inferentia.yaml
    │   └── multi-accelerator.yaml
    ├── aws_efa
    │   ├── README.md
    │   └── nccl_efa.yaml
    ├── azure_start_stop.yaml
    ├── benchmark
    │   ├── keras_asr.yaml
    │   ├── keras_asr
    │   │   └── callback.patch
    │   ├── lightning_gan.yaml
    │   ├── lightning_gan
    │   │   └── callback.patch
    │   ├── timm.yaml
    │   ├── timm
    │   │   ├── callback.patch
    │   │   └── dummy_dataset.patch
    │   ├── transformers_qa.yaml
    │   └── transformers_qa
    │   │   └── callback.patch
    ├── cog
    │   ├── README.md
    │   ├── cog.yaml
    │   ├── predict.py
    │   └── sky.yaml
    ├── containerized_app.py
    ├── custom_image.yaml
    ├── deepspeed-multinode
    │   ├── README.md
    │   └── sky.yaml
    ├── detectron2_app.yaml
    ├── detectron2_docker.yaml
    ├── disk_size.yaml
    ├── distributed-pytorch
    │   ├── README.md
    │   ├── train-rdzv.yaml
    │   └── train.yaml
    ├── distributed_ray_train
    │   ├── README.md
    │   ├── ray_train.yaml
    │   └── train.py
    ├── docker
    │   ├── compose
    │   │   ├── compose_example.yaml
    │   │   └── docker-compose.yml
    │   ├── detectron2
    │   │   ├── Dockerfile
    │   │   └── deploy.Dockerfile
    │   ├── echo_app.py
    │   ├── echo_app.yaml
    │   └── echo_app
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   └── echo.py
    ├── dvc
    │   ├── README.md
    │   └── dvc_pipeline.yaml
    ├── env_check.yaml
    ├── example_app.py
    ├── gcp_gpu_direct_tcpx
    │   ├── README.md
    │   ├── gpu_direct_tcpx.yaml
    │   └── tcpx_sglang_serving.yaml
    ├── gcp_start_stop.yaml
    ├── horovod_distributed_tf_app.py
    ├── http_server_with_custom_ports
    │   ├── server.py
    │   └── task.yaml
    ├── huggingface_glue_imdb_app.py
    ├── huggingface_glue_imdb_app.yaml
    ├── huggingface_glue_imdb_grid_search_app.py
    ├── hyperpod-eks
    │   ├── README.md
    │   └── train.yaml
    ├── image_with_tag.yaml
    ├── job_queue
    │   ├── cluster.yaml
    │   ├── cluster_docker.yaml
    │   ├── cluster_multinode.yaml
    │   ├── job.yaml
    │   ├── job_docker.yaml
    │   ├── job_gpu.yaml
    │   ├── job_ibm.yaml
    │   ├── job_multinode.yaml
    │   └── job_multinode_ibm.yaml
    ├── jupyter_app.py
    ├── jupyter_lab.yaml
    ├── k8s_cloud_deploy
    │   ├── README.md
    │   ├── cloud_k8s.yaml
    │   └── launch_k8s.sh
    ├── managed_job.yaml
    ├── managed_job_with_storage.yaml
    ├── managed_spot.yaml
    ├── many_gpu_vms.yaml
    ├── minimal.yaml
    ├── mpirun.yaml
    ├── multi_accelerators.yaml
    ├── multi_echo.py
    ├── multi_hostname.py
    ├── multi_hostname.yaml
    ├── multi_resources.yaml
    ├── nccl_test.yaml
    ├── nebius_infiniband
    │   ├── README.md
    │   ├── infiniband.yaml
    │   ├── nccl.yaml
    │   ├── nccl_network_tier.yaml
    │   ├── nccl_no_ib.yaml
    │   └── nccl_vm_ib.yaml
    ├── nemo
    │   ├── README.md
    │   ├── nemo_bert.yaml
    │   ├── nemo_gpt_distributed.yaml
    │   ├── nemo_gpt_preprocessing.yaml
    │   └── nemo_gpt_singlenode.yaml
    ├── oci
    │   ├── config.yaml
    │   ├── dataset-mount.yaml
    │   ├── dataset-upload-and-mount.yaml
    │   ├── gpu-oraclelinux9.yaml
    │   ├── gpu-ubuntu-2204.yaml
    │   ├── oci-mounts.yaml
    │   ├── oci_cpu-sky-preemptible.yaml
    │   ├── oci_cpu-sky.yaml
    │   ├── oci_gpu-sky.yaml
    │   ├── serve-http-cpu.yaml
    │   └── serve-qwen-7b.yaml
    ├── per_region_images.yaml
    ├── perf
    │   ├── results.md
    │   └── storage_rawperf.yaml
    ├── playground
    │   ├── min_fail.yaml
    │   ├── min_progress_bar.yaml
    │   ├── storage_playground.py
    │   └── symlink_playground.yaml
    ├── ray_tune_app.py
    ├── ray_tune_app.yaml
    ├── ray_tune_examples
    │   ├── tune_basic_example.py
    │   └── tune_ptl_example.py
    ├── resnet_app.py
    ├── resnet_app.yaml
    ├── resnet_app_storage.py
    ├── resnet_app_storage.yaml
    ├── resnet_app_storage_spot.yaml
    ├── resnet_distributed_tf_app.py
    ├── resnet_distributed_torch.yaml
    ├── resnet_distributed_torch_app.py
    ├── resnet_distributed_torch_scripts
    │   ├── run.sh
    │   └── setup.sh
    ├── resnet_distributed_torch_with_script.yaml
    ├── resnet_inference_app.yaml
    ├── sample_dotenv
    ├── serve
    │   ├── gorilla
    │   │   ├── gorilla.yaml
    │   │   └── use_gorilla.ipynb
    │   ├── http_server
    │   │   ├── server.py
    │   │   └── task.yaml
    │   ├── https
    │   │   └── service.yaml
    │   ├── huggingface-tgi.yaml
    │   ├── llama2
    │   │   ├── chat.py
    │   │   └── llama2.yaml
    │   ├── load_balancing_policies_example.yaml
    │   ├── min_replicas_zero.yaml
    │   ├── minimal.yaml
    │   ├── misc
    │   │   └── cancel
    │   │   │   ├── README.md
    │   │   │   ├── send_cancel_request.py
    │   │   │   ├── server.py
    │   │   │   └── service.yaml
    │   ├── ray_serve
    │   │   ├── ray_serve.yaml
    │   │   └── serve.py
    │   ├── spot_policy
    │   │   ├── base_on_demand_fallback_replicas.yaml
    │   │   ├── dynamic_on_demand_fallback.yaml
    │   │   └── multi_accelerators.yaml
    │   ├── stable_diffusion_service.yaml
    │   ├── vicuna-v1.5.yaml
    │   └── vllm.yaml
    ├── show_gpus.sh
    ├── spot
    │   ├── bert_qa.yaml
    │   ├── lightning_cifar10.yaml
    │   ├── lightning_cifar10
    │   │   ├── requirements.txt
    │   │   └── train.py
    │   ├── resnet.yaml
    │   └── resnet_ddp
    │   │   ├── README.md
    │   │   ├── requirements.txt
    │   │   └── resnet_ddp.py
    ├── spot_pipeline
    │   ├── bert_qa_train_eval.yaml
    │   ├── multi_jobs.yaml
    │   └── single.yaml
    ├── stable_diffusion
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── docker-compose.yml
    │   ├── pushing_docker_image.md
    │   └── stable_diffusion_docker.yaml
    ├── storage
    │   ├── checkpointed_training.yaml
    │   ├── hostname_echo_demo.yaml
    │   └── pingpong.yaml
    ├── storage_demo.yaml
    ├── tensorboard_app.py
    ├── tensorflow_distributed
    │   ├── README.md
    │   ├── tf_distributed.yaml
    │   └── train.py
    ├── time_estimators.py
    ├── timm_app.py
    ├── torch_ddp_benchmark
    │   ├── torch_ddp_benchmark.py
    │   └── torch_ddp_benchmark.yaml
    ├── tpu
    │   ├── README.md
    │   ├── tpu_app.py
    │   ├── tpu_app.yaml
    │   ├── tpu_app_code
    │   │   ├── requirements.txt
    │   │   └── run_tpu.py
    │   ├── tpu_node_mnist.yaml
    │   ├── tpuvm_mnist.yaml
    │   └── v6e
    │   │   ├── README.md
    │   │   ├── benchmark-llama2-7b.yaml
    │   │   ├── config-8B.json
    │   │   ├── fsdp_config.json
    │   │   ├── serve-llama2-7b.yaml
    │   │   └── train-llama3-8b.yaml
    ├── unsloth
    │   ├── README.md
    │   ├── unsloth.yaml
    │   └── unsloth_example.py
    ├── using_file_mounts.yaml
    ├── using_file_mounts_with_env_vars.yaml
    └── vector_database
    │   ├── README.md
    │   ├── batch_compute_vectors.py
    │   ├── build_vectordb.yaml
    │   ├── compute_vectors.yaml
    │   ├── scripts
    │       ├── build_vectordb.py
    │       ├── compute_vectors.py
    │       └── serve_vectordb.py
    │   └── serve_vectordb.yaml
├── format.sh
├── llm
    ├── axolotl
    │   ├── axolotl-docker.yaml
    │   ├── axolotl-spot.yaml
    │   ├── axolotl.yaml
    │   ├── mistral
    │   │   ├── qlora-checkpoint.yaml
    │   │   └── qlora.yaml
    │   └── readme.md
    ├── batch_inference
    │   ├── README.md
    │   ├── batch_compute_vectors.py
    │   ├── compute_text_vectors.yaml
    │   ├── monitor_progress.yaml
    │   └── scripts
    │   │   ├── base_vector_processor.py
    │   │   ├── monitor_progress.py
    │   │   └── text_vector_processor.py
    ├── codellama
    │   ├── README.md
    │   ├── complete.py
    │   ├── endpoint.yaml
    │   ├── gui.yaml
    │   └── tabby.yaml
    ├── dbrx
    │   ├── README.md
    │   └── dbrx.yaml
    ├── deepseek-janus
    │   ├── README.md
    │   ├── janus_1.5b.yaml
    │   └── januspro_7b.yaml
    ├── deepseek-r1-distilled
    │   ├── README.md
    │   └── deepseek-r1-vllm.yaml
    ├── deepseek-r1
    │   ├── README.md
    │   ├── deepseek-r1-671B-A100.yaml
    │   └── deepseek-r1-671B.yaml
    ├── falcon
    │   ├── README.md
    │   ├── falcon.yaml
    │   └── train.py
    ├── gemma
    │   ├── README.md
    │   └── serve.yaml
    ├── gemma3
    │   ├── README.md
    │   └── gemma3.yaml
    ├── gpt-2
    │   ├── README.md
    │   ├── gpt2-data.yaml
    │   ├── gpt2-pipeline.yaml
    │   ├── gpt2-train.yaml
    │   └── gpt2.yaml
    ├── llama-2
    │   ├── README.md
    │   ├── chatbot-hf.yaml
    │   └── chatbot-meta.yaml
    ├── llama-3
    │   ├── README.md
    │   ├── gui.yaml
    │   └── llama3.yaml
    ├── llama-3_1-finetuning
    │   ├── configs
    │   │   ├── 70B-lora.yaml
    │   │   └── 8B-lora.yaml
    │   ├── lora.yaml
    │   ├── readme.md
    │   └── serve.yaml
    ├── llama-3_1
    │   ├── README.md
    │   └── llama-3_1.yaml
    ├── llama-3_2
    │   ├── README.md
    │   ├── llama3_2-vision-11b.yaml
    │   └── llama3_2.yaml
    ├── llama-4
    │   ├── README.md
    │   └── llama4.yaml
    ├── llama-chatbots
    │   ├── README.md
    │   ├── llama-13b-upload.yaml
    │   ├── llama-13b.yaml
    │   ├── llama-30b-upload.yaml
    │   ├── llama-30b.yaml
    │   ├── llama-65b-upload.yaml
    │   ├── llama-65b.yaml
    │   ├── llama-7b-upload.yaml
    │   └── llama-7b.yaml
    ├── localgpt
    │   ├── README.md
    │   └── localgpt.yaml
    ├── lorax
    │   ├── README.md
    │   └── lorax.yaml
    ├── mixtral
    │   ├── README.md
    │   └── serve.yaml
    ├── ollama
    │   ├── README.md
    │   └── ollama.yaml
    ├── pixtral
    │   ├── README.md
    │   └── pixtral.yaml
    ├── qwen
    │   ├── README.md
    │   ├── gui.yaml
    │   ├── qwen15-110b.yaml
    │   ├── qwen2-vl-7b.yaml
    │   ├── qwen25-72b.yaml
    │   ├── qwen25-7b.yaml
    │   └── qwen3-235b.yaml
    ├── rag
    │   ├── README.md
    │   ├── batch_compute_embeddings.py
    │   ├── build_rag.yaml
    │   ├── compute_embeddings.yaml
    │   ├── scripts
    │   │   ├── build_rag.py
    │   │   ├── compute_embeddings.py
    │   │   ├── serve_rag.py
    │   │   └── templates
    │   │   │   └── index.html
    │   └── serve_rag.yaml
    ├── sglang
    │   ├── README.md
    │   ├── llama2.yaml
    │   └── llava.yaml
    ├── tabby
    │   ├── README.md
    │   ├── docker-compose.cuda.yaml
    │   ├── docker-compose.yaml
    │   ├── tabby.yaml
    │   └── tabby
    │   │   └── config.toml
    ├── tgi
    │   ├── README.md
    │   └── serve.yaml
    ├── vicuna-llama-2
    │   ├── README.md
    │   ├── scripts
    │   │   ├── flash_attn_patch.py
    │   │   ├── hardcoded_questions.py
    │   │   ├── train.py
    │   │   ├── train_flash_attn.py
    │   │   ├── train_xformers.py
    │   │   └── xformers_patch.py
    │   ├── serve.yaml
    │   └── train.yaml
    ├── vicuna
    │   ├── LICENSE
    │   ├── README.md
    │   ├── dummy.json
    │   ├── scripts
    │   │   └── sync_local_checkpoint.sh
    │   ├── serve-openai-api-endpoint.yaml
    │   ├── serve.yaml
    │   └── train.yaml
    ├── vllm
    │   ├── README.md
    │   ├── serve-openai-api-docker.yaml
    │   ├── serve-openai-api.yaml
    │   ├── serve.yaml
    │   ├── service-with-auth.yaml
    │   └── service.yaml
    └── yi
    │   ├── README.md
    │   ├── yi15-34b.yaml
    │   ├── yi15-6b.yaml
    │   ├── yi15-9b.yaml
    │   ├── yicoder-1_5b.yaml
    │   └── yicoder-9b.yaml
├── pyproject.toml
├── requirements-dev.txt
├── setup.py
├── sky
    ├── __init__.py
    ├── adaptors
    │   ├── README.md
    │   ├── __init__.py
    │   ├── aws.py
    │   ├── azure.py
    │   ├── cloudflare.py
    │   ├── common.py
    │   ├── cudo.py
    │   ├── do.py
    │   ├── docker.py
    │   ├── gcp.py
    │   ├── ibm.py
    │   ├── kubernetes.py
    │   ├── nebius.py
    │   ├── oci.py
    │   ├── runpod.py
    │   ├── vast.py
    │   └── vsphere.py
    ├── admin_policy.py
    ├── authentication.py
    ├── backends
    │   ├── __init__.py
    │   ├── backend.py
    │   ├── backend_utils.py
    │   ├── cloud_vm_ray_backend.py
    │   ├── docker_utils.py
    │   ├── local_docker_backend.py
    │   ├── monkey_patches
    │   │   └── monkey_patch_ray_up.py
    │   ├── playground
    │   │   └── demo_dockerutils.py
    │   └── wheel_utils.py
    ├── benchmark
    │   ├── __init__.py
    │   ├── benchmark_state.py
    │   └── benchmark_utils.py
    ├── callbacks
    │   ├── setup.py
    │   └── sky_callback
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── base.py
    │   │   ├── integrations
    │   │       ├── __init__.py
    │   │       ├── keras.py
    │   │       ├── pytorch_lightning.py
    │   │       └── transformers.py
    │   │   └── utils.py
    ├── check.py
    ├── cli.py
    ├── client
    │   ├── __init__.py
    │   ├── cli.py
    │   ├── common.py
    │   ├── oauth.py
    │   └── sdk.py
    ├── cloud_stores.py
    ├── clouds
    │   ├── __init__.py
    │   ├── aws.py
    │   ├── azure.py
    │   ├── cloud.py
    │   ├── cudo.py
    │   ├── do.py
    │   ├── fluidstack.py
    │   ├── gcp.py
    │   ├── ibm.py
    │   ├── kubernetes.py
    │   ├── lambda_cloud.py
    │   ├── nebius.py
    │   ├── oci.py
    │   ├── paperspace.py
    │   ├── runpod.py
    │   ├── scp.py
    │   ├── service_catalog
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── aws_catalog.py
    │   │   ├── azure_catalog.py
    │   │   ├── common.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── cudo_catalog.py
    │   │   ├── data_fetchers
    │   │   │   ├── __init__.py
    │   │   │   ├── analyze.py
    │   │   │   ├── fetch_aws.py
    │   │   │   ├── fetch_azure.py
    │   │   │   ├── fetch_cudo.py
    │   │   │   ├── fetch_fluidstack.py
    │   │   │   ├── fetch_gcp.py
    │   │   │   ├── fetch_ibm.py
    │   │   │   ├── fetch_lambda_cloud.py
    │   │   │   ├── fetch_vast.py
    │   │   │   ├── fetch_vsphere.py
    │   │   │   └── requirements.txt
    │   │   ├── do_catalog.py
    │   │   ├── fluidstack_catalog.py
    │   │   ├── gcp_catalog.py
    │   │   ├── ibm_catalog.py
    │   │   ├── images
    │   │   │   ├── README.md
    │   │   │   ├── aws_utils
    │   │   │   │   ├── image_delete.py
    │   │   │   │   └── image_gen.py
    │   │   │   ├── plugins.pkr.hcl
    │   │   │   ├── provisioners
    │   │   │   │   ├── cuda-azure-grid.sh
    │   │   │   │   ├── cuda.sh
    │   │   │   │   ├── docker.sh
    │   │   │   │   ├── nvidia-container-toolkit.sh
    │   │   │   │   ├── skypilot.sh
    │   │   │   │   └── user-toolkit.sh
    │   │   │   ├── skypilot-aws-cpu-ubuntu.pkr.hcl
    │   │   │   ├── skypilot-aws-gpu-ubuntu.pkr.hcl
    │   │   │   ├── skypilot-azure-cpu-ubuntu.pkr.hcl
    │   │   │   ├── skypilot-azure-gpu-ubuntu.pkr.hcl
    │   │   │   ├── skypilot-gcp-cpu-ubuntu.pkr.hcl
    │   │   │   ├── skypilot-gcp-gpu-ubuntu.pkr.hcl
    │   │   │   └── skypilot-k8s-image.sh
    │   │   ├── kubernetes_catalog.py
    │   │   ├── lambda_catalog.py
    │   │   ├── nebius_catalog.py
    │   │   ├── oci_catalog.py
    │   │   ├── paperspace_catalog.py
    │   │   ├── runpod_catalog.py
    │   │   ├── scp_catalog.py
    │   │   ├── ssh_catalog.py
    │   │   ├── vast_catalog.py
    │   │   └── vsphere_catalog.py
    │   ├── ssh.py
    │   ├── utils
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── aws_utils.py
    │   │   ├── azure_utils.py
    │   │   ├── gcp_utils.py
    │   │   ├── oci_utils.py
    │   │   └── scp_utils.py
    │   ├── vast.py
    │   └── vsphere.py
    ├── core.py
    ├── dag.py
    ├── dashboard
    │   ├── .eslintrc.json
    │   ├── .gitignore
    │   ├── .prettierrc
    │   ├── README.md
    │   ├── components.json
    │   ├── eslint.config.mjs
    │   ├── jest.config.js
    │   ├── jest.setup.js
    │   ├── jsconfig.json
    │   ├── next.config.mjs
    │   ├── package-lock.json
    │   ├── package.json
    │   ├── postcss.config.mjs
    │   ├── public
    │   │   ├── favicon.ico
    │   │   ├── skypilot.svg
    │   │   └── videos
    │   │   │   └── cursor-small.mp4
    │   ├── server.js
    │   ├── src
    │   │   ├── app
    │   │   │   └── globals.css
    │   │   ├── components
    │   │   │   ├── clusters.jsx
    │   │   │   ├── elements
    │   │   │   │   ├── ErrorDisplay.jsx
    │   │   │   │   ├── StatusBadge.jsx
    │   │   │   │   ├── events.jsx
    │   │   │   │   ├── icons.jsx
    │   │   │   │   ├── layout.jsx
    │   │   │   │   ├── modals.jsx
    │   │   │   │   ├── sidebar.jsx
    │   │   │   │   └── version-display.jsx
    │   │   │   ├── infra.jsx
    │   │   │   ├── jobs.jsx
    │   │   │   ├── ui
    │   │   │   │   ├── alert.jsx
    │   │   │   │   ├── avatar.jsx
    │   │   │   │   ├── button.jsx
    │   │   │   │   ├── card.jsx
    │   │   │   │   ├── dialog.jsx
    │   │   │   │   ├── flip_card.jsx
    │   │   │   │   ├── input.jsx
    │   │   │   │   ├── label.jsx
    │   │   │   │   ├── select.jsx
    │   │   │   │   ├── table.jsx
    │   │   │   │   ├── tabs.jsx
    │   │   │   │   └── textarea.jsx
    │   │   │   ├── users.jsx
    │   │   │   ├── utils.jsx
    │   │   │   ├── workspace-editor.jsx
    │   │   │   └── workspaces.jsx
    │   │   ├── data
    │   │   │   ├── connectors
    │   │   │   │   ├── clusters.jsx
    │   │   │   │   ├── constants.jsx
    │   │   │   │   ├── infra.jsx
    │   │   │   │   ├── jobs.jsx
    │   │   │   │   ├── toast.jsx
    │   │   │   │   ├── users.js
    │   │   │   │   └── workspaces.jsx
    │   │   │   └── utils.jsx
    │   │   ├── hooks
    │   │   │   └── useMobile.js
    │   │   ├── lib
    │   │   │   ├── README.md
    │   │   │   ├── cache-preloader.js
    │   │   │   ├── cache.js
    │   │   │   ├── config.js
    │   │   │   └── utils.js
    │   │   └── pages
    │   │   │   ├── _app.js
    │   │   │   ├── clusters.js
    │   │   │   ├── clusters
    │   │   │       ├── [cluster].js
    │   │   │       └── [cluster]
    │   │   │       │   └── [job].js
    │   │   │   ├── config.js
    │   │   │   ├── index.js
    │   │   │   ├── infra.js
    │   │   │   ├── infra
    │   │   │       └── [context].js
    │   │   │   ├── jobs.js
    │   │   │   ├── jobs
    │   │   │       └── [job].js
    │   │   │   ├── users.js
    │   │   │   ├── workspace
    │   │   │       └── new.js
    │   │   │   ├── workspaces.js
    │   │   │   └── workspaces
    │   │   │       └── [name].js
    │   └── tailwind.config.js
    ├── data
    │   ├── __init__.py
    │   ├── data_transfer.py
    │   ├── data_utils.py
    │   ├── mounting_utils.py
    │   ├── storage.py
    │   └── storage_utils.py
    ├── design_docs
    │   ├── client_server.md
    │   ├── cluster_name.md
    │   ├── cluster_status.md
    │   ├── figures
    │   │   ├── cluster-state-transition.svg
    │   │   └── grafana-loki-setup.png
    │   ├── usage_collection.md
    │   └── workspaces.md
    ├── exceptions.py
    ├── execution.py
    ├── global_user_state.py
    ├── jobs
    │   ├── README.md
    │   ├── __init__.py
    │   ├── client
    │   │   ├── __init__.py
    │   │   └── sdk.py
    │   ├── constants.py
    │   ├── controller.py
    │   ├── dashboard
    │   │   ├── dashboard.py
    │   │   ├── static
    │   │   │   └── favicon.ico
    │   │   └── templates
    │   │   │   └── index.html
    │   ├── recovery_strategy.py
    │   ├── scheduler.py
    │   ├── server
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── dashboard_utils.py
    │   │   └── server.py
    │   ├── state.py
    │   └── utils.py
    ├── models.py
    ├── optimizer.py
    ├── provision
    │   ├── __init__.py
    │   ├── aws
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   ├── azure
    │   │   ├── __init__.py
    │   │   ├── azure-config-template.json
    │   │   ├── config.py
    │   │   └── instance.py
    │   ├── common.py
    │   ├── constants.py
    │   ├── cudo
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── cudo_machine_type.py
    │   │   ├── cudo_utils.py
    │   │   ├── cudo_wrapper.py
    │   │   └── instance.py
    │   ├── do
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   ├── docker_utils.py
    │   ├── fluidstack
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── fluidstack_utils.py
    │   │   └── instance.py
    │   ├── gcp
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── instance.py
    │   │   ├── instance_utils.py
    │   │   ├── mig_utils.py
    │   │   └── volume_utils.py
    │   ├── instance_setup.py
    │   ├── kubernetes
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── instance.py
    │   │   ├── manifests
    │   │   │   └── fusermount-server-daemonset.yaml
    │   │   ├── network.py
    │   │   ├── network_utils.py
    │   │   └── utils.py
    │   ├── lambda_cloud
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── lambda_utils.py
    │   ├── logging.py
    │   ├── metadata_utils.py
    │   ├── nebius
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   ├── oci
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── query_utils.py
    │   ├── paperspace
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   ├── provisioner.py
    │   ├── runpod
    │   │   ├── __init__.py
    │   │   ├── api
    │   │   │   ├── __init__.py
    │   │   │   ├── commands.py
    │   │   │   └── pods.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   ├── ssh
    │   │   └── __init__.py
    │   ├── vast
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── utils.py
    │   └── vsphere
    │   │   ├── __init__.py
    │   │   ├── common
    │   │       ├── __init__.py
    │   │       ├── cls_api_client.py
    │   │       ├── cls_api_helper.py
    │   │       ├── custom_script.py
    │   │       ├── id_generator.py
    │   │       ├── metadata_utils.py
    │   │       ├── service_manager.py
    │   │       ├── service_manager_factory.py
    │   │       ├── ssl_helper.py
    │   │       ├── vapiconnect.py
    │   │       └── vim_utils.py
    │   │   ├── config.py
    │   │   ├── instance.py
    │   │   └── vsphere_utils.py
    ├── resources.py
    ├── serve
    │   ├── README.md
    │   ├── __init__.py
    │   ├── autoscalers.py
    │   ├── client
    │   │   ├── __init__.py
    │   │   └── sdk.py
    │   ├── constants.py
    │   ├── controller.py
    │   ├── load_balancer.py
    │   ├── load_balancing_policies.py
    │   ├── replica_managers.py
    │   ├── serve_state.py
    │   ├── serve_utils.py
    │   ├── server
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   └── server.py
    │   ├── service.py
    │   ├── service_spec.py
    │   └── spot_placer.py
    ├── server
    │   ├── __init__.py
    │   ├── common.py
    │   ├── config.py
    │   ├── constants.py
    │   ├── html
    │   │   ├── log.html
    │   │   └── token_page.html
    │   ├── requests
    │   │   ├── __init__.py
    │   │   ├── event_loop.py
    │   │   ├── executor.py
    │   │   ├── payloads.py
    │   │   ├── preconditions.py
    │   │   ├── process.py
    │   │   ├── queues
    │   │   │   ├── __init__.py
    │   │   │   ├── local_queue.py
    │   │   │   └── mp_queue.py
    │   │   ├── requests.py
    │   │   └── serializers
    │   │   │   ├── __init__.py
    │   │   │   ├── decoders.py
    │   │   │   └── encoders.py
    │   ├── server.py
    │   ├── stream_utils.py
    │   └── uvicorn.py
    ├── setup_files
    │   ├── MANIFEST.in
    │   ├── dependencies.py
    │   └── setup.py
    ├── sky_logging.py
    ├── skylet
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── attempt_skylet.py
    │   ├── autostop_lib.py
    │   ├── configs.py
    │   ├── constants.py
    │   ├── events.py
    │   ├── job_lib.py
    │   ├── log_lib.py
    │   ├── log_lib.pyi
    │   ├── providers
    │   │   ├── __init__.py
    │   │   ├── command_runner.py
    │   │   ├── ibm
    │   │   │   ├── __init__.py
    │   │   │   ├── node_provider.py
    │   │   │   ├── utils.py
    │   │   │   └── vpc_provider.py
    │   │   └── scp
    │   │   │   ├── __init__.py
    │   │   │   ├── config.py
    │   │   │   └── node_provider.py
    │   ├── ray_patches
    │   │   ├── __init__.py
    │   │   ├── autoscaler.py.patch
    │   │   ├── cli.py.patch
    │   │   ├── command_runner.py.patch
    │   │   ├── log_monitor.py.patch
    │   │   ├── resource_demand_scheduler.py.patch
    │   │   ├── updater.py.patch
    │   │   └── worker.py.patch
    │   ├── skylet.py
    │   └── subprocess_daemon.py
    ├── skypilot_config.py
    ├── task.py
    ├── templates
    │   ├── aws-ray.yml.j2
    │   ├── azure-ray.yml.j2
    │   ├── cudo-ray.yml.j2
    │   ├── do-ray.yml.j2
    │   ├── fluidstack-ray.yml.j2
    │   ├── gcp-ray.yml.j2
    │   ├── ibm-ray.yml.j2
    │   ├── jobs-controller.yaml.j2
    │   ├── kubernetes-ingress.yml.j2
    │   ├── kubernetes-loadbalancer.yml.j2
    │   ├── kubernetes-port-forward-proxy-command.sh
    │   ├── kubernetes-ray.yml.j2
    │   ├── kubernetes-ssh-jump.yml.j2
    │   ├── lambda-ray.yml.j2
    │   ├── local-ray.yml.j2
    │   ├── nebius-ray.yml.j2
    │   ├── oci-ray.yml.j2
    │   ├── paperspace-ray.yml.j2
    │   ├── runpod-ray.yml.j2
    │   ├── scp-ray.yml.j2
    │   ├── sky-serve-controller.yaml.j2
    │   ├── skypilot-server-kubernetes-proxy.sh
    │   ├── vast-ray.yml.j2
    │   ├── vsphere-ray.yml.j2
    │   └── websocket_proxy.py
    ├── usage
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── loki-s3-config.yaml
    │   └── usage_lib.py
    ├── utils
    │   ├── __init__.py
    │   ├── accelerator_registry.py
    │   ├── admin_policy_utils.py
    │   ├── annotations.py
    │   ├── atomic.py
    │   ├── aws
    │   │   ├── __init__.py
    │   │   └── get_default_security_group.py
    │   ├── cli_utils
    │   │   ├── __init__.py
    │   │   └── status_utils.py
    │   ├── cluster_utils.py
    │   ├── command_runner.py
    │   ├── command_runner.pyi
    │   ├── common.py
    │   ├── common_utils.py
    │   ├── config_utils.py
    │   ├── context.py
    │   ├── context_utils.py
    │   ├── control_master_utils.py
    │   ├── controller_utils.py
    │   ├── dag_utils.py
    │   ├── db_utils.py
    │   ├── env_options.py
    │   ├── infra_utils.py
    │   ├── kubernetes
    │   │   ├── __init__.py
    │   │   ├── cleanup-tunnel.sh
    │   │   ├── config_map_utils.py
    │   │   ├── create_cluster.sh
    │   │   ├── delete_cluster.sh
    │   │   ├── deploy_remote_cluster.py
    │   │   ├── exec_kubeconfig_converter.py
    │   │   ├── generate_kind_config.py
    │   │   ├── generate_kubeconfig.sh
    │   │   ├── gpu_labeler.py
    │   │   ├── k8s_gpu_labeler_job.yaml
    │   │   ├── k8s_gpu_labeler_setup.yaml
    │   │   ├── kubernetes_deploy_utils.py
    │   │   ├── rsync_helper.sh
    │   │   ├── ssh-tunnel.sh
    │   │   └── ssh_jump_lifecycle_manager.py
    │   ├── kubernetes_enums.py
    │   ├── log_utils.py
    │   ├── message_utils.py
    │   ├── registry.py
    │   ├── resources_utils.py
    │   ├── rich_console_utils.py
    │   ├── rich_utils.py
    │   ├── schemas.py
    │   ├── status_lib.py
    │   ├── subprocess_utils.py
    │   ├── timeline.py
    │   ├── ux_utils.py
    │   └── validator.py
    └── workspaces
    │   ├── __init__.py
    │   ├── core.py
    │   └── server.py
└── tests
    ├── common_test_fixtures.py
    ├── conftest.py
    ├── default_aws_az_mappings.csv
    ├── default_vsphere_vms.csv
    ├── git_info_exclude_test
    ├── gitignore_test
    ├── kubernetes
        ├── README.md
        ├── cpu_test_pod.yaml
        ├── eks_test_cluster.yaml
        ├── gpu_test_pod.yaml
        ├── ingress_test.yaml
        ├── loadbalancer_test_svc.yaml
        ├── networking_benchmarks
        │   ├── k8s_network_benchmarks.md
        │   ├── rsync_bench.sh
        │   └── skylaunch_bench.sh
        └── scripts
        │   ├── clean_k8s.sh
        │   ├── dashboard.yaml
        │   ├── delete.sh
        │   ├── deploy_k3s.sh
        │   ├── install_dashboard.sh
        │   ├── ray_k8s_sky.yaml
        │   ├── run.sh
        │   └── skypilot_ssh_k8s_deployment.yaml
    ├── load_tests
        ├── README.md
        ├── serve.yaml
        ├── sys_profiling.py
        ├── test_distribute_load_on_server.py
        ├── test_load_on_server.py
        └── test_queue_dispatcher.py
    ├── mypy_files.txt
    ├── run_smoke_tests.yaml
    ├── skyserve
        ├── auto_restart.yaml
        ├── cancel
        │   ├── cancel.yaml
        │   └── send_cancel_request.py
        ├── failures
        │   ├── initial_delay.yaml
        │   ├── probing.py
        │   └── probing.yaml
        ├── high_availability
        │   ├── config.yaml
        │   └── service.yaml
        ├── http
        │   ├── aws.yaml
        │   ├── azure.yaml
        │   ├── gcp.yaml
        │   ├── kubernetes.yaml
        │   └── oci.yaml
        ├── https
        │   └── service.yaml
        ├── llm
        │   ├── get_response.py
        │   ├── prompt_output.json
        │   └── service.yaml
        ├── load_balancer
        │   ├── server.py
        │   ├── service.yaml
        │   └── test_round_robin.py
        ├── multi_ports.yaml
        ├── readiness_timeout
        │   ├── server.py
        │   ├── task.yaml
        │   └── task_large_timeout.yaml
        ├── restart
        │   ├── user_bug.py
        │   └── user_bug.yaml
        ├── spot
        │   ├── base_ondemand_fallback.yaml
        │   ├── dynamic_ondemand_fallback.yaml
        │   ├── recovery.yaml
        │   ├── spot_hedge.yaml
        │   └── spot_hedge_T4.yaml
        ├── streaming
        │   ├── example.txt
        │   ├── send_streaming_request.py
        │   ├── server.py
        │   └── streaming.yaml
        └── update
        │   ├── bump_version_after.yaml
        │   ├── bump_version_before.yaml
        │   ├── new.yaml
        │   ├── new_autoscaler_after.yaml
        │   ├── new_autoscaler_before.yaml
        │   ├── new_server.py
        │   ├── num_min_one.yaml
        │   ├── num_min_two.yaml
        │   ├── old.yaml
        │   └── old_server.py
    ├── smoke_tests
        ├── __init__.py
        ├── backward_compat
        │   ├── sdk_backward_compat_utils.py
        │   └── test_backward_compat.py
        ├── docker
        │   ├── Dockerfile_test
        │   ├── __init__.py
        │   ├── docker_utils.py
        │   ├── entrypoint.sh
        │   └── stop_sky_resource.sh
        ├── smoke_tests_utils.py
        ├── test_api_server.py
        ├── test_basic.py
        ├── test_cluster_job.py
        ├── test_images.py
        ├── test_managed_job.py
        ├── test_mount_and_storage.py
        ├── test_quick_tests_core.py
        ├── test_region_and_zone.py
        ├── test_sky_serve.py
        ├── test_volume_mount.py
        └── test_workspaces.py
    ├── stress
        └── mountedstorage
        │   ├── mount_stress.yaml
        │   └── read_parallel.py
    ├── test_api.py
    ├── test_cli.py
    ├── test_config.py
    ├── test_db_utils.py
    ├── test_failover.py
    ├── test_global_user_state.py
    ├── test_jobs.py
    ├── test_jobs_and_serve.py
    ├── test_list_accelerators.py
    ├── test_optimizer_dryruns.py
    ├── test_optimizer_random_dag.py
    ├── test_serve_autoscaler.py
    ├── test_smoke.py
    ├── test_storage.py
    ├── test_wheels.py
    ├── test_yaml_parser.py
    ├── test_yamls
        ├── different_default_conda_env.yaml
        ├── failed_setup.yaml
        ├── failed_setup_pipeline.yaml
        ├── failed_worker_run.yaml
        ├── failed_worker_setup.yaml
        ├── force_enable_external_ips_config.yaml
        ├── gcp_per_region_images.yaml
        ├── intermediate_bucket.yaml
        ├── low_resource_sky_config.yaml
        ├── minimal.yaml
        ├── minimal_test_quick_tests_core.yaml
        ├── pipeline.yaml
        ├── pipeline_aws.yaml
        ├── pipeline_gcp.yaml
        ├── test_aws_config.yaml
        ├── test_custom_default_conda_env.yaml
        ├── test_custom_image.yaml
        ├── test_ibm_cos_storage_mounting.yaml
        ├── test_k8s_logs.yaml
        ├── test_labels.yaml.j2
        ├── test_long_setup.yaml
        ├── test_managed_jobs_retry.yaml
        ├── test_multiple_accelerators_ordered.yaml
        ├── test_multiple_accelerators_ordered_with_default.yaml
        ├── test_multiple_accelerators_unordered.yaml
        ├── test_multiple_accelerators_unordered_with_default.yaml
        ├── test_multiple_resources.yaml
        ├── test_nebius_storage_mounting.yaml
        ├── test_only_setup.yaml
        ├── test_r2_storage_mounting.yaml
        ├── test_rclone_mount.yaml
        ├── test_serve_autoscaler.yaml
        ├── test_skyignore.yaml
        ├── test_skyignore_verification.py
        ├── test_storage_mounting.yaml.j2
        ├── test_volume_mount.yaml.j2
        ├── use_intermediate_bucket_config.yaml
        ├── use_internal_ips_config.yaml
        └── use_mig_config.yaml
    └── unit_tests
        ├── kubernetes
            ├── test_gpu_label_formatters.py
            ├── test_instance_type.py
            └── test_kubernetes_utils.py
        ├── test_adaptor.py
        ├── test_admin_policy.py
        ├── test_authentication.py
        ├── test_aws.py
        ├── test_aws_utils.py
        ├── test_azure_utils.py
        ├── test_backend_utils.py
        ├── test_cloud.py
        ├── test_controller_utils.py
        ├── test_dag.py
        ├── test_dag_utils.py
        ├── test_exceptions.py
        ├── test_gcp.py
        ├── test_jobs_utils.py
        ├── test_lambda.py
        ├── test_resources.py
        ├── test_sky
            ├── adaptors
            │   └── test_oci.py
            ├── clouds
            │   ├── test_kubernetes.py
            │   └── test_ssh.py
            ├── server
            │   ├── requests
            │   │   ├── queues
            │   │   │   └── test_mp_queue.py
            │   │   ├── test_precond.py
            │   │   ├── test_process.py
            │   │   └── test_requests.py
            │   ├── test_common.py
            │   ├── test_config.py
            │   ├── test_sdk.py
            │   └── test_server.py
            ├── storage
            │   └── test_storage_utils.py
            ├── test_sky_logging.py
            ├── test_task.py
            ├── utils
            │   ├── kubernetes
            │   │   └── test_skypilot_config_configmap_sync.py
            │   ├── test_cli_utils.py
            │   ├── test_common_utils.py
            │   ├── test_config_utils.py
            │   ├── test_context_utils.py
            │   ├── test_infra_utils.py
            │   ├── test_rich_utils.py
            │   ├── test_schemas.py
            │   ├── test_subprocess_utils.py
            │   └── text_context.py
            └── workspaces
            │   ├── test_workspace_config_concurrency.py
            │   ├── test_workspace_management.py
            │   └── test_workspace_race_condition_demo.py
        ├── test_sky_import.py
        └── test_zip_and_unzip.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.git
2 | 


--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Issue
 3 | about: Use this to open new issues.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | <!-- Describe the bug report / feature request here -->
10 | 
11 | 
12 | 
13 | 
14 | <!-- If relevant, fill in versioning info to help us troubleshoot -->
15 | _Version & Commit info:_
16 | * `sky -v`: PLEASE_FILL_IN
17 | * `sky -c`: PLEASE_FILL_IN
18 | 


--------------------------------------------------------------------------------
/.github/workflows/dashboard.yml:
--------------------------------------------------------------------------------
 1 | name: Dashboard Linting and Formatting
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request,
 5 |   # but only for the main branch
 6 |   push:
 7 |     branches:
 8 |       - master
 9 |       - 'releases/**'
10 |   pull_request:
11 |     branches:
12 |       - master
13 |       - 'releases/**'
14 |   merge_group:
15 | 
16 | jobs:
17 |   dashboard:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |     - uses: actions/checkout@v3
21 |     - name: Install dependencies and check
22 |       run: |
23 |         npm --prefix sky/dashboard install
24 |         npm --prefix sky/dashboard run lint
25 |         npm --prefix sky/dashboard run format:check
26 |         npm --prefix sky/dashboard run build
27 | 


--------------------------------------------------------------------------------
/.github/workflows/go-reviewable.yaml:
--------------------------------------------------------------------------------
 1 | name: go-reviewable
 2 | 
 3 | on:
 4 |     pull_request:
 5 |       branches:
 6 |         - master
 7 |         - 'releases/**'
 8 |       paths:
 9 |         - 'addons/fuse-proxy/**'
10 | 
11 | jobs:
12 |   reviewable:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |       - name: Set up Go
17 |         uses: actions/setup-go@v4
18 |         with:
19 |           go-version: '1.23'
20 |       - name: Run reviewable
21 |         working-directory: addons/fuse-proxy
22 |         run: make reviewable
23 | 


--------------------------------------------------------------------------------
/.github/workflows/pytest-generic.yml:
--------------------------------------------------------------------------------
 1 | # This is needed for GitHub Actions for the "Waiting for status to be reported" problem,
 2 | # according to https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks
 3 | name: Python Tests
 4 | on:
 5 |   # Trigger the workflow on push or pull request,
 6 |   # but only for the main branch
 7 |   push:
 8 |     branches:
 9 |       - master
10 |       - 'releases/**'
11 |   pull_request:
12 |     branches:
13 |       - master
14 |       - 'releases/**'
15 |   merge_group:
16 | 
17 | jobs:
18 |   python-test:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - run: 'echo "No tests to run"'
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/.DS_Store
 2 | **/*.pyc
 3 | **/__pycache__/
 4 | *.egg-info/
 5 | *.eggs/
 6 | 
 7 | docs/build/
 8 | docs/_build/
 9 | build/
10 | sky_logs/
11 | sky/clouds/service_catalog/data_fetchers/*.csv
12 | .vscode/
13 | .idea/
14 | .env
15 | 
16 | # For editor files
17 | *.swp
18 | .buildkite/*.yaml
19 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | build:
 9 |   os: "ubuntu-22.04"
10 |   tools:
11 |     python: "3.10"
12 | 
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 |   configuration: docs/source/conf.py
16 | 
17 | # Optionally set the version of Python and requirements required to build your docs
18 | python:
19 |   install:
20 |     - method: pip
21 |       path: .
22 |     - requirements: docs/requirements-docs.txt
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | sky/setup_files/MANIFEST.in


--------------------------------------------------------------------------------
/addons/fuse-proxy/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | 


--------------------------------------------------------------------------------
/addons/fuse-proxy/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/skypilot-org/skypilot/addons/fuse-proxy
 2 | 
 3 | go 1.23.3
 4 | 
 5 | require (
 6 | 	github.com/pfnet-research/meta-fuse-csi-plugin v0.2.2
 7 | 	github.com/sevlyar/go-daemon v0.1.6
 8 | 	k8s.io/klog/v2 v2.100.1
 9 | )
10 | 
11 | require (
12 | 	github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
13 | 	golang.org/x/sys v0.18.0 // indirect
14 | )
15 | 
16 | require (
17 | 	github.com/go-logr/logr v1.2.4 // indirect
18 | 	github.com/spf13/pflag v1.0.6
19 | 	k8s.io/apimachinery v0.28.1 // indirect
20 | )
21 | 


--------------------------------------------------------------------------------
/charts/skypilot/.gitignore:
--------------------------------------------------------------------------------
1 | Chart.lock
2 | charts/
3 | 


--------------------------------------------------------------------------------
/charts/skypilot/.helmignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/charts/skypilot/.helmignore


--------------------------------------------------------------------------------
/charts/skypilot/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: skypilot
 3 | description: A Helm chart for deploying SkyPilot API server on Kubernetes
 4 | type: application
 5 | version: 0.0.0
 6 | appVersion: "0.0"
 7 | dependencies:
 8 |   - name: ingress-nginx
 9 |     version: 4.11.3
10 |     repository: https://kubernetes.github.io/ingress-nginx
11 |     condition: ingress-nginx.enabled
12 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | {{- if not .Values.apiService.skipResourceCheck }}
2 | {{- include "skypilot.checkResources" . }}
3 | {{- end }}
4 | {{- include "skypilot.checkUpgradeConfig" . }}
5 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/api-configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: {{ .Release.Name }}-config
 5 |   namespace: {{ .Release.Namespace }}
 6 | data:
 7 |   config.yaml: |-
 8 |     {{- if .Values.apiService.config }}
 9 |     {{- .Values.apiService.config | nindent 4 }}
10 |     {{- else }}
11 |     {}
12 |     {{- end }} 
13 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/api-secrets.yaml:
--------------------------------------------------------------------------------
 1 | {{- /* Use serect since sshNodePools config may contain credentials */ -}}
 2 | {{- if .Values.apiService.sshNodePools}}
 3 | apiVersion: v1
 4 | kind: Secret
 5 | metadata:
 6 |   name: {{ .Release.Name }}-ssh-node-pools
 7 |   namespace: {{ .Release.Namespace }}
 8 | stringData:
 9 |   ssh_node_pools.yaml: |
10 | {{ .Values.apiService.sshNodePools | indent 4 }} 
11 | {{- end }}
12 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/api-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ .Release.Name }}-api-service
 5 |   namespace: {{ .Release.Namespace }}
 6 | spec:
 7 |   type: ClusterIP  # Use clusterIP to allow ingress to authenticate
 8 |   ports:
 9 |     - port: 80
10 |       targetPort: 46580  # Assuming your container listens on port 46580
11 |       protocol: TCP
12 |   selector:
13 |     app: {{ .Release.Name }}-api
14 |     skypilot.co/ready: "true"
15 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/auth.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and (not .Values.ingress.authSecret) .Values.ingress.authCredentials (not (index .Values.ingress "oauth2-proxy" "enabled")) }}
 2 | apiVersion: v1
 3 | kind: Secret
 4 | metadata:
 5 |   name: {{ .Release.Name }}-basic-auth
 6 |   namespace: {{ .Release.Namespace }}
 7 | type: Opaque
 8 | stringData:
 9 |   auth: {{ .Values.ingress.authCredentials | quote }}
10 | {{- end }}
11 | 


--------------------------------------------------------------------------------
/charts/skypilot/templates/oauth2-proxy-service.yaml:
--------------------------------------------------------------------------------
 1 | {{- if and .Values.ingress.enabled (index .Values.ingress "oauth2-proxy" "enabled") }}
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   labels:
 6 |     app: {{ .Release.Name }}-oauth2-proxy
 7 |     skypilot.co/component: oauth2-proxy
 8 |   name: {{ .Release.Name }}-oauth2-proxy
 9 |   namespace: {{ .Release.Namespace }}
10 | spec:
11 |   ports:
12 |   - name: http
13 |     port: 4180
14 |     protocol: TCP
15 |     targetPort: 4180
16 |   selector:
17 |     app: {{ .Release.Name }}-oauth2-proxy
18 | {{- end }}
19 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation
 2 | Sphinx docs based on ReadTheDocs.
 3 | 
 4 | ## Styleguide
 5 | 
 6 | - Each page's title is in `Title Case <https://en.wikipedia.org/wiki/Title_case>`_.
 7 | - Each subsection's title is in `Sentence case <https://en.wikipedia.org/wiki/Sentence_case>`_.
 8 | 
 9 | ## Build and view locally
10 | ```bash
11 | pip install -r requirements-docs.txt
12 | ./build.sh --watch --port 8000
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/repo-images/README:
--------------------------------------------------------------------------------
1 | Images in this directory are intended to be used by README.md files in the repo.
2 | They should not be used in the docs.
3 | 


--------------------------------------------------------------------------------
/docs/repo-images/managed-job-schedule-state-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/repo-images/managed-job-schedule-state-diagram.png


--------------------------------------------------------------------------------
/docs/repo-images/managed-job-status-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/repo-images/managed-job-status-diagram.png


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
 1 | linkify-it-py==2.0.3
 2 | myst-parser==2.0.0
 3 | sphinx==7.1.2
 4 | sphinx-click==5.0.1
 5 | sphinx-copybutton==0.5.2
 6 | sphinxemoji==0.2.0
 7 | sphinx-design==0.5.0
 8 | pydata-sphinx-theme==0.14.4
 9 | Pygments==2.16.1
10 | sphinx-autobuild==2021.3.14
11 | sphinx-autodoc-typehints==1.25.2
12 | sphinx-book-theme==1.1.0
13 | sphinx-togglebutton==0.3.2
14 | sphinx-notfound-page==1.0.4
15 | sphinxcontrib-applehelp==1.0.7
16 | sphinxcontrib-devhelp==1.0.5
17 | sphinxcontrib-googleanalytics==0.4
18 | sphinxcontrib-htmlhelp==2.0.4
19 | sphinxcontrib-jsmath==1.0.1
20 | sphinxcontrib-qthelp==1.0.6
21 | sphinxcontrib-serializinghtml==1.1.9
22 | 


--------------------------------------------------------------------------------
/docs/source/.gitignore:
--------------------------------------------------------------------------------
1 | generated-examples/
2 | 


--------------------------------------------------------------------------------
/docs/source/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/_static/favicon.ico


--------------------------------------------------------------------------------
/docs/source/_static/intro.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/_static/intro.gif


--------------------------------------------------------------------------------
/docs/source/_static/intro.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/_static/intro.mp4


--------------------------------------------------------------------------------
/docs/source/_static/rtd-data.js:
--------------------------------------------------------------------------------
1 | // Dummy data for testing ReadTheDocs footer insertion
2 | // This mimics RTD data for a project that uses both versions + languages
3 | var READTHEDOCS_DATA = {
4 |     project: "frc-docs",
5 |     version: "latest",
6 |     language: "en",
7 |     proxied_api_host: "https://readthedocs.org",
8 | };
9 | 


--------------------------------------------------------------------------------
/docs/source/_templates/author.html:
--------------------------------------------------------------------------------
1 | {% if author %}
2 | <p class="component-author">
3 |     By {{ author }}
4 | </p>
5 | {% endif %}
6 | 


--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "pydata_sphinx_theme/layout.html" %}
2 | 
3 | {% block docs_navbar %}
4 | {% include "header.html" ignore missing %}
5 | {{ super() }}
6 | {% endblock %} 
7 | 


--------------------------------------------------------------------------------
/docs/source/_templates/main-sidebar-home.html:
--------------------------------------------------------------------------------
 1 | {# Displays the TOC-subtree for pages nested under the currently active top-level TOCtree element. #}
 2 | <nav class="bd-docs-nav bd-links" aria-label="{{ _('SkyPilot') }}">
 3 |     <!-- <p class="bd-links__title" role="heading" aria-level="1">{{ _("SkyPilot") }}</p> -->
 4 |     <div class="bd-toc-item navbar-nav">
 5 |     {{- generate_toctree_html(
 6 |     "sidebar",
 7 |     startdepth=0,
 8 |     show_nav_level=theme_show_nav_level | int,
 9 |     maxdepth=theme_navigation_depth | int,
10 |     collapse=True,
11 |     includehidden=theme_sidebar_includehidden | tobool,
12 |     titles_only=True
13 |     )
14 |     -}}
15 |     </div>
16 | </nav>
17 | 


--------------------------------------------------------------------------------
/docs/source/_templates/main-sidebar.html:
--------------------------------------------------------------------------------
 1 | {# Displays the TOC-subtree for pages nested under the currently active top-level TOCtree element. #}
 2 | <nav class="bd-docs-nav bd-links" aria-label="{{ _('SkyPilot') }}">
 3 |     <!-- <p class="bd-links__title" role="heading" aria-level="1">{{ _("SkyPilot") }}</p> -->
 4 |     <div class="bd-toc-item navbar-nav">
 5 |     {{- generate_toctree_html(
 6 |     "sidebar",
 7 |     show_nav_level=theme_show_nav_level | int,
 8 |     maxdepth=theme_navigation_depth | int,
 9 |     collapse=theme_collapse_navigation | tobool,
10 |     includehidden=theme_sidebar_includehidden | tobool,
11 |     titles_only=True
12 |     )
13 |     -}}
14 |     </div>
15 | </nav>
16 | 


--------------------------------------------------------------------------------
/docs/source/developers/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ../../../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/source/developers/index.rst:
--------------------------------------------------------------------------------
1 | Developer Guides
2 | =================
3 | 
4 | .. toctree::
5 |    :maxdepth: 1
6 | 
7 |    ../developers/CONTRIBUTING
8 |    Guide: Adding a New Cloud <https://docs.google.com/document/d/1oWox3qb3Kz3wXXSGg9ZJWwijoa99a3PIQUHBR8UgEGs/edit?usp=sharing>
9 | 


--------------------------------------------------------------------------------
/docs/source/examples/applications/batch_inference.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/batch_inference.md


--------------------------------------------------------------------------------
/docs/source/examples/applications/index.rst:
--------------------------------------------------------------------------------
 1 | AI Applications
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    DeepSeek-R1 for RAG <rag>
 8 |    Large-Scale Batch Inference <batch_inference>
 9 |    Image Vector Database <vector_database>
10 |    Tabby: Coding Assistant <tabby>
11 |    LocalGPT: Chat with PDF <localgpt>
12 |    Stable Diffusion <stable_diffusion>
13 | 


--------------------------------------------------------------------------------
/docs/source/examples/applications/localgpt.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/localgpt.md


--------------------------------------------------------------------------------
/docs/source/examples/applications/rag.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/rag.md


--------------------------------------------------------------------------------
/docs/source/examples/applications/stable_diffusion.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/stable_diffusion.md


--------------------------------------------------------------------------------
/docs/source/examples/applications/tabby.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/tabby.md


--------------------------------------------------------------------------------
/docs/source/examples/applications/vector_database.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/vector_database.md


--------------------------------------------------------------------------------
/docs/source/examples/frameworks/airflow.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/airflow.md


--------------------------------------------------------------------------------
/docs/source/examples/frameworks/dvc.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/dvc.md


--------------------------------------------------------------------------------
/docs/source/examples/frameworks/index.rst:
--------------------------------------------------------------------------------
 1 | Frameworks
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    Airflow <airflow>
 8 |    Cross-cloud data transfer <https://nebius.com/blog/posts/bulk-object-storage-s3-data-migration-with-skypilot>
 9 |    DVC <dvc>
10 |    GCP DWS/Kueue <https://gke-ai-labs.dev/docs/tutorials/skypilot/resource-management-using-kueue/>
11 |    Jupyter <jupyter>
12 |    MLFlow <https://nebius.com/blog/posts/orchestrating-llm-fine-tuning-k8s-skypilot-mlflow>
13 |    MPI <mpi>
14 | 


--------------------------------------------------------------------------------
/docs/source/examples/frameworks/jupyter.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/jupyter_lab.md


--------------------------------------------------------------------------------
/docs/source/examples/frameworks/mpi.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/mpirun.md


--------------------------------------------------------------------------------
/docs/source/examples/models/codellama.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/codellama.md


--------------------------------------------------------------------------------
/docs/source/examples/models/dbrx.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/dbrx.md


--------------------------------------------------------------------------------
/docs/source/examples/models/deepseek-janus.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/deepseek-janus.md


--------------------------------------------------------------------------------
/docs/source/examples/models/deepseek-r1-distilled.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/deepseek-r1-distilled.md


--------------------------------------------------------------------------------
/docs/source/examples/models/deepseek-r1.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/deepseek-r1.md


--------------------------------------------------------------------------------
/docs/source/examples/models/gemma.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/gemma.md


--------------------------------------------------------------------------------
/docs/source/examples/models/gemma3.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/gemma3.md


--------------------------------------------------------------------------------
/docs/source/examples/models/gpt-2.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/gpt-2.md


--------------------------------------------------------------------------------
/docs/source/examples/models/index.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ============
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    DeepSeek-R1 <deepseek-r1>
 8 |    DeepSeek-R1 Distilled <deepseek-r1-distilled>
 9 |    DeepSeek-Janus <deepseek-janus>
10 |    Gemma 3 <gemma3>
11 |    Llama 4 <llama-4>
12 |    Llama 3.2 <llama-3_2>
13 |    Llama 3.1 <llama-3_1>
14 |    Llama 3 <llama-3>
15 |    Llama 2 <llama-2>
16 |    CodeLlama <codellama>
17 |    Pixtral <pixtral>
18 |    Mixtral <mixtral>
19 |    Mistral 7B <https://docs.mistral.ai/self-deployment/skypilot/>
20 |    Qwen 2.5 <qwen>
21 |    Yi <yi>
22 |    Gemma <gemma>
23 |    DBRX <dbrx>
24 |    GPT-2 via llm.c <gpt-2>
25 |    Vicuna <vicuna>
26 | 


--------------------------------------------------------------------------------
/docs/source/examples/models/llama-2.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/llama-2.md


--------------------------------------------------------------------------------
/docs/source/examples/models/llama-3.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/llama-3.md


--------------------------------------------------------------------------------
/docs/source/examples/models/llama-3_1.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/llama-3_1.md


--------------------------------------------------------------------------------
/docs/source/examples/models/llama-3_2.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/llama-3_2.md


--------------------------------------------------------------------------------
/docs/source/examples/models/llama-4.md:
--------------------------------------------------------------------------------
1 | ../../../../llm/llama-4/README.md


--------------------------------------------------------------------------------
/docs/source/examples/models/mixtral.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/mixtral.md


--------------------------------------------------------------------------------
/docs/source/examples/models/pixtral.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/pixtral.md


--------------------------------------------------------------------------------
/docs/source/examples/models/qwen.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/qwen.md


--------------------------------------------------------------------------------
/docs/source/examples/models/vicuna.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/vicuna.md


--------------------------------------------------------------------------------
/docs/source/examples/models/yi.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/yi.md


--------------------------------------------------------------------------------
/docs/source/examples/performance/aws_efa.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/aws_efa.md


--------------------------------------------------------------------------------
/docs/source/examples/performance/gcp_gpu_direct_tcpx.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/gcp_gpu_direct_tcpx.md


--------------------------------------------------------------------------------
/docs/source/examples/performance/index.rst:
--------------------------------------------------------------------------------
 1 | AI Performance
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    AWS EFA <aws_efa>
 8 |    GCP GPUDirect-TCPX <gcp_gpu_direct_tcpx>
 9 |    Nebius with InfiniBand <nebius_infiniband>
10 | 


--------------------------------------------------------------------------------
/docs/source/examples/performance/nebius_infiniband.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/nebius_infiniband.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/cog.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/cog.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/index.rst:
--------------------------------------------------------------------------------
 1 | Serving
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    vLLM <vllm>
 8 |    SGLang <sglang>
 9 |    Ollama <ollama>
10 |    Hugging Face TGI <tgi>
11 |    LoRAX <lorax>
12 |    Cog <cog>
13 | 


--------------------------------------------------------------------------------
/docs/source/examples/serving/lorax.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/lorax.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/ollama.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/ollama.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/sglang.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/sglang.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/tgi.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/tgi.md


--------------------------------------------------------------------------------
/docs/source/examples/serving/vllm.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/vllm.md


--------------------------------------------------------------------------------
/docs/source/examples/training/axolotl.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/axolotl.md


--------------------------------------------------------------------------------
/docs/source/examples/training/deepspeed.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/deepspeed-multinode.md


--------------------------------------------------------------------------------
/docs/source/examples/training/distributed-pytorch.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/distributed-pytorch.md


--------------------------------------------------------------------------------
/docs/source/examples/training/distributed-tensorflow.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/tensorflow_distributed.md


--------------------------------------------------------------------------------
/docs/source/examples/training/index.rst:
--------------------------------------------------------------------------------
 1 | Training
 2 | =========
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    Axolotl <axolotl.md>
 8 |    DeepSpeed <deepspeed.md>
 9 |    Distributed PyTorch <distributed-pytorch.md>
10 |    Distributed TensorFlow <distributed-tensorflow.md>
11 |    Finetuning Llama 3 <llama-3_1-finetuning.md>
12 |    Finetuning Llama 2 <llama-2-finetuning.md>
13 |    NeMo <nemo.md>
14 |    Ray <ray.md>
15 |    Training on TPUs <tpu.md>
16 |    Unsloth <unsloth.md>
17 |    Vertex AI <https://medium.com/google-cloud/streamline-ai-ml-model-development-on-gke-with-skypilot-and-vertex-ai-workbench-453729a8897c>
18 | 


--------------------------------------------------------------------------------
/docs/source/examples/training/llama-2-finetuning.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/vicuna-llama-2.md


--------------------------------------------------------------------------------
/docs/source/examples/training/llama-3_1-finetuning.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/llama-3_1-finetuning.md


--------------------------------------------------------------------------------
/docs/source/examples/training/nemo.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/nemo.md


--------------------------------------------------------------------------------
/docs/source/examples/training/ray.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/distributed_ray_train.md


--------------------------------------------------------------------------------
/docs/source/examples/training/tpu.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/tpu.md


--------------------------------------------------------------------------------
/docs/source/examples/training/unsloth.md:
--------------------------------------------------------------------------------
1 | ../../generated-examples/unsloth.md


--------------------------------------------------------------------------------
/docs/source/gallery/applications/localgpt.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/applications/localgpt.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/applications/localgpt.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/applications/rag.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/applications/rag.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/applications/rag.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/applications/tabby.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/applications/tabby.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/applications/tabby.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/applications/vector_database.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/applications/vector_database.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/applications/vector_database.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/frameworks/lorax.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/serving/lorax.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/serving/lorax.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/frameworks/ollama.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/serving/ollama.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/serving/ollama.html">
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/frameworks/sglang.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/serving/sglang.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/serving/sglang.html">
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/frameworks/tgi.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/serving/tgi.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/serving/tgi.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/frameworks/vllm.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/serving/vllm.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/serving/vllm.html">
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/index.rst:
--------------------------------------------------------------------------------
 1 | .. The whole gallery/ folder is deprecated, and replaced by examples/. It is only kept here for redirecting old URLs.
 2 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 3 | 
 4 | :orphan:
 5 | 
 6 | .. _ai-gallery:
 7 | 
 8 | .. raw:: html
 9 | 
10 |    <script type="text/javascript">
11 |        window.location.href = "../examples/index.html";
12 |    </script>
13 |    <meta http-equiv="refresh" content="0; url=../examples/index.html">
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/codellama.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/codellama.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/codellama.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/dbrx.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/dbrx.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/dbrx.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/deepseek-janus.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/deepseek-janus.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/deepseek-janus.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/deepseek-r1-distilled.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/deepseek-r1-distilled.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/deepseek-r1-distilled.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/deepseek-r1.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/deepseek-r1.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/deepseek-r1.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/gemma.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/gemma.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/gemma.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/gpt-2.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/gpt-2.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/gpt-2.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/index.rst:
--------------------------------------------------------------------------------
 1 | .. The whole gallery/ folder is deprecated, and replaced by examples/. It is only kept here for redirecting old URLs.
 2 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 3 | 
 4 | :orphan:
 5 | 
 6 | .. raw:: html
 7 | 
 8 |    <script type="text/javascript">
 9 |        window.location.href = "../../examples/models/index.html";
10 |    </script>
11 |    <meta http-equiv="refresh" content="0; url=../../examples/models/index.html">
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/llama-2.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/llama-2.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/llama-2.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/llama-3.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/llama-3.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/llama-3.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/llama-3_1.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/llama-3_1.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/llama-3_1.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/llama-3_2.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/llama-3_2.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/llama-3_2.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/mixtral.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/mixtral.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/mixtral.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/pixtral.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/pixtral.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/pixtral.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/qwen.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/qwen.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/qwen.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/vicuna.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/vicuna.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/vicuna.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/gallery/llms/yi.rst:
--------------------------------------------------------------------------------
 1 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 2 | 
 3 | :orphan:
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "../../examples/models/yi.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=../../examples/models/yi.html"> 
11 | 


--------------------------------------------------------------------------------
/docs/source/images/SkyPilot-logo-wide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/SkyPilot-logo-wide.png


--------------------------------------------------------------------------------
/docs/source/images/ai-gallery-cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/ai-gallery-cover.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/arch.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/cluster-users.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/cluster-users.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/executor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/executor.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/high-level-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/high-level-arch.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/local.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/local.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/okta-setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/okta-setup.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/okta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/okta.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/remote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/remote.png


--------------------------------------------------------------------------------
/docs/source/images/client-server/token-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/client-server/token-page.png


--------------------------------------------------------------------------------
/docs/source/images/cloud-logos-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/cloud-logos-dark.png


--------------------------------------------------------------------------------
/docs/source/images/cloud-logos-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/cloud-logos-light.png


--------------------------------------------------------------------------------
/docs/source/images/dashboard-clusters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/dashboard-clusters.png


--------------------------------------------------------------------------------
/docs/source/images/dashboard-managed-jobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/dashboard-managed-jobs.png


--------------------------------------------------------------------------------
/docs/source/images/gcp-vm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/gcp-vm.png


--------------------------------------------------------------------------------
/docs/source/images/jupyter-auth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/jupyter-auth.png


--------------------------------------------------------------------------------
/docs/source/images/jupyter-covid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/jupyter-covid.png


--------------------------------------------------------------------------------
/docs/source/images/jupyter-create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/jupyter-create.png


--------------------------------------------------------------------------------
/docs/source/images/jupyter-gpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/jupyter-gpu.png


--------------------------------------------------------------------------------
/docs/source/images/k8s-pod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/k8s-pod.png


--------------------------------------------------------------------------------
/docs/source/images/k8s-skypilot-architecture-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/k8s-skypilot-architecture-dark.png


--------------------------------------------------------------------------------
/docs/source/images/k8s-skypilot-architecture-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/k8s-skypilot-architecture-light.png


--------------------------------------------------------------------------------
/docs/source/images/managed-jobs-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/managed-jobs-arch.png


--------------------------------------------------------------------------------
/docs/source/images/managed-jobs-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/managed-jobs-dashboard.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-add-policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-add-policy.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-add-role-entity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-add-role-entity.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-add-role.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-add-role.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-add-user.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-add-user.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-create-access-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-create-access-key.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/aws/aws-create-policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/aws/aws-create-policy.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/cloud-nat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/cloud-nat.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/create-iam.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/create-iam.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/create-role.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/create-role.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/create-service-account.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/create-service-account.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/service-account-grant-role.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/service-account-grant-role.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/gcp/service-account-name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/gcp/service-account-name.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/kubernetes/kubernetes-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/kubernetes/kubernetes-dashboard.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/nebius/nebius-k8s-attach-fs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/nebius/nebius-k8s-attach-fs.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-item-tag-adding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-item-tag-adding.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-item.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-item.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-local.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-local.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-name.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-security-policy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-security-policy.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-lib-storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-lib-storage.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-libs-navigate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-libs-navigate.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/content-libs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/content-libs.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vm-clone-to-template-cl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vm-clone-to-template-cl.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vm-clone-to-template-ovf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vm-clone-to-template-ovf.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vm-clone-to-template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vm-clone-to-template.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-catagory-create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-catagory-create.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-catagory-create_navigate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-catagory-create_navigate.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-catagory-create_navigate_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-catagory-create_navigate_new.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-datastore-tag-adding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-datastore-tag-adding.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-tags-create.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-tags-create.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-tags-create_navigate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-tags-create_navigate.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-inventory.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-inventory.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-name.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-navigate-new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-navigate-new.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-navigate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-navigate.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-review.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-review.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-rule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-rule.png


--------------------------------------------------------------------------------
/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-tag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/screenshots/vsphere/vsphere-vm-storage-policy-tag.png


--------------------------------------------------------------------------------
/docs/source/images/sky-above-clouds-gen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-above-clouds-gen.jpg


--------------------------------------------------------------------------------
/docs/source/images/sky-existing-infra-workflow-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-existing-infra-workflow-dark.png


--------------------------------------------------------------------------------
/docs/source/images/sky-existing-infra-workflow-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-existing-infra-workflow-light.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-architecture.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-status-full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-status-full.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-status-output-provisioning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-status-output-provisioning.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-status-tgi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-status-tgi.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-status-vicuna-ready.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-status-vicuna-ready.png


--------------------------------------------------------------------------------
/docs/source/images/sky-serve-status-vllm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/sky-serve-status-vllm.png


--------------------------------------------------------------------------------
/docs/source/images/skypilot-abstractions-long-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/skypilot-abstractions-long-2.png


--------------------------------------------------------------------------------
/docs/source/images/skypilot-wide-dark-1k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/skypilot-wide-dark-1k.png


--------------------------------------------------------------------------------
/docs/source/images/skypilot-wide-light-1k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/skypilot-wide-light-1k.png


--------------------------------------------------------------------------------
/docs/source/images/ssh-node-pools/infra.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/ssh-node-pools/infra.png


--------------------------------------------------------------------------------
/docs/source/images/ssh-node-pools/pool-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/ssh-node-pools/pool-details.png


--------------------------------------------------------------------------------
/docs/source/images/workspaces/config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/workspaces/config.png


--------------------------------------------------------------------------------
/docs/source/images/workspaces/edit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/workspaces/edit.png


--------------------------------------------------------------------------------
/docs/source/images/workspaces/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/workspaces/overview.png


--------------------------------------------------------------------------------
/docs/source/images/workspaces/resources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/docs/source/images/workspaces/resources.png


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to SkyPilot!
 2 | ====================
 3 | 
 4 | 
 5 | .. raw:: html
 6 | 
 7 |    <script type="text/javascript">
 8 |        window.location.href = "docs/index.html";
 9 |    </script>
10 |    <meta http-equiv="refresh" content="0; url=./docs/index.html">
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 |    :caption: Contents
15 |    :hidden:
16 | 
17 |    Docs <docs/index>
18 |    Blog <https://blog.skypilot.co/>
19 |    Community <https://blog.skypilot.co/community/>
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/source/reference/comparison.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. People may hit this old URL; we redirect to the new one instead of 404ing.
 4 | .. raw:: html
 5 | 
 6 |    <script type="text/javascript">
 7 |        window.location.href = "kubernetes/skypilot-and-vanilla-k8s.html";
 8 |    </script>
 9 |    <meta http-equiv="refresh" content="0; url=kubernetes/skypilot-and-vanilla-k8s.html">
10 | 


--------------------------------------------------------------------------------
/docs/source/reference/kubernetes/examples/index.rst:
--------------------------------------------------------------------------------
 1 | .. _kubernetes-examples:
 2 | 
 3 | Kubernetes Examples
 4 | ===================
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 1
 8 | 
 9 |    Dynamic Workload Scheduler <https://gke-ai-labs.dev/docs/tutorials/skypilot/resource-management-using-kueue/>
10 |    Kueue <https://gke-ai-labs.dev/docs/tutorials/skypilot/resource-management-using-kueue/>
11 |    Multi-region Kubernetes <https://gke-ai-labs.dev/docs/tutorials/skypilot/cross-region-capacity-chasing/>
12 | 


--------------------------------------------------------------------------------
/docs/source/serving/user-guides.rst:
--------------------------------------------------------------------------------
 1 | Serving User Guides
 2 | ================================================
 3 | 
 4 | .. toctree::
 5 | 
 6 |    autoscaling
 7 |    update
 8 |    auth
 9 |    spot-policy
10 |    https
11 | 


--------------------------------------------------------------------------------
/examples/admin_policy/add_labels.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.AddLabelsPolicy
2 | 


--------------------------------------------------------------------------------
/examples/admin_policy/disable_public_ip.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.DisablePublicIpPolicy
2 | 


--------------------------------------------------------------------------------
/examples/admin_policy/dynamic_kubernetes_contexts_update.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.DynamicKubernetesContextsUpdatePolicy
2 | 


--------------------------------------------------------------------------------
/examples/admin_policy/enforce_autostop.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.EnforceAutostopPolicy
2 | 


--------------------------------------------------------------------------------
/examples/admin_policy/example_policy/example_policy/__init__.py:
--------------------------------------------------------------------------------
1 | """Example admin policy module and prebuilt policies."""
2 | from example_policy.skypilot_policy import AddLabelsPolicy
3 | from example_policy.skypilot_policy import DisablePublicIpPolicy
4 | from example_policy.skypilot_policy import DynamicKubernetesContextsUpdatePolicy
5 | from example_policy.skypilot_policy import EnforceAutostopPolicy
6 | from example_policy.skypilot_policy import RejectAllPolicy
7 | from example_policy.skypilot_policy import UseSpotForGpuPolicy
8 | 


--------------------------------------------------------------------------------
/examples/admin_policy/example_policy/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 
5 | [project]
6 | name = "example_policy"
7 | version = "0.0.1"
8 | 


--------------------------------------------------------------------------------
/examples/admin_policy/reject_all.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.RejectAllPolicy
2 | 


--------------------------------------------------------------------------------
/examples/admin_policy/task.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 |   cpus: 2
 4 |   labels:
 5 |     other_labels: test
 6 | 
 7 | 
 8 | setup: |
 9 |   echo "setup"
10 | 
11 | run: |
12 |   echo "run"
13 | 


--------------------------------------------------------------------------------
/examples/admin_policy/use_spot_for_gpu.yaml:
--------------------------------------------------------------------------------
1 | admin_policy: example_policy.UseSpotForGpuPolicy
2 | 


--------------------------------------------------------------------------------
/examples/airflow/data_preprocessing.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 1
 3 | 
 4 | envs:
 5 |   DATA_BUCKET_NAME: sky-demo-data-test
 6 |   DATA_BUCKET_STORE_TYPE: s3
 7 | 
 8 | file_mounts:
 9 |   /data:
10 |     name: $DATA_BUCKET_NAME
11 |     store: $DATA_BUCKET_STORE_TYPE
12 | 
13 | setup: |
14 |   echo "Setting up dependencies for data preprocessing..."
15 | 
16 | run: |
17 |   echo "Running data preprocessing..."
18 |   
19 |   # Generate few files with random data to simulate data preprocessing
20 |   for i in {0..9}; do
21 |       dd if=/dev/urandom of=/data/file_$i bs=1M count=10
22 |   done
23 |   
24 |   echo "Data preprocessing completed, wrote to $DATA_BUCKET_NAME"
25 | 
26 | 


--------------------------------------------------------------------------------
/examples/airflow/eval.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 1
 3 |   # Add GPUs here
 4 | 
 5 | envs:
 6 |   DATA_BUCKET_NAME: sky-demo-data-test
 7 |   DATA_BUCKET_STORE_TYPE: s3
 8 | 
 9 | file_mounts:
10 |   /data:
11 |     name: $DATA_BUCKET_NAME
12 |     store: $DATA_BUCKET_STORE_TYPE
13 | 
14 | setup: |
15 |   echo "Setting up dependencies for eval..."
16 | 
17 | run: |
18 |   echo "Evaluating the trained model..."
19 |   
20 |   # Run a mock evaluation job that reads the trained model from /data/trained_model.txt
21 |   cat /data/trained_model.txt || true
22 |   # Generate a mock accuracy
23 |   ACCURACY=$(shuf -i 90-100 -n 1)
24 |   echo "Metric - accuracy: $ACCURACY%"
25 |   echo "Evaluation report" > /data/evaluation_report.txt
26 |   
27 |   echo "Evaluation completed, report written to $DATA_BUCKET_NAME"
28 | 


--------------------------------------------------------------------------------
/examples/autogluon.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: gcp
 3 | 
 4 | setup: |
 5 |   git clone https://github.com/autogluon/autogluon.git
 6 | 
 7 |   conda activate autogluon
 8 |   if [ $? -eq 0 ]; then
 9 |     echo 'conda env exists'
10 |   else
11 |     conda create -n autogluon python=3.8 -y
12 |     conda activate autogluon
13 |     pip install torch==1.13.1+cpu torchvision==0.14.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
14 |     pip install autogluon
15 |     # Ray + Torch Dataloader failed with latest grpcio
16 |     # See: https://github.com/ray-project/ray/pull/33903
17 |     pip install grpcio==1.51.3
18 |   fi
19 | 
20 | run: |
21 |   conda activate autogluon
22 |   cd autogluon
23 |   python examples/automm/tabular_dl/example_tabular.py --mode single_hpo
24 | 


--------------------------------------------------------------------------------
/examples/azure_start_stop.yaml:
--------------------------------------------------------------------------------
 1 | # start and stop Azure instances
 2 | name: azure-start-stop
 3 | 
 4 | resources:
 5 |   infra: azure
 6 | 
 7 | # Optimizing for smoke tests
 8 | #   2 nodes: smoke tests ~37 mins
 9 | #   1 node: smoke tests ~19 mins
10 | # num_nodes: 2
11 | 
12 | # The setup command.  Will be run under the working directory.
13 | setup: 'echo "azure-start-stop [setup]"'
14 | 
15 | # The command to run.  Will be run under the working directory.
16 | run: 'echo "azure-start-stop [run]"'
17 | 


--------------------------------------------------------------------------------
/examples/cog/cog.yaml:
--------------------------------------------------------------------------------
1 | build:
2 |   python_version: "3.8"
3 |   python_packages:
4 |     - "pillow==8.2.0"
5 |   system_packages:
6 |     - "libpng-dev"
7 |     - "libjpeg-dev"
8 | predict: "predict.py:Predictor"
9 | 


--------------------------------------------------------------------------------
/examples/cog/predict.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | import cog
 4 | from PIL import Image
 5 | from PIL import ImageFilter
 6 | 
 7 | 
 8 | class Predictor(cog.BasePredictor):
 9 | 
10 |     def predict(
11 |         self,
12 |         image: cog.Path = cog.Input(description='Input image'),
13 |         blur: float = cog.Input(description='Blur radius', default=5),
14 |     ) -> cog.Path:
15 |         if blur == 0:
16 |             return input
17 |         im = Image.open(str(image))
18 |         im = im.filter(ImageFilter.BoxBlur(blur))
19 |         out_path = cog.Path(tempfile.mkdtemp()) / 'out.png'
20 |         im.save(str(out_path))
21 |         return out_path
22 | 


--------------------------------------------------------------------------------
/examples/custom_image.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws/us-east-2
 3 |   # Nvidia image from
 4 |   # https://aws.amazon.com/marketplace/pp/prodview-rf7na2b2ttvdg
 5 |   image_id: ami-062ddd90fb6f8267a
 6 |   accelerators: V100
 7 | 
 8 | setup: |
 9 |   echo "running setup"
10 | 
11 | run: |
12 |   echo "hello sky"
13 | 


--------------------------------------------------------------------------------
/examples/deepspeed-multinode/README.md:
--------------------------------------------------------------------------------
1 | # DeepSpeed
2 | 
3 | This example shows how to launch a multinode DeepSpeed training job with SkyPilot.
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/disk_size.yaml:
--------------------------------------------------------------------------------
 1 | # A minimal example to ask a 512GB OS disk.
 2 | #
 3 | # Runs a task that simply lists the default conda environments.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c min minimal_os_disk.yaml
 7 | #   sky down min
 8 | 
 9 | name: minimal
10 | 
11 | resources:
12 |   infra: azure
13 |   disk_size: 512
14 | 
15 | setup: |
16 |   echo "running setup"
17 |   lsblk
18 | 
19 | run: |
20 |   conda env list
21 | 


--------------------------------------------------------------------------------
/examples/distributed_ray_train/README.md:
--------------------------------------------------------------------------------
1 | # Ray
2 | 
3 | This example shows how to launch distributed Ray jobs with SkyPilot.
4 | 


--------------------------------------------------------------------------------
/examples/docker/compose/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   gpu-app1:
 5 |     image: nvidia/cuda:11.5.2-runtime-ubuntu20.04
 6 |     command: nvidia-smi # To keep running in a loop, add -l 1
 7 |     deploy:
 8 |       resources:
 9 |         reservations:
10 |           devices:
11 |           - driver: nvidia
12 |             device_ids: ['0']
13 |             capabilities: [gpu]
14 | 
15 |   gpu-app2:
16 |     image: nvidia/cuda:11.5.2-runtime-ubuntu20.04
17 |     command: nvidia-smi
18 |     deploy:
19 |       resources:
20 |         reservations:
21 |           devices:
22 |           - driver: nvidia
23 |             device_ids: ['1'] # Allocates GPU ID 1 to this container. Inside the container, this will be visible as device id 0
24 |             capabilities: [gpu]
25 | 


--------------------------------------------------------------------------------
/examples/docker/echo_app/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python
2 | 
3 | ADD echo.py /app/echo.py
4 | 
5 | WORKDIR /app
6 | 
7 | ENTRYPOINT ["python", "echo.py"]
8 | 


--------------------------------------------------------------------------------
/examples/docker/echo_app/README.md:
--------------------------------------------------------------------------------
1 | # Echo App
2 | 
3 | A simple app that ingests a file and writes it out back to a specified path.
4 | 


--------------------------------------------------------------------------------
/examples/docker/echo_app/echo.py:
--------------------------------------------------------------------------------
 1 | """Echo app
 2 | 
 3 | Reads a file, echoes it and writes back to a specified path.
 4 | """
 5 | import argparse
 6 | 
 7 | 
 8 | def main():
 9 |     """Main function"""
10 |     parser = argparse.ArgumentParser(description='Echo app')
11 |     parser.add_argument('input', type=str)
12 |     parser.add_argument('output', type=str)
13 |     args = parser.parse_args()
14 | 
15 |     with open(args.input, 'r') as input_file:
16 |         content = input_file.read()
17 |     print("===== echo app =====")
18 |     print("Input file content:")
19 |     print(content)
20 |     with open(args.output, 'w') as output_file:
21 |         output_file.write(content)
22 |     print("Output written to {}".format(args.output))
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/examples/dvc/README.md:
--------------------------------------------------------------------------------
1 | # DVC
2 | 
3 | This example shows how to use DVC with SkyPilot.
4 | 


--------------------------------------------------------------------------------
/examples/dvc/dvc_pipeline.yaml:
--------------------------------------------------------------------------------
 1 | # adapted from https://alex000kim.com/posts/2023-08-10-ml-experiments-in-cloud-skypilot-dvc/
 2 | name: dvc-pipeline
 3 | resources:
 4 |   accelerators: T4:1
 5 |   infra: aws/us-east-2
 6 | 
 7 | workdir: .
 8 | file_mounts: 
 9 |   ~/.ssh/id_rsa: ~/.ssh/id_rsa
10 |   ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
11 |   ~/.gitconfig: ~/.gitconfig
12 | setup: |
13 |   pip install -r requirements.txt
14 |   pip install dvc[s3]
15 | run: |
16 |   # pull data versioned by DVC from DVC remote
17 |   dvc pull 
18 |   # run DVC pipeline as an experiment
19 |   dvc exp run --pull --allow-missing
20 |   # push experiment results to DVC remote
21 |   dvc exp push origin 
22 | 


--------------------------------------------------------------------------------
/examples/gcp_start_stop.yaml:
--------------------------------------------------------------------------------
 1 | # start and stop GCP instances
 2 | name: gcp-start-stop
 3 | 
 4 | resources:
 5 |   infra: gcp
 6 | 
 7 | num_nodes: 2
 8 | 
 9 | # The setup command.  Will be run under the working directory.
10 | setup: 'echo "gcp_start_stop [setup]"'
11 | 
12 | # The command to run.  Will be run under the working directory.
13 | run: 'echo "gcp_start_stop [run]"'
14 | 


--------------------------------------------------------------------------------
/examples/http_server_with_custom_ports/task.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 |   ports:
3 |     - 33828
4 | 
5 | workdir: ./examples/http_server_with_custom_ports
6 | 
7 | run: python3 server.py
8 | 


--------------------------------------------------------------------------------
/examples/image_with_tag.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 |   image_id: skypilot:gpu-ubuntu-1804
 4 | 
 5 | 
 6 | setup: |
 7 |   echo "running setup"
 8 | 
 9 | run: |
10 |   conda env list
11 | 


--------------------------------------------------------------------------------
/examples/job_queue/cluster.yaml:
--------------------------------------------------------------------------------
 1 | # A dummy task for cluster creation.
 2 | #
 3 | # Runs a dummy task that provision a cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c jq cluster.yaml
 7 | #   sky exec jq job.yaml
 8 | 
 9 | resources:
10 |   accelerators: T4
11 | 


--------------------------------------------------------------------------------
/examples/job_queue/cluster_docker.yaml:
--------------------------------------------------------------------------------
 1 | # A dummy task for cluster creation.
 2 | #
 3 | # Runs a dummy task that provision a cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c djq cluster_docker.yaml
 7 | #   sky exec djq job_docker.yaml
 8 | 
 9 | resources:
10 |   accelerators: T4
11 |   image_id: docker:ubuntu:20.04
12 | 


--------------------------------------------------------------------------------
/examples/job_queue/cluster_multinode.yaml:
--------------------------------------------------------------------------------
 1 | # A dummy task for multinode cluster creation.
 2 | #
 3 | # Runs a dummy task that provision a cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c mjq cluster_multinode.yaml
 7 | #   sky exec mjq job_multinode.yaml
 8 | #   sky exec mjq job.yaml
 9 | 
10 | resources:
11 |   accelerators: T4
12 | 
13 | num_nodes: 2
14 | 


--------------------------------------------------------------------------------
/examples/job_queue/job.yaml:
--------------------------------------------------------------------------------
 1 | # A task submitted to an existing cluster.
 2 | #
 3 | # Runs a task on a existing cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c jq cluster.yaml
 7 | #   sky exec jq job.yaml
 8 | 
 9 | name: job
10 | 
11 | resources:
12 |   accelerators: T4:0.5
13 | 
14 | setup: |
15 |   echo "running setup"
16 | 
17 | run: |
18 |   timestamp=$(date +%s)
19 |   conda env list
20 |   for i in {1..180}; do
21 |     echo "$timestamp $i"
22 |     sleep 1
23 |   done
24 | 


--------------------------------------------------------------------------------
/examples/job_queue/job_docker.yaml:
--------------------------------------------------------------------------------
 1 | # A task submitted to an existing cluster.
 2 | #
 3 | # Runs a task on a existing cluster with docker.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c djq cluster_docker.yaml
 7 | #   sky exec djq job_docker.yaml
 8 | 
 9 | name: job_docker
10 | 
11 | envs:
12 |   TIME_TO_SLEEP: 180
13 | 
14 | resources:
15 |   accelerators: T4:0.5
16 |   image_id: docker:ubuntu:20.04
17 | 
18 | setup: |
19 |   echo "running setup"
20 | 
21 | run: |
22 |   timestamp=$(date +%s)
23 |   conda env list
24 |   for i in $(seq 1 $TIME_TO_SLEEP); do
25 |     echo "$timestamp $i"
26 |     sleep 1
27 |   done
28 | 


--------------------------------------------------------------------------------
/examples/job_queue/job_gpu.yaml:
--------------------------------------------------------------------------------
 1 | # A task submitted to an existing cluster.
 2 | #
 3 | # Runs a task on a existing cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c jq cluster.yaml
 7 | #   sky exec -c jq job.yaml
 8 | 
 9 | name: job
10 | 
11 | resources:
12 |   accelerators: K80:0.5
13 | 
14 | # setup: |
15 | #   conda create -n test python=3.7 -y
16 | #   conda activate test
17 | #   conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch
18 | 
19 | 
20 | 
21 | run: |
22 |   timestamp=$(date +%s)
23 |   conda activate test
24 |   echo "started"
25 |   python -u -c "import torch; a = torch.randn(10000, 10000).cuda(); b = torch.randn(10000, 10000).cuda(); [print((a @ b).sum()) for _ in range(10000000000)]"
26 |   echo "ended"
27 | 


--------------------------------------------------------------------------------
/examples/job_queue/job_ibm.yaml:
--------------------------------------------------------------------------------
 1 | # A task submitted to an existing cluster.
 2 | #
 3 | # Runs a task on a existing cluster.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c jq cluster.yaml
 7 | #   sky exec jq job_ibm.yaml
 8 | 
 9 | name: job
10 | 
11 | resources:
12 |   accelerators: v100:0.5
13 | 
14 | setup: |
15 |   echo "running setup"
16 | 
17 | run: |
18 |   timestamp=$(date +%s)
19 |   conda env list
20 |   for i in {1..120}; do
21 |     echo "$timestamp $i"
22 |     sleep 1
23 |   done
24 | 


--------------------------------------------------------------------------------
/examples/job_queue/job_multinode.yaml:
--------------------------------------------------------------------------------
 1 | # A task runs on an existing multinode cluster.
 2 | #
 3 | # Runs a task that requires multinode.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c mjq cluster_multinode.yaml
 7 | #   sky exec mjq job_multinode.yaml
 8 | #   sky exec mjq job.yaml
 9 | 
10 | name: job_multinode
11 | 
12 | resources:
13 |   accelerators: T4:0.5
14 | 
15 | num_nodes: 2
16 | 
17 | setup: |
18 |   echo "running setup"
19 |   sleep 80
20 | 
21 | run: |
22 |   timestamp=$(date +%s)
23 |   conda env list
24 |   for i in {1..360}; do
25 |     echo "$timestamp $i"
26 |     sleep 1
27 |   done
28 | 


--------------------------------------------------------------------------------
/examples/job_queue/job_multinode_ibm.yaml:
--------------------------------------------------------------------------------
 1 | # A task runs on an existing multinode cluster.
 2 | #
 3 | # Runs a task that requires multinode.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c mjq cluster_multinode.yaml
 7 | #   sky exec mjq job_multinode_ibm.yaml
 8 | #   sky exec mjq job_ibm.yaml
 9 | 
10 | name: job_multinode
11 | 
12 | resources:
13 |   accelerators: v100:0.5
14 | 
15 | num_nodes: 2
16 | 
17 | setup: |
18 |   echo "running setup"
19 |   sleep 80
20 | 
21 | run: |
22 |   timestamp=$(date +%s)
23 |   conda env list
24 |   for i in {1..240}; do
25 |     echo "$timestamp $i"
26 |     sleep 1
27 |   done
28 | 


--------------------------------------------------------------------------------
/examples/jupyter_lab.yaml:
--------------------------------------------------------------------------------
 1 | # Example: Launch Jupyter Lab and auto-expose its port to Internet.
 2 | #
 3 | # Usage:
 4 | #     $ sky launch -c jupyter jupyter_lab.yaml
 5 | #     # Then look for the logs for some output like:
 6 | #     # Jupyter Server 2.7.0 is running at:
 7 | #     #     http://127.0.0.1:29324/lab?token=<token>
 8 | #     # Run
 9 | #     $ sky status -a jupyter
10 | #     # to get the HEAD_IP of the cluster, replace the 127.0.0.1 with
11 | #     # the HEAD_IP and open browser for the URL.
12 | #     
13 | #     # This is an alternative to port forwarding.
14 | 
15 | resources:
16 |   ports:
17 |     - 29324
18 | 
19 | setup: pip install jupyter
20 | 
21 | run: jupyter lab --port 29324 --no-browser --ip=0.0.0.0
22 | 


--------------------------------------------------------------------------------
/examples/managed_job.yaml:
--------------------------------------------------------------------------------
 1 | name: minimal
 2 | 
 3 | setup: |
 4 |   echo "running setup"
 5 |   pip install tqdm
 6 | 
 7 | run: |
 8 |   conda env list
 9 |   echo "start counting"
10 |   python -u - << EOF
11 |   import time
12 |   import tqdm
13 | 
14 |   for i in tqdm.trange(240):
15 |     time.sleep(1)
16 |   
17 |   EOF
18 | 


--------------------------------------------------------------------------------
/examples/managed_spot.yaml:
--------------------------------------------------------------------------------
 1 | name: minimal
 2 | 
 3 | resources:
 4 |   use_spot: true
 5 | 
 6 | setup: |
 7 |   echo "running setup"
 8 |   pip install tqdm
 9 | 
10 | run: |
11 |   conda env list
12 |   python -u - << EOF
13 |   import time
14 |   import tqdm
15 | 
16 |   for i in tqdm.trange(240):
17 |     time.sleep(1)
18 |   
19 |   EOF
20 | 


--------------------------------------------------------------------------------
/examples/many_gpu_vms.yaml:
--------------------------------------------------------------------------------
 1 | name: many_gpu_vms
 2 | 
 3 | resources:
 4 |   infra: aws
 5 |   accelerators: V100:8
 6 |   # use_spot: true
 7 | 
 8 | num_nodes: 16
 9 | 
10 | setup: 'pip3 install wandb'
11 | 
12 | run: "python3 -c 'import wandb; print(wandb.__path__)'; nvidia-smi"
13 | 


--------------------------------------------------------------------------------
/examples/minimal.yaml:
--------------------------------------------------------------------------------
 1 | # A minimal example.
 2 | #
 3 | # Runs a task that simply lists the default conda environments.
 4 | #
 5 | # Usage:
 6 | #   sky launch -c min minimal.yaml
 7 | #   sky down min
 8 | 
 9 | name: minimal
10 | 
11 | resources:
12 |   infra: aws
13 | 
14 | setup: |
15 |   echo "running setup"
16 | 
17 | run: |
18 |   conda env list
19 | 


--------------------------------------------------------------------------------
/examples/mpirun.yaml:
--------------------------------------------------------------------------------
 1 | workdir: .
 2 | 
 3 | resources:
 4 |   infra: aws
 5 | 
 6 | num_nodes: 2  # Total number of nodes (1 head + 1 worker)
 7 | 
 8 | setup: |
 9 |   echo "Running setup on node ${SKYPILOT_NODE_RANK}."
10 |   # Install MPI if not already present. This will vary based on your OS/distro.
11 |   sudo apt update
12 |   sudo apt install -y openmpi-bin openmpi-common libopenmpi-dev
13 | 
14 | run: |
15 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
16 |     echo "head node"
17 |     num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
18 |     mpi_nodes=$(echo "$SKYPILOT_NODE_IPS" | tr '\n' ',')
19 |     mpi_nodes=${mpi_nodes::-1}
20 |     echo "$mpi_nodes"
21 |     mpirun -np $num_nodes -H $mpi_nodes bash -c 'echo "mpirun hello from IP $(hostname -I)"'
22 |   else
23 |     echo "worker nodes"
24 |   fi
25 | 


--------------------------------------------------------------------------------
/examples/multi_accelerators.yaml:
--------------------------------------------------------------------------------
 1 | name: multi-accelerators
 2 | 
 3 | resources:
 4 | 
 5 |   # Ordered list of accelerators: Try the accelerators in the specified order.
 6 |   # accelerators: ['A100-40GB:1', 'V100:1', 'K80:1', 'T4:1']
 7 | 
 8 |   # Unordered set of accelerators: Optimize all specified accelerators together, and try accelerator with lowest cost first.
 9 |   accelerators: {'A100-40GB:1', 'K80:1', 'V100:1', 'T4:1', 'T4:4'}
10 | 
11 | run: |
12 |   nvidia-smi
13 | 


--------------------------------------------------------------------------------
/examples/multi_hostname.py:
--------------------------------------------------------------------------------
 1 | import sky
 2 | 
 3 | with sky.Dag() as dag:
 4 |     # The run command will be run on *all* nodes.
 5 |     # Should see two lines:
 6 |     #   My hostname: <host1>
 7 |     #   My hostname: <host2>
 8 |     sky.Task(run='echo My hostname: $(hostname)',
 9 |              num_nodes=2).set_resources(sky.Resources(infra='aws'))
10 | 
11 | sky.launch(dag)
12 | 


--------------------------------------------------------------------------------
/examples/multi_hostname.yaml:
--------------------------------------------------------------------------------
 1 | name: multi_hostname
 2 | 
 3 | num_nodes: 2
 4 | 
 5 | # The run command will be run on *all* nodes.
 6 | # Should see two lines:
 7 | #   My hostname: <host1>
 8 | #   My hostname: <host2>
 9 | run: 'echo My hostname: $(hostname)'
10 | 


--------------------------------------------------------------------------------
/examples/multi_resources.yaml:
--------------------------------------------------------------------------------
 1 | name: multi-resources
 2 | 
 3 | resources:
 4 |   ordered:
 5 |     - infra: aws
 6 |       accelerators: A10g
 7 |     - infra: gcp
 8 |       accelerators: L4
 9 | 
10 | # resources:
11 | #   any_of:
12 |     # - infra: aws
13 |     #   accelerators: A10g
14 |     # - infra: gcp
15 |     #   accelerators: L4
16 | 
17 | run: |
18 |   nvidia-smi
19 | 


--------------------------------------------------------------------------------
/examples/nebius_infiniband/infiniband.yaml:
--------------------------------------------------------------------------------
 1 | # This example is used to test the InfiniBand
 2 | # connection between two VMs.
 3 | resources:
 4 |   cloud: nebius
 5 |   region: eu-north1
 6 |   accelerators: H100:8
 7 |   
 8 | num_nodes: 2
 9 | 
10 | setup: |
11 |   sudo apt install perftest -y
12 | 
13 | run: |
14 |   MASTER_ADDR=$(echo "$SKYPILOT_NODE_IPS" | head -n1)
15 |   if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
16 |       ib_send_bw --report_gbits -n 1000 -F > /dev/null
17 |   elif [ "${SKYPILOT_NODE_RANK}" == "1" ]; then
18 |       echo "MASTER_ADDR: $MASTER_ADDR"
19 |       sleep 2 # wait for the master to start
20 |       ib_send_bw $MASTER_ADDR --report_gbits -n 1000 -F
21 |   fi
22 | 


--------------------------------------------------------------------------------
/examples/nemo/README.md:
--------------------------------------------------------------------------------
1 | # Nvidia NeMo
2 | 
3 | This example shows how to launch Nvidia NeMo jobs with SkyPilot.
4 | 


--------------------------------------------------------------------------------
/examples/oci/config.yaml:
--------------------------------------------------------------------------------
 1 | oci:
 2 |   default:
 3 |     # oci_config_profile: DEFAULT
 4 |     compartment_ocid: ocid1.compartment.oc1..aaaaaaaahr7aicqtodxmcfor6pbqn3hvsngpftozyxzqw36gj4kh3w3kkj4q
 5 |     # image_tag_general: skypilot:cpu-ubuntu-2004
 6 |     # image_tag_gpu: skypilot:gpu-ubuntu-2004
 7 | 
 8 |   ap-seoul-1:
 9 |     vcn_subnet: ocid1.subnet.oc1.ap-seoul-1.aaaaaaaa5c6wndifsij6yfyfehmi3tazn6mvhhiewqmajzcrlryurnl7nuja
10 | 
11 |   us-ashburn-1:
12 |     vcn_subnet: ocid1.subnet.oc1.iad.aaaaaaaafbj7i3aqc4ofjaapa5edakde6g4ea2yaslcsay32cthp7qo55pxa
13 | 


--------------------------------------------------------------------------------
/examples/oci/oci-mounts.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: oci
 3 | 
 4 | file_mounts:
 5 |   ~/tmpfile: ~/tmpfile
 6 |   ~/a/b/c/tmpfile: ~/tmpfile
 7 |   /tmp/workdir: ~/tmp-workdir
 8 | 
 9 |   /mydir:
10 |     name: skybucket
11 |     source: ['~/tmp-workdir']
12 |     store: oci
13 |     mode: MOUNT
14 | 
15 | setup: |
16 |   echo "*** Setup ***"
17 | 
18 | run: |
19 |   echo "*** Run ***"
20 | 
21 |   ls -lthr ~/tmpfile
22 |   ls -lthr ~/a/b/c
23 |   echo hi >> /tmp/workdir/new_file
24 |   ls -lthr /tmp/workdir
25 | 
26 |   ls -lthr /mydir
27 | 


--------------------------------------------------------------------------------
/examples/oci/serve-http-cpu.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe: /
 3 |   replicas: 2
 4 | 
 5 | resources:
 6 |   infra: oci/us-sanjose-1
 7 |   ports: 8080
 8 |   cpus: 2+
 9 | 
10 | run: python -m http.server 8080
11 | 


--------------------------------------------------------------------------------
/examples/oci/serve-qwen-7b.yaml:
--------------------------------------------------------------------------------
 1 | # service.yaml
 2 | service:
 3 |   readiness_probe: /v1/models
 4 |   replicas: 2
 5 | 
 6 | # Fields below describe each replica.
 7 | resources:
 8 |   infra: oci/us-sanjose-1
 9 |   ports: 8080
10 |   accelerators: {A10:1}
11 | 
12 | setup: |
13 |   conda create -n vllm python=3.12 -y
14 |   conda activate vllm
15 |   pip install vllm==0.6.3.post1
16 |   pip install vllm-flash-attn==2.6.2
17 | 
18 | run: |
19 |   conda activate vllm
20 |   python -u -m vllm.entrypoints.openai.api_server \
21 |     --host 0.0.0.0 --port 8080 \
22 |     --model Qwen/Qwen2-7B-Instruct \
23 |     --served-model-name Qwen2-7B-Instruct \
24 |     --device=cuda --dtype auto --max-model-len=2048
25 | 


--------------------------------------------------------------------------------
/examples/per_region_images.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 |   instance_type: g4dn.xlarge
 4 |   image_id:
 5 |     us-west-2: skypilot:gpu-ubuntu-1804
 6 |     us-east-2: skypilot:gpu-ubuntu-2004
 7 | 
 8 | 
 9 | setup: |
10 |   echo "running setup"
11 | 
12 | run: |
13 |   conda env list
14 |   nvidia-smi
15 | 


--------------------------------------------------------------------------------
/examples/playground/min_fail.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 | 
 4 | setup: |
 5 |   echo "running setup"
 6 | 
 7 | run: |
 8 |   conda env list
 9 |   exit 1
10 | 


--------------------------------------------------------------------------------
/examples/playground/min_progress_bar.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 | 
 4 | setup: |
 5 |   echo "running setup"
 6 | 
 7 | run: |
 8 |   conda env list
 9 |   python3 -u -c "from tqdm import tqdm; import time; import sys; [time.sleep(0.5) for i in tqdm(range(120))]; print('done'); print('new')"
10 | 


--------------------------------------------------------------------------------
/examples/ray_tune_app.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |     infra: aws
 3 |     accelerators: V100
 4 | 
 5 | num_nodes: 2
 6 | 
 7 | workdir: examples/ray_tune_examples
 8 | 
 9 | setup: |
10 |     pip3 install --upgrade pip
11 |     pip3 install ray[tune] pytorch-lightning==1.4.9 lightning-bolts torchvision
12 | 
13 | run: |
14 |     if [ "${SKYPILOT_NODE_RANK}" == "0" ]; then
15 |         python3 tune_ptl_example.py
16 |     fi
17 | 


--------------------------------------------------------------------------------
/examples/resnet_distributed_torch_scripts/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | conda activate resnet
 4 | conda env list
 5 | 
 6 | cd pytorch-distributed-resnet
 7 | num_nodes=`echo "$SKYPILOT_NODE_IPS" | wc -l`
 8 | master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
 9 | echo MASTER_ADDR $master_addr
10 | python -m torch.distributed.launch --nproc_per_node=1 \
11 | --nnodes=$num_nodes --node_rank=${SKYPILOT_NODE_RANK} --master_addr=$master_addr \
12 | --master_port=8008 resnet_ddp.py --num_epochs 20
13 | 


--------------------------------------------------------------------------------
/examples/resnet_distributed_torch_scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | [ -d pytorch-distributed-resnet ] || git clone https://github.com/michaelzhiluo/pytorch-distributed-resnet
 3 | cd pytorch-distributed-resnet
 4 | 
 5 | conda activate resnet
 6 | if [ $? -eq 0 ]; then
 7 |     echo "conda env exists"
 8 | else
 9 |     echo "conda env does not exist"
10 |     conda create -n resnet python=3.7 -y
11 |     conda activate resnet
12 | fi
13 | # SkyPilot's default image on AWS/GCP has CUDA 11.6 (Azure 11.5).
14 | pip install -r requirements.txt torch==1.12.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
15 | 
16 | mkdir -p data
17 | mkdir -p saved_models
18 | cd data
19 | wget -c --quiet https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
20 | tar -xvzf cifar-10-python.tar.gz
21 | 


--------------------------------------------------------------------------------
/examples/resnet_distributed_torch_with_script.yaml:
--------------------------------------------------------------------------------
 1 | name: resnet-distributed-app
 2 | 
 3 | 
 4 | resources:
 5 |     infra: aws
 6 |     accelerators: V100
 7 | 
 8 | num_nodes: 2
 9 | 
10 | workdir: ./examples/resnet_distributed_torch_scripts
11 | 
12 | setup: |
13 |     bash ./setup.sh
14 | 
15 | run: |
16 |     bash ./run.sh
17 | 


--------------------------------------------------------------------------------
/examples/sample_dotenv:
--------------------------------------------------------------------------------
1 | TEST_ENV2="success" 
2 | 


--------------------------------------------------------------------------------
/examples/serve/http_server/task.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to run a simple http server.
 2 | #
 3 | # Usage:
 4 | #   sky serve up -n http examples/serve/http_server/task.yaml
 5 | # The endpoint will be printed in the console. You
 6 | # could also check the endpoint by running:
 7 | #   sky serve status --endpoint http
 8 | 
 9 | service:
10 |   readiness_probe:
11 |     path: /health
12 |     initial_delay_seconds: 20
13 |   replicas: 2
14 | 
15 | resources:
16 |   ports: 8080
17 |   cpus: 2+
18 | 
19 | workdir: examples/serve/http_server
20 | 
21 | run: python3 server.py
22 | 


--------------------------------------------------------------------------------
/examples/serve/huggingface-tgi.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to run HuggingFace TGI
 2 | #
 3 | # Usage:
 4 | #   sky serve up -n tgi huggingface-tgi.yaml \
 5 | #     [--env MODEL_ID=<model-id-on-huggingface>]
 6 | # Then visit the endpoint printed in the console. You could also
 7 | # check the endpoint by running:
 8 | #   sky serve status --endpoint tgi
 9 | 
10 | envs:
11 |   MODEL_ID: lmsys/vicuna-13b-v1.5
12 | 
13 | service:
14 |   readiness_probe: /health
15 |   replicas: 2
16 | 
17 | resources:
18 |   ports: 8080
19 |   accelerators: A100:1
20 | 
21 | run: |
22 |   docker run --gpus all --shm-size 1g -p 8080:80 \
23 |     -v ~/data:/data ghcr.io/huggingface/text-generation-inference \
24 |     --model-id $MODEL_ID
25 | 


--------------------------------------------------------------------------------
/examples/serve/minimal.yaml:
--------------------------------------------------------------------------------
 1 | # An minimal example of a serve application.
 2 | 
 3 | service:
 4 |   readiness_probe: /
 5 |   replicas: 1
 6 | 
 7 | resources:
 8 |   ports: 8080
 9 |   cpus: 2+
10 | 
11 | run: python3 -m http.server 8080
12 | 


--------------------------------------------------------------------------------
/examples/serve/misc/cancel/service.yaml:
--------------------------------------------------------------------------------
 1 | # Usage: Please refer to the README.md in this directory.
 2 | 
 3 | service:
 4 |   readiness_probe:
 5 |     path: /health
 6 |     initial_delay_seconds: 120
 7 | 
 8 | resources:
 9 |   ports: 9000
10 |   cpus: 2+
11 | 
12 | workdir: examples/serve/misc/cancel
13 | 
14 | run: python3 server.py --port 9000
15 | 


--------------------------------------------------------------------------------
/examples/serve/ray_serve/ray_serve.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to run a simple rayserve endpoint.
 2 | # 
 3 | # Usage:
 4 | #   sky serve up examples/serve/ray_serve/ray_serve.yaml
 5 | 
 6 | service:
 7 |   readiness_probe: /
 8 |   replicas: 1
 9 | 
10 | resources:
11 |   ports: 8000
12 |   cpus: 2+
13 | 
14 | workdir: examples/serve/ray_serve
15 | 
16 | setup: pip install "ray[serve]"
17 | 
18 | run: serve run serve:app --host 0.0.0.0
19 | 


--------------------------------------------------------------------------------
/examples/serve/ray_serve/serve.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from ray import serve
 4 | from starlette import requests
 5 | 
 6 | 
 7 | # 2 Ray actors, each running on 1 vCPU.
 8 | @serve.deployment(route_prefix='/', num_replicas=2)
 9 | class ModelDeployment:
10 | 
11 |     def __init__(self, msg: str):
12 |         self._msg = msg
13 | 
14 |     def __call__(self, request: requests.Request) -> Dict:
15 |         del request  # unused
16 |         return {'result': self._msg}
17 | 
18 | 
19 | app = ModelDeployment.bind(msg='Hello Ray Serve!')
20 | 


--------------------------------------------------------------------------------
/examples/serve/spot_policy/base_on_demand_fallback_replicas.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to launch a service with mixed spot and on-demand instances.
 2 | # The policy will maintain `base_ondemand_fallback_replicas` number of on-demand instances, in addition to spot instances.
 3 | # On-demand instances are counted in autoscaling decisions (i.e., between `min_replicas` and `max_replicas`).
 4 | 
 5 | service:
 6 |   readiness_probe: /health
 7 |   replica_policy:
 8 |     min_replicas: 2
 9 |     max_replicas: 3
10 |     target_qps_per_replica: 1
11 |     base_ondemand_fallback_replicas: 1
12 | 
13 | resources:
14 |   ports: 8081
15 |   cpus: 2+
16 |   # use_spot is needed for ondemand fallback
17 |   use_spot: true
18 | 
19 | workdir: examples/serve/http_server
20 | 
21 | run: python3 server.py
22 | 


--------------------------------------------------------------------------------
/examples/serve/spot_policy/dynamic_on_demand_fallback.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to launch a service with mixed spot and on-demand instances.
 2 | # The policy will dynamically fallback to on-demand instances when spot instances are not available. 
 3 | 
 4 | service:
 5 |   readiness_probe: /health
 6 |   replica_policy:
 7 |     min_replicas: 2
 8 |     max_replicas: 3
 9 |     target_qps_per_replica: 1
10 |     dynamic_ondemand_fallback: true
11 | 
12 | resources:
13 |   any_of:
14 |   - infra: gcp/*/us-central1-a
15 |   - infra: gcp/us-east1
16 |   ports: 8081
17 |   cpus: 2+
18 |   # use_spot is needed for ondemand fallback
19 |   use_spot: true
20 | 
21 | workdir: examples/serve/http_server
22 | 
23 | run: python3 server.py
24 | 


--------------------------------------------------------------------------------
/examples/serve/spot_policy/multi_accelerators.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to launch a service with mixed spot and on-demand instances and an ordered preference for accelerators.
 2 | # The policy will maintain `base_ondemand_fallback_replicas` number of on-demand instances, in addition to spot instances.
 3 | 
 4 | service:
 5 |   readiness_probe: /health
 6 |   replica_policy:
 7 |     min_replicas: 2
 8 |     max_replicas: 3
 9 |     target_qps_per_replica: 1
10 |     base_ondemand_fallback_replicas: 1
11 | 
12 | resources:
13 |   ordered:
14 |   - accelerators: V100
15 |   - accelerators: T4
16 |   ports: 8081
17 |   cpus: 2+
18 |   # use_spot is needed for ondemand fallback
19 |   use_spot: true
20 | 
21 | workdir: examples/serve/http_server
22 | 
23 | run: python3 server.py
24 | 


--------------------------------------------------------------------------------
/examples/show_gpus.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -ex
3 | 
4 | sky show-gpus --help
5 | sky show-gpus
6 | sky show-gpus V100
7 | sky show-gpus A100
8 | sky show-gpus --all
9 | 


--------------------------------------------------------------------------------
/examples/spot/lightning_cifar10/requirements.txt:
--------------------------------------------------------------------------------
1 | pytorch-lightning>=1.3
2 | torchvision
3 | wandb
4 | torchmetrics==0.4.1
5 | torch>=1.6, <1.9
6 | lightning-bolts
7 | 


--------------------------------------------------------------------------------
/examples/spot/resnet_ddp/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-distributed-resnet
2 | Example of Pytorch Resnet Distributed Training - pulled from https://leimao.github.io/blog/PyTorch-Distributed-Training/
3 | 


--------------------------------------------------------------------------------
/examples/spot/resnet_ddp/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | wandb
4 | absl-py
5 | 


--------------------------------------------------------------------------------
/examples/spot_pipeline/single.yaml:
--------------------------------------------------------------------------------
 1 | name: dag-name
 2 | 
 3 | ---
 4 | name: simple-task
 5 | 
 6 | resources:
 7 |   cpus: 2+
 8 |   memory: 8+
 9 | 
10 | setup: |
11 |   echo setup for task 1
12 | 
13 | run: |
14 |   echo run for task 1
15 | 
16 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.9'
 2 | 
 3 | services:
 4 |   model:
 5 |     image: berkeleyskypilot/stable-diffusion
 6 |     restart: on-failure
 7 |     ports:
 8 |       - "7860:7860"
 9 |     volumes:
10 |       - ./cache:/cache
11 |       - ./output:/output
12 |       - ./models:/models
13 |     environment:
14 |       - CLI_ARGS=--extra-models-cpu --optimized-turbo
15 |     deploy:
16 |       resources:
17 |         reservations:
18 |           devices:
19 |               - driver: nvidia
20 |                 device_ids: ['0']
21 |                 capabilities: [gpu]
22 | 


--------------------------------------------------------------------------------
/examples/tensorflow_distributed/README.md:
--------------------------------------------------------------------------------
1 | # Distributed TensorFlow
2 | 
3 | This example shows how to launch a distributed TensorFlow training job with SkyPilot.
4 | 


--------------------------------------------------------------------------------
/examples/tpu/README.md:
--------------------------------------------------------------------------------
1 | # TPU
2 | 
3 | This example shows how to launch TPU jobs with SkyPilot.
4 | 
5 | > Note: Some examples may be old. See the `v6e/` files for the latest examples. See also: https://docs.skypilot.co/en/latest/reference/tpu.html.
6 | 


--------------------------------------------------------------------------------
/examples/tpu/tpu_app.yaml:
--------------------------------------------------------------------------------
 1 | name: tpu_app
 2 | 
 3 | # The working directory contains all code and will be synced to remote.
 4 | workdir: ./examples/tpu/tpu_app_code
 5 | 
 6 | resources:
 7 |   accelerators: tpu-v2-8
 8 | 
 9 | # The setup command.  Will be run under the working directory.
10 | setup: |
11 |   pip install --upgrade pip
12 | 
13 |   conda activate huggingface
14 |   if [ $? -eq 0 ]; then
15 |     echo 'conda env exists'
16 |   else
17 |     conda create -n huggingface python=3.8 -y
18 |     conda activate huggingface
19 |     pip install -r requirements.txt
20 |   fi
21 | 
22 | # The command to run.  Will be run under the working directory.
23 | run: |
24 |   conda activate huggingface
25 |   python -u run_tpu.py
26 | 


--------------------------------------------------------------------------------
/examples/tpu/tpu_app_code/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.5.1
2 | tensorflow-datasets==4.4.0
3 | transformers==4.12.0
4 | tensorflow-text==2.5.0
5 | cloud-tpu-client==0.10
6 | 


--------------------------------------------------------------------------------
/examples/tpu/v6e/benchmark-llama2-7b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   model_name: llama-2
 3 |   tokenizer_path: /home/gcpuser/sky_workdir/ckpt/llama2-7b/original/tokenizer.model
 4 | 
 5 | run: |
 6 |   cd JetStream
 7 |   python benchmarks/benchmark_serving.py \
 8 |     --tokenizer=$tokenizer_path --num-prompts=100 \
 9 |     --dataset openorca --save-request-outputs \
10 |     --warmup-mode=sampled --model=$model_name
11 | 


--------------------------------------------------------------------------------
/examples/tpu/v6e/fsdp_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "fsdp_transformer_layer_cls_to_wrap": [
3 |         "LlamaDecoderLayer"
4 |     ],
5 |     "xla": true,
6 |     "xla_fsdp_v2": true,
7 |     "xla_fsdp_grad_ckpt": true
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/unsloth/README.md:
--------------------------------------------------------------------------------
1 | # Unsloth
2 | 
3 | This example shows how to launch Unsloth jobs with SkyPilot.
4 | 


--------------------------------------------------------------------------------
/llm/axolotl/axolotl.yaml:
--------------------------------------------------------------------------------
 1 | # Usage:
 2 | #   HF_TOKEN=abc sky launch -c axolotl axolotl.yaml --env HF_TOKEN -y -i30 --down
 3 | 
 4 | name: axolotl
 5 | 
 6 | resources:
 7 |   accelerators: L4:1
 8 |   image_id: docker:winglian/axolotl:main-py3.10-cu118-2.0.1
 9 | 
10 | workdir: mistral
11 | 
12 | run: |
13 |   huggingface-cli login --token ${HF_TOKEN} 
14 | 
15 |   accelerate launch -m axolotl.cli.train qlora.yaml
16 | 
17 | envs:
18 |   HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
19 | 


--------------------------------------------------------------------------------
/llm/batch_inference/monitor_progress.yaml:
--------------------------------------------------------------------------------
 1 | name: batch-inference-monitor-progress
 2 | 
 3 | workdir: .
 4 | 
 5 | resources:
 6 |   cpus: 2
 7 |   memory: 8+
 8 |   infra: aws
 9 |   ports:
10 |     - 8000
11 | 
12 | envs:
13 |   # make sure this is the same as the source in compute_vectors.yaml
14 |   EMBEDDINGS_BUCKET_NAME: sky-text-embeddings 
15 | 
16 | file_mounts:
17 |   /output:
18 |     name: ${EMBEDDINGS_BUCKET_NAME}
19 |     # this needs to be the same as the source in compute_vectors.yaml
20 |     mode: MOUNT
21 |     store: s3
22 | 
23 | 
24 | setup: |
25 |   pip install fastapi uvicorn aiofiles
26 |   pip install pandas pyarrow plotly
27 | 
28 | run: |
29 |   python scripts/monitor_progress.py --metrics-dir /output/metrics 
30 | 


--------------------------------------------------------------------------------
/llm/deepseek-janus/janus_1.5b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
 3 | 
 4 | resources:
 5 |   accelerators: {L4:1, A10G:1, A10:1, A100:8, A100-80GB:1}
 6 |   ports:
 7 |     - 8000
 8 |   disk_tier: best
 9 |   memory: 32+
10 | 
11 | setup: |
12 |   git clone https://github.com/deepseek-ai/Janus.git
13 |   pip install -e Janus[gradio]
14 |   pip install diffusers==0.32.2
15 |   python -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
16 | 
17 | run: |
18 |   cd Janus && python demo/app.py


--------------------------------------------------------------------------------
/llm/deepseek-janus/januspro_7b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
 3 | 
 4 | resources:
 5 |   accelerators: {L4:1, A10G:1, A10:1, A100:8, A100-80GB:1}
 6 |   ports:
 7 |     - 8000
 8 |   disk_tier: best
 9 |   memory: 64+
10 | 
11 | setup: |
12 |   git clone https://github.com/deepseek-ai/Janus.git
13 |   pip install -e Janus[gradio]
14 |   pip install diffusers==0.32.2
15 |   python -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
16 | 
17 | run: |
18 |   cd Janus && python demo/app_januspro.py


--------------------------------------------------------------------------------
/llm/deepseek-r1-distilled/deepseek-r1-vllm.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
 3 |   MAX_MODEL_LEN: 4096
 4 | 
 5 | resources:
 6 |   accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1}
 7 |   ports:
 8 |     - 8000
 9 |   disk_tier: best
10 | 
11 | setup: |
12 |   uv pip install transformers==4.48.1
13 |   uv pip install vllm==0.6.6.post1
14 | 
15 | 
16 | run: |
17 |   echo 'Starting vllm openai api server...'
18 |   python -m vllm.entrypoints.openai.api_server \
19 |     --host 0.0.0.0 \
20 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
21 |     --model $MODEL_NAME \
22 |     --max-model-len $MAX_MODEL_LEN
23 | 


--------------------------------------------------------------------------------
/llm/lorax/lorax.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   accelerators: {A10G, A10, L4, A100, A100-80GB}
 3 |   memory: 32+
 4 |   ports: 
 5 |     - 8080
 6 | 
 7 | envs:
 8 |   MODEL_ID: mistralai/Mistral-7B-Instruct-v0.1
 9 | 
10 | run: |
11 |   docker run --gpus all --shm-size 1g -p 8080:80 -v ~/data:/data \
12 |     ghcr.io/predibase/lorax:latest \
13 |     --model-id $MODEL_ID
14 | 


--------------------------------------------------------------------------------
/llm/rag/build_rag.yaml:
--------------------------------------------------------------------------------
 1 | name: build-legal-rag
 2 | 
 3 | workdir: .
 4 | 
 5 | resources:
 6 |   memory: 32+  # Need more memory for merging embeddings
 7 |   infra: aws
 8 | 
 9 | envs:
10 |   EMBEDDINGS_BUCKET_NAME: sky-rag-embeddings
11 |   VECTORDB_BUCKET_NAME: sky-rag-vectordb
12 | 
13 | file_mounts:
14 |   /embeddings:
15 |     name: ${EMBEDDINGS_BUCKET_NAME}
16 |     # this needs to be the same as the output in compute_embeddings.yaml
17 |     mode: MOUNT
18 |   
19 |   /vectordb:
20 |     name: ${VECTORDB_BUCKET_NAME}
21 |     mode: MOUNT
22 | 
23 | setup: |
24 |   pip install chromadb pandas tqdm pyarrow
25 | 
26 | run: |
27 |   python scripts/build_rag.py \
28 |     --collection-name legal_docs \
29 |     --persist-dir /vectordb/chroma \
30 |     --embeddings-dir /embeddings \
31 |     --batch-size 1000 
32 | 


--------------------------------------------------------------------------------
/llm/tabby/docker-compose.cuda.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.5'
 2 | 
 3 | services:
 4 |   tabby:
 5 |     restart: always
 6 |     container_name: tabby
 7 |     image: tabbyml/tabby
 8 |     command: serve --model TabbyML/StarCoder-1B --device cuda
 9 |     volumes:
10 |       - "./tabby:/data"
11 |     ports:
12 |       - 8080:8080
13 |     deploy:
14 |       resources:
15 |         reservations:
16 |           devices:
17 |             - driver: nvidia
18 |               count: 1
19 |               capabilities: [gpu]
20 | 


--------------------------------------------------------------------------------
/llm/tabby/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.5'
 2 | 
 3 | services:
 4 |   tabby:
 5 |     restart: always
 6 |     container_name: tabby
 7 |     image: tabbyml/tabby
 8 |     command: serve --model TabbyML/StarCoder-1B
 9 |     volumes:
10 |       - "./tabby:/data"
11 |     ports:
12 |       - 8080:8080
13 | 


--------------------------------------------------------------------------------
/llm/tabby/tabby/config.toml:
--------------------------------------------------------------------------------
1 | [[repositories]]
2 | git_url = "https://github.com/skypilot-org/skypilot"
3 | 


--------------------------------------------------------------------------------
/llm/tgi/serve.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to run HuggingFace TGI
 2 | #
 3 | # Usage:
 4 | #   sky serve up -n tgi huggingface-tgi.yaml \
 5 | #     [--env MODEL_ID=<model-id-on-huggingface>]
 6 | # Then visit the endpoint printed in the console. You could also
 7 | # check the endpoint by running:
 8 | #   sky serve status --endpoint tgi
 9 | 
10 | envs:
11 |   MODEL_ID: lmsys/vicuna-13b-v1.5
12 | 
13 | service:
14 |   readiness_probe: /health
15 |   replicas: 2
16 | 
17 | resources:
18 |   ports: 8080
19 |   accelerators: A100:1
20 | 
21 | run: |
22 |   docker run --gpus all --shm-size 1g -p 8080:80 \
23 |     -v ~/data:/data ghcr.io/huggingface/text-generation-inference \
24 |     --model-id $MODEL_ID
25 | 


--------------------------------------------------------------------------------
/llm/vicuna-llama-2/scripts/train_flash_attn.py:
--------------------------------------------------------------------------------
 1 | # Make it more memory efficient by monkey patching the LLaMA model with FlashAttn.
 2 | 
 3 | # Need to call this before importing transformers.
 4 | from flash_attn_patch import replace_llama_attn_with_flash_attn
 5 | 
 6 | replace_llama_attn_with_flash_attn()
 7 | 
 8 | from train import train
 9 | 
10 | if __name__ == "__main__":
11 |     train()
12 | 


--------------------------------------------------------------------------------
/llm/vllm/serve-openai-api-docker.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: meta-llama/Llama-2-7b-chat-hf
 3 |   HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
 4 | 
 5 | resources:
 6 |   image_id: docker:vllm/vllm-openai:latest
 7 |   accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1}
 8 |   ports:
 9 |     - 8000
10 | 
11 | setup: |
12 |   conda deactivate
13 |   python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
14 | 
15 | run: |
16 |   conda deactivate
17 |   echo 'Starting vllm openai api server...'
18 |   python -m vllm.entrypoints.openai.api_server \
19 |     --model $MODEL_NAME --tokenizer hf-internal-testing/llama-tokenizer \
20 |     --host 0.0.0.0
21 | 


--------------------------------------------------------------------------------
/llm/yi/yi15-34b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: 01-ai/Yi-1.5-34B-Chat
 3 |   
 4 | resources:
 5 |   accelerators: {A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
 6 |   disk_size: 1024
 7 |   disk_tier: best
 8 |   memory: 32+
 9 |   ports: 8000
10 | 
11 | setup: |
12 |   pip install vllm==0.6.1.post2
13 |   pip install vllm-flash-attn
14 | 
15 | run: |
16 |   export PATH=$PATH:/sbin
17 |   vllm serve $MODEL_NAME \
18 |     --host 0.0.0.0 \
19 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
20 |     --max-model-len 1024 | tee ~/openai_api_server.log
21 | 


--------------------------------------------------------------------------------
/llm/yi/yi15-6b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: 01-ai/Yi-1.5-6B-Chat
 3 | 
 4 | resources:
 5 |   accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
 6 |   disk_tier: best
 7 |   ports: 8000
 8 | 
 9 | setup: |
10 |   pip install vllm==0.6.1.post2
11 |   pip install vllm-flash-attn
12 | 
13 | run: |
14 |   export PATH=$PATH:/sbin
15 |   vllm serve $MODEL_NAME \
16 |     --host 0.0.0.0 \
17 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
18 |     --max-model-len 1024 | tee ~/openai_api_server.log
19 | 


--------------------------------------------------------------------------------
/llm/yi/yi15-9b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: 01-ai/Yi-1.5-9B-Chat
 3 | 
 4 | resources:
 5 |   accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
 6 |   disk_tier: best
 7 |   ports: 8000
 8 | 
 9 | setup: |
10 |   pip install vllm==0.6.1.post2
11 |   pip install vllm-flash-attn
12 | 
13 | run: |
14 |   export PATH=$PATH:/sbin
15 |   vllm serve $MODEL_NAME \
16 |     --host 0.0.0.0 \
17 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
18 |     --max-model-len 1024 | tee ~/openai_api_server.log
19 | 


--------------------------------------------------------------------------------
/llm/yi/yicoder-1_5b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: 01-ai/Yi-Coder-1.5B-Chat
 3 |   
 4 | resources:
 5 |   accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB}
 6 |   disk_tier: best
 7 |   ports: 8000
 8 | 
 9 | setup: |
10 |   pip install vllm==0.6.1.post2
11 |   pip install vllm-flash-attn
12 | 
13 | run: |
14 |   export PATH=$PATH:/sbin
15 |   vllm serve $MODEL_NAME \
16 |     --host 0.0.0.0 \
17 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
18 |     --max-model-len 1024 | tee ~/openai_api_server.log
19 | 


--------------------------------------------------------------------------------
/llm/yi/yicoder-9b.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   MODEL_NAME: 01-ai/Yi-Coder-9B-Chat
 3 |   
 4 | resources:
 5 |   accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
 6 |   disk_tier: best
 7 |   ports: 8000
 8 | 
 9 | setup: |
10 |   pip install vllm==0.6.1.post2
11 |   pip install vllm-flash-attn
12 | 
13 | run: |
14 |   export PATH=$PATH:/sbin
15 |   vllm serve $MODEL_NAME \
16 |     --host 0.0.0.0 \
17 |     --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
18 |     --max-model-len 1024 | tee ~/openai_api_server.log
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | sky/setup_files/setup.py


--------------------------------------------------------------------------------
/sky/adaptors/README.md:
--------------------------------------------------------------------------------
1 | This directory is for third-party cloud adaptors. These adaptors wrap the underlying packages, so cloud-specific packages are loaded on demand.
2 | 


--------------------------------------------------------------------------------
/sky/adaptors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/adaptors/__init__.py


--------------------------------------------------------------------------------
/sky/adaptors/cudo.py:
--------------------------------------------------------------------------------
1 | """Cudo Compute cloud adaptor."""
2 | 
3 | from sky.adaptors import common
4 | 
5 | cudo = common.LazyImport(
6 |     'cudo_compute',
7 |     import_error_message='Failed to import dependencies for Cudo Compute. '
8 |     'Try running: pip install "skypilot[cudo]"')
9 | 


--------------------------------------------------------------------------------
/sky/adaptors/docker.py:
--------------------------------------------------------------------------------
 1 | """Docker adaptors"""
 2 | 
 3 | # pylint: disable=import-outside-toplevel
 4 | 
 5 | from sky.adaptors import common
 6 | 
 7 | docker = common.LazyImport(
 8 |     'docker',
 9 |     import_error_message='Failed to import dependencies for Docker. '
10 |     'See README for how to install it.')
11 | 
12 | 
13 | def from_env():
14 |     return docker.from_env()
15 | 
16 | 
17 | def build_error():
18 |     return docker.errors.BuildError
19 | 
20 | 
21 | def not_found_error():
22 |     return docker.errors.NotFound
23 | 
24 | 
25 | def api_error():
26 |     return docker.errors.APIError
27 | 


--------------------------------------------------------------------------------
/sky/adaptors/runpod.py:
--------------------------------------------------------------------------------
1 | """RunPod cloud adaptor."""
2 | 
3 | from sky.adaptors import common
4 | 
5 | runpod = common.LazyImport(
6 |     'runpod',
7 |     import_error_message='Failed to import dependencies for RunPod. '
8 |     'Try running: pip install "skypilot[runpod]"')
9 | 


--------------------------------------------------------------------------------
/sky/backends/__init__.py:
--------------------------------------------------------------------------------
 1 | """Sky Backends."""
 2 | from sky.backends.backend import Backend
 3 | from sky.backends.backend import ResourceHandle
 4 | from sky.backends.cloud_vm_ray_backend import CloudVmRayBackend
 5 | from sky.backends.cloud_vm_ray_backend import CloudVmRayResourceHandle
 6 | from sky.backends.local_docker_backend import LocalDockerBackend
 7 | from sky.backends.local_docker_backend import LocalDockerResourceHandle
 8 | 
 9 | __all__ = [
10 |     'Backend', 'ResourceHandle', 'CloudVmRayBackend',
11 |     'CloudVmRayResourceHandle', 'LocalDockerBackend',
12 |     'LocalDockerResourceHandle'
13 | ]
14 | 


--------------------------------------------------------------------------------
/sky/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/benchmark/__init__.py


--------------------------------------------------------------------------------
/sky/callbacks/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | 
3 | setuptools.setup(
4 |     name='sky-callback',
5 |     version='0.1.1-dev0',
6 |     packages=setuptools.find_packages(),
7 |     install_requires=['psutil'],
8 | )
9 | 


--------------------------------------------------------------------------------
/sky/callbacks/sky_callback/integrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/callbacks/sky_callback/integrations/__init__.py


--------------------------------------------------------------------------------
/sky/client/__init__.py:
--------------------------------------------------------------------------------
1 | """Module for the SkyPilot Client."""
2 | 


--------------------------------------------------------------------------------
/sky/client/cli.py:
--------------------------------------------------------------------------------
1 | ../cli.py


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/README.md:
--------------------------------------------------------------------------------
1 | # Service Catalog
2 | 
3 | This module provides information for clouds supported by SkyPilot, including the instance type offerings, their pricing and data transfer costs. It also provides functions to query these information, and to select the most suitable instance types based on resource requirements. Primarily used by the Clouds module.
4 | 
5 | - `data_fetchers/fetch_{aws,azure}.py`: each file is a standalone script that queries the cloud APIs to produce the pricing list files.
6 | - `data_fetchers/fetch_gcp.py`: A script that generates the GCP catalog based by crawling GCP websites.
7 | - `{aws,azure,gcp}_catalog.py`: Singleton-classes that load the data files and provide functions to query for instance offerings based on resource requirements.
8 | 


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/constants.py:
--------------------------------------------------------------------------------
1 | """Constants used for service catalog."""
2 | HOSTED_CATALOG_DIR_URL = 'https://raw.githubusercontent.com/skypilot-org/skypilot-catalog/master/catalogs'  # pylint: disable=line-too-long
3 | HOSTED_CATALOG_DIR_URL_S3_MIRROR = 'https://skypilot-catalog.s3.us-east-1.amazonaws.com/catalogs'  # pylint: disable=line-too-long
4 | CATALOG_SCHEMA_VERSION = 'v7'
5 | CATALOG_DIR = '~/.sky/catalogs'
6 | ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
7 |               'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
8 |               'paperspace', 'do', 'nebius', 'ssh')
9 | 


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/data_fetchers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/clouds/service_catalog/data_fetchers/__init__.py


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/data_fetchers/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | boto3
3 | lxml
4 | pandas
5 | ray
6 | requests
7 | 


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/images/plugins.pkr.hcl:
--------------------------------------------------------------------------------
 1 | packer {
 2 |   required_plugins {
 3 |     amazon = {
 4 |       version = ">= 1.2.8"
 5 |       source  = "github.com/hashicorp/amazon"
 6 |     }
 7 |   }
 8 | }
 9 | 
10 | packer {
11 |   required_plugins {
12 |     googlecompute = {
13 |       version = ">= 1.1.1"
14 |       source  = "github.com/hashicorp/googlecompute"
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/sky/clouds/service_catalog/images/provisioners/user-toolkit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This script installs popular toolkits for users to use in the base environment.
 3 | 
 4 | eval "$(~/miniconda3/bin/conda shell.bash hook)"
 5 | conda activate base
 6 | pip install numpy
 7 | pip install pandas
 8 | 
 9 | if [ "$AZURE_GRID_DRIVER" = 1 ]; then
10 |     # Need PyTorch X.X.X+cu121 version to be compatible with older NVIDIA driver (535.161.08 or lower)
11 |     pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
12 | fi
13 | 


--------------------------------------------------------------------------------
/sky/clouds/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Utility for Clouds
 2 | 
 3 | This folder contains the utility functions for clouds which are required by both
 4 | the `sky.skylet.providers` and other modules in SkyPilot.
 5 | 
 6 | This is to avoid importing other unnecessary modules in `sky.skylet.providers`.
 7 | When a utility file is placed under, e.g., `sky.skylet.providers.<cloud>`, and is
 8 | imported by other modules in SkyPilot, Python will import the `__init__.py` file in
 9 | the folder, which will then import
10 | `sky.skylet.provider.<cloud>.node_provider`, causing the import of `ray`.
11 | Importing `ray` will cause failure for clouds that have adopted the new provisioner
12 | #1702 and removed the dependency of ray #2625.
13 | 


--------------------------------------------------------------------------------
/sky/clouds/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/clouds/utils/__init__.py


--------------------------------------------------------------------------------
/sky/dashboard/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": ["next/core-web-vitals", "prettier"]
3 | }
4 | 


--------------------------------------------------------------------------------
/sky/dashboard/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | .yarn/install-state.gz
 8 | 
 9 | # testing
10 | /coverage
11 | 
12 | # next.js
13 | /.next/
14 | /out/
15 | 
16 | # production
17 | /build
18 | 
19 | # misc
20 | .DS_Store
21 | *.pem
22 | 
23 | # debug
24 | npm-debug.log*
25 | yarn-debug.log*
26 | yarn-error.log*
27 | 
28 | # local env files
29 | .env*.local
30 | 
31 | # vercel
32 | .vercel
33 | 
34 | # typescript
35 | *.tsbuildinfo
36 | next-env.d.ts
37 | 
38 | .vscode
39 | .swc
40 | 


--------------------------------------------------------------------------------
/sky/dashboard/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": true,
3 |   "trailingComma": "es5",
4 |   "singleQuote": true,
5 |   "tabWidth": 2,
6 |   "useTabs": false
7 | }
8 | 


--------------------------------------------------------------------------------
/sky/dashboard/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "default",
 4 |   "rsc": true,
 5 |   "tsx": false,
 6 |   "tailwind": {
 7 |     "config": "tailwind.config.js",
 8 |     "css": "src/app/globals.css",
 9 |     "baseColor": "slate",
10 |     "cssVariables": true
11 |   },
12 |   "aliases": {
13 |     "components": "@/components",
14 |     "utils": "@/lib/utils"
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/sky/dashboard/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import globals from 'globals';
 2 | import pluginJs from '@eslint/js';
 3 | import pluginReactConfig from 'eslint-plugin-react/configs/recommended.js';
 4 | 
 5 | export default [
 6 |   { files: ['**/*.{js,mjs,cjs,jsx}'] },
 7 |   { languageOptions: { parserOptions: { ecmaFeatures: { jsx: true } } } },
 8 |   { languageOptions: { globals: globals.browser } },
 9 |   pluginJs.configs.recommended,
10 |   pluginReactConfig,
11 | ];
12 | 


--------------------------------------------------------------------------------
/sky/dashboard/jest.setup.js:
--------------------------------------------------------------------------------
 1 | require('@testing-library/jest-dom');
 2 | 
 3 | // Mock fetch
 4 | global.fetch = jest.fn();
 5 | 
 6 | // Keep original console methods for testing
 7 | const originalConsole = { ...console };
 8 | global.console = {
 9 |   ...console,
10 |   error: (...args) => {
11 |     originalConsole.error(...args);
12 |   },
13 |   warn: (...args) => {
14 |     originalConsole.warn(...args);
15 |   },
16 |   log: (...args) => {
17 |     originalConsole.log(...args);
18 |   },
19 |   info: (...args) => {
20 |     originalConsole.info(...args);
21 |   },
22 |   debug: (...args) => {
23 |     originalConsole.debug(...args);
24 |   },
25 | };
26 | 
27 | // Add Jest globals
28 | global.describe = describe;
29 | global.test = test;
30 | global.expect = expect;
31 | 
32 | // Add any global test setup here
33 | 


--------------------------------------------------------------------------------
/sky/dashboard/jsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "baseUrl": ".",
 4 |     "paths": {
 5 |       "@/*": ["./src/*"]
 6 |     },
 7 |     "jsx": "react",
 8 |     "checkJs": true,
 9 |     "resolveJsonModule": true,
10 |     "moduleResolution": "node",
11 |     "target": "es6",
12 |     "module": "commonjs",
13 |     "allowSyntheticDefaultImports": true,
14 |     "esModuleInterop": true
15 |   },
16 |   "include": ["src/**/*"],
17 |   "exclude": ["node_modules", ".next", "out"]
18 | }
19 | 


--------------------------------------------------------------------------------
/sky/dashboard/next.config.mjs:
--------------------------------------------------------------------------------
 1 | /** @type {import('next').NextConfig} */
 2 | const nextConfig = {
 3 |   basePath: '/dashboard',
 4 |   output: 'export',
 5 |   images: {
 6 |     unoptimized: true,
 7 |   },
 8 |   env: {
 9 |     SKYPILOT_API_SERVER_ENDPOINT: process.env.SKYPILOT_API_SERVER_ENDPOINT,
10 |     INFRA_CACHE_DURATION_MINUTES:
11 |       process.env.INFRA_CACHE_DURATION_MINUTES || '10',
12 |     INFRA_CACHE_DEBUG: process.env.INFRA_CACHE_DEBUG || 'false',
13 |   },
14 | };
15 | 
16 | export default nextConfig;
17 | 


--------------------------------------------------------------------------------
/sky/dashboard/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 |   plugins: {
4 |     tailwindcss: {},
5 |   },
6 | };
7 | 
8 | export default config;
9 | 


--------------------------------------------------------------------------------
/sky/dashboard/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/dashboard/public/favicon.ico


--------------------------------------------------------------------------------
/sky/dashboard/public/videos/cursor-small.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/dashboard/public/videos/cursor-small.mp4


--------------------------------------------------------------------------------
/sky/dashboard/src/components/elements/version-display.jsx:
--------------------------------------------------------------------------------
 1 | import React, { useState, useEffect } from 'react';
 2 | import { ENDPOINT } from '@/data/connectors/constants';
 3 | 
 4 | export function VersionDisplay() {
 5 |   const [version, setVersion] = useState(null);
 6 | 
 7 |   useEffect(() => {
 8 |     fetch(`${ENDPOINT}/api/health`)
 9 |       .then((res) => res.json())
10 |       .then((data) => {
11 |         if (data.version) {
12 |           setVersion(data.version);
13 |         }
14 |       })
15 |       .catch((error) => {
16 |         console.error('Error fetching version:', error);
17 |       });
18 |   }, []);
19 | 
20 |   if (!version) return null;
21 | 
22 |   return <div className="text-sm text-gray-500">Version: {version}</div>;
23 | }
24 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/components/ui/input.jsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | import { cn } from '@/lib/utils';
 4 | 
 5 | const Input = React.forwardRef(({ className, type, ...props }, ref) => {
 6 |   return (
 7 |     <input
 8 |       type={type}
 9 |       className={cn(
10 |         'flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
11 |         className
12 |       )}
13 |       ref={ref}
14 |       {...props}
15 |     />
16 |   );
17 | });
18 | Input.displayName = 'Input';
19 | 
20 | export { Input };
21 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/components/ui/label.jsx:
--------------------------------------------------------------------------------
 1 | 'use client';
 2 | 
 3 | import * as React from 'react';
 4 | import * as LabelPrimitive from '@radix-ui/react-label';
 5 | import { cva } from 'class-variance-authority';
 6 | 
 7 | import { cn } from '@/lib/utils';
 8 | 
 9 | const labelVariants = cva(
10 |   'text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70'
11 | );
12 | 
13 | const Label = React.forwardRef(({ className, ...props }, ref) => (
14 |   <LabelPrimitive.Root
15 |     ref={ref}
16 |     className={cn(labelVariants(), className)}
17 |     {...props}
18 |   />
19 | ));
20 | Label.displayName = LabelPrimitive.Root.displayName;
21 | 
22 | export { Label };
23 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/components/ui/textarea.jsx:
--------------------------------------------------------------------------------
 1 | import * as React from 'react';
 2 | 
 3 | import { cn } from '@/lib/utils';
 4 | 
 5 | const Textarea = React.forwardRef(({ className, ...props }, ref) => {
 6 |   return (
 7 |     <textarea
 8 |       className={cn(
 9 |         'flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50',
10 |         className
11 |       )}
12 |       ref={ref}
13 |       {...props}
14 |     />
15 |   );
16 | });
17 | Textarea.displayName = 'Textarea';
18 | 
19 | export { Textarea };
20 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/hooks/useMobile.js:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | 
 3 | export function useMobile(breakpoint = 768) {
 4 |   const [isMobile, setIsMobile] = useState(false);
 5 | 
 6 |   useEffect(() => {
 7 |     const handleResize = () => {
 8 |       setIsMobile(window.innerWidth < breakpoint);
 9 |     };
10 | 
11 |     // Set initial value
12 |     handleResize();
13 | 
14 |     // Add event listener
15 |     window.addEventListener('resize', handleResize);
16 | 
17 |     // Cleanup
18 |     return () => {
19 |       window.removeEventListener('resize', handleResize);
20 |     };
21 |   }, [breakpoint]);
22 | 
23 |   return isMobile;
24 | }
25 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/lib/config.js:
--------------------------------------------------------------------------------
 1 | // Configuration for dashboard cache and UI settings
 2 | 
 3 | // Cache TTL durations (in milliseconds)
 4 | export const CACHE_CONFIG = {
 5 |   DEFAULT_TTL: 2 * 60 * 1000, // 2 minutes
 6 | };
 7 | 
 8 | // Refresh intervals for different data types (in milliseconds)
 9 | export const REFRESH_INTERVALS = {
10 |   REFRESH_INTERVAL: 30 * 1000, // 30 seconds - standard refresh interval for all pages
11 |   GPU_REFRESH_INTERVAL: 30 * 1000, // 30 seconds - aligned with standard refresh interval
12 | };
13 | 
14 | // UI configuration
15 | export const UI_CONFIG = {
16 |   NAME_TRUNCATE_LENGTH: 20, // Maximum length for truncated names
17 | };
18 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/lib/utils.js:
--------------------------------------------------------------------------------
1 | import { clsx } from 'clsx';
2 | import { twMerge } from 'tailwind-merge';
3 | 
4 | export function cn(...inputs) {
5 |   return twMerge(clsx(inputs));
6 | }
7 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/_app.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import PropTypes from 'prop-types';
 3 | import '@/app/globals.css';
 4 | import { useEffect } from 'react';
 5 | import { BASE_PATH } from '@/data/connectors/constants';
 6 | 
 7 | function MyApp({ Component, pageProps }) {
 8 |   useEffect(() => {
 9 |     const link = document.createElement('link');
10 |     link.rel = 'icon';
11 |     link.href = `${BASE_PATH}/favicon.ico`;
12 |     document.head.appendChild(link);
13 |   }, []);
14 | 
15 |   return <Component {...pageProps} />;
16 | }
17 | 
18 | MyApp.propTypes = {
19 |   Component: PropTypes.elementType.isRequired,
20 |   pageProps: PropTypes.object.isRequired,
21 | };
22 | 
23 | export default MyApp;
24 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/clusters.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import Head from 'next/head';
 3 | import { Clusters } from '@/components/clusters';
 4 | 
 5 | export default function ClustersPage() {
 6 |   return (
 7 |     <>
 8 |       <Head>
 9 |         <title>Clusters | SkyPilot Dashboard</title>
10 |       </Head>
11 |       <Clusters />
12 |     </>
13 |   );
14 | }
15 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/index.js:
--------------------------------------------------------------------------------
 1 | import { useEffect } from 'react';
 2 | import { useRouter } from 'next/router';
 3 | 
 4 | export default function Index() {
 5 |   const router = useRouter();
 6 | 
 7 |   useEffect(() => {
 8 |     if (router.asPath === '/') {
 9 |       router.push('/clusters');
10 |     } else {
11 |       router.push(router.asPath);
12 |     }
13 |   }, [router]);
14 | 
15 |   // Return null or a loading state while redirecting
16 |   return null;
17 | }
18 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/infra.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { GPUs } from '@/components/infra';
 3 | import Head from 'next/head';
 4 | 
 5 | export default function InfraPage() {
 6 |   return (
 7 |     <>
 8 |       <Head>
 9 |         <title>Infra | SkyPilot Dashboard</title>
10 |       </Head>
11 |       <GPUs />
12 |     </>
13 |   );
14 | }
15 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/infra/[context].js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { GPUs } from '@/components/infra';
 3 | import Head from 'next/head';
 4 | 
 5 | export default function InfraContextPage() {
 6 |   return (
 7 |     <>
 8 |       <Head>
 9 |         <title>Infra | SkyPilot Dashboard</title>
10 |       </Head>
11 |       <GPUs />
12 |     </>
13 |   );
14 | }
15 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/jobs.js:
--------------------------------------------------------------------------------
 1 | import Head from 'next/head';
 2 | import { ManagedJobs } from '@/components/jobs';
 3 | 
 4 | export default function JobsPage() {
 5 |   return (
 6 |     <>
 7 |       <Head>
 8 |         <title>Managed Jobs | SkyPilot Dashboard</title>
 9 |       </Head>
10 |       <ManagedJobs />
11 |     </>
12 |   );
13 | }
14 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/users.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import Head from 'next/head';
 3 | import { Users } from '@/components/users';
 4 | 
 5 | export default function UsersPage() {
 6 |   return (
 7 |     <>
 8 |       <Head>
 9 |         <title>Users | SkyPilot Dashboard</title>
10 |       </Head>
11 |       <Users />
12 |     </>
13 |   );
14 | }
15 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/workspaces.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import Head from 'next/head';
 3 | import { Layout } from '@/components/elements/layout';
 4 | import { Workspaces } from '@/components/workspaces'; // This component will be created next
 5 | 
 6 | export default function WorkspacesPage() {
 7 |   return (
 8 |     <>
 9 |       <Head>
10 |         <title>Workspaces | SkyPilot Dashboard</title>
11 |       </Head>
12 |       <Layout highlighted="workspaces">
13 |         <Workspaces />
14 |       </Layout>
15 |     </>
16 |   );
17 | }
18 | 


--------------------------------------------------------------------------------
/sky/dashboard/src/pages/workspaces/[name].js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { useRouter } from 'next/router';
 3 | import { WorkspaceEditor } from '@/components/workspace-editor';
 4 | 
 5 | export default function WorkspacePage() {
 6 |   const router = useRouter();
 7 |   const { name } = router.query;
 8 | 
 9 |   // Show loading while router is not ready or name param is not available
10 |   if (!router.isReady || !name) {
11 |     return <div>Loading...</div>;
12 |   }
13 | 
14 |   return <WorkspaceEditor workspaceName={name} />;
15 | }
16 | 


--------------------------------------------------------------------------------
/sky/data/__init__.py:
--------------------------------------------------------------------------------
1 | """Sky Data."""
2 | from sky.data.storage import Storage
3 | from sky.data.storage import StorageMode
4 | from sky.data.storage import StoreType
5 | 
6 | __all__ = ['Storage', 'StorageMode', 'StoreType']
7 | 


--------------------------------------------------------------------------------
/sky/design_docs/cluster_status.md:
--------------------------------------------------------------------------------
 1 | # Cluster State: Definition & Transition Diagram
 2 | 
 3 | * **INIT**: The provision / runtime setup has not been finished. Or, the cluster is in abnormal states, e.g. partially UP.
 4 | * **UP**: The cluster is healthy, i.e. all the nodes are UP and the ray cluster is correctly running.
 5 | * **STOPPED**: All the nodes in the cluster is STOPPED.
 6 | * **TERMINATED**: The cluster has been terminated or not exist. This is implicitly indicated by not appearing in the database.
 7 | 
 8 | ## Cluster State Transition for Refreshing
 9 | ### On-Demand Cluster
10 | <!-- Image edited in https://docs.google.com/presentation/d/1PFNw6OYnr5rh4gKPvg43nmP_t1W0AXvyjszE7nNHPQ0/edit?usp=sharing -->
11 | ![Transition Diagram for On Demand](figures/cluster-state-transition.svg)
12 | 


--------------------------------------------------------------------------------
/sky/design_docs/figures/grafana-loki-setup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/design_docs/figures/grafana-loki-setup.png


--------------------------------------------------------------------------------
/sky/design_docs/workspaces.md:
--------------------------------------------------------------------------------
 1 | # Workspaces
 2 | 
 3 | Workspaces are a way to group clusters/jobs together. A workspace could contain different cloud credentials and different configurations.
 4 | 
 5 | 
 6 | ## Workspace verification
 7 | 
 8 | For each cluster creation / stop / down request, the active workspace is verified against the workspace of the cluster, and will raise an error if they are different.
 9 | 
10 | The jobs controller will always be launched in the default workspace, and the managed jobs will be set to active workspace based on user's client settings.
11 | 


--------------------------------------------------------------------------------
/sky/jobs/client/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/jobs/client/__init__.py


--------------------------------------------------------------------------------
/sky/jobs/dashboard/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/jobs/dashboard/static/favicon.ico


--------------------------------------------------------------------------------
/sky/jobs/server/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/sky/provision/aws/__init__.py:
--------------------------------------------------------------------------------
 1 | """AWS provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.aws.config import bootstrap_instances
 4 | from sky.provision.aws.instance import cleanup_ports
 5 | from sky.provision.aws.instance import get_cluster_info
 6 | from sky.provision.aws.instance import open_ports
 7 | from sky.provision.aws.instance import query_instances
 8 | from sky.provision.aws.instance import run_instances
 9 | from sky.provision.aws.instance import stop_instances
10 | from sky.provision.aws.instance import terminate_instances
11 | from sky.provision.aws.instance import wait_instances
12 | 
13 | __all__ = ('bootstrap_instances', 'run_instances', 'stop_instances',
14 |            'terminate_instances', 'wait_instances', 'get_cluster_info',
15 |            'open_ports', 'cleanup_ports', 'query_instances')
16 | 


--------------------------------------------------------------------------------
/sky/provision/azure/__init__.py:
--------------------------------------------------------------------------------
 1 | """Azure provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.azure.config import bootstrap_instances
 4 | from sky.provision.azure.instance import cleanup_ports
 5 | from sky.provision.azure.instance import get_cluster_info
 6 | from sky.provision.azure.instance import open_ports
 7 | from sky.provision.azure.instance import query_instances
 8 | from sky.provision.azure.instance import run_instances
 9 | from sky.provision.azure.instance import stop_instances
10 | from sky.provision.azure.instance import terminate_instances
11 | from sky.provision.azure.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/cudo/config.py:
--------------------------------------------------------------------------------
 1 | """Cudo Compute configuration bootstrapping."""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     """Bootstraps instances for the given cluster."""
10 |     del region, cluster_name  # unused
11 |     return config
12 | 


--------------------------------------------------------------------------------
/sky/provision/do/__init__.py:
--------------------------------------------------------------------------------
 1 | """DO provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.do.config import bootstrap_instances
 4 | from sky.provision.do.instance import cleanup_ports
 5 | from sky.provision.do.instance import get_cluster_info
 6 | from sky.provision.do.instance import open_ports
 7 | from sky.provision.do.instance import query_instances
 8 | from sky.provision.do.instance import run_instances
 9 | from sky.provision.do.instance import stop_instances
10 | from sky.provision.do.instance import terminate_instances
11 | from sky.provision.do.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/do/config.py:
--------------------------------------------------------------------------------
 1 | """Paperspace configuration bootstrapping."""
 2 | 
 3 | from sky import sky_logging
 4 | from sky.provision import common
 5 | 
 6 | logger = sky_logging.init_logger(__name__)
 7 | 
 8 | 
 9 | def bootstrap_instances(
10 |         region: str, cluster_name: str,
11 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
12 |     """Bootstraps instances for the given cluster."""
13 |     del region, cluster_name
14 |     return config
15 | 


--------------------------------------------------------------------------------
/sky/provision/do/constants.py:
--------------------------------------------------------------------------------
 1 | """DO cloud constants
 2 | """
 3 | 
 4 | POLL_INTERVAL = 5
 5 | WAIT_DELETE_VOLUMES = 5
 6 | 
 7 | GPU_IMAGES = {
 8 |     'gpu-h100x1-80gb': 'gpu-h100x1-base',
 9 |     'gpu-h100x8-640gb': 'gpu-h100x8-base',
10 | }
11 | 


--------------------------------------------------------------------------------
/sky/provision/fluidstack/__init__.py:
--------------------------------------------------------------------------------
 1 | """Fluidstack provisioner module."""
 2 | 
 3 | from sky.provision.fluidstack.config import bootstrap_instances
 4 | from sky.provision.fluidstack.instance import cleanup_ports
 5 | from sky.provision.fluidstack.instance import get_cluster_info
 6 | from sky.provision.fluidstack.instance import open_ports
 7 | from sky.provision.fluidstack.instance import query_instances
 8 | from sky.provision.fluidstack.instance import run_instances
 9 | from sky.provision.fluidstack.instance import stop_instances
10 | from sky.provision.fluidstack.instance import terminate_instances
11 | from sky.provision.fluidstack.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/fluidstack/config.py:
--------------------------------------------------------------------------------
 1 | """FluidStack configuration bootstrapping."""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     """Bootstraps instances for the given cluster."""
10 |     del region, cluster_name  # unused
11 | 
12 |     return config
13 | 


--------------------------------------------------------------------------------
/sky/provision/gcp/__init__.py:
--------------------------------------------------------------------------------
 1 | """GCP provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.gcp.config import bootstrap_instances
 4 | from sky.provision.gcp.instance import cleanup_ports
 5 | from sky.provision.gcp.instance import get_cluster_info
 6 | from sky.provision.gcp.instance import open_ports
 7 | from sky.provision.gcp.instance import query_instances
 8 | from sky.provision.gcp.instance import run_instances
 9 | from sky.provision.gcp.instance import stop_instances
10 | from sky.provision.gcp.instance import terminate_instances
11 | from sky.provision.gcp.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/kubernetes/__init__.py:
--------------------------------------------------------------------------------
 1 | """Kubernetes provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.kubernetes.config import bootstrap_instances
 4 | from sky.provision.kubernetes.instance import get_cluster_info
 5 | from sky.provision.kubernetes.instance import get_command_runners
 6 | from sky.provision.kubernetes.instance import query_instances
 7 | from sky.provision.kubernetes.instance import run_instances
 8 | from sky.provision.kubernetes.instance import stop_instances
 9 | from sky.provision.kubernetes.instance import terminate_instances
10 | from sky.provision.kubernetes.instance import wait_instances
11 | from sky.provision.kubernetes.network import cleanup_ports
12 | from sky.provision.kubernetes.network import open_ports
13 | from sky.provision.kubernetes.network import query_ports
14 | 


--------------------------------------------------------------------------------
/sky/provision/kubernetes/constants.py:
--------------------------------------------------------------------------------
1 | """Constants for Kubernetes provisioning."""
2 | 
3 | NO_GPU_HELP_MESSAGE = ('If your cluster contains GPUs, make sure '
4 |                        'nvidia.com/gpu resource is available on the nodes and '
5 |                        'the node labels for identifying GPUs '
6 |                        '(e.g., skypilot.co/accelerator) are setup correctly. ')
7 | 
8 | KUBERNETES_IN_CLUSTER_NAMESPACE_ENV_VAR = 'SKYPILOT_IN_CLUSTER_NAMESPACE'
9 | 


--------------------------------------------------------------------------------
/sky/provision/lambda_cloud/__init__.py:
--------------------------------------------------------------------------------
 1 | """Lambda provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.lambda_cloud.config import bootstrap_instances
 4 | from sky.provision.lambda_cloud.instance import cleanup_ports
 5 | from sky.provision.lambda_cloud.instance import get_cluster_info
 6 | from sky.provision.lambda_cloud.instance import open_ports
 7 | from sky.provision.lambda_cloud.instance import query_instances
 8 | from sky.provision.lambda_cloud.instance import run_instances
 9 | from sky.provision.lambda_cloud.instance import stop_instances
10 | from sky.provision.lambda_cloud.instance import terminate_instances
11 | from sky.provision.lambda_cloud.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/lambda_cloud/config.py:
--------------------------------------------------------------------------------
 1 | """Lambda Cloud configuration bootstrapping"""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     del region, cluster_name  # unused
10 |     return config
11 | 


--------------------------------------------------------------------------------
/sky/provision/nebius/__init__.py:
--------------------------------------------------------------------------------
 1 | """Nebius provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.nebius.config import bootstrap_instances
 4 | from sky.provision.nebius.instance import cleanup_ports
 5 | from sky.provision.nebius.instance import get_cluster_info
 6 | from sky.provision.nebius.instance import open_ports
 7 | from sky.provision.nebius.instance import query_instances
 8 | from sky.provision.nebius.instance import run_instances
 9 | from sky.provision.nebius.instance import stop_instances
10 | from sky.provision.nebius.instance import terminate_instances
11 | from sky.provision.nebius.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/nebius/config.py:
--------------------------------------------------------------------------------
 1 | """Nebius configuration bootstrapping."""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     """Bootstraps instances for the given cluster."""
10 |     del region, cluster_name  # unused
11 |     return config
12 | 


--------------------------------------------------------------------------------
/sky/provision/oci/__init__.py:
--------------------------------------------------------------------------------
 1 | """OCI provisioner for SkyPilot.
 2 | 
 3 | History:
 4 |  - Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
 5 | """
 6 | 
 7 | from sky.provision.oci.config import bootstrap_instances
 8 | from sky.provision.oci.instance import cleanup_ports
 9 | from sky.provision.oci.instance import get_cluster_info
10 | from sky.provision.oci.instance import open_ports
11 | from sky.provision.oci.instance import query_instances
12 | from sky.provision.oci.instance import run_instances
13 | from sky.provision.oci.instance import stop_instances
14 | from sky.provision.oci.instance import terminate_instances
15 | from sky.provision.oci.instance import wait_instances
16 | 


--------------------------------------------------------------------------------
/sky/provision/paperspace/__init__.py:
--------------------------------------------------------------------------------
 1 | """Paperspace provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.paperspace.config import bootstrap_instances
 4 | from sky.provision.paperspace.instance import cleanup_ports
 5 | from sky.provision.paperspace.instance import get_cluster_info
 6 | from sky.provision.paperspace.instance import open_ports
 7 | from sky.provision.paperspace.instance import query_instances
 8 | from sky.provision.paperspace.instance import run_instances
 9 | from sky.provision.paperspace.instance import stop_instances
10 | from sky.provision.paperspace.instance import terminate_instances
11 | from sky.provision.paperspace.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/runpod/__init__.py:
--------------------------------------------------------------------------------
 1 | """GCP provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.runpod.config import bootstrap_instances
 4 | from sky.provision.runpod.instance import cleanup_ports
 5 | from sky.provision.runpod.instance import get_cluster_info
 6 | from sky.provision.runpod.instance import query_instances
 7 | from sky.provision.runpod.instance import query_ports
 8 | from sky.provision.runpod.instance import run_instances
 9 | from sky.provision.runpod.instance import stop_instances
10 | from sky.provision.runpod.instance import terminate_instances
11 | from sky.provision.runpod.instance import wait_instances
12 | 


--------------------------------------------------------------------------------
/sky/provision/runpod/api/__init__.py:
--------------------------------------------------------------------------------
1 | """RunPod low level API support for spot pod."""
2 | 
3 | from sky.provision.runpod.api.commands import create_spot_pod
4 | 


--------------------------------------------------------------------------------
/sky/provision/runpod/config.py:
--------------------------------------------------------------------------------
 1 | """Runpod configuration bootstrapping."""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     """Bootstraps instances for the given cluster."""
10 |     del region, cluster_name  # unused
11 |     return config
12 | 


--------------------------------------------------------------------------------
/sky/provision/vast/__init__.py:
--------------------------------------------------------------------------------
 1 | """Vast provisioner for SkyPilot."""
 2 | 
 3 | from sky.provision.vast.config import bootstrap_instances
 4 | from sky.provision.vast.instance import cleanup_ports
 5 | from sky.provision.vast.instance import get_cluster_info
 6 | from sky.provision.vast.instance import query_instances
 7 | from sky.provision.vast.instance import run_instances
 8 | from sky.provision.vast.instance import stop_instances
 9 | from sky.provision.vast.instance import terminate_instances
10 | from sky.provision.vast.instance import wait_instances
11 | 


--------------------------------------------------------------------------------
/sky/provision/vast/config.py:
--------------------------------------------------------------------------------
 1 | """Vast configuration bootstrapping."""
 2 | 
 3 | from sky.provision import common
 4 | 
 5 | 
 6 | def bootstrap_instances(
 7 |         region: str, cluster_name: str,
 8 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
 9 |     """Bootstraps instances for the given cluster."""
10 |     del region, cluster_name  # unused
11 |     return config
12 | 


--------------------------------------------------------------------------------
/sky/provision/vsphere/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/provision/vsphere/common/__init__.py


--------------------------------------------------------------------------------
/sky/provision/vsphere/common/custom_script.py:
--------------------------------------------------------------------------------
 1 | """Vsphere custom script."""
 2 | CUSTOMIZED_SCRIPT = """#!/bin/bash
 3 | if [ x$1 = x"precustomization" ]; then
 4 |     sudo mkdir -p /home/user_placeholder/.ssh
 5 |     sudo echo "ssh_public_key" | sudo tee /home/user_placeholder/.ssh/authorized_keys
 6 |     sudo mkdir -p /home/user_placeholder/.ssh/pre
 7 |     sudo chown -R user_placeholder:user_placeholder /home/user_placeholder/.ssh
 8 |     sudo chmod 700 /home/user_placeholder/.ssh
 9 |     sudo chmod 644 /home/user_placeholder/.ssh/authorized_keys
10 | fi"""
11 | 


--------------------------------------------------------------------------------
/sky/provision/vsphere/common/id_generator.py:
--------------------------------------------------------------------------------
 1 | """ID Generator
 2 | """
 3 | 
 4 | import random
 5 | import string
 6 | import uuid
 7 | 
 8 | 
 9 | def generate_random_uuid():
10 |     return str(uuid.uuid4())
11 | 
12 | 
13 | def rand(value):
14 |     return value + generate_random_string(5)
15 | 
16 | 
17 | def generate_random_string(length):
18 |     return ''.join(random.choice(string.ascii_uppercase) for _ in range(length))
19 | 


--------------------------------------------------------------------------------
/sky/provision/vsphere/common/service_manager_factory.py:
--------------------------------------------------------------------------------
 1 | """Service manager factory
 2 | """
 3 | 
 4 | from sky.provision.vsphere.common import service_manager as service_manager_lib
 5 | 
 6 | 
 7 | class ServiceManagerFactory(object):
 8 |     """Factory class for getting service manager for a management node.
 9 |     """
10 | 
11 |     service_manager = None
12 | 
13 |     @classmethod
14 |     def get_service_manager(cls, server, username, password, skip_verification):
15 |         service_manager = service_manager_lib.ServiceManager(
16 |             server, username, password, skip_verification)
17 |         service_manager.connect()
18 |         return service_manager
19 | 


--------------------------------------------------------------------------------
/sky/provision/vsphere/config.py:
--------------------------------------------------------------------------------
 1 | """Vsphere configuration bootstrapping."""
 2 | 
 3 | from sky import sky_logging
 4 | from sky.provision import common
 5 | 
 6 | logger = sky_logging.init_logger(__name__)
 7 | 
 8 | 
 9 | def bootstrap_instances(
10 |         region: str, cluster_name: str,
11 |         config: common.ProvisionConfig) -> common.ProvisionConfig:
12 |     """See sky/provision/__init__.py"""
13 |     logger.info(f'New provision of Vsphere: bootstrap_instances().Region: '
14 |                 f'{region} Cluster Name:{cluster_name}')
15 | 
16 |     # TODO: process config.
17 | 
18 |     return config
19 | 


--------------------------------------------------------------------------------
/sky/serve/client/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/serve/client/__init__.py


--------------------------------------------------------------------------------
/sky/serve/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/serve/server/__init__.py


--------------------------------------------------------------------------------
/sky/server/__init__.py:
--------------------------------------------------------------------------------
1 | """SkyPilot API Server."""
2 | 


--------------------------------------------------------------------------------
/sky/server/requests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/server/requests/__init__.py


--------------------------------------------------------------------------------
/sky/server/requests/queues/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/server/requests/queues/__init__.py


--------------------------------------------------------------------------------
/sky/server/requests/queues/local_queue.py:
--------------------------------------------------------------------------------
 1 | """Process-local queue implementation."""
 2 | import queue
 3 | import threading
 4 | from typing import Dict
 5 | 
 6 | # Global dict to store queues
 7 | _queues: Dict[str, queue.Queue] = {}
 8 | _lock = threading.Lock()
 9 | 
10 | 
11 | def get_queue(queue_name: str) -> queue.Queue:
12 |     """Get or create a queue by name."""
13 |     with _lock:
14 |         if queue_name not in _queues:
15 |             _queues[queue_name] = queue.Queue()
16 |         return _queues[queue_name]
17 | 


--------------------------------------------------------------------------------
/sky/server/requests/serializers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/server/requests/serializers/__init__.py


--------------------------------------------------------------------------------
/sky/setup_files/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include sky/backends/monkey_patches/*.py
 2 | exclude sky/clouds/service_catalog/data_fetchers/analyze.py
 3 | include sky/provision/kubernetes/manifests/*
 4 | include sky/provision/azure/*
 5 | include sky/setup_files/*
 6 | include sky/skylet/*.sh
 7 | include sky/skylet/LICENSE
 8 | include sky/skylet/providers/ibm/*
 9 | include sky/skylet/providers/scp/*
10 | include sky/skylet/providers/*.py
11 | include sky/skylet/ray_patches/*.patch
12 | include sky/jobs/dashboard/*
13 | include sky/jobs/dashboard/templates/*
14 | include sky/jobs/dashboard/static/*
15 | include sky/templates/*
16 | include sky/utils/kubernetes/*
17 | include sky/server/html/*
18 | recursive-include sky/dashboard/out *
19 | 


--------------------------------------------------------------------------------
/sky/skylet/README.md:
--------------------------------------------------------------------------------
1 | # Skylet
2 | 
3 | Skylet is a subpackage of SkyPilot. It provides utilities to be installed and used by a remote cluster.
4 | 


--------------------------------------------------------------------------------
/sky/skylet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/skylet/__init__.py


--------------------------------------------------------------------------------
/sky/skylet/providers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/skylet/providers/__init__.py


--------------------------------------------------------------------------------
/sky/skylet/providers/ibm/__init__.py:
--------------------------------------------------------------------------------
1 | """IBM node provider"""
2 | from sky.skylet.providers.ibm.node_provider import IBMVPCNodeProvider
3 | 


--------------------------------------------------------------------------------
/sky/skylet/providers/scp/__init__.py:
--------------------------------------------------------------------------------
1 | """SCP node provider"""
2 | from sky.skylet.providers.scp.node_provider import SCPNodeProvider
3 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/autoscaler.py.patch:
--------------------------------------------------------------------------------
1 | 0a1,3
2 | > # From https://github.com/ray-project/ray/blob/ray-2.9.3/python/ray/autoscaler/_private/autoscaler.py
3 | > # Sky patch changes:
4 | > #  - enable upscaling_speed to be 0.0
5 | 1074c1077
6 | <             if upscaling_speed:
7 | ---
8 | >             if upscaling_speed is not None: # NOTE(sky): enable 0.0
9 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/cli.py.patch:
--------------------------------------------------------------------------------
 1 | 0a1,4
 2 | > # Adapted from https://github.com/ray-project/ray/blob/ray-2.9.3/dashboard/modules/job/cli.py
 3 | > # Fixed the problem in ray's issue https://github.com/ray-project/ray/issues/26514
 4 | > # Otherwise, the output redirection ">" will not work.
 5 | > 
 6 | 273c277
 7 | <         entrypoint=list2cmdline(entrypoint),
 8 | ---
 9 | >         entrypoint=" ".join(entrypoint),
10 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/command_runner.py.patch:
--------------------------------------------------------------------------------
1 | 0a1,2
2 | > # From https://github.com/ray-project/ray/blob/ray-2.9.3/python/ray/autoscaler/_private/command_runner.py
3 | > 
4 | 140c142
5 | <                     "ControlPersist": "10s",
6 | ---
7 | >                     "ControlPersist": "300s",
8 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/log_monitor.py.patch:
--------------------------------------------------------------------------------
 1 | 0a1,4
 2 | > # Original file https://github.com/ray-project/ray/blob/ray-2.9.3/python/ray/_private/log_monitor.py
 3 | > # Fixed the problem for progress bar, as the latest version does not preserve \r for progress bar.
 4 | > # We change the newline handling back to https://github.com/ray-project/ray/blob/ray-1.10.0/python/ray/_private/log_monitor.py#L299-L300
 5 | > 
 6 | 377c381,382
 7 | <                     next_line = next_line.rstrip("\r\n")
 8 | ---
 9 | >                     if next_line.endswith("\n"):
10 | >                         next_line = next_line[:-1]
11 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/resource_demand_scheduler.py.patch:
--------------------------------------------------------------------------------
 1 | 0a1,5
 2 | > # From https://github.com/ray-project/ray/blob/ray-2.9.3/python/ray/autoscaler/_private/resource_demand_scheduler.py
 3 | > # Sky patch changes:
 4 | > #  - no new nodes are allowed to be launched launched when the upscaling_speed is 0
 5 | > #  - comment out "assert not unfulfilled": this seems a buggy assert
 6 | > 
 7 | 451c456,459
 8 | <             if upper_bound > 0:
 9 | ---
10 | >             # NOTE(sky): do not autoscale when upsclaing speed is 0.
11 | >             if self.upscaling_speed == 0:
12 | >                 upper_bound = 0
13 | >             if upper_bound >= 0:
14 | 595c603
15 | <             assert not unfulfilled
16 | ---
17 | >             # assert not unfulfilled  # NOTE(sky): buggy assert.
18 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/updater.py.patch:
--------------------------------------------------------------------------------
1 | 0a1,4
2 | > # From https://github.com/ray-project/ray/blob/releases/2.9.3/python/ray/autoscaler/_private/updater.py
3 | > # Sky patch changes:
4 | > #  - Ensure the node state is refreshed before checking the node is terminated.
5 | > 
6 | 327a332
7 | >                     self.provider.non_terminated_nodes({})
8 | 


--------------------------------------------------------------------------------
/sky/skylet/ray_patches/worker.py.patch:
--------------------------------------------------------------------------------
 1 | 0a1,4
 2 | > # Adapted from https://github.com/ray-project/ray/blob/ray-2.9.3/python/ray/_private/worker.py
 3 | > # Fixed the problem in ray's issue https://github.com/ray-project/ray/issues/9233
 4 | > # Tracked in PR https://github.com/ray-project/ray/pull/21977/files.
 5 | > 
 6 | 2022a2027,2034
 7 | >     def end_for(line: str) -> str:
 8 | >         if sys.platform == "win32":
 9 | >             return "\n"
10 | >         if line.endswith("\r"):
11 | >             return ""
12 | >         return "\n"
13 | > 
14 | > 
15 | 2037a2050
16 | >                     end=end_for(line),
17 | 2054a2068
18 | >                     end=end_for(line),
19 | 


--------------------------------------------------------------------------------
/sky/templates/kubernetes-loadbalancer.yml.j2:
--------------------------------------------------------------------------------
 1 | service_spec:
 2 |   apiVersion: v1
 3 |   kind: Service
 4 |   metadata:
 5 |     name: {{ service_name }}
 6 |     labels:
 7 |       parent: skypilot
 8 |       {%- for label_key, label_value in labels.items() %}
 9 |       {{ label_key }}: {{ label_value|tojson }}
10 |       {%- endfor %}
11 |     annotations:
12 |       {%- for key, value in annotations.items() %}
13 |       {{ key }}: {{ value|tojson }}
14 |       {%- endfor %}
15 |   spec:
16 |     type: LoadBalancer
17 |     selector:
18 |       {{ selector_key }}: {{ selector_value }}
19 |     ports:
20 | {%- for port in ports %}
21 |     - port: {{ port | int }}
22 |       targetPort: {{ port | int }}
23 |       protocol: TCP
24 |       name: port{{ port }}
25 | {%- endfor %}
26 | 


--------------------------------------------------------------------------------
/sky/usage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/usage/__init__.py


--------------------------------------------------------------------------------
/sky/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/utils/__init__.py


--------------------------------------------------------------------------------
/sky/utils/aws/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/utils/aws/__init__.py


--------------------------------------------------------------------------------
/sky/utils/aws/get_default_security_group.py:
--------------------------------------------------------------------------------
 1 | """Script to get the default security group"""
 2 | from sky.clouds import aws
 3 | 
 4 | 
 5 | def main():
 6 |     default_security_group = aws.DEFAULT_SECURITY_GROUP_NAME
 7 |     print(f'{default_security_group}')
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     main()
12 | 


--------------------------------------------------------------------------------
/sky/utils/cli_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/utils/cli_utils/__init__.py


--------------------------------------------------------------------------------
/sky/utils/kubernetes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/utils/kubernetes/__init__.py


--------------------------------------------------------------------------------
/sky/utils/rich_console_utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for rich console."""
 2 | import typing
 3 | 
 4 | from sky.adaptors import common as adaptors_common
 5 | 
 6 | if typing.TYPE_CHECKING:
 7 |     import rich.console as rich_console
 8 | else:
 9 |     rich_console = adaptors_common.LazyImport('rich.console')
10 | 
11 | _console = None  # Lazy initialized console
12 | 
13 | 
14 | # Move global console to a function to avoid
15 | # importing rich console if not used
16 | def get_console():
17 |     """Get or create the rich console."""
18 |     global _console
19 |     if _console is None:
20 |         _console = rich_console.Console(soft_wrap=True)
21 |     return _console
22 | 


--------------------------------------------------------------------------------
/sky/workspaces/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/sky/workspaces/__init__.py


--------------------------------------------------------------------------------
/tests/git_info_exclude_test:
--------------------------------------------------------------------------------
1 | question_mark/excluded?.txt
2 | square_bracket/excluded[0-9].txt
3 | square_bracket_single/excluded[01].txt
4 | square_bracket_excla/excluded[!01].txt
5 | square_bracket_alpha/excluded[a-z].txt
6 | excluded_dir/
7 | nested_double_asterisk/**/also_exclude.txt
8 | nested_wildcard_dir/*day/also_exclude.txt
9 | 


--------------------------------------------------------------------------------
/tests/gitignore_test:
--------------------------------------------------------------------------------
 1 | # This test should upload only included.txt, included.log, and included_dir/included.log
 2 | **/double_asterisk_excluded
 3 | **/double_asterisk_excluded_dir
 4 | **/parent/*.txt
 5 | **/parent/child/*.txt
 6 | *.log
 7 | exp-*/
 8 | !included.*
 9 | excluded.*
10 | /front_slash_excluded
11 | no_slash_excluded
12 | 


--------------------------------------------------------------------------------
/tests/kubernetes/eks_test_cluster.yaml:
--------------------------------------------------------------------------------
 1 | # Usage:
 2 | # eksctl create cluster -f eks_test_cluster.yaml
 3 | # eksctl delete cluster -f eks_test_cluster.yaml
 4 | apiVersion: eksctl.io/v1alpha5
 5 | kind: ClusterConfig
 6 | 
 7 | metadata:
 8 |   name: my-cluster
 9 |   region: us-west-2
10 | 
11 | managedNodeGroups:
12 |   - name: v100-nodes
13 |     instanceType: p3.2xlarge  # This instance type provides 1 NVIDIA V100 GPU.
14 |     desiredCapacity: 1
15 | 
16 |   - name: t4-nodes
17 |     instanceType: g4dn.2xlarge  # This instance type provides 1 NVIDIA T4 GPU.
18 |     desiredCapacity: 1
19 | 
20 |   - name: cpu-nodes # Creates 16 CPU nodes
21 |     instanceType: m5.4xlarge
22 |     desiredCapacity: 1
23 | 


--------------------------------------------------------------------------------
/tests/kubernetes/gpu_test_pod.yaml:
--------------------------------------------------------------------------------
 1 | # Runs nvidia-smi in a pod to test GPU operator and nvidia runtime are setup correctly
 2 | # Run with kubectl apply -f gpu_pod_test.yaml
 3 | apiVersion: v1
 4 | kind: Pod
 5 | metadata:
 6 |   name: skygputest
 7 | spec:
 8 |   restartPolicy: Never
 9 |   containers:
10 |   - name: skygputest
11 |     image: us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot-gpu:latest
12 |     command: ["nvidia-smi"]
13 |     resources:
14 |       limits:
15 |          nvidia.com/gpu: "1"
16 | 


--------------------------------------------------------------------------------
/tests/kubernetes/loadbalancer_test_svc.yaml:
--------------------------------------------------------------------------------
 1 | # Creates a service of type LoadBalancer for the skytest pod in cpu_test_pod.yaml
 2 | #
 3 | # Usage:
 4 | #   kubectl apply -f cpu_test_pod.yaml
 5 | #   kubectl apply -f loadbalancer_test_svc.yaml
 6 | #
 7 | # Access the server by running:
 8 | #  kubectl get svc skytest-loadbalancer
 9 | apiVersion: v1
10 | kind: Service
11 | metadata:
12 |   name: skytest-loadbalancer
13 | spec:
14 |   type: LoadBalancer
15 |   ports:
16 |   - port: 8080
17 |     targetPort: 8080
18 |   selector:
19 |     app: skytest
20 | 


--------------------------------------------------------------------------------
/tests/kubernetes/scripts/clean_k8s.sh:
--------------------------------------------------------------------------------
1 | kubectl delete all -l parent=skypilot
2 | 


--------------------------------------------------------------------------------
/tests/kubernetes/scripts/delete.sh:
--------------------------------------------------------------------------------
1 | kubectl delete -f skypilot_ssh_k8s_deployment.yaml
2 | 


--------------------------------------------------------------------------------
/tests/kubernetes/scripts/install_dashboard.sh:
--------------------------------------------------------------------------------
1 | kubectl apply -f dashboard.yaml
2 | echo "Dashboard installed, please run 'kubectl proxy' and visit http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/#/node?namespace=default"
3 | kubectl proxy
4 | 
5 | # kubectl get ns kubernetes-dashboard -o json | jq '.spec.finalizers = []' | kubectl replace --raw "/api/v1/namespaces/kubernetes-dashboard/finalize" -f -


--------------------------------------------------------------------------------
/tests/kubernetes/scripts/run.sh:
--------------------------------------------------------------------------------
1 | # TODO(kbrgl): Fix secret creation since all SSH keys are now stored in one secret object.
2 | kubectl create secret generic ssh-key-secret --from-file=ssh-publickey=/Users/romilb/.ssh/sky-key.pub
3 | kubectl apply -f skypilot_ssh_k8s_deployment.yaml
4 | # Use kubectl describe service skypilot-service to get the port of the service
5 | kubectl describe service skypilot-service | grep NodePort
6 | echo Run the following command to ssh into the container:
7 | echo ssh sky@127.0.0.1 -p port -i ~/.ssh/sky-key
8 | 


--------------------------------------------------------------------------------
/tests/load_tests/serve.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 180
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8080
 9 |   cpus: 2+
10 | 
11 | workdir: examples/serve/http_server
12 | run: python3 server.py --port 8080
13 | 


--------------------------------------------------------------------------------
/tests/mypy_files.txt:
--------------------------------------------------------------------------------
1 | sky
2 | 
3 | --exclude sky/benchmark
4 | --exclude sky/callbacks
5 | --exclude sky/backends/monkey_patches
6 | 


--------------------------------------------------------------------------------
/tests/skyserve/auto_restart.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 60
 5 |   replicas: 1
 6 | 
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp
11 | 
12 | workdir: examples/serve/http_server
13 | 
14 | run: python3 server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/cancel/cancel.yaml:
--------------------------------------------------------------------------------
 1 | # This is copied from examples/serve/misc/cancel/service.yaml, but with
 2 | # cloud set to gcp.
 3 | 
 4 | service:
 5 |   readiness_probe:
 6 |     path: /health
 7 |     initial_delay_seconds: 120
 8 | 
 9 | resources:
10 |   ports: 9000
11 |   infra: gcp
12 | 
13 | workdir: examples/serve/misc/cancel
14 | 
15 | setup: pip install aiohttp
16 | 
17 | run: python3 server.py --port 9000
18 | 


--------------------------------------------------------------------------------
/tests/skyserve/failures/initial_delay.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 10
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8081
 9 | 
10 | run: |
11 |   sleep 1000
12 | 


--------------------------------------------------------------------------------
/tests/skyserve/failures/probing.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 240
 5 |   replicas: 1
 6 | 
 7 | resources:
 8 |   ports: 8081
 9 | 
10 | workdir: tests/skyserve/failures
11 | 
12 | run: python3 probing.py
13 | 


--------------------------------------------------------------------------------
/tests/skyserve/high_availability/config.yaml:
--------------------------------------------------------------------------------
1 | serve:
2 |   controller:
3 |     resources:
4 |       infra: kubernetes
5 |       cpus: 2
6 |     high_availability: true
7 | 


--------------------------------------------------------------------------------
/tests/skyserve/high_availability/service.yaml:
--------------------------------------------------------------------------------
 1 | # Adopted from tests/skyserve/http/gcp.yaml
 2 | service:
 3 |   readiness_probe:
 4 |     path: /health
 5 |     initial_delay_seconds: 20
 6 |   replicas: 1
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp
11 |   cpus: 2+
12 | 
13 | workdir: examples/serve/http_server
14 | 
15 | # Use 8080 to test jupyter service is terminated
16 | run: python3 server.py --port 8080
17 | 


--------------------------------------------------------------------------------
/tests/skyserve/http/aws.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 20
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8080
 9 |   infra: aws
10 | 
11 | workdir: examples/serve/http_server
12 | 
13 | run: python3 server.py --port 8080
14 | 


--------------------------------------------------------------------------------
/tests/skyserve/http/azure.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 200
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8081
 9 |   infra: azure
10 | 
11 | workdir: examples/serve/http_server
12 | 
13 | # Use 8081 to test jupyterhub service is terminated
14 | run: python3 server.py --port 8081
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/http/gcp.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 20
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8080
 9 |   infra: gcp
10 | 
11 | workdir: examples/serve/http_server
12 | 
13 | # Use 8080 to test jupyter service is terminated
14 | run: python3 server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/http/kubernetes.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 180  # Use a large delay for EKS LB to be ready
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8080
 9 |   infra: kubernetes
10 | 
11 | workdir: examples/serve/http_server
12 | 
13 | # Use 8080 to test jupyter service is terminated
14 | run: python3 server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/http/oci.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe: /
 3 |   replicas: 2
 4 | 
 5 | resources:
 6 |   infra: oci
 7 |   ports: 8080
 8 |   cpus: 2+
 9 | 
10 | run: python -m http.server 8080
11 | 


--------------------------------------------------------------------------------
/tests/skyserve/https/service.yaml:
--------------------------------------------------------------------------------
 1 | envs:
 2 |   TLS_KEYFILE_ENV_VAR:
 3 |   TLS_CERTFILE_ENV_VAR:
 4 | 
 5 | service:
 6 |   readiness_probe: /health
 7 |   replicas: 1
 8 |   tls:
 9 |     keyfile: $TLS_KEYFILE_ENV_VAR
10 |     certfile: $TLS_CERTFILE_ENV_VAR
11 | 
12 | resources:
13 |   ports: 8081
14 | 
15 | workdir: examples/serve/http_server
16 | 
17 | # Use 8081 to test jupyterhub service is terminated
18 | run: python3 server.py --port 8081
19 | 


--------------------------------------------------------------------------------
/tests/skyserve/load_balancer/server.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import functools
 3 | 
 4 | from fastapi import FastAPI
 5 | import requests
 6 | import uvicorn
 7 | 
 8 | app = FastAPI()
 9 | 
10 | 
11 | @functools.lru_cache(maxsize=1)
12 | def get_self_ip() -> str:
13 |     return requests.get('http://ifconfig.me').text
14 | 
15 | 
16 | @app.get('/get_ip')
17 | async def get_ip():
18 |     return {'ip': get_self_ip()}
19 | 
20 | 
21 | @app.get('/health')
22 | async def health():
23 |     return {'status': 'ok'}
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     parser = argparse.ArgumentParser(description='SkyServe HTTP Test Server')
28 |     parser.add_argument('--port', type=int, required=True)
29 |     args = parser.parse_args()
30 |     uvicorn.run(app, host='0.0.0.0', port=args.port)
31 | 


--------------------------------------------------------------------------------
/tests/skyserve/load_balancer/service.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     # For install dependencies
 5 |     initial_delay_seconds: 180
 6 |   replica_policy:
 7 |     min_replicas: 3
 8 |   load_balancing_policy: round_robin
 9 | 
10 | resources:
11 |   ports: 8080
12 | 
13 | workdir: tests/skyserve/load_balancer
14 | 
15 | setup: pip install fastapi[all] uvicorn
16 | 
17 | run: python3 server.py --port 8080
18 | 


--------------------------------------------------------------------------------
/tests/skyserve/multi_ports.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 180  # Use a large delay for EKS LB to be ready
 5 |   replicas: 1
 6 |   ports: 8080
 7 | 
 8 | resources:
 9 |   ports:
10 |     - 8080
11 |     - 8081
12 | 
13 | setup: |
14 |   wget https://raw.githubusercontent.com/skypilot-org/skypilot/refs/heads/master/examples/serve/http_server/server.py
15 | 
16 | run: |
17 |   python3 server.py --port 8080 &
18 |   python3 server.py --port 8081
19 | 


--------------------------------------------------------------------------------
/tests/skyserve/readiness_timeout/server.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import asyncio
 3 | 
 4 | import fastapi
 5 | import uvicorn
 6 | 
 7 | app = fastapi.FastAPI()
 8 | 
 9 | 
10 | @app.get('/')
11 | async def root():
12 |     return 'Hi, SkyPilot here!'
13 | 
14 | 
15 | @app.get('/health')
16 | async def health():
17 |     # Simulate a readiness probe with long processing time.
18 |     await asyncio.sleep(20)
19 |     return {'status': 'ok'}
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser(
24 |         description='SkyServe Readiness Timeout Test Server')
25 |     parser.add_argument('--port', type=int, required=True)
26 |     args = parser.parse_args()
27 |     uvicorn.run(app, host='0.0.0.0', port=args.port)
28 | 


--------------------------------------------------------------------------------
/tests/skyserve/readiness_timeout/task.yaml:
--------------------------------------------------------------------------------
 1 | # test.yaml
 2 | service:
 3 |   readiness_probe:
 4 |     path: /health
 5 |     initial_delay_seconds: 120
 6 |   replicas: 1
 7 | 
 8 | workdir: tests/skyserve/readiness_timeout
 9 | 
10 | resources:
11 |   ports: 8081
12 | 
13 | setup: pip install fastapi uvicorn
14 | 
15 | run: python3 server.py --port 8081
16 | 


--------------------------------------------------------------------------------
/tests/skyserve/readiness_timeout/task_large_timeout.yaml:
--------------------------------------------------------------------------------
 1 | # test.yaml
 2 | service:
 3 |   readiness_probe:
 4 |     path: /health
 5 |     initial_delay_seconds: 120
 6 |     timeout_seconds: 30
 7 |   replicas: 1
 8 | 
 9 | workdir: tests/skyserve/readiness_timeout
10 | 
11 | resources:
12 |   ports: 8081
13 | 
14 | setup: pip install fastapi uvicorn
15 | 
16 | run: python3 server.py --port 8081
17 | 


--------------------------------------------------------------------------------
/tests/skyserve/restart/user_bug.py:
--------------------------------------------------------------------------------
1 | import time
2 | 
3 | # The program exits to simulate a user app bug.
4 | if __name__ == "__main__":
5 |     time.sleep(1)
6 |     assert False
7 | 


--------------------------------------------------------------------------------
/tests/skyserve/restart/user_bug.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 60
 5 |   replicas: 1
 6 | 
 7 | 
 8 | resources:
 9 |   ports: 8080
10 | 
11 | workdir: tests/skyserve/restart
12 | 
13 | run: python3 user_bug.py
14 | 


--------------------------------------------------------------------------------
/tests/skyserve/spot/base_ondemand_fallback.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 20
 5 |   replica_policy:
 6 |     min_replicas: 2
 7 |     max_replicas: 3
 8 |     base_ondemand_fallback_replicas: 1
 9 |     # Use a large qps per replica to avoid scale up for testing purpose.
10 |     target_qps_per_replica: 10000
11 | 
12 | resources:
13 |   ports: 8080
14 |   use_spot: true
15 | 
16 | setup: |
17 |   wget https://raw.githubusercontent.com/skypilot-org/skypilot/refs/heads/master/examples/serve/http_server/server.py
18 | 
19 | # Use 8080 to test jupyter service is terminated
20 | run: python3 server.py --port 8080
21 | 


--------------------------------------------------------------------------------
/tests/skyserve/spot/dynamic_ondemand_fallback.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 20
 5 |   replica_policy:
 6 |     min_replicas: 2
 7 |     max_replicas: 3
 8 |     dynamic_ondemand_fallback: true
 9 |     # Use a large qps per replica to avoid scale up for testing purpose.
10 |     target_qps_per_replica: 10000
11 | 
12 | resources:
13 |   ports: 8080
14 |   infra: gcp/*/us-central1-a
15 |   cpus: 2+
16 |   use_spot: true
17 | 
18 | workdir: examples/serve/http_server
19 | 
20 | # Use 8080 to test jupyter service is terminated
21 | run: python3 server.py --port 8080
22 | 


--------------------------------------------------------------------------------
/tests/skyserve/spot/recovery.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 20
 5 |   replicas: 1
 6 | 
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp/*/us-central1-a
11 |   use_spot: true
12 | 
13 | workdir: examples/serve/http_server
14 | 
15 | # Use 8080 to test jupyter service is terminated
16 | run: python3 server.py --port 8080
17 | 


--------------------------------------------------------------------------------
/tests/skyserve/streaming/example.txt:
--------------------------------------------------------------------------------
1 | Hello! How can I help you today?


--------------------------------------------------------------------------------
/tests/skyserve/streaming/server.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | import fastapi
 4 | import uvicorn
 5 | 
 6 | with open('example.txt', 'r') as f:
 7 |     WORD_TO_STREAM = f.read()
 8 | 
 9 | app = fastapi.FastAPI()
10 | 
11 | 
12 | @app.get('/')
13 | async def stream():
14 | 
15 |     async def generate_words():
16 |         for word in WORD_TO_STREAM.split():
17 |             yield word + "\n"
18 |             await asyncio.sleep(0.2)
19 | 
20 |     return fastapi.responses.StreamingResponse(generate_words(),
21 |                                                media_type="text/plain")
22 | 
23 | 
24 | uvicorn.run(app, host='0.0.0.0', port=8080)
25 | 


--------------------------------------------------------------------------------
/tests/skyserve/streaming/streaming.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe: /
 3 |   replicas: 1
 4 | 
 5 | resources:
 6 |   ports: 8080
 7 | 
 8 | workdir: tests/skyserve/streaming
 9 | 
10 | setup: pip install fastapi uvicorn
11 | 
12 | run: python server.py
13 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/new.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 100
 5 |   replicas: 2
 6 |   load_balancing_policy: round_robin
 7 | 
 8 | resources:
 9 |   ports: 8081
10 |   infra: gcp
11 | 
12 | workdir: tests/skyserve/update
13 | 
14 | run: |
15 |   if (( $SKYPILOT_SERVE_REPLICA_ID % 2 == 0 )); then
16 |     # Sleep for replicas with even id, so that we can test the mixing traffic
17 |     # of rolling update
18 |     sleep 120
19 |   fi
20 |   python3 new_server.py --port 8081
21 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/new_autoscaler_after.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 150
 5 |   replica_policy:
 6 |     min_replicas: 5
 7 |     max_replicas: 5
 8 |     base_ondemand_fallback_replicas: 1
 9 | 
10 | resources:
11 |   ports: 8081
12 |   use_spot: true
13 | 
14 | setup: |
15 |   wget https://raw.githubusercontent.com/skypilot-org/skypilot/refs/heads/master/examples/serve/http_server/server.py
16 | 
17 | run: |
18 |   if [ $SKYPILOT_SERVE_REPLICA_ID -eq 7 ]; then
19 |     # Sleep for the last replica in the test_skyserve_new_autoscaler_update
20 |     # so that we can check the behavior difference between rolling and
21 |     # blue-green update.
22 |     sleep 120
23 |   fi
24 |   python3 server.py --port 8081
25 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/new_autoscaler_before.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 60
 5 |   replicas: 2
 6 | 
 7 | resources:
 8 |   ports: 8081
 9 | 
10 | setup: |
11 |   wget https://raw.githubusercontent.com/skypilot-org/skypilot/refs/heads/master/examples/serve/http_server/server.py
12 | 
13 | run: python3 server.py --port 8081
14 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/num_min_one.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 60
 5 |   replica_policy:
 6 |     min_replicas: 1
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp
11 | 
12 | workdir: examples/serve/http_server
13 | 
14 | run: python3 server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/num_min_two.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 60
 5 |   replica_policy:
 6 |     min_replicas: 2
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp
11 | 
12 | workdir: examples/serve/http_server
13 | 
14 | run: python3 server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/skyserve/update/old.yaml:
--------------------------------------------------------------------------------
 1 | service:
 2 |   readiness_probe:
 3 |     path: /health
 4 |     initial_delay_seconds: 100
 5 |   replicas: 2
 6 |   load_balancing_policy: round_robin
 7 | 
 8 | resources:
 9 |   ports: 8080
10 |   infra: gcp
11 | 
12 | workdir: tests/skyserve/update
13 | 
14 | run: python3 old_server.py --port 8080
15 | 


--------------------------------------------------------------------------------
/tests/smoke_tests/__init__.py:
--------------------------------------------------------------------------------
1 | """For smoke tests import."""
2 | __all__ = ['smoke_tests_utils']
3 | 


--------------------------------------------------------------------------------
/tests/smoke_tests/docker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skypilot-org/skypilot/370d5fa04663145f36b1dc67ab8ec229b190bc54/tests/smoke_tests/docker/__init__.py


--------------------------------------------------------------------------------
/tests/test_global_user_state.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | import sky
 6 | 
 7 | 
 8 | @pytest.mark.skipif(sys.platform != 'linux', reason='Only test in CI.')
 9 | def test_enabled_clouds_empty():
10 |     # In test environment, no cloud should be enabled.
11 |     assert sky.global_user_state.get_cached_enabled_clouds(
12 |         sky.clouds.cloud.CloudCapability.COMPUTE, workspace='default') == []
13 | 


--------------------------------------------------------------------------------
/tests/test_yamls/different_default_conda_env.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 2+
 3 | 
 4 | 
 5 | setup: |
 6 |   conda create -n testenv python=3.6 -y
 7 | 
 8 |   echo "conda activate testenv" >> ~/.bashrc
 9 | 
10 | run: |
11 |   python --version 2>&1 | grep "Python 3.6" || exit 1
12 | 


--------------------------------------------------------------------------------
/tests/test_yamls/failed_setup.yaml:
--------------------------------------------------------------------------------
1 | setup: |
2 |   echo failed setup
3 |   exit 1
4 | 
5 | run: |
6 |   echo run
7 | 


--------------------------------------------------------------------------------
/tests/test_yamls/failed_worker_run.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 2+
 3 | 
 4 | num_nodes: 3
 5 | 
 6 | run: |
 7 |   if [ "$SKYPILOT_NODE_RANK" == "1" ]; then
 8 |       sleep 2
 9 |       exit 1
10 |   fi
11 |   echo My hostname: $(hostname)
12 |   sleep 10000
13 | 


--------------------------------------------------------------------------------
/tests/test_yamls/failed_worker_setup.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 2+
 3 | 
 4 | num_nodes: 3
 5 | 
 6 | setup: |
 7 |   echo "Setting up nodes"
 8 |   echo "$SKYPILOT_SETUP_NODE_RANK"
 9 |   if [ "$SKYPILOT_SETUP_NODE_RANK" == "1" ]; then
10 |       echo FAILING $SKYPILOT_SETUP_NODE_RANK
11 |       exit 1
12 |   fi
13 |   sleep 10000
14 | 
15 | run: |
16 |   echo Should not get here
17 | 


--------------------------------------------------------------------------------
/tests/test_yamls/force_enable_external_ips_config.yaml:
--------------------------------------------------------------------------------
1 | gcp:
2 |   vpc_name: default
3 |   use_internal_ips: true
4 |   force_enable_external_ips: true
5 | 


--------------------------------------------------------------------------------
/tests/test_yamls/gcp_per_region_images.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: gcp
 3 |   image_id: 
 4 |     us-central1: skypilot:cpu-debian-10
 5 |     us-west3: projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20230112
 6 | 
 7 | 
 8 | setup: |
 9 |   echo "running setup"
10 | 
11 | run: |
12 |   conda env list
13 | 


--------------------------------------------------------------------------------
/tests/test_yamls/intermediate_bucket.yaml:
--------------------------------------------------------------------------------
 1 | name: intermediate-bucket
 2 | 
 3 | file_mounts:
 4 |   /setup.py: ./setup.py
 5 |   /sky: .
 6 |   /train-00001-of-01024: gs://cloud-tpu-test-datasets/fake_imagenet/train-00001-of-01024
 7 | 
 8 | workdir: .
 9 | 
10 | 
11 | setup: |
12 |   echo "running setup"
13 | 
14 | run: |
15 |   echo "listing workdir"
16 |   ls .
17 |   echo "listing file_mounts"
18 |   ls /setup.py
19 |   ls /sky
20 |   ls /train-00001-of-01024
21 |   echo "task run finish"
22 | 


--------------------------------------------------------------------------------
/tests/test_yamls/low_resource_sky_config.yaml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 |   controller:
 3 |     resources:
 4 |       cpus: 2+
 5 |       memory: 4+
 6 | serve:
 7 |   controller:
 8 |     resources:
 9 |       cpus: 2+
10 |       memory: 4+
11 | 


--------------------------------------------------------------------------------
/tests/test_yamls/minimal.yaml:
--------------------------------------------------------------------------------
1 | name: min
2 | 
3 | setup: |
4 |   echo "running setup"
5 | 
6 | run: |
7 |   conda env list
8 |   echo "task run finish"
9 | 


--------------------------------------------------------------------------------
/tests/test_yamls/minimal_test_quick_tests_core.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws
 3 |   instance_type: t3.small
 4 | 
 5 | file_mounts:
 6 |   ~/aws: .
 7 | 
 8 | workdir: .
 9 | 
10 | num_nodes: 1
11 | 
12 | run: |
13 |   ls -l ~/aws/tests/test_yamls/minimal_test_quick_tests_core.yaml
14 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_aws_config.yaml:
--------------------------------------------------------------------------------
 1 | aws:
 2 |   vpc_name: fake-vpc
 3 |   remote_identity:
 4 |     - sky-serve-fake1-*: fake1-skypilot-role
 5 |     - sky-serve-fake2-*: fake2-skypilot-role
 6 | 
 7 |   security_group_name:
 8 |     - sky-serve-fake1-*: fake-1-sg
 9 |     - sky-serve-fake2-*: fake-2-sg
10 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_custom_default_conda_env.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 2+
 3 | 
 4 | setup: |
 5 |   conda activate myenv
 6 |   if [ $? -ne 0 ]; then
 7 |     conda create -n myenv python=3.7 -y
 8 |     conda activate myenv
 9 |   fi
10 | 
11 |   # Set user's conda environment as default, which does not have the SkyPilot
12 |   # runtime installed.
13 |   grep -qxF 'conda activate myenv' ~/.bashrc || echo "conda activate myenv" >> ~/.bashrc
14 |   # Further install the older version of Ray to test SkyPilot being able to use
15 |   # the original environment to submit jobs.
16 |   pip install ray==2.6.0
17 | 
18 | run: |
19 |   conda env list
20 |   echo hi
21 |   echo bye
22 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_custom_image.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   infra: aws/us-east-2
 3 |   # Nvidia image from
 4 |   # https://aws.amazon.com/marketplace/pp/prodview-rf7na2b2ttvdg
 5 |   image_id: ami-062ddd90fb6f8267a
 6 |   accelerators: T4:1
 7 | 
 8 | # Test file_mounts - both rsync and mounting based.
 9 | file_mounts:
10 |   /setup.py: ./setup.py
11 |   /mount_public_s3:
12 |     source: s3://digitalcorpora
13 |     mode: MOUNT
14 | 
15 | setup: |
16 |   echo "running setup"
17 | 
18 | run: |
19 |   set -ex
20 |   
21 |   ls -ltr /mount_public_s3/corpora
22 |   
23 |   for i in {1..100}; do
24 |     echo "Hello $i"
25 |     sleep 0.25
26 |   done
27 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_k8s_logs.yaml:
--------------------------------------------------------------------------------
1 | name: test-k8s-logs
2 | 
3 | run: |
4 |   for i in $(seq 1 9)
5 |   do
6 |     echo "$i"
7 |     sleep 0.1
8 |   done
9 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_labels.yaml.j2:
--------------------------------------------------------------------------------
1 | resources:
2 |   cloud: {{cloud}}
3 |   {% if region %}
4 |   region: {{region}}
5 |   {% endif %}
6 |   labels:
7 |     inlinelabel1: inlinevalue1
8 |     inlinelabel2: inlinevalue2


--------------------------------------------------------------------------------
/tests/test_yamls/test_long_setup.yaml:
--------------------------------------------------------------------------------
 1 | setup: |
 2 |   echo long setup
 3 |   for i in {1..10000}; do
 4 |     echo $i
 5 |     sleep 1
 6 |   done
 7 | 
 8 | run: |
 9 |   echo run
10 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_managed_jobs_retry.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   cpus: 2+
 3 |   job_recovery:
 4 |     max_restarts_on_errors: 1
 5 | 
 6 | # Task 1: Always fails
 7 | run: |
 8 |   echo "Task 1 starting"
 9 |   exit 1
10 | ---
11 | # Task 2: Never reached due to Task 1 failure
12 | run: |
13 |   echo "Task 2 starting"
14 |   exit 0


--------------------------------------------------------------------------------
/tests/test_yamls/test_multiple_accelerators_ordered.yaml:
--------------------------------------------------------------------------------
1 | name: multi-accelerators-ordered
2 | 
3 | resources:
4 |   accelerators: ['A100-40GB:1', 'T4:1', 'V100:1', 'K80:1']
5 | 
6 | run: |
7 |   nvidia-smi
8 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_multiple_accelerators_ordered_with_default.yaml:
--------------------------------------------------------------------------------
1 | name: multi-accelerators-ordered
2 | 
3 | resources:
4 |   use_spot: true
5 |   accelerators: ['A100:1', 'T4:1', 'V100:1', 'K80:1']
6 | 
7 | run: |
8 |   nvidia-smi
9 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_multiple_accelerators_unordered.yaml:
--------------------------------------------------------------------------------
1 | name: multi-accelerators-unordered
2 | 
3 | resources:
4 |   accelerators: {'A100-40GB:1', 'T4:1', 'V100:1'}
5 | 
6 | run: |
7 |   nvidia-smi
8 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_multiple_accelerators_unordered_with_default.yaml:
--------------------------------------------------------------------------------
 1 | name: multi-accelerators-unordered
 2 | 
 3 | resources:
 4 |   use_spot: true
 5 |   accelerators: {'A100:1', 'T4:1', 'V100:1'}
 6 |   any_of:
 7 |     - infra: aws
 8 |     - infra: gcp
 9 | 
10 | run: |
11 |   nvidia-smi
12 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_multiple_resources.yaml:
--------------------------------------------------------------------------------
 1 | name: multi-resources
 2 | 
 3 | resources:
 4 |   any_of:
 5 |     - infra: aws/us-east-1
 6 |       accelerators: A100:8
 7 |     - infra: gcp
 8 |       accelerators: T4:4
 9 |     - infra: aws
10 | 
11 | run:
12 |   echo hi
13 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_only_setup.yaml:
--------------------------------------------------------------------------------
1 | setup: |
2 |   echo "hello world"
3 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_rclone_mount.yaml:
--------------------------------------------------------------------------------
 1 | file_mounts:
 2 |   # Mounting private buckets in RCLONE mode
 3 |   /mount_private_rclone:
 4 |     name: {{storage_name}}
 5 |     source: ~/tmp-workdir
 6 |     store: {{store_type}}
 7 |     mode: RCLONE
 8 | 
 9 | run: |
10 |   set -ex
11 | 
12 |   # Check private bucket contents
13 |   ls -ltr /mount_private_rclone/foo
14 |   ls -ltr /mount_private_rclone/tmp\ file
15 |   
16 |   # Symlinks are not copied to buckets
17 |   ! ls /mount_private_rclone/circle-link
18 |   
19 |   # Write to private bucket in MOUNT mode should pass
20 |   echo "hello" > /mount_private_rclone/hello.txt
21 | 
22 |   # Ensure that write is reflected in bucket
23 |   rclone ls {{ rclone_profile_name }}:{{ storage_name }}/hello.txt
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_serve_autoscaler.yaml:
--------------------------------------------------------------------------------
 1 | # SkyServe YAML to run a simple http server.
 2 | #
 3 | # Usage:
 4 | #   sky serve up -n http examples/serve/http_server/task.yaml
 5 | # The endpoint will be printed in the console. You
 6 | # could also check the endpoint by running:
 7 | #   sky serve status --endpoint http
 8 | 
 9 | service:
10 |   readiness_probe:
11 |     path: /health
12 |     initial_delay_seconds: 20
13 | 
14 |   replica_policy:
15 |     min_replicas: 1
16 |     max_replicas: 3
17 |     target_qps_per_replica: 1
18 | 
19 | resources:
20 |   ports: 8081
21 |   cpus: 2+
22 | 
23 | workdir: examples/serve/http_server
24 | 
25 | run: python3 server.py
26 | 


--------------------------------------------------------------------------------
/tests/test_yamls/test_skyignore.yaml:
--------------------------------------------------------------------------------
 1 | name: skyignore-test
 2 | 
 3 | file_mounts:
 4 |   /script.py: ./tests/test_yamls/test_skyignore_verification.py
 5 | 
 6 | setup: |
 7 |   echo "Setting up skyignore test environment"
 8 | 
 9 | run: |
10 |   set -e
11 |   python /script.py ~/sky_workdir
12 | 


--------------------------------------------------------------------------------
/tests/test_yamls/use_intermediate_bucket_config.yaml:
--------------------------------------------------------------------------------
1 | jobs:
2 |   bucket: "s3://bucket-jobs-s3"
3 | 
4 |   controller:
5 |     resources:
6 |         cpus: 2+
7 |         memory: 4+
8 | 


--------------------------------------------------------------------------------
/tests/test_yamls/use_internal_ips_config.yaml:
--------------------------------------------------------------------------------
1 | gcp:
2 |   vpc_name: default
3 |   use_internal_ips: true
4 | 


--------------------------------------------------------------------------------
/tests/test_yamls/use_mig_config.yaml:
--------------------------------------------------------------------------------
1 | gcp:
2 |   managed_instance_group:
3 |     run_duration: 36000
4 |     provision_timeout: 900
5 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_azure_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from sky.clouds.utils import azure_utils
 4 | 
 5 | 
 6 | def test_validate_image_id():
 7 |     # Valid marketplace image ID
 8 |     azure_utils.validate_image_id("publisher:offer:sku:version")
 9 | 
10 |     # Valid community image ID
11 |     azure_utils.validate_image_id(
12 |         "/CommunityGalleries/gallery-name/Images/image-name")
13 | 
14 |     # Invalid format (neither marketplace nor community)
15 |     with pytest.raises(ValueError):
16 |         azure_utils.validate_image_id(
17 |             "CommunityGalleries/gallery-name/Images/image-name")
18 | 
19 |     # Invalid marketplace image ID (too few parts)
20 |     with pytest.raises(ValueError):
21 |         azure_utils.validate_image_id("publisher:offer:sku")
22 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_cloud.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from sky.clouds.cloud import Cloud
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(("specific_reservations", "expected"), [({"a"}, {
 7 |     "a": 0
 8 | }), ((set(), {}))])
 9 | def test_cloud_get_reservations_available_resources(specific_reservations,
10 |                                                     expected):
11 | 
12 |     available_resources = Cloud().get_reservations_available_resources(
13 |         "instance_type", "region", "zone", specific_reservations)
14 |     assert available_resources == expected
15 | 


--------------------------------------------------------------------------------