├── .gitignore ├── README.md ├── alpa_serve ├── __init__.py ├── controller.py ├── http_util.py ├── placement_policy │ ├── __init__.py │ ├── base_policy.py │ ├── model_parallelism.py │ └── selective_replication.py ├── profiling.py ├── run.py ├── simulator │ ├── cluster.py │ ├── controller.py │ ├── event_loop.py │ ├── executable.py │ ├── util.py │ └── workload.py ├── trace │ ├── README.md │ ├── __init__.py │ ├── benchmark_trace.py │ ├── distribution.py │ ├── test_trace.py │ └── trace.py └── util.py ├── benchmarks └── alpa │ ├── README.md │ ├── approximate_one_case.py │ ├── bert_model.py │ ├── compare_waiting_time.py │ ├── equal_model_case.py │ ├── gen_data_goodput_vs_slo.py │ ├── gen_data_notebook.py │ ├── gen_data_simulator_align.py │ ├── gen_data_various_metrics.py │ ├── general_model_case.py │ ├── inspect_profiling_result.py │ ├── interactive_benchmarking.ipynb │ ├── plot_goodput_vs_slo.py │ ├── plot_various_metrics.py │ ├── prepare_trace.py │ ├── run_one_case.py │ ├── simulate_one_case.py │ ├── suite_debug.py │ └── util.py ├── deprecated ├── README.md ├── alpasim │ ├── __init__.py │ ├── cluster.py │ ├── model.py │ ├── scheduler.py │ ├── simulator.py │ ├── utils.py │ └── workload.py ├── azuretrace │ ├── README.md │ └── analyse.ipynb ├── cluster_traces │ ├── README.md │ ├── test_workload_8to2_30Hz_60s_interop_trace.json │ ├── test_workload_8to2_30Hz_60s_intraop_trace.json │ ├── test_workload_8to2_50Hz_60s_interop_trace.json │ ├── test_workload_8to2_50Hz_60s_intraop_trace.json │ ├── test_workload_8to2_6.667Hz_20s_baseline_trace.json │ ├── test_workload_8to2_6.667Hz_20s_interop_trace.json │ └── test_workload_8to2_6.667Hz_20s_intraop_trace.json ├── placements │ ├── README.md │ ├── placement_125M_baseline.json │ ├── placement_125M_interop.json │ ├── placement_125M_intraop.json │ ├── placement_125M_strong_baseline.json │ ├── placement_baseline.json │ ├── placement_interop.json │ ├── placement_intraop.json │ └── placement_test.json ├── scripts │ ├── memory_saving │ │ ├── benchmark.py │ │ └── placements │ │ │ ├── placement_baseline_2GPUs.json │ │ │ ├── placement_baseline_4GPUs_memx1.json │ │ │ ├── placement_baseline_4GPUs_memx2.json │ │ │ ├── placement_baseline_4GPUs_memx2_3to1.json │ │ │ ├── placement_baseline_4GPUs_memx3_3to1.json │ │ │ ├── placement_baseline_4GPUs_memx4.json │ │ │ ├── placement_pipeline_2GPUs.json │ │ │ ├── placement_pipeline_4GPUs_memx1.json │ │ │ ├── placement_pipeline_4GPUs_memx1dot5_3to1.json │ │ │ ├── placement_pipeline_4GPUs_memx2.json │ │ │ └── placement_strong_2GPUs.json │ ├── pipeline_latency │ │ ├── 2.6B.png │ │ ├── 6.7B.png │ │ └── plot.py │ └── small_model_benchmark │ │ └── strong_baseline.py ├── simulator.py ├── test.py └── workload │ ├── README.md │ ├── test_workload_8to2_10Hz_20s │ ├── test_workload_8to2_30Hz_60s │ ├── test_workload_8to2_50Hz_60s │ └── test_workload_8to2_6.667Hz_20s ├── experiments ├── ablation │ ├── ablation_general_synthetic_bert_all │ │ └── res_general_vs_all.tsv │ ├── ablation_general_synthetic_bert_all_fix_trace_seed │ │ └── res_general_vs_all.tsv │ ├── ablation_general_synthetic_mixed_all │ │ └── res_general_vs_all.tsv │ ├── ablation_general_synthetic_mixed_all_fix_trace_seed │ │ └── res_general_vs_all.tsv │ ├── align_simulator_2022_12_12 │ │ ├── res_real.tsv │ │ └── res_sim.tsv │ ├── general_synthetic_bert │ │ └── res_general_model_cases.tsv │ └── general_synthetic_mixed │ │ └── res_general_model_cases.tsv ├── batching │ └── gen_data_goodput_vs_slo.py ├── e2e_goodput │ ├── equal_model_exp.py │ ├── equal_model_suite.py │ ├── general_model_exp.py │ ├── general_model_suite.py │ ├── plot_sec6_2.py │ ├── plot_sec6_3.py │ ├── plot_sec6_4.py │ ├── plot_sec6_5.py │ ├── plot_sec6_6.py │ ├── plot_various_metrics.py │ └── visualize.py ├── motivation │ ├── README.md │ ├── changing_pipeline_overhead.py │ ├── changing_rate_cv_slo.py │ ├── illustrative_example.py │ ├── illustrative_example_slides.py │ ├── memory_budget_vs_latency.py │ ├── model_parallel_latency_throughput.py │ ├── overhead_decomposition.py │ └── queueing_theory_plot.py └── robustness │ ├── plot_average_performance.py │ ├── robustness_exp.py │ └── robustness_suite.py ├── osdi23_artifact ├── README.md ├── cleanup.sh ├── equal_model_exp.py ├── equal_model_suite.py ├── gen_data_sec6_2_e2e.sh ├── gen_data_sec6_3_large.sh ├── gen_data_sec6_4_robust.sh ├── gen_data_sec6_5_ab.sh ├── general_model_exp.py ├── general_model_suite.py ├── plot_sec6_2_e2e.py ├── plot_sec6_3_large.py ├── plot_sec6_4_robust.py ├── plot_sec6_5_ab.py ├── robustness_exp.py ├── robustness_suite.py └── sec6_2_data │ ├── azure_v1_mixed.tsv │ └── azure_v2_mixed.tsv ├── setup.py └── tests ├── run_all.py └── serve ├── test_controller.py ├── test_placement_policy.py └── test_simulator.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/README.md -------------------------------------------------------------------------------- /alpa_serve/__init__.py: -------------------------------------------------------------------------------- 1 | """Alpa serving backend""" 2 | -------------------------------------------------------------------------------- /alpa_serve/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/controller.py -------------------------------------------------------------------------------- /alpa_serve/http_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/http_util.py -------------------------------------------------------------------------------- /alpa_serve/placement_policy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/placement_policy/__init__.py -------------------------------------------------------------------------------- /alpa_serve/placement_policy/base_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/placement_policy/base_policy.py -------------------------------------------------------------------------------- /alpa_serve/placement_policy/model_parallelism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/placement_policy/model_parallelism.py -------------------------------------------------------------------------------- /alpa_serve/placement_policy/selective_replication.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/placement_policy/selective_replication.py -------------------------------------------------------------------------------- /alpa_serve/profiling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/profiling.py -------------------------------------------------------------------------------- /alpa_serve/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/run.py -------------------------------------------------------------------------------- /alpa_serve/simulator/cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/cluster.py -------------------------------------------------------------------------------- /alpa_serve/simulator/controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/controller.py -------------------------------------------------------------------------------- /alpa_serve/simulator/event_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/event_loop.py -------------------------------------------------------------------------------- /alpa_serve/simulator/executable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/executable.py -------------------------------------------------------------------------------- /alpa_serve/simulator/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/util.py -------------------------------------------------------------------------------- /alpa_serve/simulator/workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/simulator/workload.py -------------------------------------------------------------------------------- /alpa_serve/trace/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/README.md -------------------------------------------------------------------------------- /alpa_serve/trace/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/__init__.py -------------------------------------------------------------------------------- /alpa_serve/trace/benchmark_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/benchmark_trace.py -------------------------------------------------------------------------------- /alpa_serve/trace/distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/distribution.py -------------------------------------------------------------------------------- /alpa_serve/trace/test_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/test_trace.py -------------------------------------------------------------------------------- /alpa_serve/trace/trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/trace/trace.py -------------------------------------------------------------------------------- /alpa_serve/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/alpa_serve/util.py -------------------------------------------------------------------------------- /benchmarks/alpa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/README.md -------------------------------------------------------------------------------- /benchmarks/alpa/approximate_one_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/approximate_one_case.py -------------------------------------------------------------------------------- /benchmarks/alpa/bert_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/bert_model.py -------------------------------------------------------------------------------- /benchmarks/alpa/compare_waiting_time.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/compare_waiting_time.py -------------------------------------------------------------------------------- /benchmarks/alpa/equal_model_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/equal_model_case.py -------------------------------------------------------------------------------- /benchmarks/alpa/gen_data_goodput_vs_slo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/gen_data_goodput_vs_slo.py -------------------------------------------------------------------------------- /benchmarks/alpa/gen_data_notebook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/gen_data_notebook.py -------------------------------------------------------------------------------- /benchmarks/alpa/gen_data_simulator_align.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/gen_data_simulator_align.py -------------------------------------------------------------------------------- /benchmarks/alpa/gen_data_various_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/gen_data_various_metrics.py -------------------------------------------------------------------------------- /benchmarks/alpa/general_model_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/general_model_case.py -------------------------------------------------------------------------------- /benchmarks/alpa/inspect_profiling_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/inspect_profiling_result.py -------------------------------------------------------------------------------- /benchmarks/alpa/interactive_benchmarking.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/interactive_benchmarking.ipynb -------------------------------------------------------------------------------- /benchmarks/alpa/plot_goodput_vs_slo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/plot_goodput_vs_slo.py -------------------------------------------------------------------------------- /benchmarks/alpa/plot_various_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/plot_various_metrics.py -------------------------------------------------------------------------------- /benchmarks/alpa/prepare_trace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/prepare_trace.py -------------------------------------------------------------------------------- /benchmarks/alpa/run_one_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/run_one_case.py -------------------------------------------------------------------------------- /benchmarks/alpa/simulate_one_case.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/simulate_one_case.py -------------------------------------------------------------------------------- /benchmarks/alpa/suite_debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/suite_debug.py -------------------------------------------------------------------------------- /benchmarks/alpa/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/benchmarks/alpa/util.py -------------------------------------------------------------------------------- /deprecated/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/README.md -------------------------------------------------------------------------------- /deprecated/alpasim/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/__init__.py -------------------------------------------------------------------------------- /deprecated/alpasim/cluster.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/cluster.py -------------------------------------------------------------------------------- /deprecated/alpasim/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/model.py -------------------------------------------------------------------------------- /deprecated/alpasim/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/scheduler.py -------------------------------------------------------------------------------- /deprecated/alpasim/simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/simulator.py -------------------------------------------------------------------------------- /deprecated/alpasim/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/utils.py -------------------------------------------------------------------------------- /deprecated/alpasim/workload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/alpasim/workload.py -------------------------------------------------------------------------------- /deprecated/azuretrace/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/azuretrace/README.md -------------------------------------------------------------------------------- /deprecated/azuretrace/analyse.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/azuretrace/analyse.ipynb -------------------------------------------------------------------------------- /deprecated/cluster_traces/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/README.md -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_30Hz_60s_interop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_30Hz_60s_interop_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_30Hz_60s_intraop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_30Hz_60s_intraop_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_50Hz_60s_interop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_50Hz_60s_interop_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_50Hz_60s_intraop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_50Hz_60s_intraop_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_baseline_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_baseline_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_interop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_interop_trace.json -------------------------------------------------------------------------------- /deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_intraop_trace.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/cluster_traces/test_workload_8to2_6.667Hz_20s_intraop_trace.json -------------------------------------------------------------------------------- /deprecated/placements/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/README.md -------------------------------------------------------------------------------- /deprecated/placements/placement_125M_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_125M_baseline.json -------------------------------------------------------------------------------- /deprecated/placements/placement_125M_interop.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_125M_interop.json -------------------------------------------------------------------------------- /deprecated/placements/placement_125M_intraop.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_125M_intraop.json -------------------------------------------------------------------------------- /deprecated/placements/placement_125M_strong_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_125M_strong_baseline.json -------------------------------------------------------------------------------- /deprecated/placements/placement_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_baseline.json -------------------------------------------------------------------------------- /deprecated/placements/placement_interop.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_interop.json -------------------------------------------------------------------------------- /deprecated/placements/placement_intraop.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_intraop.json -------------------------------------------------------------------------------- /deprecated/placements/placement_test.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/placements/placement_test.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/benchmark.py -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_2GPUs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_2GPUs.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx1.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx2.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx2_3to1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx2_3to1.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx3_3to1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx3_3to1.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_baseline_4GPUs_memx4.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_pipeline_2GPUs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_pipeline_2GPUs.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx1.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx1dot5_3to1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx1dot5_3to1.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_pipeline_4GPUs_memx2.json -------------------------------------------------------------------------------- /deprecated/scripts/memory_saving/placements/placement_strong_2GPUs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/memory_saving/placements/placement_strong_2GPUs.json -------------------------------------------------------------------------------- /deprecated/scripts/pipeline_latency/2.6B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/pipeline_latency/2.6B.png -------------------------------------------------------------------------------- /deprecated/scripts/pipeline_latency/6.7B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/pipeline_latency/6.7B.png -------------------------------------------------------------------------------- /deprecated/scripts/pipeline_latency/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/pipeline_latency/plot.py -------------------------------------------------------------------------------- /deprecated/scripts/small_model_benchmark/strong_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/scripts/small_model_benchmark/strong_baseline.py -------------------------------------------------------------------------------- /deprecated/simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/simulator.py -------------------------------------------------------------------------------- /deprecated/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/test.py -------------------------------------------------------------------------------- /deprecated/workload/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/workload/README.md -------------------------------------------------------------------------------- /deprecated/workload/test_workload_8to2_10Hz_20s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/workload/test_workload_8to2_10Hz_20s -------------------------------------------------------------------------------- /deprecated/workload/test_workload_8to2_30Hz_60s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/workload/test_workload_8to2_30Hz_60s -------------------------------------------------------------------------------- /deprecated/workload/test_workload_8to2_50Hz_60s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/workload/test_workload_8to2_50Hz_60s -------------------------------------------------------------------------------- /deprecated/workload/test_workload_8to2_6.667Hz_20s: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/deprecated/workload/test_workload_8to2_6.667Hz_20s -------------------------------------------------------------------------------- /experiments/ablation/ablation_general_synthetic_bert_all/res_general_vs_all.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/ablation_general_synthetic_bert_all/res_general_vs_all.tsv -------------------------------------------------------------------------------- /experiments/ablation/ablation_general_synthetic_bert_all_fix_trace_seed/res_general_vs_all.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/ablation_general_synthetic_bert_all_fix_trace_seed/res_general_vs_all.tsv -------------------------------------------------------------------------------- /experiments/ablation/ablation_general_synthetic_mixed_all/res_general_vs_all.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/ablation_general_synthetic_mixed_all/res_general_vs_all.tsv -------------------------------------------------------------------------------- /experiments/ablation/ablation_general_synthetic_mixed_all_fix_trace_seed/res_general_vs_all.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/ablation_general_synthetic_mixed_all_fix_trace_seed/res_general_vs_all.tsv -------------------------------------------------------------------------------- /experiments/ablation/align_simulator_2022_12_12/res_real.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/align_simulator_2022_12_12/res_real.tsv -------------------------------------------------------------------------------- /experiments/ablation/align_simulator_2022_12_12/res_sim.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/align_simulator_2022_12_12/res_sim.tsv -------------------------------------------------------------------------------- /experiments/ablation/general_synthetic_bert/res_general_model_cases.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/general_synthetic_bert/res_general_model_cases.tsv -------------------------------------------------------------------------------- /experiments/ablation/general_synthetic_mixed/res_general_model_cases.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/ablation/general_synthetic_mixed/res_general_model_cases.tsv -------------------------------------------------------------------------------- /experiments/batching/gen_data_goodput_vs_slo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/batching/gen_data_goodput_vs_slo.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/equal_model_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/equal_model_exp.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/equal_model_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/equal_model_suite.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/general_model_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/general_model_exp.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/general_model_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/general_model_suite.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_sec6_2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_sec6_2.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_sec6_3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_sec6_3.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_sec6_4.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_sec6_4.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_sec6_5.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_sec6_5.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_sec6_6.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_sec6_6.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/plot_various_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/plot_various_metrics.py -------------------------------------------------------------------------------- /experiments/e2e_goodput/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/e2e_goodput/visualize.py -------------------------------------------------------------------------------- /experiments/motivation/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/README.md -------------------------------------------------------------------------------- /experiments/motivation/changing_pipeline_overhead.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/changing_pipeline_overhead.py -------------------------------------------------------------------------------- /experiments/motivation/changing_rate_cv_slo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/changing_rate_cv_slo.py -------------------------------------------------------------------------------- /experiments/motivation/illustrative_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/illustrative_example.py -------------------------------------------------------------------------------- /experiments/motivation/illustrative_example_slides.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/illustrative_example_slides.py -------------------------------------------------------------------------------- /experiments/motivation/memory_budget_vs_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/memory_budget_vs_latency.py -------------------------------------------------------------------------------- /experiments/motivation/model_parallel_latency_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/model_parallel_latency_throughput.py -------------------------------------------------------------------------------- /experiments/motivation/overhead_decomposition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/overhead_decomposition.py -------------------------------------------------------------------------------- /experiments/motivation/queueing_theory_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/motivation/queueing_theory_plot.py -------------------------------------------------------------------------------- /experiments/robustness/plot_average_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/robustness/plot_average_performance.py -------------------------------------------------------------------------------- /experiments/robustness/robustness_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/robustness/robustness_exp.py -------------------------------------------------------------------------------- /experiments/robustness/robustness_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/experiments/robustness/robustness_suite.py -------------------------------------------------------------------------------- /osdi23_artifact/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/README.md -------------------------------------------------------------------------------- /osdi23_artifact/cleanup.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/cleanup.sh -------------------------------------------------------------------------------- /osdi23_artifact/equal_model_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/equal_model_exp.py -------------------------------------------------------------------------------- /osdi23_artifact/equal_model_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/equal_model_suite.py -------------------------------------------------------------------------------- /osdi23_artifact/gen_data_sec6_2_e2e.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/gen_data_sec6_2_e2e.sh -------------------------------------------------------------------------------- /osdi23_artifact/gen_data_sec6_3_large.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/gen_data_sec6_3_large.sh -------------------------------------------------------------------------------- /osdi23_artifact/gen_data_sec6_4_robust.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/gen_data_sec6_4_robust.sh -------------------------------------------------------------------------------- /osdi23_artifact/gen_data_sec6_5_ab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/gen_data_sec6_5_ab.sh -------------------------------------------------------------------------------- /osdi23_artifact/general_model_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/general_model_exp.py -------------------------------------------------------------------------------- /osdi23_artifact/general_model_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/general_model_suite.py -------------------------------------------------------------------------------- /osdi23_artifact/plot_sec6_2_e2e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/plot_sec6_2_e2e.py -------------------------------------------------------------------------------- /osdi23_artifact/plot_sec6_3_large.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/plot_sec6_3_large.py -------------------------------------------------------------------------------- /osdi23_artifact/plot_sec6_4_robust.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/plot_sec6_4_robust.py -------------------------------------------------------------------------------- /osdi23_artifact/plot_sec6_5_ab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/plot_sec6_5_ab.py -------------------------------------------------------------------------------- /osdi23_artifact/robustness_exp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/robustness_exp.py -------------------------------------------------------------------------------- /osdi23_artifact/robustness_suite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/robustness_suite.py -------------------------------------------------------------------------------- /osdi23_artifact/sec6_2_data/azure_v1_mixed.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/sec6_2_data/azure_v1_mixed.tsv -------------------------------------------------------------------------------- /osdi23_artifact/sec6_2_data/azure_v2_mixed.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/osdi23_artifact/sec6_2_data/azure_v2_mixed.tsv -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/setup.py -------------------------------------------------------------------------------- /tests/run_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/tests/run_all.py -------------------------------------------------------------------------------- /tests/serve/test_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/tests/serve/test_controller.py -------------------------------------------------------------------------------- /tests/serve/test_placement_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/tests/serve/test_placement_policy.py -------------------------------------------------------------------------------- /tests/serve/test_simulator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alpa-projects/mms/HEAD/tests/serve/test_simulator.py --------------------------------------------------------------------------------