├── .claude └── settings.local.json ├── .gitignore ├── .gitmodules ├── .mcp.json ├── CLAUDE.md ├── LICENSE ├── Makefile ├── README.md ├── autotune ├── .gitignore ├── Cargo.toml ├── README.md └── src │ ├── bin │ └── cli.rs │ ├── daemon.rs │ ├── lib.rs │ ├── prompt.rs │ └── system_info.rs ├── document ├── 2509.01245v2.pdf ├── 2509.01245v2.txt ├── design.png ├── devlog.md ├── devlog │ ├── UPDATES_SUMMARY.md │ ├── ai-gen-paper.md │ ├── basic.md │ ├── dsl.md │ ├── improvement.md │ ├── linux_sched_summary.md │ ├── outline.md │ ├── related-and-modules.md │ └── review_suggestions.md ├── linux.gif ├── motivation_exp │ ├── README.md │ ├── test-claude-failed │ │ ├── Makefile │ │ ├── README.md │ │ ├── scheduler.bpf.c │ │ ├── scheduler.c │ │ ├── scheduler.h │ │ └── vmlinux.h │ ├── test-claude-success │ │ ├── 8b51c6a0-435d-4231-b5af-9bc85b745332.jsonl │ │ ├── Makefile │ │ ├── README_SCHEDULER.md │ │ ├── conversation-2025-07-18-161603.txt │ │ ├── ebpf-scheduler │ │ │ ├── .gitignore │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── benchmark.sh │ │ │ ├── scx_common.h │ │ │ ├── simple_scheduler.bpf.c │ │ │ ├── simple_scheduler.c │ │ │ └── test_scheduler.sh │ │ ├── minimal_scheduler.bpf.c │ │ ├── minimal_scheduler.c │ │ └── priority_scheduler.bpf.c │ └── test-sched-fake │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── fdeb2ecc-5a2d-4682-9c39-663334326689.jsonl │ │ ├── load_sched.sh │ │ ├── loader.c │ │ ├── read_stats.c │ │ └── sched.bpf.c ├── schbench-optimize.gif ├── sched-agent-design.md ├── schedcp-design.md └── scx │ ├── 01-overview.md │ ├── 02-schedulers-analysis.md │ ├── 03-tools-and-utilities.md │ ├── 04-build-system-infrastructure.md │ ├── 05-bpf-framework-kernel-integration.md │ ├── 06-testing-benchmarking-infrastructure.md │ ├── 07-summary-and-conclusions.md │ └── README.md ├── mcp ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── lib │ └── process_manager │ │ ├── Cargo.toml │ │ ├── README.md │ │ └── src │ │ ├── binary_extractor.rs │ │ ├── lib.rs │ │ ├── process_manager.rs │ │ ├── process_runner.rs │ │ └── types.rs ├── new_sched │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── example.bpf.c │ ├── loader.c │ └── pid_filename_header.h ├── setup_claude.sh ├── setup_claude_code.sh ├── src │ ├── cli.rs │ ├── lib.rs │ ├── main.rs │ ├── scheduler_generator.rs │ ├── scheduler_manager.rs │ ├── storage.rs │ ├── system_monitor.rs │ └── workload_profile.rs └── tests │ ├── scheduler_generator_test.rs │ ├── test_mcp_server.rs │ └── workload_integration_test.rs ├── scheduler ├── .gitignore ├── Makefile ├── README.md ├── __init__.py ├── custom_schedulers │ ├── .gitignore │ ├── Makefile │ ├── README_scx_cxl.md │ ├── runqslower.bpf.c │ ├── runqslower.c │ ├── runqslower.h │ ├── scx_cxl.bpf.c │ ├── scx_cxl.c │ ├── scx_cxl.h │ ├── scx_cxl_rl.bpf.c │ ├── scx_cxl_rl.c │ ├── scx_pmu.bpf.c │ ├── scx_pmu.c │ ├── scx_pmu.h │ ├── scx_simple_cxl_pmu.bpf.c │ ├── scx_simple_cxl_pmu.c │ ├── scx_simple_cxl_pmu.h │ └── test_scx_cxl.sh ├── json │ ├── SCHEDULER_JSON_SCHEMA.md │ ├── scheduler-schema.json │ ├── schedulers.json.backup │ └── validate_schedulers.py ├── merge_schedulers.py ├── ml-scheduler │ ├── .gitignore │ ├── Cargo.toml │ ├── Makefile │ ├── README.md │ ├── schedcp.log.2025-08-02 │ ├── schedcp.log.2025-08-03 │ ├── schedcp_workloads.json │ ├── scx_rusty_ml │ │ ├── Cargo.toml │ │ ├── LICENSE │ │ ├── README.md │ │ ├── build.rs │ │ ├── meson.build │ │ ├── rustfmt.toml │ │ └── src │ │ │ ├── bpf │ │ │ ├── intf.h │ │ │ └── main.bpf.c │ │ │ ├── bpf_intf.rs │ │ │ ├── bpf_skel.rs │ │ │ ├── domain.rs │ │ │ ├── load_balance.rs │ │ │ ├── main.rs │ │ │ ├── model_dir │ │ │ └── transfer.py │ │ │ └── tuner.rs │ └── src │ │ ├── bpf_interface.rs │ │ ├── exact_ml_implementation.rs │ │ ├── lib.rs │ │ ├── main.rs │ │ ├── ml_scheduler_exact.rs │ │ ├── model_dir │ │ ├── README.md │ │ ├── convert_model.py │ │ ├── convert_to_savedmodel.py │ │ ├── create_model.py │ │ ├── tf_convert.py │ │ └── transfer.py │ │ ├── model_export.py │ │ ├── scheduler_integration.rs │ │ └── test_ml.rs ├── prompt │ └── select.md ├── sche_description │ ├── scx_bpfland.json │ ├── scx_bpfland.md │ ├── scx_central.json │ ├── scx_central.md │ ├── scx_chaos.json │ ├── scx_chaos.md │ ├── scx_flash.json │ ├── scx_flash.md │ ├── scx_flatcg.json │ ├── scx_flatcg.md │ ├── scx_lavd.json │ ├── scx_lavd.md │ ├── scx_layered.json │ ├── scx_layered.md │ ├── scx_mitosis.json │ ├── scx_mitosis.md │ ├── scx_nest.json │ ├── scx_nest.md │ ├── scx_p2dq.json │ ├── scx_p2dq.md │ ├── scx_pair.json │ ├── scx_pair.md │ ├── scx_prev.json │ ├── scx_prev.md │ ├── scx_qmap.json │ ├── scx_qmap.md │ ├── scx_rlfifo.json │ ├── scx_rlfifo.md │ ├── scx_rustland.json │ ├── scx_rustland.md │ ├── scx_rusty.json │ ├── scx_rusty.md │ ├── scx_sdt.json │ ├── scx_sdt.md │ ├── scx_simple.json │ ├── scx_simple.md │ ├── scx_tickless.json │ ├── scx_tickless.md │ ├── scx_userland.json │ └── scx_userland.md ├── scheduler_runner.py ├── schedulers.json ├── schedulers.json.backup_20250719_062837 ├── scx_instruction.md ├── template │ ├── .gitignore │ ├── Makefile │ ├── cxl_schedulers │ │ ├── README.md │ │ ├── meson.build │ │ ├── scx_central.bpf.c │ │ ├── scx_central.c │ │ ├── scx_flatcg.bpf.c │ │ ├── scx_flatcg.c │ │ ├── scx_flatcg.h │ │ ├── scx_nest.bpf.c │ │ ├── scx_nest.c │ │ ├── scx_nest.h │ │ ├── scx_nest_stats_table.h │ │ ├── scx_prev.bpf.c │ │ ├── scx_prev.c │ │ ├── scx_qmap.bpf.c │ │ ├── scx_qmap.c │ │ ├── scx_userland.bpf.c │ │ ├── scx_userland.c │ │ └── scx_userland.h │ ├── fifo.bpf.c │ ├── loader.c │ └── vruntime.bpf.c └── update_scheduler_docs.sh └── workloads ├── basic ├── Makefile ├── README.md ├── schbench_test │ ├── README.md │ ├── requirements.txt │ ├── results │ │ ├── schbench_performance_comparison.png │ │ ├── schbench_results.json │ │ └── scheduler_performance_comparison.png │ ├── schbench_bench_start.py │ ├── schbench_simple_collect.py │ ├── schbench_tester.py │ ├── schedcp.log.2025-08-03 │ ├── schedcp_workloads.json │ └── test-record │ │ ├── APPLICATION_PROFILE.md │ │ ├── SCHBENCH_ANALYSIS.md │ │ ├── SCHEDULER_OPTIMIZATION_PLAN.md │ │ ├── default.json │ │ ├── plot_schbench.py │ │ ├── prompt.md │ │ ├── schbench_performance_comparison.pdf │ │ ├── schbench_performance_comparison.png │ │ ├── schedcp.log.2025-08-03 │ │ ├── schedcp_workloads.json │ │ ├── scx_bpfland_aggressive.json │ │ ├── scx_flash_latency.json │ │ └── scx_rusty_lowlat.json ├── scheduler_evaluation_plan.md ├── stress-ng_test │ ├── README.md │ ├── requirements.txt │ ├── results │ │ ├── stress_ng_normalized_performance.png │ │ ├── stress_ng_performance_comparison.png │ │ └── stress_ng_results.json │ ├── stress_ng_bench_start.py │ └── stress_ng_tester.py └── sysbench_guide.md ├── cxl-micro ├── .gitignore ├── Makefile ├── README.md ├── bandwidth_analysis.md ├── cxl_micro_bench_start.py ├── cxl_perf_bandwidth_bench.py ├── double_bandwidth.cpp ├── numa_results │ ├── SYSTEM_CONFIG.md │ ├── bandwidth_vs_read_ratio_combined.pdf │ ├── bandwidth_vs_read_ratio_comparison.pdf │ ├── bandwidth_vs_read_ratio_datasize_comparison.pdf │ ├── cxl_perf_parameter_sweep_numact_none.csv │ ├── cxl_perf_parameter_sweep_numactl0.csv │ ├── cxl_perf_parameter_sweep_numactl01.csv │ ├── cxl_perf_parameter_sweep_numactl01_seq.csv │ ├── cxl_perf_parameter_sweep_numactl0_seq.csv │ ├── cxl_perf_parameter_sweep_numactl2.csv │ ├── cxl_perf_parameter_sweep_numactl23.csv │ ├── cxl_perf_parameter_sweep_numactl23_seq.csv │ ├── cxl_perf_parameter_sweep_numactl2_seq.csv │ ├── cxl_perf_parameter_sweep_numactl3.csv │ ├── cxl_perf_parameter_sweep_numactl3_seq.csv │ ├── default_random_vs_seq_comparison.pdf │ ├── default_random_vs_seq_comparison.txt │ ├── numa_interleave_01_vs_23_fixed_172_threads.pdf │ ├── numa_interleave_01_vs_23_fixed_64gb_buffer.pdf │ ├── numa_results_numact_none.log │ ├── numa_results_numactl0.log │ ├── numa_results_numactl01.log │ ├── numa_results_numactl01_seq.log │ ├── numa_results_numactl0_seq.log │ ├── numa_results_numactl2.log │ ├── numa_results_numactl23.log │ ├── numa_results_numactl23_seq.log │ ├── numa_results_numactl2_seq.log │ ├── numa_results_numactl3.log │ ├── numa_results_numactl3_seq.log │ ├── plot_bandwidth_comparison.py │ ├── report.txt │ └── runnuma.sh ├── parameter_sweep_multi_schedulers_numactl3_random_32g_172t.log ├── results │ ├── cxl_perf_bandwidth_results.json │ ├── cxl_perf_parameter_sweep.csv │ ├── cxl_perf_parameter_sweep.json │ ├── cxl_results │ │ ├── cxl_scheduler_performance.png │ │ ├── cxl_scheduler_results.json │ │ ├── mini-perf │ │ │ ├── cxl_perf_parameter_sweep_numactl0.csv │ │ │ ├── cxl_perf_parameter_sweep_numactl01.csv │ │ │ ├── cxl_perf_parameter_sweep_numactl012.csv │ │ │ ├── cxl_perf_parameter_sweep_numactl1.csv │ │ │ └── cxl_perf_parameter_sweep_numactl3.csv │ │ ├── no_conf_results │ │ │ ├── cxl_scheduler_performance.png │ │ │ └── cxl_scheduler_results.json │ │ └── numactl_results │ │ │ ├── cxl_scheduler_performance.png │ │ │ └── cxl_scheduler_results.json │ ├── cxl_scheduler_performance.pdf │ ├── cxl_scheduler_performance.png │ ├── cxl_scheduler_performance_t172_s64gb_r0.50_seq.png │ ├── cxl_scheduler_performance_t256_s16gb_r0.50_random.png │ ├── cxl_scheduler_results.json │ ├── cxl_scheduler_results_t172_s64gb_r0.50_seq.json │ ├── cxl_scheduler_results_t256_s16gb_r0.50_random.json │ ├── default_random_vs_seq_comparison.pdf │ ├── default_random_vs_seq_comparison.txt │ ├── parameter_sweep_multi_schedulers_numactl3_random_16g_256t.csv │ ├── parameter_sweep_multi_schedulers_numactl3_random_32g_172t.csv │ ├── parameter_sweep_multi_schedulers_numactl3_seq_32g_172t.csv │ ├── parameter_sweep_multi_schedulers_scx_rustland.csv │ ├── plot_scheduler_comparison.py │ ├── random_schedulers_comparison.pdf │ ├── raw_schedulers_comparison.pdf │ ├── rustland_vs_default_comparison.pdf │ └── sequential_schedulers_comparison.pdf └── thread_workers.hpp ├── ktransformers ├── optimized_local_chat.py └── run_optimized_chat.sh ├── linux-build-bench ├── .gitignore ├── Makefile ├── README.md ├── linux_build_bench_start.py ├── linux_build_tester.py ├── monitor_linux_build.py ├── requirements.txt ├── results │ ├── defconfig │ │ ├── linux_build_normalized_performance.png │ │ ├── linux_build_results.json │ │ ├── linux_build_speedup.png │ │ └── linux_build_time_comparison.png │ ├── linux_build_normalized_performance.png │ ├── linux_build_results.json │ ├── linux_build_speedup.png │ ├── linux_build_time_comparison.png │ └── tiny-config │ │ ├── linux_build_normalized_performance.png │ │ ├── linux_build_results.json │ │ ├── linux_build_speedup.png │ │ └── linux_build_time_comparison.png ├── schedcp_workloads.json └── test-record │ ├── Linux_build_benchmark_results.pdf │ ├── Linux_build_benchmark_results.png │ ├── README.md │ ├── plot_benchmark.py │ ├── schedcp.log.2025-08-01 │ └── schedcp_workloads.json ├── llama.cpp ├── .gitignore ├── Makefile ├── README.md ├── build_llama.sh ├── docs │ ├── SLO.md │ ├── SUPPORTED_BACKENDS.md │ └── ShareGPT_Research_Summary.md ├── download_sharegpt.py ├── download_test_model.py ├── llama_benchmark.py ├── llamacpp_bench_start.py ├── requirements.txt ├── results │ ├── benchmark_results.csv │ ├── duplex_scheduling_analysis.png │ ├── llama_benchmark_results.png │ ├── llama_scheduler_performance_128.png │ ├── llama_scheduler_results.json │ ├── llama_scheduler_results_128.json │ ├── roofline_analysis.png │ ├── roofline_analysis_results.csv │ ├── scheduler_comparison.pdf │ ├── sharegpt_llama_server_performance.png │ ├── sharegpt_llama_server_results.json │ ├── sharegpt_vicuna_s1000_c64_20250719_160620 │ │ ├── sharegpt_benchmark_summary.txt │ │ ├── sharegpt_llama_server_performance.png │ │ └── sharegpt_llama_server_results.json │ ├── sharegpt_vicuna_s100_c128_20250816_130207 │ │ ├── sharegpt_benchmark_summary.txt │ │ ├── sharegpt_llama_server_performance.png │ │ └── sharegpt_llama_server_results.json │ ├── sharegpt_vicuna_s30_c4_20250805_112829 │ │ ├── sharegpt_benchmark_summary.txt │ │ ├── sharegpt_llama_server_performance.png │ │ └── sharegpt_llama_server_results.json │ ├── sharegpt_vicuna_s5_c2_20250719_153411 │ │ ├── LOG_ANALYSIS.md │ │ ├── server_logs.stderr │ │ ├── server_logs.stdout │ │ ├── sharegpt_benchmark_summary.txt │ │ ├── sharegpt_llama_server_performance.png │ │ └── sharegpt_llama_server_results.json │ └── visualize_scheduler_comparison.py ├── roofline_analysis.py ├── roofline_deepseek_analysis.py ├── run_deepseek_roofline.sh ├── run_roofline_test.sh ├── sharegpt_llama_server_eval.py └── test_server.py ├── nginx ├── .gitignore ├── Makefile ├── index.html ├── mime.types ├── nginx.conf ├── nginx_benchmark.py ├── nginx_benchmark_simple.py └── requirements.txt ├── processing ├── .gitignore ├── README.md ├── assets │ ├── README.md │ ├── compression.py │ ├── dask_groupby_prepare.py │ ├── dask_groupby_test.py │ ├── file_checksum.py │ ├── flink_join_prepare.py │ ├── flink_join_test.py │ ├── git_add_libgit2.cpp │ ├── long.c │ ├── pandas_etl_prepare.py │ ├── pandas_etl_test.py │ ├── short.c │ ├── spark_skew_prepare.py │ ├── spark_skew_test.py │ └── video_transcode.cpp ├── case.md ├── evaluate_workloads_parallel.py ├── schedulers │ ├── .claude │ │ └── backup │ │ │ ├── comm.c │ │ │ └── file1.bpf.c │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── compression.bpf.c │ ├── compression │ │ ├── compression.md │ │ └── time │ ├── ctest_suite.bpf.c │ ├── ctest_suite │ │ ├── ctest_suite.md │ │ └── time │ ├── ddos_log_analysis.bpf.c │ ├── ddos_log_analysis │ │ ├── ddos_log_analysis.md │ │ └── time │ ├── example.bpf.c │ ├── fifo.bpf.c │ ├── file_checksum.bpf.c │ ├── file_checksum │ │ ├── file_checksum.md │ │ └── time │ ├── git_add_different.bpf.c │ ├── git_add_different │ │ ├── git_add_different.md │ │ └── time │ ├── hotkey_aggregation.bpf.c │ ├── hotkey_aggregation │ │ ├── hotkey_aggregation.md │ │ └── time │ ├── loader.c │ ├── pid_filename_header.h │ ├── video_transcode.bpf.c │ ├── video_transcode │ │ ├── time │ │ └── video_transcode.md │ ├── viral_product_analytics.bpf.c │ ├── viral_product_analytics │ │ ├── time │ │ └── viral_product_analytics.md │ └── vruntime.bpf.c ├── scripts │ ├── analyze_scheduler_results.py │ ├── desc_result │ │ ├── 73adda31-b9fa-456d-90c1-a09f05c83f17.jsonl │ │ └── time │ ├── install_deps.sh │ ├── json_to_markdown.py │ ├── prompt.md │ ├── quick_test_assets.sh │ ├── results_ctest.json │ ├── results_default.json │ ├── results_fifo.json │ ├── run_scheduler_tests.py │ ├── scheduler_comparison.png │ ├── scheduler_performance_comparison.pdf │ ├── scheduler_test_results │ │ ├── compression_custom_run1_20250731_213536.json │ │ ├── compression_custom_run1_20250731_213536.log │ │ ├── compression_custom_run2_20250731_213549.json │ │ ├── compression_custom_run2_20250731_213549.log │ │ ├── compression_custom_run3_20250731_213603.json │ │ ├── compression_custom_run3_20250731_213603.log │ │ ├── compression_default_run1_20250731_213457.json │ │ ├── compression_default_run1_20250731_213457.log │ │ ├── compression_default_run2_20250731_213510.json │ │ ├── compression_default_run2_20250731_213510.log │ │ ├── compression_default_run3_20250731_213523.json │ │ ├── compression_default_run3_20250731_213523.log │ │ ├── ctest_suite_custom_run1_20250731_214521.json │ │ ├── ctest_suite_custom_run1_20250731_214521.log │ │ ├── ctest_suite_custom_run2_20250731_214546.json │ │ ├── ctest_suite_custom_run2_20250731_214546.log │ │ ├── ctest_suite_custom_run3_20250731_214608.json │ │ ├── ctest_suite_custom_run3_20250731_214608.log │ │ ├── ctest_suite_default_run1_20250731_214406.json │ │ ├── ctest_suite_default_run1_20250731_214406.log │ │ ├── ctest_suite_default_run2_20250731_214433.json │ │ ├── ctest_suite_default_run2_20250731_214433.log │ │ ├── ctest_suite_default_run3_20250731_214500.json │ │ ├── ctest_suite_default_run3_20250731_214500.log │ │ ├── ddos_log_analysis_custom_run1_20250731_215650.json │ │ ├── ddos_log_analysis_custom_run1_20250731_215650.log │ │ ├── ddos_log_analysis_custom_run2_20250731_215724.json │ │ ├── ddos_log_analysis_custom_run2_20250731_215724.log │ │ ├── ddos_log_analysis_custom_run3_20250731_215759.json │ │ ├── ddos_log_analysis_custom_run3_20250731_215759.log │ │ ├── ddos_log_analysis_default_run1_20250731_215517.json │ │ ├── ddos_log_analysis_default_run1_20250731_215517.log │ │ ├── ddos_log_analysis_default_run2_20250731_215546.json │ │ ├── ddos_log_analysis_default_run2_20250731_215546.log │ │ ├── ddos_log_analysis_default_run3_20250731_215615.json │ │ ├── ddos_log_analysis_default_run3_20250731_215615.log │ │ ├── file_checksum_custom_run1_20250731_215036.json │ │ ├── file_checksum_custom_run1_20250731_215036.log │ │ ├── file_checksum_custom_run2_20250731_215104.json │ │ ├── file_checksum_custom_run2_20250731_215104.log │ │ ├── file_checksum_custom_run3_20250731_215131.json │ │ ├── file_checksum_custom_run3_20250731_215131.log │ │ ├── file_checksum_default_run1_20250731_214926.json │ │ ├── file_checksum_default_run1_20250731_214926.log │ │ ├── file_checksum_default_run2_20250731_214947.json │ │ ├── file_checksum_default_run2_20250731_214947.log │ │ ├── file_checksum_default_run3_20250731_215008.json │ │ ├── file_checksum_default_run3_20250731_215008.log │ │ ├── git_add_different_custom_run1_20250731_214806.json │ │ ├── git_add_different_custom_run1_20250731_214806.log │ │ ├── git_add_different_custom_run2_20250731_214836.json │ │ ├── git_add_different_custom_run2_20250731_214836.log │ │ ├── git_add_different_custom_run3_20250731_214905.json │ │ ├── git_add_different_custom_run3_20250731_214905.log │ │ ├── git_add_different_default_run1_20250731_214639.json │ │ ├── git_add_different_default_run1_20250731_214639.log │ │ ├── git_add_different_default_run2_20250731_214708.json │ │ ├── git_add_different_default_run2_20250731_214708.log │ │ ├── git_add_different_default_run3_20250731_214737.json │ │ ├── git_add_different_default_run3_20250731_214737.log │ │ ├── hotkey_aggregation_custom_run1_20250731_215337.json │ │ ├── hotkey_aggregation_custom_run1_20250731_215337.log │ │ ├── hotkey_aggregation_custom_run2_20250731_215412.json │ │ ├── hotkey_aggregation_custom_run2_20250731_215412.log │ │ ├── hotkey_aggregation_custom_run3_20250731_215447.json │ │ ├── hotkey_aggregation_custom_run3_20250731_215447.log │ │ ├── hotkey_aggregation_default_run1_20250731_215202.json │ │ ├── hotkey_aggregation_default_run1_20250731_215202.log │ │ ├── hotkey_aggregation_default_run2_20250731_215234.json │ │ ├── hotkey_aggregation_default_run2_20250731_215234.log │ │ ├── hotkey_aggregation_default_run3_20250731_215304.json │ │ ├── hotkey_aggregation_default_run3_20250731_215304.log │ │ ├── test_summary_20250731_220223.json │ │ ├── video_transcode_custom_run1_20250731_214113.json │ │ ├── video_transcode_custom_run1_20250731_214113.log │ │ ├── video_transcode_custom_run2_20250731_214227.json │ │ ├── video_transcode_custom_run2_20250731_214227.log │ │ ├── video_transcode_custom_run3_20250731_214339.json │ │ ├── video_transcode_custom_run3_20250731_214339.log │ │ ├── video_transcode_default_run1_20250731_213718.json │ │ ├── video_transcode_default_run1_20250731_213718.log │ │ ├── video_transcode_default_run2_20250731_213835.json │ │ ├── video_transcode_default_run2_20250731_213835.log │ │ ├── video_transcode_default_run3_20250731_213958.json │ │ ├── video_transcode_default_run3_20250731_213958.log │ │ ├── viral_product_analytics_custom_run1_20250731_220052.json │ │ ├── viral_product_analytics_custom_run1_20250731_220052.log │ │ ├── viral_product_analytics_custom_run2_20250731_220136.json │ │ ├── viral_product_analytics_custom_run2_20250731_220136.log │ │ ├── viral_product_analytics_custom_run3_20250731_220223.json │ │ ├── viral_product_analytics_custom_run3_20250731_220223.log │ │ ├── viral_product_analytics_default_run1_20250731_215841.json │ │ ├── viral_product_analytics_default_run1_20250731_215841.log │ │ ├── viral_product_analytics_default_run2_20250731_215924.json │ │ ├── viral_product_analytics_default_run2_20250731_215924.log │ │ ├── viral_product_analytics_default_run3_20250731_220007.json │ │ └── viral_product_analytics_default_run3_20250731_220007.log │ ├── test_cases_parallel_1_100.json │ ├── test_cases_parallel_old.json │ └── visualize_results.py └── test_cases_parallel.json ├── pyvsag ├── Makefile ├── README.md ├── pyvsag_bench_start.py ├── requirements.txt ├── results │ ├── pyvsag_scheduler_comparison.pdf │ ├── pyvsag_scheduler_results.json │ └── visualize_scheduler_comparison.py └── results_summary.txt ├── redis ├── .gitignore ├── Makefile ├── README.md ├── memtier_bench_start.py ├── memtier_benchmark.py ├── redis_bench_start.py ├── redis_benchmark.py ├── requirements.txt ├── results │ ├── analyze_memtier_results.py │ ├── memtier_benchmark_20250819_170157.json │ ├── memtier_benchmark_20250819_170603.json │ ├── memtier_detailed_comparison.png │ ├── memtier_latency_comparison.png │ ├── memtier_scheduler_comparison.png │ ├── memtier_scheduler_results.csv │ ├── memtier_scheduler_results.json │ ├── memtier_throughput_comparison.png │ ├── redis_combined_performance.png │ ├── redis_comparison.pdf │ ├── redis_data_size_sweep.csv │ ├── redis_data_size_sweep.png │ ├── redis_latency_comparison.png │ ├── redis_scheduler_results.json │ ├── redis_throughput_comparison.png │ └── summary_memtier.txt └── utils.py ├── rocksdb ├── Makefile ├── requirements.txt └── rocksdb_benchmark.py └── vllm ├── .gitignore ├── Makefile ├── README.md ├── llama.md ├── llamacpp_openai_client.py ├── moe_offload_analysis.md ├── vllm_bench_start.py ├── vllm_benchmark_tester.py ├── vllm_full_benchmark.py └── vllm_latency_comparison.png /.claude/settings.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "permissions": { 3 | "allow": [ 4 | "Bash(mkdir:*)", 5 | "Bash(cp:*)", 6 | "Bash(chmod:*)", 7 | "Bash(ls:*)", 8 | "Bash(make:*)", 9 | "Bash(cargo build:*)", 10 | "Bash(./target/release/schedcp-cli:*)", 11 | "Bash(cargo test:*)", 12 | "mcp__schedcp__list_schedulers", 13 | "mcp__schedcp__workload", 14 | "mcp__schedcp__run_scheduler", 15 | "mcp__schedcp__stop_scheduler", 16 | "mcp__schedcp__get_execution_status", 17 | "Bash(time bash -c:*)", 18 | "Bash(nproc)", 19 | "Bash(time make:*)", 20 | "Bash(find:*)", 21 | "Bash(/dev/null)", 22 | "Bash(workloads/basic/schbench/schbench)", 23 | "Bash(workloads/basic/schbench/schbench -r 30 -j results_default.json)", 24 | "Bash(workloads/basic/schbench/schbench -r 30 -j results_flash.json)", 25 | "Bash(workloads/basic/schbench/schbench -r 30 -j results_bpfland.json)", 26 | "Bash(workloads/basic/schbench/schbench -r 30 -j results_rusty.json)", 27 | "Bash(cargo run:*)", 28 | "Bash(sudo cargo test --test scheduler_generator_test test_execution_verify_real_scheduler -- --ignored --nocapture)", 29 | "Bash(timeout:*)", 30 | "Bash(sudo cargo test --test scheduler_generator_test test_execution_verify_real_scheduler -- --ignored --nocapture --test-threads=1)", 31 | "Bash(sudo cargo test --test scheduler_generator_test test_execute_real_scheduler -- --ignored --nocapture --test-threads=1)", 32 | "Bash(grep:*)" 33 | ], 34 | "deny": [] 35 | }, 36 | "enableAllProjectMcpServers": true, 37 | "enabledMcpjsonServers": [ 38 | "schedcp" 39 | ] 40 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cpu_bin 2 | venv/ 3 | __pycache__ 4 | results_*.json 5 | bcc 6 | schedcp.log.* 7 | /schedcp_workloads.json 8 | 9 | # Custom scheduler executables 10 | scheduler/custom_schedulers/scx_cxl 11 | scheduler/custom_schedulers/scx_cxl_rl 12 | scheduler/custom_schedulers/scx_simple_cxl_pmu 13 | scheduler/custom_schedulers/scx_pmu 14 | 15 | # BPF object files and skeletons 16 | scheduler/custom_schedulers/*.bpf.o 17 | scheduler/custom_schedulers/*.bpf.skel.h 18 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "scheduler/scx"] 2 | path = scheduler/scx 3 | url = https://github.com/sched-ext/scx/ 4 | [submodule "/home/yunwei37/ai-os/workloads/basic/schbench"] 5 | path = /home/yunwei37/ai-os/workloads/basic/schbench 6 | url = https://github.com/masoncl/schbench.git 7 | [submodule "workloads/basic/schbench"] 8 | path = workloads/basic/schbench 9 | url = https://github.com/masoncl/schbench 10 | [submodule "workloads/stress-ng"] 11 | path = workloads/basic/stress-ng 12 | url = https://github.com/ColinIanKing/stress-ng 13 | [submodule "workloads/cachyos-benchmarker"] 14 | path = workloads/basic/cachyos-benchmarker 15 | url = https://github.com/CachyOS/cachyos-benchmarker 16 | [submodule "workloads/llama.cpp/llama.cpp"] 17 | path = workloads/llama.cpp/llama.cpp 18 | url = https://github.com/ggerganov/llama.cpp.git 19 | [submodule "workloads/redis/redis-src"] 20 | path = workloads/redis/redis-src 21 | url = https://github.com/redis/redis.git 22 | [submodule "workloads/redis/memtier_benchmark"] 23 | path = workloads/redis/memtier_benchmark 24 | url = https://github.com/RedisLabs/memtier_benchmark.git 25 | [submodule "workloads/nginx/nginx"] 26 | path = workloads/nginx/nginx 27 | url = https://github.com/nginx/nginx.git 28 | -------------------------------------------------------------------------------- /.mcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "schedcp": { 4 | "type": "stdio", 5 | "command": "mcp/target/release/schedcp", 6 | "args": [], 7 | "env": {} 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 eunomia-bpf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | make -C mcp/new_sched 3 | make -C scheduler 4 | cd mcp && cargo build --release 5 | cd autotune && cargo build --release 6 | -------------------------------------------------------------------------------- /autotune/.gitignore: -------------------------------------------------------------------------------- 1 | # Rust build artifacts 2 | target/ 3 | Cargo.lock 4 | 5 | # IDE 6 | .vscode/ 7 | .idea/ 8 | *.swp 9 | *.swo 10 | 11 | # OS 12 | .DS_Store 13 | Thumbs.db 14 | 15 | # Debug 16 | *.pdb -------------------------------------------------------------------------------- /autotune/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "autotune" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | serde = { version = "1.0", features = ["derive"] } 8 | serde_json = "1.0" 9 | tokio = { version = "1.41", features = ["full"] } 10 | clap = { version = "4.5", features = ["derive"] } 11 | chrono = "0.4" 12 | anyhow = "1.0" 13 | 14 | [[bin]] 15 | name = "autotune" 16 | path = "src/bin/cli.rs" -------------------------------------------------------------------------------- /autotune/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod prompt; 2 | pub mod daemon; 3 | pub mod system_info; -------------------------------------------------------------------------------- /autotune/src/prompt.rs: -------------------------------------------------------------------------------- 1 | use std::time::Duration; 2 | use crate::system_info::get_system_spec_prompt; 3 | 4 | pub fn create_optimization_prompt( 5 | command: &str, 6 | duration: Duration, 7 | exit_code: i32, 8 | stdout: &str, 9 | stderr: &str, 10 | ) -> String { 11 | format!( 12 | "The command '{}' took {:?} to execute with exit code {}.\n\ 13 | stdout: {}\n\ 14 | stderr: {}\n\ 15 | Please suggest optimizations for this command.", 16 | command, duration, exit_code, stdout, stderr 17 | ) 18 | } 19 | 20 | /// Creates a prompt specifically for the cc subcommand 21 | pub fn create_cc_prompt(command: &str) -> String { 22 | let system_spec = get_system_spec_prompt(); 23 | format!( 24 | "{}\n\n\ 25 | For the command '{}', optimize the scheduler with the schedcp mcp tools. When optimize, make sure run the exact command user wants, do not change or break it down. You should analyze the workload, check the detail like reading code, create a profile include the exact full command and workload user wants, and the analysis of the workload, list the schedulers, and test the default and max 3 different other schedulers (that may be best) available with best configuration, update history for all the available metrics correctly after each test, and summary for the best one. If you know the command generate too many outputs, consider using like 1>/dev/null to make sure you capture the metrics. otherwise check the output of the command. The exact command is '{}'.", 26 | system_spec, command, command 27 | ) 28 | } -------------------------------------------------------------------------------- /document/2509.01245v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/document/2509.01245v2.pdf -------------------------------------------------------------------------------- /document/design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/document/design.png -------------------------------------------------------------------------------- /document/devlog/related-and-modules.md: -------------------------------------------------------------------------------- 1 | # Ecosystem & Inspiration 2 | 3 | | Project | Focus | Why it matters to SchedCP | 4 | |---------|-------|--------------------------| 5 | | **bpftune** (Oracle) | Always-on autotuning of TCP buffers & other sysctls with BPF | Proven pattern for "observe → tune → verify" loops. | 6 | | **tuned** (Red Hat/SUSE) | Daemon that switches profiles and tweaks CPU / I/O / VM / power on the fly | Shows demand for profile-based tuning and plug-in architecture. | 7 | | **sched_ext / scx** | Framework for writing BPF-backed schedulers and loading them at run-time | SchedCP can load & auto-parametrize these schedulers. | 8 | | **KernelOracle** | Deep-learning model that predicts CFS decisions | Evidence that ML can improve scheduling; potential policy engine. | 9 | | **SchedViz** (Google) | Collects & visualises kernel scheduling traces | Useful companion for debugging SchedCP policies. | 10 | | **eBPF Energy Monitor** | Process-level power telemetry via eBPF | Feeds power-aware signals into SchedCP RL loops. | 11 | 12 | --- 13 | 14 | # Beyond Scheduler & Sysctl – Candidate Modules 15 | 16 | | Area | First shipping module idea | Quick win & data source | 17 | |------|---------------------------|--------------------------| 18 | | **CPU frequency / C-states** | Smart governor that biases P-cores vs E-cores based on latency SLA | `perf`, `intel_pstate`, sched_ext hooks | 19 | | **Memory management** | Adaptive `vm.swappiness` + DAMON-aware reclaim policy | DAMON stats via `damon_reclaim` events | 20 | | **Block-I/O** | Per-device IO-scheduler selector (`mq-deadline`, `bfq`, `kyber`) | `blk_iolatency` tracepoints | 21 | | **Networking** | Autoselect congestion control (`bbr2` vs `cubic`) & tune socket queues | `tcp:tcp_probe` + bpftune net-buffer lessons | 22 | | **IRQ / NUMA** | Automatic IRQ affinity & page migration to minimise remote-access stalls | `irqbalance` data + `sched:sched_stat_runtime` | 23 | | **Cgroups & QoS** | RL agent that rewrites weight/limit knobs for bursty workloads | cgroup v2 stats + container labels | 24 | | **Thermals / power** | DVFS policy that respects battery or data-centre carbon budget | RAPL / ACPI telemetry + eBPF energy monitor | 25 | | **Observability glue** | Unified ring-buffer exporter (Prometheus/OpenTelemetry) | Standard gRPC/OTLP for dashboards | 26 | | **Safety net** | A/B rollback & "flight-recorder" for every knob change | Git-style history + `bpftune-sysctl` guard rails | 27 | 28 | Each module follows the same pattern: **collect metrics → decide via RL/LLM or heuristics → apply with eBPF/`sysfs` → verify**. Start with scheduler + sysctl, then iterate down the list as the feedback-loop library stabilises. -------------------------------------------------------------------------------- /document/linux.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/document/linux.gif -------------------------------------------------------------------------------- /document/motivation_exp/README.md: -------------------------------------------------------------------------------- 1 | ● Yes, this is a FIFO scheduler. The key evidence: 2 | 3 | 1. Single shared queue: Uses one custom dispatch queue (MY_DSQ_ID) for all tasks 4 | 2. Simple enqueue: Tasks are dispatched to the queue in arrival order 5 | (simple_scheduler.bpf.c:93) 6 | 3. Sequential consumption: The simple_dispatch function consumes tasks from the queue 7 | in order (simple_scheduler.bpf.c:109) 8 | 4. No reordering: There's no priority-based reordering or preemption logic 9 | 10 | The scheduler maintains FIFO ordering within its custom dispatch queue, making it a 11 | basic FIFO scheduler with some CPU affinity optimizations for performance. 12 | -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-failed/Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | CFLAGS = -g -O2 -Wall 3 | INCLUDES = -I/usr/include/bpf -I. 4 | LIBS = -lbpf -lelf -lz 5 | 6 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/') 7 | VMLINUX_BTF := /sys/kernel/btf/vmlinux 8 | 9 | BPF_CFLAGS = -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) $(INCLUDES) 10 | 11 | all: scheduler 12 | 13 | vmlinux.h: 14 | bpftool btf dump file $(VMLINUX_BTF) format c > vmlinux.h 15 | 16 | scheduler.skel.h: scheduler.bpf.o 17 | bpftool gen skeleton scheduler.bpf.o > scheduler.skel.h 18 | 19 | scheduler.bpf.o: scheduler.bpf.c vmlinux.h scheduler.h 20 | $(CC) $(BPF_CFLAGS) -c scheduler.bpf.c -o scheduler.bpf.o 21 | 22 | scheduler: scheduler.c scheduler.skel.h scheduler.h 23 | $(CC) $(CFLAGS) $(INCLUDES) scheduler.c -o scheduler $(LIBS) 24 | 25 | clean: 26 | rm -f *.o scheduler scheduler.skel.h vmlinux.h 27 | 28 | install: scheduler 29 | install -m 755 scheduler /usr/local/bin/ 30 | 31 | uninstall: 32 | rm -f /usr/local/bin/scheduler 33 | 34 | .PHONY: all clean install uninstall -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-failed/README.md: -------------------------------------------------------------------------------- 1 | # eBPF-based Linux Scheduler 2 | 3 | { 4 | "sessionId": "-root-yunwei37-test-scheduler", 5 | "inputTokens": 39, 6 | "outputTokens": 6093, 7 | "cacheCreationTokens": 26947, 8 | "cacheReadTokens": 350957, 9 | "totalTokens": 384036, 10 | "totalCost": 1.4892517499999998, 11 | "lastActivity": "2025-07-18", 12 | "modelsUsed": [ 13 | "claude-opus-4-20250514" 14 | ], 15 | "modelBreakdowns": [ 16 | { 17 | "modelName": "claude-opus-4-20250514", 18 | "inputTokens": 39, 19 | "outputTokens": 6093, 20 | "cacheCreationTokens": 26947, 21 | "cacheReadTokens": 350957, 22 | "cost": 1.4892517499999998 23 | } 24 | ] 25 | }, 26 | 27 | A simple eBPF-based scheduler implementation using Linux's sched_ext framework. 28 | 29 | ## Features 30 | 31 | - Fair scheduling based on virtual runtime (vruntime) 32 | - CPU load balancing 33 | - Task priority support 34 | - Real-time statistics monitoring 35 | - Minimal overhead 36 | 37 | ## Requirements 38 | 39 | - Linux kernel 6.6+ with CONFIG_SCHED_CLASS_EXT enabled 40 | - libbpf development files 41 | - clang and bpftool 42 | - Root privileges to load BPF programs 43 | 44 | ## Building 45 | 46 | ```bash 47 | make 48 | ``` 49 | 50 | ## Usage 51 | 52 | ```bash 53 | # Basic usage 54 | sudo ./scheduler 55 | 56 | # With statistics output 57 | sudo ./scheduler --stats 58 | 59 | # With verbose logging 60 | sudo ./scheduler --verbose 61 | 62 | # Custom stats interval (seconds) 63 | sudo ./scheduler --stats --interval 5 64 | ``` 65 | 66 | ## How it Works 67 | 68 | The scheduler implements a simple fair scheduling algorithm: 69 | 70 | 1. **Task Tracking**: Maintains per-task statistics including virtual runtime and priority 71 | 2. **CPU Selection**: Selects the least loaded CPU for new tasks 72 | 3. **Fair Scheduling**: Uses virtual runtime to ensure fair CPU time distribution 73 | 4. **Load Balancing**: Distributes tasks across available CPUs 74 | 75 | ## Architecture 76 | 77 | - `scheduler.bpf.c`: eBPF program implementing scheduling logic 78 | - `scheduler.c`: Userspace control program 79 | - `scheduler.h`: Shared definitions 80 | - `Makefile`: Build configuration 81 | 82 | ## Limitations 83 | 84 | This is a demonstration scheduler and may not be suitable for production use. Consider: 85 | - Limited to basic fair scheduling 86 | - No support for real-time tasks 87 | - Simple load balancing algorithm 88 | - No power-aware scheduling 89 | 90 | ## License 91 | 92 | GPL-2.0 -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-failed/scheduler.h: -------------------------------------------------------------------------------- 1 | #ifndef __SCHEDULER_H 2 | #define __SCHEDULER_H 3 | 4 | #define MAX_CPUS 128 5 | #define MAX_TASKS 10000 6 | #define SCHED_SLICE_NS 10000000 7 | 8 | struct sched_config { 9 | __u64 slice_ns; 10 | __u32 nr_cpus; 11 | __u32 debug; 12 | }; 13 | 14 | struct task_stats_user { 15 | __u64 runtime_ns; 16 | __u64 vruntime; 17 | __u32 cpu; 18 | __u32 priority; 19 | __u32 pid; 20 | char comm[16]; 21 | }; 22 | 23 | enum sched_event_type { 24 | SCHED_EVENT_ENQUEUE, 25 | SCHED_EVENT_DEQUEUE, 26 | SCHED_EVENT_DISPATCH, 27 | SCHED_EVENT_SWITCH, 28 | }; 29 | 30 | struct sched_event { 31 | __u64 timestamp; 32 | __u32 cpu; 33 | __u32 pid; 34 | __u32 event_type; 35 | __u32 extra; 36 | }; 37 | 38 | #endif -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for building eBPF scheduler 2 | 3 | # Compiler and tools 4 | CC := gcc 5 | CLANG := clang 6 | BPFTOOL := bpftool 7 | 8 | # Paths 9 | LIBBPF_DIR := /usr/include/bpf 10 | VMLINUX := vmlinux.h 11 | 12 | # Flags 13 | CFLAGS := -g -O2 -Wall 14 | BPF_CFLAGS := -g -O2 -target bpf -D__TARGET_ARCH_x86_64 15 | LDFLAGS := -lbpf -lelf -lz 16 | 17 | # Targets 18 | BPF_OBJS := minimal_scheduler.bpf.o 19 | USER_TARGETS := minimal_scheduler 20 | 21 | .PHONY: all clean vmlinux 22 | 23 | all: vmlinux $(BPF_OBJS) $(USER_TARGETS) 24 | 25 | # Generate vmlinux.h if it doesn't exist 26 | vmlinux: 27 | @if [ ! -f $(VMLINUX) ]; then \ 28 | echo "Generating vmlinux.h..."; \ 29 | $(BPFTOOL) btf dump file /sys/kernel/btf/vmlinux format c > $(VMLINUX); \ 30 | fi 31 | 32 | # Build BPF object 33 | %.bpf.o: %.bpf.c $(VMLINUX) 34 | $(CLANG) $(BPF_CFLAGS) -c $< -o $@ 35 | 36 | # Build user-space program 37 | %: %.c 38 | $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) 39 | 40 | # Load scheduler (requires root) 41 | load: all 42 | @echo "Loading minimal scheduler..." 43 | @sudo ./minimal_scheduler 44 | 45 | # Check if scheduler is loaded 46 | status: 47 | @if [ -f /sys/kernel/sched_ext/root/ops ]; then \ 48 | echo "Current scheduler: $$(cat /sys/kernel/sched_ext/root/ops)"; \ 49 | else \ 50 | echo "sched_ext not available or no custom scheduler loaded"; \ 51 | fi 52 | 53 | clean: 54 | rm -f $(BPF_OBJS) $(USER_TARGETS) $(VMLINUX) -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/conversation-2025-07-18-161603.txt: -------------------------------------------------------------------------------- 1 | ╭───────────────────────────────────────────────────╮ 2 | │ ✻ Welcome to Claude Code! │ 3 | │ │ 4 | │ /help for help, /status for your current setup │ 5 | │ │ 6 | │ cwd: /root/yunwei37/new-sched │ 7 | ╰───────────────────────────────────────────────────╯ 8 | 9 | Tips for getting started: 10 | 11 | 1. Run /init to create a CLAUDE.md file with instructions for Claude 12 | 2. Run /terminal-setup to set up terminal integration 13 | 3. Use Claude to help with file analysis, editing, bash commands and git 14 | 4. Be as specific as you would with another engineer for the best results 15 | 16 | -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/ebpf-scheduler/.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | *.o 3 | *.skel.h 4 | simple_scheduler 5 | vmlinux.h 6 | 7 | # Temporary files 8 | *.tmp 9 | *.swp 10 | *.swo 11 | *~ 12 | 13 | # Debug files 14 | *.log 15 | core 16 | core.* 17 | 18 | # IDE files 19 | .vscode/ 20 | .idea/ 21 | 22 | # Test output 23 | test_output/ 24 | benchmark_results/ -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/ebpf-scheduler/Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0 2 | 3 | # Compiler and tools 4 | CC = clang 5 | CLANG = clang 6 | LLC = llc 7 | BPFTOOL = bpftool 8 | 9 | # Flags 10 | CFLAGS = -g -O2 -Wall 11 | BPF_CFLAGS = -g -O2 -target bpf -D__TARGET_ARCH_x86_64 -I. 12 | LDFLAGS = -lbpf -lelf -lz 13 | 14 | # Files 15 | BPF_PROG = simple_scheduler 16 | TARGETS = $(BPF_PROG) 17 | 18 | .PHONY: all clean 19 | 20 | all: $(TARGETS) 21 | 22 | # Generate vmlinux.h if it doesn't exist 23 | vmlinux.h: 24 | $(BPFTOOL) btf dump file /sys/kernel/btf/vmlinux format c > $@ 25 | 26 | # Compile BPF program 27 | $(BPF_PROG).bpf.o: $(BPF_PROG).bpf.c vmlinux.h 28 | $(CLANG) $(BPF_CFLAGS) -c $< -o $@ 29 | 30 | # Generate BPF skeleton 31 | $(BPF_PROG).skel.h: $(BPF_PROG).bpf.o 32 | $(BPFTOOL) gen skeleton $< > $@ 33 | 34 | # Compile userspace program 35 | $(BPF_PROG): $(BPF_PROG).c $(BPF_PROG).skel.h 36 | $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) 37 | 38 | clean: 39 | rm -f *.o *.skel.h $(TARGETS) vmlinux.h 40 | 41 | # Helper targets 42 | load: $(BPF_PROG) 43 | @echo "Loading scheduler..." 44 | @sudo ./$(BPF_PROG) 45 | 46 | status: 47 | @echo "Scheduler status:" 48 | @cat /sys/kernel/sched_ext/state 2>/dev/null || echo "sched_ext not available" 49 | @echo "Enabled:" 50 | @cat /sys/kernel/sched_ext/enabled 2>/dev/null || echo "0" 51 | 52 | enable: 53 | @echo "Enabling scheduler..." 54 | @echo 1 | sudo tee /sys/kernel/sched_ext/enabled 55 | 56 | disable: 57 | @echo "Disabling scheduler..." 58 | @echo 0 | sudo tee /sys/kernel/sched_ext/enabled -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/ebpf-scheduler/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Benchmark script for eBPF scheduler 4 | 5 | set -e 6 | 7 | echo "=== eBPF Scheduler Benchmark ===" 8 | echo 9 | 10 | # Check if running as root 11 | if [ "$EUID" -ne 0 ]; then 12 | echo "Error: This script must be run as root" 13 | exit 1 14 | fi 15 | 16 | # Function to run benchmark with a scheduler 17 | run_benchmark() { 18 | local scheduler_name=$1 19 | local enable_cmd=$2 20 | local disable_cmd=$3 21 | 22 | echo "Testing with $scheduler_name scheduler..." 23 | 24 | # Enable scheduler if needed 25 | eval "$enable_cmd" 26 | 27 | # Run sysbench CPU benchmark 28 | echo " CPU benchmark (single-threaded):" 29 | sysbench cpu --cpu-max-prime=20000 --time=10 run | grep -E "events per second|total time" | sed 's/^/ /' 30 | 31 | echo " CPU benchmark (multi-threaded):" 32 | sysbench cpu --cpu-max-prime=20000 --threads=4 --time=10 run | grep -E "events per second|total time" | sed 's/^/ /' 33 | 34 | # Run memory benchmark 35 | echo " Memory benchmark:" 36 | sysbench memory --memory-total-size=1G run | grep -E "transferred|total time" | sed 's/^/ /' 37 | 38 | # Disable scheduler if needed 39 | eval "$disable_cmd" 40 | 41 | echo 42 | } 43 | 44 | # Install sysbench if not available 45 | if ! command -v sysbench &> /dev/null; then 46 | echo "Installing sysbench..." 47 | apt-get update && apt-get install -y sysbench 48 | fi 49 | 50 | # Kill any existing scheduler 51 | pkill simple_scheduler 2>/dev/null || true 52 | 53 | # Benchmark with default scheduler 54 | run_benchmark "Default (CFS)" "true" "true" 55 | 56 | # Start our eBPF scheduler 57 | echo "Loading eBPF scheduler..." 58 | ./simple_scheduler & 59 | SCHED_PID=$! 60 | sleep 2 61 | 62 | if ! kill -0 $SCHED_PID 2>/dev/null; then 63 | echo "Error: Scheduler failed to load" 64 | exit 1 65 | fi 66 | 67 | # Benchmark with eBPF scheduler 68 | run_benchmark "Simple eBPF" "echo 1 > /sys/kernel/sched_ext/enabled" "echo 0 > /sys/kernel/sched_ext/enabled" 69 | 70 | # Clean up 71 | kill -SIGINT $SCHED_PID 2>/dev/null 72 | wait $SCHED_PID 2>/dev/null 73 | 74 | echo "=== Benchmark Complete ===" -------------------------------------------------------------------------------- /document/motivation_exp/test-claude-success/ebpf-scheduler/scx_common.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | #ifndef __SCX_COMMON_H 3 | #define __SCX_COMMON_H 4 | 5 | #include "vmlinux.h" 6 | #include 7 | #include 8 | 9 | /* BPF_STRUCT_OPS macro for sched_ext */ 10 | #define BPF_STRUCT_OPS(name, args...) \ 11 | SEC("struct_ops/"#name) \ 12 | BPF_PROG(name, ##args) 13 | 14 | #define BPF_STRUCT_OPS_SLEEPABLE(name, args...) \ 15 | SEC("struct_ops.s/"#name) \ 16 | BPF_PROG(name, ##args) 17 | 18 | /* Default time slice */ 19 | #define SCX_SLICE_DFL 20000000ULL /* 20ms */ 20 | 21 | /* Helper function declarations */ 22 | s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; 23 | void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; 24 | void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym; 25 | void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym; 26 | bool scx_bpf_consume(u64 dsq_id) __ksym; 27 | s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *found) __ksym; 28 | s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags) __ksym; 29 | s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, u64 flags) __ksym; 30 | bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; 31 | void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; 32 | s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; 33 | void scx_bpf_exit(s64 exit_code, const char *reason, u64 reason_len) __ksym; 34 | 35 | #endif /* __SCX_COMMON_H */ -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/.gitignore: -------------------------------------------------------------------------------- 1 | loader 2 | read_stats 3 | *.o 4 | -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/Makefile: -------------------------------------------------------------------------------- 1 | CLANG ?= clang 2 | LLVM_STRIP ?= llvm-strip 3 | BPFTOOL ?= bpftool 4 | ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/') 5 | 6 | INCLUDES := -I/usr/include -I/usr/include/$(ARCH)-linux-gnu 7 | CFLAGS := -g -O2 -Wall -target bpf -D__TARGET_ARCH_$(ARCH) 8 | 9 | all: sched.bpf.o 10 | 11 | sched.bpf.o: sched.bpf.c 12 | $(CLANG) $(CFLAGS) $(INCLUDES) -c $< -o $@ 13 | $(LLVM_STRIP) -g $@ 14 | 15 | clean: 16 | rm -f *.o 17 | 18 | .PHONY: all clean -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/README.md: -------------------------------------------------------------------------------- 1 | real 10m44.759s 2 | user 2m16.340s 3 | sys 0m12.405s 4 | 5 | 6 | I've successfully created and run an eBPF scheduler that: 7 | 8 | 1. Tracks scheduler events - monitors context switches, wakeups, new tasks, 9 | and CPU migrations 10 | 2. Collects statistics - counts total switches, wakeups, and per-CPU 11 | statistics 12 | 3. Outputs trace logs - prints periodic updates when thresholds are reached 13 | 4. Uses multiple tracepoints: 14 | - sched_switch - tracks context switches 15 | - sched_wakeup - tracks task wakeups 16 | - sched_wakeup_new - tracks new task creation 17 | - sched_migrate_task - tracks CPU migrations 18 | 19 | The scheduler compiled successfully and is running in the kernel, producing 20 | trace output visible in /sys/kernel/debug/tracing/trace_pipe. 21 | 22 | 23 | -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/load_sched.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Load the eBPF scheduler program 4 | echo "Loading eBPF scheduler..." 5 | 6 | # Check if running as root 7 | if [ "$EUID" -ne 0 ]; then 8 | echo "Please run as root" 9 | exit 1 10 | fi 11 | 12 | # Load the program using bpftool 13 | echo "Loading sched.bpf.o..." 14 | bpftool prog load sched.bpf.o /sys/fs/bpf/sched_prog 15 | 16 | # List loaded programs 17 | echo -e "\nLoaded BPF programs:" 18 | bpftool prog list | grep -E "sched|tracepoint|kprobe" 19 | 20 | # Attach to tracepoints 21 | echo -e "\nAttaching to tracepoints..." 22 | 23 | # Get program IDs 24 | SCHED_SWITCH_ID=$(bpftool prog list | grep "sched_switch" | awk '{print $1}' | tr -d ':') 25 | SCHED_WAKEUP_ID=$(bpftool prog list | grep "sched_wakeup" | awk '{print $1}' | tr -d ':') 26 | FINISH_TASK_ID=$(bpftool prog list | grep "finish_task_switch" | awk '{print $1}' | tr -d ':') 27 | 28 | echo "Program IDs: switch=$SCHED_SWITCH_ID, wakeup=$SCHED_WAKEUP_ID, finish=$FINISH_TASK_ID" 29 | 30 | # Watch the trace pipe 31 | echo -e "\nMonitoring scheduler events (Ctrl+C to stop)..." 32 | echo "Check /sys/kernel/debug/tracing/trace_pipe for output" 33 | timeout 10 cat /sys/kernel/debug/tracing/trace_pipe || true 34 | 35 | # Show map contents 36 | echo -e "\nChecking BPF maps..." 37 | bpftool map list | grep -E "sched_stats|cpu_switch_count" 38 | 39 | echo -e "\nScheduler monitoring is running. Check trace_pipe for events." -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/loader.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static volatile bool exiting; 9 | 10 | static void sig_handler(int sig) 11 | { 12 | exiting = true; 13 | } 14 | 15 | static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) 16 | { 17 | return vfprintf(stderr, format, args); 18 | } 19 | 20 | int main(int argc, char **argv) 21 | { 22 | struct bpf_object *obj; 23 | struct bpf_program *prog; 24 | struct bpf_link *links[10]; 25 | int link_count = 0; 26 | int err; 27 | 28 | // Set up libbpf errors and debug info callback 29 | libbpf_set_print(libbpf_print_fn); 30 | 31 | // Load BPF object file 32 | obj = bpf_object__open_file("sched.bpf.o", NULL); 33 | if (libbpf_get_error(obj)) { 34 | fprintf(stderr, "ERROR: opening BPF object file failed\n"); 35 | return 1; 36 | } 37 | 38 | // Load BPF object into kernel 39 | if (bpf_object__load(obj)) { 40 | fprintf(stderr, "ERROR: loading BPF object file failed\n"); 41 | goto cleanup; 42 | } 43 | 44 | // Attach all programs 45 | bpf_object__for_each_program(prog, obj) { 46 | links[link_count] = bpf_program__attach(prog); 47 | if (libbpf_get_error(links[link_count])) { 48 | fprintf(stderr, "ERROR: attaching BPF program %s failed: %s\n", 49 | bpf_program__name(prog), strerror(errno)); 50 | links[link_count] = NULL; 51 | } else { 52 | printf("Successfully attached: %s\n", bpf_program__name(prog)); 53 | link_count++; 54 | } 55 | } 56 | 57 | if (link_count == 0) { 58 | fprintf(stderr, "ERROR: no programs were attached\n"); 59 | goto cleanup; 60 | } 61 | 62 | // Set up signal handlers 63 | signal(SIGINT, sig_handler); 64 | signal(SIGTERM, sig_handler); 65 | 66 | printf("\nScheduler monitoring is running. Press Ctrl-C to stop.\n"); 67 | printf("Check /sys/kernel/debug/tracing/trace_pipe for output\n\n"); 68 | 69 | // Sleep until interrupted 70 | while (!exiting) { 71 | sleep(1); 72 | } 73 | 74 | cleanup: 75 | // Detach all links 76 | for (int i = 0; i < link_count; i++) { 77 | if (links[i]) 78 | bpf_link__destroy(links[i]); 79 | } 80 | bpf_object__close(obj); 81 | 82 | return 0; 83 | } -------------------------------------------------------------------------------- /document/motivation_exp/test-sched-fake/read_stats.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) 8 | { 9 | int switch_map_fd, wakeup_map_fd, cpu_map_fd; 10 | __u32 key = 0; 11 | __u64 switch_count = 0, wakeup_count = 0; 12 | __u64 cpu_counts[16] = {0}; 13 | 14 | // Find BPF maps by name 15 | switch_map_fd = bpf_obj_get("/sys/fs/bpf/sched_switch_count"); 16 | if (switch_map_fd < 0) { 17 | // Try to find by ID 18 | struct bpf_map_info info = {}; 19 | __u32 info_len = sizeof(info); 20 | int fd; 21 | 22 | for (int id = 1; id < 1000; id++) { 23 | fd = bpf_map_get_fd_by_id(id); 24 | if (fd < 0) continue; 25 | 26 | if (bpf_obj_get_info_by_fd(fd, &info, &info_len) == 0) { 27 | if (strstr(info.name, "sched_switch_count")) { 28 | switch_map_fd = fd; 29 | } else if (strstr(info.name, "sched_wakeup_count")) { 30 | wakeup_map_fd = fd; 31 | } else if (strstr(info.name, "cpu_switch_count")) { 32 | cpu_map_fd = fd; 33 | } else { 34 | close(fd); 35 | } 36 | } 37 | } 38 | } 39 | 40 | printf("eBPF Scheduler Statistics\n"); 41 | printf("========================\n\n"); 42 | 43 | // Read scheduler switch count 44 | if (switch_map_fd >= 0) { 45 | if (bpf_map_lookup_elem(switch_map_fd, &key, &switch_count) == 0) { 46 | printf("Total scheduler switches: %llu\n", switch_count); 47 | } 48 | } 49 | 50 | // Read wakeup count 51 | if (wakeup_map_fd >= 0) { 52 | if (bpf_map_lookup_elem(wakeup_map_fd, &key, &wakeup_count) == 0) { 53 | printf("Total wakeup events: %llu\n", wakeup_count); 54 | } 55 | } 56 | 57 | // Read per-CPU statistics 58 | if (cpu_map_fd >= 0) { 59 | printf("\nPer-CPU switch counts:\n"); 60 | for (int cpu = 0; cpu < 16; cpu++) { 61 | __u32 cpu_key = cpu; 62 | __u64 count = 0; 63 | if (bpf_map_lookup_elem(cpu_map_fd, &cpu_key, &count) == 0 && count > 0) { 64 | printf(" CPU %2d: %llu switches\n", cpu, count); 65 | } 66 | } 67 | } 68 | 69 | return 0; 70 | } -------------------------------------------------------------------------------- /document/schbench-optimize.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/document/schbench-optimize.gif -------------------------------------------------------------------------------- /mcp/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug 4 | target 5 | 6 | # These are backup files generated by rustfmt 7 | **/*.rs.bk 8 | .env 9 | 10 | # MSVC Windows builds of rustc generate these, which store debugging information 11 | *.pdb 12 | 13 | # Generated by cargo mutants 14 | # Contains mutation testing data 15 | **/mutants.out*/ 16 | 17 | # RustRover 18 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 19 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 20 | # and can be added to the global gitignore or merged into this file. For a more nuclear 21 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 22 | #.idea/ 23 | 24 | # Python 25 | __pycache__/ 26 | *.py[cod] 27 | *$py.class 28 | *.so 29 | .Python 30 | env/ 31 | venv/ 32 | ENV/ 33 | .venv 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | .pytest_cache/ 37 | *.egg-info/ 38 | dist/ 39 | build/ 40 | 41 | # IDE 42 | .vscode/ 43 | .idea/ 44 | *.swp 45 | *.swo 46 | *~ 47 | 48 | # OS 49 | .DS_Store 50 | Thumbs.db 51 | 52 | # Project specific 53 | *.log 54 | execution_buffers/ 55 | /schedcp_workloads.json 56 | 57 | -------------------------------------------------------------------------------- /mcp/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | ".", 4 | "lib/process_manager", 5 | ] 6 | resolver = "2" 7 | 8 | [workspace.dependencies] 9 | tokio = { version = "1.40", features = ["full"] } 10 | serde = { version = "1.0", features = ["derive"] } 11 | serde_json = "1.0" 12 | log = "0.4" 13 | 14 | [package] 15 | name = "schedcp" 16 | version = "0.1.0" 17 | edition = "2021" 18 | 19 | [dependencies] 20 | rmcp = { version = "0.2.0", features = ["server", "transport-io"] } 21 | tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread", "io-std", "process", "time"] } 22 | serde = { version = "1.0", features = ["derive"] } 23 | serde_json = "1.0" 24 | anyhow = "1.0" 25 | tracing = "0.1" 26 | tracing-subscriber = { version = "0.3", features = ["env-filter", "std", "fmt", "registry"] } 27 | tracing-appender = "0.2" 28 | uuid = { version = "1.10", features = ["v4"] } 29 | schemars = "1.0" 30 | dashmap = "6.1" 31 | dotenv = "0.15" 32 | process_manager = { path = "lib/process_manager" } 33 | clap = { version = "4.5", features = ["derive"] } 34 | rust-embed = { version = "8.5", features = ["compression", "include-exclude"] } 35 | tempfile = "3.13" 36 | futures = "0.3" 37 | log = "0.4" 38 | 39 | [dev-dependencies] 40 | tempfile = "3.13" 41 | rmcp = { version = "0.2.0", features = ["server", "transport-io"] } 42 | 43 | [lib] 44 | name = "schedcp" 45 | path = "src/lib.rs" 46 | 47 | [[bin]] 48 | name = "schedcp" 49 | path = "src/main.rs" 50 | 51 | [[bin]] 52 | name = "schedcp-cli" 53 | path = "src/cli.rs" -------------------------------------------------------------------------------- /mcp/lib/process_manager/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "process_manager" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | tokio = { version = "1.40", features = ["full"] } 8 | async-trait = "0.1" 9 | serde = { version = "1.0", features = ["derive"] } 10 | serde_json = "1.0" 11 | log = "0.4" 12 | env_logger = "0.11" 13 | tempfile = "3.13" 14 | futures = "0.3" 15 | async-stream = "0.3" 16 | thiserror = "1.0" 17 | dashmap = "6.1" 18 | uuid = { version = "1.10", features = ["v4", "serde"] } 19 | chrono = { version = "0.4", features = ["serde"] } 20 | 21 | [dev-dependencies] 22 | tokio-test = "0.4" -------------------------------------------------------------------------------- /mcp/lib/process_manager/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod binary_extractor; 2 | pub mod process_manager; 3 | pub mod process_runner; 4 | pub mod types; 5 | 6 | pub use binary_extractor::BinaryExtractor; 7 | pub use process_manager::ProcessManager; 8 | pub use process_runner::ProcessRunner; 9 | pub use types::*; -------------------------------------------------------------------------------- /mcp/lib/process_manager/src/types.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use std::collections::HashMap; 3 | use uuid::Uuid; 4 | 5 | #[derive(Debug, Clone, Serialize, Deserialize)] 6 | pub struct ProcessInfo { 7 | pub id: Uuid, 8 | pub name: String, 9 | pub binary_name: String, 10 | pub pid: Option, 11 | pub status: ProcessStatus, 12 | pub args: Vec, 13 | pub started_at: Option>, 14 | pub stopped_at: Option>, 15 | } 16 | 17 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] 18 | pub enum ProcessStatus { 19 | Pending, 20 | Running, 21 | Stopped, 22 | Failed, 23 | } 24 | 25 | #[derive(Debug, Clone)] 26 | pub struct ProcessConfig { 27 | pub name: String, 28 | pub binary_name: String, 29 | pub args: Vec, 30 | pub env: HashMap, 31 | pub working_dir: Option, 32 | } 33 | 34 | #[derive(Debug, thiserror::Error)] 35 | pub enum ProcessError { 36 | #[error("Binary not found: {0}")] 37 | BinaryNotFound(String), 38 | 39 | #[error("Process not found: {0}")] 40 | ProcessNotFound(Uuid), 41 | 42 | #[error("Failed to start process: {0}")] 43 | StartFailed(String), 44 | 45 | #[error("Failed to stop process: {0}")] 46 | StopFailed(String), 47 | 48 | #[error("IO error: {0}")] 49 | Io(#[from] std::io::Error), 50 | 51 | #[error("Binary extraction failed: {0}")] 52 | ExtractionFailed(String), 53 | } -------------------------------------------------------------------------------- /mcp/new_sched/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.bpf.skel.h 3 | *.bpf.o 4 | *.bpf.l1o 5 | *.bpf.l2o 6 | *.bpf.l3o 7 | scx_simple 8 | loader 9 | example 10 | *.bpf.c 11 | -------------------------------------------------------------------------------- /mcp/new_sched/Makefile: -------------------------------------------------------------------------------- 1 | # Simplified config-driven Makefile for loader-based scheduler template 2 | # To add new scheduler: just add name to SCHEDULERS variable 3 | 4 | # Configuration - Add new schedulers here 5 | SCHEDULERS = example # vruntime fifo example compression ctest_suite ddos_log_analysis file_checksum git_add_different hotkey_aggregation video_transcode viral_product_analytics 6 | 7 | # Build targets 8 | LOADER = loader 9 | BPF_TARGETS = $(addsuffix .bpf.o,$(SCHEDULERS)) 10 | 11 | # Compiler and tools 12 | CLANG = clang 13 | CC = gcc 14 | BPFTOOL = bpftool 15 | 16 | # Include paths - matching the main scx build system 17 | PROJECT_ROOT = $(shell git rev-parse --show-toplevel) 18 | SCX_INCLUDES = \ 19 | -I$(PROJECT_ROOT)/scheduler/scx/scheds/include \ 20 | -I$(PROJECT_ROOT)/scheduler/scx/scheds/include/scx \ 21 | -I$(PROJECT_ROOT)/scheduler/scx/scheds/include/arch/x86 \ 22 | -I$(PROJECT_ROOT)/scheduler/scx/scheds/include/bpf-compat \ 23 | -I$(PROJECT_ROOT)/scheduler/scx/scheds/include/lib 24 | 25 | # System includes 26 | SYS_INCLUDES = \ 27 | -idirafter /usr/lib/llvm-19/lib/clang/19/include \ 28 | -idirafter /usr/local/include \ 29 | -idirafter /usr/include/x86_64-linux-gnu \ 30 | -idirafter /usr/include 31 | 32 | # BPF compilation flags 33 | BPF_CFLAGS = -g -O2 -Wall -Wno-compare-distinct-pointer-types \ 34 | -D__TARGET_ARCH_x86 -mcpu=v3 -mlittle-endian \ 35 | $(SYS_INCLUDES) \ 36 | $(SCX_INCLUDES) 37 | 38 | # User space compilation flags 39 | USER_CFLAGS = -O2 -g -Wall -Werror \ 40 | $(shell pkg-config --cflags libbpf) \ 41 | -I. \ 42 | $(SCX_INCLUDES) 43 | 44 | USER_LDFLAGS = $(shell pkg-config --libs libbpf) -lelf -lz -lzstd 45 | 46 | # Default target 47 | all: $(LOADER) $(BPF_TARGETS) 48 | 49 | # Compile BPF programs 50 | %.bpf.o: %.bpf.c 51 | @echo "Compiling BPF scheduler: $<" 52 | $(CLANG) $(BPF_CFLAGS) -target bpf -c $< -o $@ 53 | 54 | # Compile loader 55 | $(LOADER): $(LOADER).c 56 | @echo "Compiling loader: $<" 57 | $(CC) $(USER_CFLAGS) $< -o $@ $(USER_LDFLAGS) 58 | 59 | # Clean build artifacts 60 | clean: 61 | rm -f $(LOADER) $(BPF_TARGETS) 62 | 63 | # Install to scheduler bin directory 64 | install: all 65 | mkdir -p ../sche_bin/ 66 | cp $(LOADER) $(BPF_TARGETS) ../sche_bin/ 67 | 68 | # Show current configuration 69 | config: 70 | @echo "Current configuration:" 71 | @echo " Schedulers: $(SCHEDULERS)" 72 | @echo " BPF targets: $(BPF_TARGETS)" 73 | @echo " Loader: $(LOADER)" 74 | 75 | .PHONY: all clean install list help config -------------------------------------------------------------------------------- /scheduler/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore compiled scheduler binaries 2 | sche_bin/ 3 | 4 | # Ignore tools directory 5 | tools/ 6 | 7 | # Ignore build directories 8 | scx/build/ 9 | scx/target/ 10 | scx/scheds/rust/*/target/ 11 | scx/rust/scx_loader/target/ 12 | scx/tools/*/target/ 13 | 14 | # Ignore temporary files 15 | *.tmp 16 | *.swp 17 | *~ 18 | .DS_Store -------------------------------------------------------------------------------- /scheduler/README.md: -------------------------------------------------------------------------------- 1 | # Scheduler Components 2 | 3 | This directory contains scheduler implementations and tools for the AI-OS project. 4 | 5 | ## SCX (Sched-Ext) 6 | 7 | The `scx` subdirectory contains the sched-ext project, which provides extensible scheduling capabilities for Linux using BPF. 8 | 9 | ### Building 10 | 11 | To build everything (schedulers and tools): 12 | 13 | ```bash 14 | make 15 | ``` 16 | 17 | To build all SCX schedulers (C and Rust) and copy binaries to `sche_bin/`: 18 | 19 | ```bash 20 | make build 21 | ``` 22 | 23 | To build only C schedulers: 24 | 25 | ```bash 26 | make build-c 27 | ``` 28 | 29 | To build only Rust schedulers: 30 | 31 | ```bash 32 | make build-rust 33 | ``` 34 | 35 | To build tools (scx_loader, scxctl, scxtop) and copy to `tools/`: 36 | 37 | ```bash 38 | make build-tools 39 | ``` 40 | 41 | To generate documentation for all schedulers in `sche_description/`: 42 | 43 | ```bash 44 | make doc 45 | ``` 46 | 47 | To clean build artifacts: 48 | 49 | ```bash 50 | make clean 51 | ``` 52 | 53 | To update the SCX submodule: 54 | 55 | ```bash 56 | make update 57 | ``` 58 | 59 | ### Tools 60 | 61 | The following tools are built and available in the `tools/` directory: 62 | 63 | - **scx_loader**: Service to manage sched_ext schedulers 64 | - **scxctl**: Command-line tool to control and monitor schedulers 65 | - **scxtop**: Real-time scheduler statistics viewer 66 | 67 | ### Available Schedulers 68 | 69 | SCX includes several scheduler implementations: 70 | - scx_simple: A simple scheduler example 71 | - scx_central: Central scheduling algorithm 72 | - scx_flatcg: Flattened cgroup scheduler 73 | - scx_lavd: LAVD (Latency-Aware Virtual Deadline) scheduler 74 | - scx_layered: Layered scheduler with priority tiers 75 | - scx_nest: Nested scheduling implementation 76 | - scx_pair: Pair-based scheduler 77 | - scx_qmap: Queue-map based scheduler 78 | - scx_rlfifo: Real-time FIFO scheduler 79 | - scx_rusty: Rust-based scheduler implementation 80 | - scx_userland: Userspace-driven scheduler 81 | 82 | ### Requirements 83 | 84 | - Linux kernel with sched-ext support 85 | - Rust toolchain 86 | - Cargo 87 | - Clang/LLVM for BPF compilation 88 | - libbpf development headers 89 | 90 | ### Usage 91 | 92 | After building, the schedulers can be found in `scx/scheds/rust/scx_*/target/release/`. 93 | 94 | Example usage: 95 | ```bash 96 | sudo ./scx/scheds/rust/scx_simple/target/release/scx_simple 97 | ``` 98 | 99 | For more information, see the [SCX project documentation](https://github.com/sched-ext/scx). -------------------------------------------------------------------------------- /scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | schedcp Scheduler Testing Package 3 | 4 | This package provides utilities for testing and benchmarking different schedulers 5 | in the schedcp project. It includes reusable components for scheduler management. 6 | """ 7 | 8 | from .scheduler_runner import SchedulerRunner, SchedulerBenchmark 9 | 10 | __all__ = ['SchedulerRunner', 'SchedulerBenchmark'] -------------------------------------------------------------------------------- /scheduler/custom_schedulers/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.bpf.skel.h 3 | *.bpf.o 4 | *.bpf.l1o 5 | *.bpf.l2o 6 | *.bpf.l3o 7 | *.bpf.l4o 8 | *.bpf.l5o 9 | *.bpf.l6o 10 | *.bpf.l7o 11 | *.bpf.l8o 12 | *.bpf.l9o 13 | *.bpf.l10o 14 | *.bpf.l11o 15 | *.bpf.l12o 16 | *.bpf.l13o 17 | *.bpf.l14o 18 | *.bpf.l15o 19 | *.bpf.l16o 20 | *.bpf.l17o 21 | *.bpf.l18o 22 | *.bpf.l19o 23 | scx_simple 24 | scx_cxl 25 | runqslower 26 | scx_simple_cxl_pmu 27 | 28 | -------------------------------------------------------------------------------- /scheduler/custom_schedulers/runqslower.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 | #ifndef __RUNQSLOWER_H 3 | #define __RUNQSLOWER_H 4 | 5 | #define TASK_COMM_LEN 16 6 | 7 | struct event { 8 | char task[TASK_COMM_LEN]; 9 | char prev_task[TASK_COMM_LEN]; 10 | __u64 delta_us; 11 | __s32 pid; 12 | __s32 prev_pid; 13 | __u32 cpu; 14 | /* Raw PMU counter values */ 15 | __u64 pmu_counter; 16 | __u64 pmu_enabled; 17 | __u64 pmu_running; 18 | }; 19 | 20 | #endif /* __RUNQSLOWER_H */ 21 | -------------------------------------------------------------------------------- /scheduler/custom_schedulers/scx_cxl.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * CXL Bandwidth-Aware Scheduler Header 4 | * 5 | * Common definitions for CXL scheduler BPF and userspace components 6 | */ 7 | 8 | #ifndef __SCX_CXL_H 9 | #define __SCX_CXL_H 10 | 11 | #include 12 | #include 13 | 14 | #define MAX_CPUS 1024 15 | #define MAX_TASKS 8192 16 | #define MB_TO_BYTES(mb) ((mb) * 1024 * 1024) 17 | 18 | enum task_type { 19 | TASK_TYPE_UNKNOWN = 0, 20 | TASK_TYPE_MOE_VECTORDB, 21 | TASK_TYPE_KWORKER, 22 | TASK_TYPE_REGULAR, 23 | TASK_TYPE_LATENCY_SENSITIVE, 24 | TASK_TYPE_READ_INTENSIVE, 25 | TASK_TYPE_WRITE_INTENSIVE, 26 | TASK_TYPE_BANDWIDTH_TEST, 27 | }; 28 | 29 | enum io_pattern { 30 | IO_PATTERN_UNKNOWN = 0, 31 | IO_PATTERN_READ_HEAVY, 32 | IO_PATTERN_WRITE_HEAVY, 33 | IO_PATTERN_MIXED, 34 | IO_PATTERN_SEQUENTIAL, 35 | IO_PATTERN_RANDOM, 36 | }; 37 | 38 | struct memory_access_pattern { 39 | uint64_t nr_accesses; 40 | uint64_t avg_access_size; 41 | uint64_t total_access_time; 42 | uint64_t last_access_time; 43 | uint64_t hot_regions; 44 | uint64_t cold_regions; 45 | uint32_t locality_score; 46 | uint32_t working_set_size; 47 | uint64_t read_bytes; 48 | uint64_t write_bytes; 49 | enum io_pattern io_pattern; 50 | }; 51 | 52 | struct cxl_pmu_metrics { 53 | uint64_t memory_bandwidth; 54 | uint64_t cache_hit_rate; 55 | uint64_t memory_latency; 56 | uint64_t cxl_utilization; 57 | uint64_t read_bandwidth; 58 | uint64_t write_bandwidth; 59 | uint64_t last_update_time; 60 | }; 61 | 62 | struct bandwidth_control { 63 | uint64_t max_read_bandwidth_mb; 64 | uint64_t max_write_bandwidth_mb; 65 | uint64_t token_bucket_size; 66 | uint64_t refill_interval_ns; 67 | bool enabled; 68 | }; 69 | 70 | struct damon_config { 71 | const char *sysfs_path; 72 | uint64_t sample_interval_ns; 73 | uint32_t min_nr_regions; 74 | uint32_t max_nr_regions; 75 | bool enabled; 76 | }; 77 | 78 | struct scheduler_features { 79 | bool enable_damon; 80 | bool enable_cxl_aware; 81 | bool enable_bandwidth_control; 82 | bool enable_vectordb_optimization; 83 | bool enable_kworker_promotion; 84 | bool verbose; 85 | }; 86 | 87 | struct scheduler_stats { 88 | uint64_t total_enqueues; 89 | uint64_t total_dispatches; 90 | uint64_t vectordb_tasks; 91 | uint64_t bandwidth_limited_tasks; 92 | uint64_t damon_updates; 93 | uint64_t cxl_migrations; 94 | }; 95 | 96 | #endif /* __SCX_CXL_H */ -------------------------------------------------------------------------------- /scheduler/custom_schedulers/scx_pmu.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | struct scx_stats { 3 | int seq; 4 | pid_t pid; 5 | __u64 enqueue; 6 | __u64 exit; 7 | __u64 init; 8 | __u64 select_busy_cpu; 9 | __u64 select_idle_cpu; 10 | }; 11 | -------------------------------------------------------------------------------- /scheduler/custom_schedulers/scx_simple_cxl_pmu.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A simple FIFO scheduler. 4 | * 5 | * This scheduler implements simple FIFO (First-In-First-Out) scheduling 6 | * where tasks are scheduled in the order they arrive, without considering 7 | * task weights or priorities. 8 | * 9 | * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 10 | * Copyright (c) 2022 Tejun Heo 11 | * Copyright (c) 2022 David Vernet 12 | */ 13 | #include 14 | 15 | char _license[] SEC("license") = "GPL"; 16 | 17 | UEI_DEFINE(uei); 18 | 19 | #define SHARED_DSQ 0 20 | 21 | s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 22 | { 23 | bool is_idle = false; 24 | s32 cpu; 25 | 26 | cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); 27 | if (is_idle) { 28 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 29 | } 30 | 31 | return cpu; 32 | } 33 | 34 | void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) 35 | { 36 | scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); 37 | } 38 | 39 | void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) 40 | { 41 | scx_bpf_dsq_move_to_local(SHARED_DSQ); 42 | } 43 | 44 | s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init) 45 | { 46 | return scx_bpf_create_dsq(SHARED_DSQ, -1); 47 | } 48 | 49 | void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei) 50 | { 51 | UEI_RECORD(uei, ei); 52 | } 53 | 54 | SCX_OPS_DEFINE(simple_ops, 55 | .select_cpu = (void *)simple_select_cpu, 56 | .enqueue = (void *)simple_enqueue, 57 | .dispatch = (void *)simple_dispatch, 58 | .init = (void *)simple_init, 59 | .exit = (void *)simple_exit, 60 | .name = "simple"); -------------------------------------------------------------------------------- /scheduler/custom_schedulers/scx_simple_cxl_pmu.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * Simple CXL PMU-aware scheduler - Header file 4 | * 5 | * Common definitions shared between BPF and userspace. 6 | */ 7 | #ifndef __SCX_SIMPLE_CXL_PMU_H 8 | #define __SCX_SIMPLE_CXL_PMU_H 9 | 10 | /* Dispatch queue IDs */ 11 | #define SHARED_DSQ 0 12 | #define READ_DSQ 1 13 | #define WRITE_DSQ 2 14 | 15 | /* Task context for tracking read/write patterns */ 16 | struct task_ctx { 17 | bool is_reader; 18 | bool is_writer; 19 | __u64 last_update; 20 | }; 21 | 22 | #endif /* __SCX_SIMPLE_CXL_PMU_H */ -------------------------------------------------------------------------------- /scheduler/ml-scheduler/.gitignore: -------------------------------------------------------------------------------- 1 | # Rust build artifacts 2 | /target/ 3 | Cargo.lock 4 | 5 | # Python artifacts 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | 12 | # TensorFlow models 13 | *.h5 14 | *.pb 15 | *.tflite 16 | model_path/ 17 | model/ 18 | *.tar.gz 19 | 20 | # IDE 21 | .vscode/ 22 | .idea/ 23 | *.swp 24 | *.swo 25 | *~ 26 | 27 | # OS 28 | .DS_Store 29 | Thumbs.db 30 | 31 | # Logs 32 | *.log 33 | 34 | # Temp cloned repo 35 | /scx-ml-temp/ -------------------------------------------------------------------------------- /scheduler/ml-scheduler/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ml-scheduler" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | anyhow = "1.0" 8 | clap = { version = "4.4", features = ["derive", "env", "unicode", "wrap_help"] } 9 | ctrlc = { version = "3.4", features = ["termination"] } 10 | futures = "0.3" 11 | libc = "0.2" 12 | log = "0.4" 13 | ordered-float = "3.4" 14 | scx_utils = "1.0.6" 15 | simplelog = "0.12" 16 | tokio = { version = "1.40", features = ["full"] } 17 | tensorflow = "0.21.0" 18 | 19 | [[bin]] 20 | name = "ml-scheduler" 21 | path = "src/main.rs" 22 | 23 | [[bin]] 24 | name = "test-ml" 25 | path = "src/test_ml.rs" -------------------------------------------------------------------------------- /scheduler/ml-scheduler/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all build test clean run model 2 | 3 | all: build 4 | 5 | build: 6 | cargo build --release 7 | 8 | test: 9 | cargo test 10 | 11 | clean: 12 | cargo clean 13 | rm -f model.pb model.tflite 14 | rm -rf model/ 15 | 16 | model: 17 | cd src/model_dir && python3 create_model.py 18 | cd src/model_dir && python3 transfer.py 19 | 20 | run: build 21 | ./target/release/ml-scheduler 22 | 23 | install-deps: 24 | pip3 install tensorflow numpy 25 | 26 | help: 27 | @echo "ML Scheduler Build Targets:" 28 | @echo " make build - Build the ML scheduler" 29 | @echo " make test - Run tests" 30 | @echo " make clean - Clean build artifacts" 31 | @echo " make model - Generate sample ML model" 32 | @echo " make run - Run the ML scheduler" 33 | @echo " make install-deps - Install Python dependencies for model training" -------------------------------------------------------------------------------- /scheduler/ml-scheduler/schedcp_workloads.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": {}, 3 | "history": [] 4 | } -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "scx_rusty" 3 | version = "0.5.7" 4 | authors = ["Dan Schatzberg ", "Meta"] 5 | edition = "2021" 6 | description = "A multi-domain, BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main" 7 | license = "GPL-2.0-only" 8 | 9 | [dependencies] 10 | anyhow = "1.0.65" 11 | clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] } 12 | ctrlc = { version = "3.1", features = ["termination"] } 13 | fb_procfs = "0.7.0" 14 | libbpf-rs = "0.23" 15 | libc = "0.2.137" 16 | log = "0.4.17" 17 | ordered-float = "3.4.0" 18 | scx_utils = { path = "../../../rust/scx_utils", version = "0.8.1" } 19 | simplelog = "0.12.0" 20 | sorted-vec = "0.8.3" 21 | static_assertions = "1.1.0" 22 | tensorflow = "0.21.0" 23 | 24 | [build-dependencies] 25 | scx_utils = { path = "../../../rust/scx_utils", version = "0.8.1" } 26 | 27 | [features] 28 | enable_backtrace = [] 29 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/LICENSE: -------------------------------------------------------------------------------- 1 | ../../../LICENSE -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/README.md: -------------------------------------------------------------------------------- 1 | # scx_rusty 2 | 3 | This is a single user-defined scheduler used within [sched_ext](https://github.com/sched-ext/scx/tree/main), which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. [Read more about sched_ext](https://github.com/sched-ext/scx/tree/main). 4 | 5 | ## Overview 6 | 7 | A multi-domain, BPF / user space hybrid scheduler. The BPF portion of the 8 | scheduler does a simple round robin in each domain, and the user space portion 9 | (written in Rust) calculates the load factor of each domain, and informs BPF of 10 | how tasks should be load balanced accordingly. 11 | 12 | ## How To Install 13 | 14 | Available as a [Rust crate](https://crates.io/crates/scx_rusty): `cargo add scx_rusty` 15 | 16 | ## Typical Use Case 17 | 18 | Rusty is designed to be flexible, and accommodate different architectures and 19 | workloads. Various load balancing thresholds (e.g. greediness, frequenty, etc), 20 | as well as how Rusty should partition the system into scheduling domains, can 21 | be tuned to achieve the optimal configuration for any given system or workload. 22 | 23 | ## Production Ready? 24 | 25 | Yes. If tuned correctly, rusty should be performant across various CPU 26 | architectures and workloads. Rusty by default creates a separate scheduling 27 | domain per-LLC, so its default configuration may be performant as well. Note 28 | however that scx_rusty does not yet disambiguate between LLCs in different NUMA 29 | nodes, so it may perform better on multi-CCX machines where all the LLCs share 30 | the same socket, as opposed to multi-socket machines. 31 | 32 | Note as well that you may run into an issue with infeasible weights, where a 33 | task with a very high weight may cause the scheduler to incorrectly leave cores 34 | idle because it thinks they're necessary to accommodate the compute for a 35 | single task. This can also happen in CFS, and should soon be addressed for 36 | scx_rusty. 37 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/build.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // 3 | // This software may be used and distributed according to the terms of the 4 | // GNU General Public License version 2. 5 | 6 | fn main() { 7 | scx_utils::BpfBuilder::new() 8 | .unwrap() 9 | .enable_intf("src/bpf/intf.h", "bpf_intf.rs") 10 | .enable_skel("src/bpf/main.bpf.c", "bpf") 11 | .build() 12 | .unwrap(); 13 | } 14 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/meson.build: -------------------------------------------------------------------------------- 1 | sched = custom_target('scx_rusty', 2 | output: '@PLAINNAME@.__PHONY__', 3 | input: 'Cargo.toml', 4 | command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@', 5 | cargo_build_args], 6 | env: cargo_env, 7 | depends: [libbpf, bpftool_target, sched], 8 | build_always_stale: true) 9 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/rustfmt.toml: -------------------------------------------------------------------------------- 1 | # Get help on options with `rustfmt --help=config` 2 | # Please keep these in alphabetical order. 3 | edition = "2021" 4 | group_imports = "StdExternalCrate" 5 | imports_granularity = "Item" 6 | merge_derives = false 7 | use_field_init_shorthand = true 8 | version = "Two" 9 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/src/bpf_intf.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | // This software may be used and distributed according to the terms of the 4 | // GNU General Public License version 2. 5 | #![allow(non_upper_case_globals)] 6 | #![allow(non_camel_case_types)] 7 | #![allow(non_snake_case)] 8 | #![allow(dead_code)] 9 | 10 | include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs")); 11 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/src/bpf_skel.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | // This software may be used and distributed according to the terms of the 4 | // GNU General Public License version 2. 5 | 6 | // We can't directly include the generated skeleton in main.rs as it may 7 | // contain compiler attributes that can't be `include!()`ed via macro and we 8 | // can't use the `#[path = "..."]` because `concat!(env!("OUT_DIR"), 9 | // "/bpf.skel.rs")` does not work inside the path attribute yet (see 10 | // https://github.com/rust-lang/rust/pull/83366). 11 | 12 | include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs")); 13 | -------------------------------------------------------------------------------- /scheduler/ml-scheduler/scx_rusty_ml/src/model_dir/transfer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | model = keras.models.load_model('model_stress_model.h5') 4 | model.export('model_path') -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/bpf_interface.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | 3 | #[repr(C)] 4 | #[derive(Debug, Clone, Copy)] 5 | pub struct TaskContext { 6 | pub pid: u32, 7 | pub cpu: i32, 8 | pub dom_id: u32, 9 | pub src_dom_load: f64, 10 | pub dst_dom_load: f64, 11 | pub cpu_idle: i32, 12 | pub cpu_not_idle: i32, 13 | } 14 | 15 | impl Default for TaskContext { 16 | fn default() -> Self { 17 | Self { 18 | pid: 0, 19 | cpu: 0, 20 | dom_id: 0, 21 | src_dom_load: 0.0, 22 | dst_dom_load: 0.0, 23 | cpu_idle: 0, 24 | cpu_not_idle: 0, 25 | } 26 | } 27 | } 28 | 29 | #[repr(C)] 30 | #[derive(Debug, Clone, Copy)] 31 | pub struct MigrationDecision { 32 | pub should_migrate: bool, 33 | pub target_dom: u32, 34 | pub confidence: f64, 35 | } 36 | 37 | impl Default for MigrationDecision { 38 | fn default() -> Self { 39 | Self { 40 | should_migrate: false, 41 | target_dom: 0, 42 | confidence: 0.0, 43 | } 44 | } 45 | } 46 | 47 | pub const TASK_CONTEXT_SIZE: usize = size_of::(); 48 | pub const MIGRATION_DECISION_SIZE: usize = size_of::(); -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/exact_ml_implementation.rs: -------------------------------------------------------------------------------- 1 | // Exact ML implementation from the scx_rusty fork 2 | extern crate tensorflow; 3 | 4 | use tensorflow::Graph; 5 | use tensorflow::Session; 6 | use tensorflow::SessionOptions; 7 | use tensorflow::SessionRunArgs; 8 | use tensorflow::Tensor; 9 | use std::error::Error; 10 | 11 | struct TensorFlowModel { 12 | graph: Graph, 13 | session: Session, 14 | } 15 | 16 | impl TensorFlowModel { 17 | fn new(model_dir: &str) -> Result> { 18 | let mut graph = Graph::new(); 19 | let bundle = tensorflow::SavedModelBundle::load( 20 | &SessionOptions::new(), 21 | &["serve"], 22 | &mut graph, 23 | model_dir, 24 | )?; 25 | let session = bundle.session; 26 | 27 | Ok(TensorFlowModel { graph, session }) 28 | } 29 | 30 | fn predict(&self, input_data: Vec) -> Result> { 31 | let input_tensor = Tensor::new(&[input_data.len() as u64]).with_values(&input_data)?; 32 | 33 | let input_op = self.graph.operation_by_name_required("serving_default_input")?; 34 | let output_op = self.graph.operation_by_name_required("StatefulPartitionedCall")?; 35 | 36 | let mut args = SessionRunArgs::new(); 37 | args.add_feed(&input_op, 0, &input_tensor); 38 | let output_token = args.request_fetch(&output_op, 0); 39 | 40 | self.session.run(&mut args)?; 41 | 42 | let output_tensor: Tensor = args.fetch(output_token).unwrap(); 43 | let output_value = output_tensor[0]; 44 | Ok(output_value == 1.0) 45 | } 46 | } 47 | 48 | pub struct MLScheduler { 49 | inference_model: TensorFlowModel, 50 | } 51 | 52 | impl MLScheduler { 53 | pub fn new(model_path: &str) -> Result> { 54 | Ok(Self { 55 | inference_model: TensorFlowModel::new(model_path)?, 56 | }) 57 | } 58 | 59 | pub fn migrate_inference(&self, cpu: &i32, cpu_idle: &i32, cpu_not_idle: &i32, src_dom_load: &f64, dst_dom_load: &f64) -> bool { 60 | let input_vec = vec![f64::from(*cpu), f64::from(*cpu_idle), f64::from(*cpu_not_idle), *src_dom_load, *dst_dom_load]; 61 | self.inference_model.predict(input_vec).unwrap() 62 | } 63 | } -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use clap::Parser; 3 | use log::info; 4 | use ml_scheduler::{MLScheduler, MigrationFeatures}; 5 | use simplelog::{Config, LevelFilter, SimpleLogger}; 6 | use std::sync::Arc; 7 | use tokio::sync::Mutex; 8 | 9 | #[derive(Debug, Parser)] 10 | #[clap(author, version, about, long_about = None)] 11 | struct Args { 12 | #[clap(short, long, default_value = "src/model_dir/model_path")] 13 | model_path: String, 14 | 15 | #[clap(short, long, action = clap::ArgAction::SetTrue)] 16 | verbose: bool, 17 | } 18 | 19 | #[tokio::main] 20 | async fn main() -> Result<()> { 21 | let args = Args::parse(); 22 | 23 | let log_level = if args.verbose { 24 | LevelFilter::Debug 25 | } else { 26 | LevelFilter::Info 27 | }; 28 | 29 | SimpleLogger::init(log_level, Config::default())?; 30 | 31 | info!("Initializing ML Scheduler with model: {}", args.model_path); 32 | 33 | let scheduler = Arc::new(Mutex::new(MLScheduler::new(&args.model_path)?)); 34 | 35 | info!("ML Scheduler started successfully"); 36 | 37 | // Example usage - in production this would be integrated with the BPF scheduler 38 | let features = MigrationFeatures { 39 | cpu: 172, 40 | cpu_idle: 1, 41 | cpu_not_idle: 171, 42 | src_dom_load: 0.5, 43 | dst_dom_load: 0.5, 44 | }; 45 | 46 | let scheduler_guard = scheduler.lock().await; 47 | match scheduler_guard.should_migrate(&features) { 48 | Ok(should_migrate) => { 49 | info!("Migration decision for {:?}: {}", features, should_migrate); 50 | } 51 | Err(e) => { 52 | log::error!("Error making migration decision: {}", e); 53 | } 54 | } 55 | 56 | // Keep the scheduler running 57 | let (tx, mut rx) = tokio::sync::mpsc::channel::<()>(1); 58 | 59 | ctrlc::set_handler(move || { 60 | let _ = tx.try_send(()); 61 | })?; 62 | 63 | info!("ML Scheduler running. Press Ctrl+C to exit."); 64 | rx.recv().await; 65 | 66 | info!("Shutting down ML Scheduler"); 67 | Ok(()) 68 | } -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/model_dir/convert_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Force CPU-only 4 | import tensorflow as tf 5 | from tensorflow import keras 6 | 7 | # Load the Keras model 8 | model = keras.models.load_model('model_stress_model.h5') 9 | 10 | # Create a concrete function for the model 11 | @tf.function 12 | def serving_default(inputs): 13 | return model(inputs, training=False) 14 | 15 | # Get the concrete function with specified input shape 16 | concrete_func = serving_default.get_concrete_function( 17 | tf.TensorSpec(shape=[None, 5], dtype=tf.float32, name='serving_default_input') 18 | ) 19 | 20 | # Save the model using the lower-level API 21 | tf.saved_model.save( 22 | model, 23 | 'model_path', 24 | signatures={'serving_default': concrete_func} 25 | ) 26 | 27 | print("Model successfully converted to SavedModel format at 'model_path'") 28 | 29 | # Verify the saved model 30 | loaded = tf.saved_model.load('model_path') 31 | print("Model loaded successfully!") 32 | print("Available signatures:", list(loaded.signatures.keys())) -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/model_dir/convert_to_savedmodel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import shutil 4 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 5 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | # Remove old model directory 11 | if os.path.exists('model_path'): 12 | shutil.rmtree('model_path') 13 | 14 | # Load the existing H5 model (which expects 5 features) 15 | original_model = tf.keras.models.load_model('model_stress_model.h5') 16 | 17 | # Create a new model that accepts 10 features but uses only first 5 18 | input_layer = tf.keras.layers.Input(shape=(10,), name='input') 19 | # Take only first 5 features 20 | sliced = tf.keras.layers.Lambda(lambda x: x[:, :5])(input_layer) 21 | # Pass through the original model 22 | output = original_model(sliced) 23 | # Create the wrapper model 24 | wrapper_model = tf.keras.Model(inputs=input_layer, outputs=output) 25 | 26 | # Use TensorFlow's export method 27 | wrapper_model.export('model_path') 28 | 29 | print("Model saved successfully to model_path/") 30 | 31 | # Verify the saved model 32 | print("\nVerifying saved model...") 33 | loaded = tf.saved_model.load('model_path') 34 | 35 | # Test with realistic scheduler data 36 | test_cases = [ 37 | ([32, 16, 16, 0.8, 0.3, 0, 0, 0, 0, 0], "High load imbalance"), 38 | ([64, 32, 32, 0.5, 0.5, 0, 0, 0, 0, 0], "Balanced load"), 39 | ([16, 48, 16, 0.2, 0.2, 0, 0, 0, 0, 0], "Low load system"), 40 | ] 41 | 42 | print("\nTest predictions:") 43 | for features, desc in test_cases: 44 | test_input = tf.constant([features], dtype=tf.float32) 45 | pred = wrapper_model.predict(test_input, verbose=0)[0][0] 46 | print(f"{desc:20} -> {pred:.4f} -> {'MIGRATE' if pred > 0.5 else 'STAY'}") 47 | 48 | print("\n✓ Model ready for use with ML scheduler!") -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/model_dir/tf_convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 4 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 5 | import tensorflow as tf 6 | 7 | # Suppress warnings 8 | import warnings 9 | warnings.filterwarnings('ignore') 10 | 11 | # Load the model 12 | print("Loading model...") 13 | model = tf.keras.models.load_model('model_stress_model.h5') 14 | 15 | # Convert to TensorFlow Lite as a workaround 16 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 17 | tflite_model = converter.convert() 18 | 19 | # Save the TFLite model 20 | with open('model.tflite', 'wb') as f: 21 | f.write(tflite_model) 22 | print("Model converted to TFLite format: model.tflite") 23 | 24 | # For now, let's use the .h5 file directly in our Rust code 25 | print("Note: Using .h5 file directly for TensorFlow Rust bindings") -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/model_dir/transfer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import tensorflow as tf 3 | from tensorflow import keras 4 | 5 | # Load the Keras model 6 | model = keras.models.load_model('model_stress_model.h5') 7 | 8 | # Export to SavedModel format 9 | tf.saved_model.save(model, 'model_path') -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/model_export.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Script to export a trained Keras model to TensorFlow SavedModel format 4 | for use with the ML scheduler. 5 | """ 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | from tensorflow import keras 10 | 11 | def create_sample_model(): 12 | """Create a sample neural network model for task migration decisions.""" 13 | model = keras.Sequential([ 14 | keras.layers.Input(shape=(5,)), # 5 input features 15 | keras.layers.Dense(16, activation='relu'), 16 | keras.layers.Dense(8, activation='relu'), 17 | keras.layers.Dense(1, activation='sigmoid') # Binary output 18 | ]) 19 | 20 | model.compile( 21 | optimizer='adam', 22 | loss='binary_crossentropy', 23 | metrics=['accuracy'] 24 | ) 25 | 26 | return model 27 | 28 | def generate_sample_data(n_samples=1000): 29 | """Generate synthetic training data for demonstration.""" 30 | # Features: cpu, cpu_idle, cpu_not_idle, src_dom_load, dst_dom_load 31 | X = np.random.rand(n_samples, 5) 32 | 33 | # Simple rule: migrate if destination domain has significantly lower load 34 | y = (X[:, 4] < X[:, 3] - 0.3).astype(np.float32) 35 | 36 | return X, y 37 | 38 | def train_and_export_model(model_path='model'): 39 | """Train the model and export it to SavedModel format.""" 40 | # Create and train model 41 | model = create_sample_model() 42 | X_train, y_train = generate_sample_data() 43 | 44 | print("Training model...") 45 | model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2) 46 | 47 | # Export to SavedModel format 48 | print(f"Exporting model to {model_path}...") 49 | tf.saved_model.save(model, model_path) 50 | 51 | # Also save as .pb for easier loading 52 | converter = tf.lite.TFLiteConverter.from_saved_model(model_path) 53 | tflite_model = converter.convert() 54 | 55 | with open(f"{model_path}.tflite", "wb") as f: 56 | f.write(tflite_model) 57 | 58 | print("Model exported successfully!") 59 | 60 | if __name__ == "__main__": 61 | train_and_export_model() -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/scheduler_integration.rs: -------------------------------------------------------------------------------- 1 | use crate::bpf_interface::{TaskContext, MigrationDecision}; 2 | use crate::{MLScheduler, MigrationFeatures}; 3 | use anyhow::{Result, anyhow}; 4 | use std::sync::Arc; 5 | use tokio::sync::Mutex; 6 | 7 | pub struct SchedulerIntegration { 8 | ml_scheduler: Arc>, 9 | } 10 | 11 | impl SchedulerIntegration { 12 | pub fn new(ml_scheduler: Arc>) -> Self { 13 | Self { ml_scheduler } 14 | } 15 | 16 | pub async fn process_migration_request(&self, task: &TaskContext) -> Result { 17 | let features = MigrationFeatures { 18 | cpu: task.cpu, 19 | cpu_idle: task.cpu_idle, 20 | cpu_not_idle: task.cpu_not_idle, 21 | src_dom_load: task.src_dom_load, 22 | dst_dom_load: task.dst_dom_load, 23 | }; 24 | 25 | let scheduler = self.ml_scheduler.lock().await; 26 | let should_migrate = scheduler.should_migrate(&features) 27 | .map_err(|e| anyhow!("ML prediction failed: {}", e))?; 28 | 29 | Ok(MigrationDecision { 30 | should_migrate, 31 | target_dom: if should_migrate { 32 | // Simple logic: migrate to next domain 33 | (task.dom_id + 1) % 4 34 | } else { 35 | task.dom_id 36 | }, 37 | confidence: if should_migrate { 0.8 } else { 0.2 }, 38 | }) 39 | } 40 | 41 | pub fn update_model_metrics(&self, decision: &MigrationDecision, actual_improvement: f64) { 42 | // In a real implementation, this would collect metrics for model retraining 43 | log::debug!( 44 | "Migration decision - Should migrate: {}, Confidence: {:.2}, Actual improvement: {:.2}%", 45 | decision.should_migrate, 46 | decision.confidence, 47 | actual_improvement * 100.0 48 | ); 49 | } 50 | } -------------------------------------------------------------------------------- /scheduler/ml-scheduler/src/test_ml.rs: -------------------------------------------------------------------------------- 1 | use ml_scheduler::ml_scheduler_exact::{MLLoadBalancer, TaskCtx}; 2 | 3 | fn main() -> Result<(), Box> { 4 | println!("Testing ML Scheduler (Exact Implementation)"); 5 | 6 | // Initialize the ML scheduler with the model path 7 | let model_path = "src/model_dir/model_path"; 8 | println!("Loading model from: {}", model_path); 9 | 10 | let ml_balancer = MLLoadBalancer::new(model_path)?; 11 | println!("Model loaded successfully!"); 12 | 13 | // Test case 1: High source load, low destination load (should migrate) 14 | let task1 = TaskCtx { 15 | pid: 1234, 16 | dom_id: 0, 17 | cpu: 32, 18 | cpu_idle: 16, 19 | cpu_not_idle: 16, 20 | src_dom_load: 80, // 80% load 21 | dst_dom_load: 20, // 20% load 22 | }; 23 | 24 | let should_migrate1 = ml_balancer.should_migrate_task(&task1); 25 | println!("\nTest 1 - High load imbalance:"); 26 | println!(" Source domain load: {}%", task1.src_dom_load); 27 | println!(" Destination domain load: {}%", task1.dst_dom_load); 28 | println!(" Migration decision: {}", should_migrate1); 29 | 30 | // Test case 2: Balanced load (should not migrate) 31 | let task2 = TaskCtx { 32 | pid: 5678, 33 | dom_id: 1, 34 | cpu: 32, 35 | cpu_idle: 16, 36 | cpu_not_idle: 16, 37 | src_dom_load: 50, // 50% load 38 | dst_dom_load: 48, // 48% load 39 | }; 40 | 41 | let should_migrate2 = ml_balancer.should_migrate_task(&task2); 42 | println!("\nTest 2 - Balanced load:"); 43 | println!(" Source domain load: {}%", task2.src_dom_load); 44 | println!(" Destination domain load: {}%", task2.dst_dom_load); 45 | println!(" Migration decision: {}", should_migrate2); 46 | 47 | // Test the raw migrate_inference function 48 | println!("\nTest 3 - Raw inference function:"); 49 | let cpu = 64; 50 | let cpu_idle = 32; 51 | let cpu_not_idle = 32; 52 | let src_load = 0.75; 53 | let dst_load = 0.25; 54 | 55 | let migrate = ml_balancer.migrate_inference(&cpu, &cpu_idle, &cpu_not_idle, &src_load, &dst_load); 56 | println!(" CPU: {}, Idle: {}, Not Idle: {}", cpu, cpu_idle, cpu_not_idle); 57 | println!(" Source load: {:.2}, Destination load: {:.2}", src_load, dst_load); 58 | println!(" Migration decision: {}", migrate); 59 | 60 | Ok(()) 61 | } -------------------------------------------------------------------------------- /scheduler/prompt/select.md: -------------------------------------------------------------------------------- 1 | # Role 2 | You are a Linux kernel scheduling assistant. Given: 3 | 1. A list of schedulers and their configuration metadata 4 | 2. A system + workload profile snapshot 5 | 6 | Your job is to: 7 | - Choose the best-fit SCX scheduler (must be `production_ready: true`) 8 | - Tune its parameters based on system capabilities and workload requirements 9 | - Output shell command(s) to enable that scheduler and apply the parameters 10 | - Add one brief comment explaining your choice 11 | 12 | # Scheduler catalog 13 | {{SCHEDULER_CATALOG}} 14 | 15 | # Combined system + workload profile 16 | {{SYSTEM_AND_WORKLOAD}} 17 | 18 | # Instructions 19 | 1. Use the JSON to understand the machine (CPU layout, load, latency) and the workload (tags, type, SLA). 20 | 2. Pick the best scheduler from the catalog that matches goals and hardware. 21 | 3. Adjust its parameters based on workload (e.g., slice_us, latency weights, etc.). 22 | 4. Output shell commands that: 23 | - Enable the scheduler (`echo > /sys/kernel/debug/sched_ext/scheduler`) 24 | - Set all necessary tuning knobs via sysfs or config tools 25 | 5. Add one final comment line explaining your reasoning (max 1 sentence). 26 | 6. Do NOT output explanations, just the commands + comment. 27 | 28 | # Output format (strictly) 29 | 30 | 31 | ... 32 | # short explanation here 33 | -------------------------------------------------------------------------------- /scheduler/sche_description/scx_central.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_central", 4 | "production_ready": false, 5 | "description": "A central scheduler where all scheduling decisions are made from a single CPU, allowing other cores to run with infinite slices without timer ticks. This design reduces scheduling overhead by concentrating all scheduling logic on one designated CPU while worker CPUs simply execute tasks. Tasks are preempted every 20ms via timer callback.", 6 | "use_cases": [ 7 | "virtualization", 8 | "low_latency", 9 | "vm_workloads" 10 | ], 11 | "algorithm": "central_dispatch", 12 | "characteristics": "single cpu dispatch, infinite slices, timer tick reduction, fifo ordering, 20ms preemption timer", 13 | "tuning_parameters": { 14 | "central_cpu": { 15 | "type": "integer", 16 | "description": "CPU ID to use for central scheduling", 17 | "default": 0, 18 | "range": [ 19 | 0, 20 | 255 21 | ] 22 | }, 23 | "slice_us": { 24 | "type": "integer", 25 | "description": "Override slice duration in microseconds", 26 | "default": 20000, 27 | "range": [ 28 | 1000, 29 | 100000 30 | ] 31 | } 32 | }, 33 | "limitations": "Not production ready, no priority mechanism, fixed 20ms preemption", 34 | "performance_profile": "reduced scheduling overhead on worker CPUs" 35 | } 36 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_flatcg.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_flatcg", 4 | "production_ready": true, 5 | "description": "A high-performance cgroup-aware scheduler that flattens the cgroup hierarchy for better performance. It implements hierarchical weight-based cgroup CPU control by compounding active weight shares at each level into a single flat structure, eliminating tree traversal overhead during scheduling decisions.", 6 | "use_cases": [ 7 | "container_workloads", 8 | "resource_management", 9 | "multi_tenant_systems" 10 | ], 11 | "algorithm": "flattened_cgroup_hierarchy", 12 | "characteristics": "cgroup hierarchy flattening, weight compounding, performance optimization, dual mode operation (vtime/fifo), low scheduling overhead", 13 | "tuning_parameters": { 14 | "slice_us": { 15 | "type": "integer", 16 | "description": "Override slice duration in microseconds", 17 | "default": 20000, 18 | "range": [ 19 | 1000, 20 | 100000 21 | ] 22 | }, 23 | "interval": { 24 | "type": "integer", 25 | "description": "Report interval in seconds", 26 | "default": 1 27 | }, 28 | "fifo": { 29 | "type": "boolean", 30 | "description": "Use FIFO scheduling instead of weighted vtime", 31 | "default": false 32 | } 33 | }, 34 | "limitations": "Limited to cgroup use cases, thundering herd issues possible", 35 | "performance_profile": "optimized for low overhead cgroup scheduling" 36 | } 37 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_mitosis.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_mitosis", 4 | "production_ready": false, 5 | "description": "A dynamic affinity scheduler implementing cell-based scheduling architecture. Dynamically assigns cgroups to 'cells' which are mapped to specific CPU sets. Cells can be merged or split based on system conditions, with vtime-based scheduling within each cell.", 6 | "use_cases": [ 7 | "dynamic_affinity", 8 | "cgroup_isolation", 9 | "experimental" 10 | ], 11 | "algorithm": "cell_based_vtime", 12 | "characteristics": "dynamic cell management, adaptive cpu assignment, cgroup awareness, vtime scheduling, numa awareness, cell splitting/merging", 13 | "tuning_parameters": { 14 | "reconfiguration_interval_s": { 15 | "type": "integer", 16 | "description": "Interval to consider reconfiguring cells", 17 | "default": 10 18 | }, 19 | "rebalance_cpus_interval_s": { 20 | "type": "integer", 21 | "description": "Interval to rebalance CPUs to cells", 22 | "default": 5 23 | }, 24 | "monitor_interval_s": { 25 | "type": "integer", 26 | "description": "Interval to report monitoring information", 27 | "default": 1 28 | } 29 | }, 30 | "limitations": "Experimental scheduler, not production ready", 31 | "performance_profile": "experimental dynamic affinity management" 32 | } 33 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_nest.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_nest", 4 | "production_ready": true, 5 | "description": "A frequency-optimized scheduler that keeps tasks on warm cores to maintain high CPU frequencies. Based on the Inria-Paris Nest paper, it divides cores into primary and reserve nests, concentrating work on a subset of cores to maximize boost frequencies. Best suited for single CCX/socket systems with low to moderate CPU utilization.", 6 | "use_cases": [ 7 | "low_cpu_utilization", 8 | "frequency_sensitive", 9 | "latency_sensitive", 10 | "power_efficiency" 11 | ], 12 | "algorithm": "warm_core_clustering", 13 | "characteristics": "warm core prioritization, frequency optimization, single ccx limitation, cache locality awareness, dynamic nest sizing, hyperthreading awareness", 14 | "tuning_parameters": { 15 | "delay_us": { 16 | "type": "integer", 17 | "description": "Delay before removing idle core from primary nest", 18 | "default": 2000, 19 | "range": [ 20 | 100, 21 | 100000 22 | ] 23 | }, 24 | "r_max": { 25 | "type": "integer", 26 | "description": "Maximum number of cores in reserve nest", 27 | "default": 5, 28 | "range": [ 29 | 1, 30 | 64 31 | ] 32 | }, 33 | "iters": { 34 | "type": "integer", 35 | "description": "Placement failures before aggressive expansion", 36 | "default": 2, 37 | "range": [ 38 | 0, 39 | 10 40 | ] 41 | }, 42 | "slice_us": { 43 | "type": "integer", 44 | "description": "Override slice duration", 45 | "default": 20000 46 | }, 47 | "prefer_idle": { 48 | "type": "boolean", 49 | "description": "Prefer fully idle cores over siblings", 50 | "default": false 51 | } 52 | }, 53 | "limitations": "Limited to single CCX, not good for high CPU utilization", 54 | "performance_profile": "optimized for frequency boost and power efficiency" 55 | } 56 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_pair.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_pair", 4 | "production_ready": false, 5 | "description": "A sibling scheduler ensuring tasks only co-locate on a physical core if they're in the same cgroup. Demonstrates how to implement security mitigations for CPU bugs like L1TF by enforcing strict cgroup isolation on SMT siblings. Uses scx_bpf_kick_cpu() to preempt siblings when constraints are violated.", 6 | "use_cases": [ 7 | "security_mitigation", 8 | "smt_isolation", 9 | "educational" 10 | ], 11 | "algorithm": "cgroup_based_pairing", 12 | "characteristics": "strict cgroup isolation, smt aware scheduling, cpu sibling pairing, security focused, educational demonstration", 13 | "tuning_parameters": { 14 | "stride": { 15 | "type": "integer", 16 | "description": "Override CPU pair stride", 17 | "default": -1 18 | } 19 | }, 20 | "limitations": "Not production ready, demonstration scheduler only", 21 | "performance_profile": "security focused, not performance optimized" 22 | } 23 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_prev.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_prev", 4 | "production_ready": false, 5 | "description": "A variation on scx_simple that prioritizes selecting an idle previous CPU over finding fully idle cores. This optimization leverages CPU cache locality by preferring the previous CPU when possible, reducing cache misses. Particularly effective for OLTP workloads on simple topology systems.", 6 | "use_cases": [ 7 | "oltp_workloads", 8 | "cache_sensitive", 9 | "simple_topology" 10 | ], 11 | "algorithm": "previous_cpu_priority", 12 | "characteristics": "previous cpu priority, cache locality optimization, simple and efficient, statistics tracking", 13 | "tuning_parameters": { 14 | "interval": { 15 | "type": "integer", 16 | "description": "Sampling interval for statistics in seconds", 17 | "default": 1 18 | } 19 | }, 20 | "limitations": "Not extensively tested in production, best for simple topology", 21 | "performance_profile": "optimized for cache locality" 22 | } 23 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_qmap.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_qmap", 4 | "production_ready": false, 5 | "description": "A simple five-level FIFO queue scheduler demonstrating fundamental sched_ext features. Tasks are distributed across five priority queues based on their compound weight, with weighted dispatch giving more opportunities to higher priority queues. Includes core scheduling support and CPU performance scaling.", 6 | "use_cases": [ 7 | "demonstration", 8 | "testing", 9 | "educational" 10 | ], 11 | "algorithm": "five_level_fifo", 12 | "characteristics": "five level priority queuing, weighted dispatch, sleepable task storage, core scheduling support, cpu performance scaling, bpf map demonstration", 13 | "tuning_parameters": { 14 | "slice_us": { 15 | "type": "integer", 16 | "description": "Override slice duration", 17 | "default": 20000 18 | }, 19 | "batch_count": { 20 | "type": "integer", 21 | "description": "Dispatch up to COUNT tasks together", 22 | "default": 1 23 | }, 24 | "boost_nice": { 25 | "type": "boolean", 26 | "description": "Boost nice -20 tasks", 27 | "default": false 28 | } 29 | }, 30 | "limitations": "Not production ready, simplified design for demonstration", 31 | "performance_profile": "demonstration scheduler, not optimized" 32 | } 33 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_rlfifo.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_rlfifo", 4 | "production_ready": false, 5 | "description": "A simple Round-Robin scheduler running in user-space based on scx_rustland_core framework. Dequeues tasks in FIFO order and assigns dynamic time slices to achieve basic Round-Robin behavior. Provided as a template for testing more complex scheduling policies.", 6 | "use_cases": [ 7 | "template", 8 | "testing", 9 | "educational" 10 | ], 11 | "algorithm": "user_space_round_robin", 12 | "characteristics": "user space scheduling, fifo ordering, dynamic time slices, rustland core based, simple template", 13 | "tuning_parameters": {}, 14 | "limitations": "Not production ready, use kernel SCHED_FIFO for real-time needs", 15 | "performance_profile": "basic round-robin, not performance optimized" 16 | } 17 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_rlfifo.md: -------------------------------------------------------------------------------- 1 | # scx_rlfifo 2 | 3 | This is a single user-defined scheduler used within [sched_ext](https://github.com/sched-ext/scx/tree/main), which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. [Read more about sched_ext](https://github.com/sched-ext/scx/tree/main). 4 | 5 | ## Overview 6 | 7 | scx_rlfifo is a simple Round-Robin scheduler runs in user-space, based on the 8 | scx_rustland_core framework. 9 | It dequeues tasks in FIFO order and assigns dynamic time slices, preempting and 10 | re-enqueuing tasks to achieve basic Round-Robin behavior. 11 | 12 | ## Typical Use Case 13 | 14 | This scheduler is provided as a simple template that can be used as a baseline 15 | to test more complex scheduling policies. 16 | 17 | ## Production Ready? 18 | 19 | Definitely not. Using this scheduler in a production environment is not 20 | recommended, unless there are specific requirements that necessitate a basic 21 | FIFO scheduling approach. Even then, it's still recommended to use the kernel's 22 | SCHED_FIFO real-time class. 23 | 24 | ## Command Line Options 25 | 26 | ``` 27 | 28 | ************************************************************************** 29 | 30 | WARNING: The purpose of scx_rlfifo is to provide a simple scheduler 31 | implementation based on scx_rustland_core, and it is not intended for 32 | use in production environments. If you want to run a scheduler that makes 33 | decisions in user space, it is recommended to use *scx_rustland* instead. 34 | 35 | Please do not open GitHub issues in the event of poor performance, or 36 | scheduler eviction due to a runnable task timeout. However, if running this 37 | scheduler results in a system crash or the entire system becoming unresponsive, 38 | please open a GitHub issue. 39 | 40 | ************************************************************************** 41 | Error: another sched_ext scheduler is already running 42 | ``` 43 | -------------------------------------------------------------------------------- /scheduler/sche_description/scx_rustland.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_rustland", 4 | "production_ready": true, 5 | "description": "A user-space scheduler written in Rust that prioritizes interactive workloads. Scheduling decisions are made entirely in user-space using a deadline-based algorithm (vruntime + exec_runtime). Tasks are stored in a BTreeSet and dispatched from lowest to highest deadline, favoring latency-sensitive tasks that frequently sleep.", 6 | "use_cases": [ 7 | "gaming", 8 | "video_conferencing", 9 | "live_streaming", 10 | "interactive_applications" 11 | ], 12 | "algorithm": "user_space_deadline", 13 | "characteristics": "full user space implementation, deadline based ordering, vruntime tracking, exec runtime accounting, interactive prioritization, rust implementation", 14 | "tuning_parameters": { 15 | "slice_us": { 16 | "type": "integer", 17 | "description": "Scheduling slice duration", 18 | "default": 20000, 19 | "range": [ 20 | 1000, 21 | 100000 22 | ] 23 | }, 24 | "slice_us_min": { 25 | "type": "integer", 26 | "description": "Minimum slice duration", 27 | "default": 1000, 28 | "range": [ 29 | 100, 30 | 10000 31 | ] 32 | }, 33 | "percpu_local": { 34 | "type": "boolean", 35 | "description": "Dispatch per-CPU tasks directly", 36 | "default": false 37 | }, 38 | "partial": { 39 | "type": "boolean", 40 | "description": "Only switch SCHED_EXT tasks", 41 | "default": false 42 | } 43 | }, 44 | "limitations": "User-space overhead, not ideal for performance-critical scenarios", 45 | "performance_profile": "optimized for interactivity over raw performance" 46 | } 47 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_sdt.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_sdt", 4 | "production_ready": false, 5 | "description": "A simple demonstration scheduler showcasing BPF arena usage for per-task data management. Implements basic FIFO scheduling while tracking detailed statistics about task lifecycle and scheduling decisions using the BPF arena allocator system.", 6 | "use_cases": [ 7 | "bpf_arena_demo", 8 | "educational", 9 | "testing" 10 | ], 11 | "algorithm": "simple_fifo_with_stats", 12 | "characteristics": "bpf arena demonstration, per task data management, statistics collection, simple fifo scheduling, arena allocation showcase", 13 | "tuning_parameters": {}, 14 | "limitations": "Educational scheduler only, minimal scheduling logic", 15 | "performance_profile": "demonstration scheduler, not performance focused" 16 | } 17 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_simple", 4 | "production_ready": true, 5 | "description": "A simple scheduler with minimal complexity supporting both weighted vtime and FIFO modes. Optimized for single socket systems with uniform L3 cache. Provides a straightforward scheduling implementation suitable for systems where simplicity is preferred over advanced features.", 6 | "use_cases": [ 7 | "single_socket", 8 | "uniform_l3_cache", 9 | "simple_workloads" 10 | ], 11 | "algorithm": "simple_vtime_or_fifo", 12 | "characteristics": "dual mode operation, minimal complexity, single socket optimized, starvation risk in fifo, straightforward implementation", 13 | "tuning_parameters": { 14 | "fifo": { 15 | "type": "boolean", 16 | "description": "Use FIFO mode instead of vtime", 17 | "default": false 18 | }, 19 | "slice_us": { 20 | "type": "integer", 21 | "description": "Time slice duration", 22 | "default": 20000, 23 | "range": [ 24 | 1000, 25 | 100000 26 | ] 27 | } 28 | }, 29 | "limitations": "Limited scalability, may not optimize complex workloads", 30 | "performance_profile": "simple and predictable performance" 31 | } 32 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_tickless.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_tickless", 4 | "production_ready": false, 5 | "description": "An experimental server-oriented scheduler designed to reduce OS noise by minimizing timer ticks. Routes all scheduling events through a pool of primary CPUs, allowing other CPUs to run tickless. Requires kernel booted with nohz_full for full effectiveness. Designed for cloud computing, virtualization, and HPC workloads.", 6 | "use_cases": [ 7 | "cloud_computing", 8 | "virtualization", 9 | "hpc", 10 | "server_workloads" 11 | ], 12 | "algorithm": "primary_cpu_distribution", 13 | "characteristics": "tick reduction, primary cpu pool, os noise reduction, global task queue, configurable frequency, nohz full requirement, ipc based preemption", 14 | "tuning_parameters": { 15 | "primary_domain": { 16 | "type": "string", 17 | "description": "CPU mask for primary scheduling CPUs (hex)", 18 | "default": "0x1" 19 | }, 20 | "slice_us": { 21 | "type": "integer", 22 | "description": "Maximum scheduling slice duration", 23 | "default": 20000 24 | }, 25 | "frequency": { 26 | "type": "integer", 27 | "description": "Scheduling frequency (0 = CONFIG_HZ)", 28 | "default": 0 29 | }, 30 | "prefer_same_cpu": { 31 | "type": "boolean", 32 | "description": "Try to keep tasks on same CPU", 33 | "default": false 34 | }, 35 | "nosmt": { 36 | "type": "boolean", 37 | "description": "Disable SMT topology awareness", 38 | "default": false 39 | } 40 | }, 41 | "requirements": "Linux kernel with nohz_full support", 42 | "limitations": "Not production ready, experimental, syscall overhead with nohz_full", 43 | "performance_profile": "optimized for reduced OS noise, not latency" 44 | } 45 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_userland.json: -------------------------------------------------------------------------------- 1 | { 2 | "scheduler": { 3 | "name": "scx_userland", 4 | "production_ready": false, 5 | "description": "A fully user-space scheduler for educational purposes demonstrating user-space scheduling concepts. All scheduling decisions are made in user-space with an ordered list scheduling approach. While offering development advantages, it has inherent performance overhead and deadlock risks.", 6 | "use_cases": [ 7 | "demonstration", 8 | "educational", 9 | "prototyping" 10 | ], 11 | "algorithm": "user_space_vtime", 12 | "characteristics": "full user space implementation, ordered list scheduling, development advantages, deadlock risk, performance overhead, educational focus", 13 | "tuning_parameters": { 14 | "slice_us": { 15 | "type": "integer", 16 | "description": "Time slice duration", 17 | "default": 20000 18 | }, 19 | "batch": { 20 | "type": "integer", 21 | "description": "Number of tasks to batch when dispatching", 22 | "default": 8, 23 | "range": [1, 32] 24 | } 25 | }, 26 | "limitations": "Not production ready, performance overhead, potential deadlocks", 27 | "performance_profile": "educational scheduler with significant overhead" 28 | } 29 | } -------------------------------------------------------------------------------- /scheduler/sche_description/scx_userland.md: -------------------------------------------------------------------------------- 1 | # scx_userland 2 | 3 | 4 | ### Overview 5 | 6 | A simple weighted vtime scheduler where all scheduling decisions take place in 7 | user space. This is in contrast to Rusty, where load balancing lives in user 8 | space, but scheduling decisions are still made in the kernel. 9 | 10 | ### Typical Use Case 11 | 12 | There are many advantages to writing schedulers in user space. For example, you 13 | can use a debugger, you can write the scheduler in Rust, and you can use data 14 | structures bundled with your favorite library. 15 | 16 | On the other hand, user space scheduling can be hard to get right. You can 17 | potentially deadlock due to not scheduling a task that's required for the 18 | scheduler itself to make forward progress (though the sched_ext watchdog will 19 | protect the system by unloading your scheduler after a timeout if that 20 | happens). You also have to bootstrap some communication protocol between the 21 | kernel and user space. 22 | 23 | A more robust solution to this would be building a user space scheduling 24 | framework that abstracts much of this complexity away from you. 25 | 26 | ### Production Ready? 27 | 28 | No. This scheduler uses an ordered list for vtime scheduling, and is strictly 29 | less performant than just using something like `scx_simple`. It is purely 30 | meant to illustrate that it's possible to build a user space scheduler on 31 | top of sched_ext. 32 | 33 | ## Command Line Options 34 | 35 | ``` 36 | /root/yunwei37/ai-os/scheduler/sche_bin/scx_userland: invalid option -- '-' 37 | A minimal userland sched_ext scheduler. 38 | 39 | See the top-level comment in .bpf.c for more details. 40 | 41 | Try to reduce `sysctl kernel.pid_max` if this program triggers OOMs. 42 | 43 | Usage: scx_userland [-b BATCH] 44 | 45 | -b BATCH The number of tasks to batch when dispatching (default: 8) 46 | -v Print libbpf debug messages 47 | -h Display this help and exit 48 | ``` 49 | -------------------------------------------------------------------------------- /scheduler/template/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.bpf.skel.h 3 | *.bpf.o 4 | *.bpf.l1o 5 | *.bpf.l2o 6 | *.bpf.l3o 7 | scx_simple 8 | 9 | loader 10 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/meson.build: -------------------------------------------------------------------------------- 1 | c_scheds = ['scx_simple', 'scx_qmap', 'scx_central', 'scx_userland', 'scx_nest', 2 | 'scx_flatcg', 'scx_pair', 'scx_prev'] 3 | 4 | c_scheds_lib = ['scx_sdt'] 5 | 6 | thread_dep = dependency('threads') 7 | 8 | foreach sched: c_scheds 9 | bpf_o = gen_bpf_o.process(sched + '.bpf.c') 10 | bpf_skel = gen_bpf_skel.process(bpf_o) 11 | executable(sched, [bpf_skel, sched + '.c'], 12 | dependencies: [kernel_dep, libbpf_dep, thread_dep, user_c_dep], 13 | install: true) 14 | endforeach 15 | 16 | foreach sched: c_scheds_lib 17 | bpf_o = gen_bpf_o.process(sched + '.bpf.c') 18 | bpf_skel = gen_bpf_skel_lib.process(bpf_o) 19 | executable(sched, [bpf_skel, sched + '.c'], 20 | dependencies: [kernel_dep, libbpf_dep, thread_dep, user_c_dep], 21 | install: true) 22 | endforeach 23 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/scx_flatcg.h: -------------------------------------------------------------------------------- 1 | #ifndef __SCX_EXAMPLE_FLATCG_H 2 | #define __SCX_EXAMPLE_FLATCG_H 3 | 4 | enum { 5 | FCG_HWEIGHT_ONE = 1LLU << 16, 6 | }; 7 | 8 | enum fcg_stat_idx { 9 | FCG_STAT_ACT, 10 | FCG_STAT_DEACT, 11 | FCG_STAT_LOCAL, 12 | FCG_STAT_GLOBAL, 13 | 14 | FCG_STAT_HWT_UPDATES, 15 | FCG_STAT_HWT_CACHE, 16 | FCG_STAT_HWT_SKIP, 17 | FCG_STAT_HWT_RACE, 18 | 19 | FCG_STAT_ENQ_SKIP, 20 | FCG_STAT_ENQ_RACE, 21 | 22 | FCG_STAT_CNS_KEEP, 23 | FCG_STAT_CNS_EXPIRE, 24 | FCG_STAT_CNS_EMPTY, 25 | FCG_STAT_CNS_GONE, 26 | 27 | FCG_STAT_PNC_NO_CGRP, 28 | FCG_STAT_PNC_NEXT, 29 | FCG_STAT_PNC_EMPTY, 30 | FCG_STAT_PNC_GONE, 31 | FCG_STAT_PNC_RACE, 32 | FCG_STAT_PNC_FAIL, 33 | 34 | FCG_STAT_BAD_REMOVAL, 35 | 36 | FCG_NR_STATS, 37 | }; 38 | 39 | struct fcg_cgrp_ctx { 40 | u32 nr_active; 41 | u32 nr_runnable; 42 | u32 queued; 43 | u32 weight; 44 | u32 hweight; 45 | u64 child_weight_sum; 46 | u64 hweight_gen; 47 | s64 cvtime_delta; 48 | u64 tvtime_now; 49 | }; 50 | 51 | #endif /* __SCX_EXAMPLE_FLATCG_H */ 52 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/scx_nest.h: -------------------------------------------------------------------------------- 1 | #ifndef __SCX_NEST_H 2 | #define __SCX_NEST_H 3 | 4 | enum nest_stat_group { 5 | STAT_GRP_WAKEUP, 6 | STAT_GRP_NEST, 7 | STAT_GRP_CONSUME, 8 | }; 9 | 10 | #define NEST_STAT(__stat) BPFSTAT_##__stat 11 | #define NEST_ST(__stat, __grp, __desc) NEST_STAT(__stat), 12 | enum nest_stat_idx { 13 | #include "scx_nest_stats_table.h" 14 | NEST_ST(NR, 0, 0) 15 | }; 16 | #undef NEST_ST 17 | 18 | #endif /* __SCX_NEST_H */ 19 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/scx_nest_stats_table.h: -------------------------------------------------------------------------------- 1 | NEST_ST(WAKEUP_ATTACHED, STAT_GRP_WAKEUP, "Attached CPU was idle, and in primary nest") 2 | NEST_ST(WAKEUP_PREV_PRIMARY, STAT_GRP_WAKEUP, "Previous CPU was idle, and in primary nest") 3 | NEST_ST(WAKEUP_FULLY_IDLE_PRIMARY, STAT_GRP_WAKEUP, "Woken up to fully idle primary nest core") 4 | NEST_ST(WAKEUP_ANY_IDLE_PRIMARY, STAT_GRP_WAKEUP, "Woken up to idle logical primary nest core") 5 | NEST_ST(WAKEUP_FULLY_IDLE_RESERVE, STAT_GRP_WAKEUP, "Woken up to fully idle reserve nest core") 6 | NEST_ST(WAKEUP_ANY_IDLE_RESERVE, STAT_GRP_WAKEUP, "Woken up to idle logical reserve nest core") 7 | NEST_ST(WAKEUP_IDLE_OTHER, STAT_GRP_WAKEUP, "Woken to any idle logical core in p->cpus_ptr") 8 | 9 | NEST_ST(TASK_IMPATIENT, STAT_GRP_NEST, "A task was found to be impatient") 10 | NEST_ST(PROMOTED_TO_PRIMARY, STAT_GRP_NEST, "A core was promoted into the primary nest") 11 | NEST_ST(PROMOTED_TO_RESERVED, STAT_GRP_NEST, "A core was promoted into the reserve nest") 12 | NEST_ST(DEMOTED_TO_RESERVED, STAT_GRP_NEST, "A core was demoted into the reserve nest") 13 | NEST_ST(RESERVED_AT_CAPACITY, STAT_GRP_NEST, "Reserved nest was at capacity") 14 | NEST_ST(SCHEDULED_COMPACTION, STAT_GRP_NEST, "Scheduled a primary core to be compacted") 15 | NEST_ST(CANCELLED_COMPACTION, STAT_GRP_NEST, "Cancelled a primary core from being compacted at task wakeup time") 16 | NEST_ST(EAGERLY_COMPACTED, STAT_GRP_NEST, "A core was compacted in ops.dispatch()") 17 | NEST_ST(CALLBACK_COMPACTED, STAT_GRP_NEST, "A core was compacted in the scheduled timer callback") 18 | 19 | NEST_ST(CONSUMED, STAT_GRP_CONSUME, "A task was consumed from the global DSQ") 20 | NEST_ST(NOT_CONSUMED, STAT_GRP_CONSUME, "There was no task in the global DSQ") 21 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/scx_prev.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A variation on scx_simple with CPU selection that prioritizes an idle 4 | * previous CPU over finding a fully idle core (as is done in scx_simple and 5 | * scx_rusty). 6 | * 7 | * Outperforms the in-kernel fair class (v6.12), scx_simple, and scx_rusty on 8 | * OLTP workloads run on systems with simple topology (i.e. non-NUMA, single 9 | * LLC). 10 | * 11 | * Copyright (c) 2025, Oracle and/or its affiliates. 12 | * Copyright (c) 2025, Daniel Jordan 13 | */ 14 | #include 15 | 16 | char _license[] SEC("license") = "GPL"; 17 | 18 | UEI_DEFINE(uei); 19 | 20 | struct { 21 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 22 | __uint(key_size, sizeof(u32)); 23 | __uint(value_size, sizeof(u64)); 24 | __uint(max_entries, 4); /* [local, select_fail, prev_cpu, idle_cpu] */ 25 | } stats SEC(".maps"); 26 | 27 | static void stat_inc(u32 idx) 28 | { 29 | u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx); 30 | if (cnt_p) 31 | (*cnt_p)++; 32 | } 33 | 34 | s32 BPF_STRUCT_OPS(prev_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 35 | { 36 | s32 cpu; 37 | 38 | if (scx_bpf_test_and_clear_cpu_idle(prev_cpu)) { 39 | stat_inc(2); /* prev_cpu */ 40 | cpu = prev_cpu; 41 | goto insert; 42 | } 43 | 44 | cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); 45 | if (cpu >= 0) { 46 | stat_inc(3); /* idle_cpu */ 47 | goto insert; 48 | } 49 | 50 | stat_inc(1); /* select_fail */ 51 | 52 | return prev_cpu; 53 | 54 | insert: 55 | stat_inc(0); /* local */ 56 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 57 | 58 | return cpu; 59 | } 60 | 61 | void BPF_STRUCT_OPS(prev_exit, struct scx_exit_info *ei) 62 | { 63 | UEI_RECORD(uei, ei); 64 | } 65 | 66 | SCX_OPS_DEFINE(prev_ops, 67 | .select_cpu = (void *)prev_select_cpu, 68 | .exit = (void *)prev_exit, 69 | .name = "prev" 70 | ); 71 | -------------------------------------------------------------------------------- /scheduler/template/cxl_schedulers/scx_userland.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* Copyright (c) 2022 Meta, Inc */ 3 | 4 | #ifndef __SCX_USERLAND_COMMON_H 5 | #define __SCX_USERLAND_COMMON_H 6 | 7 | /* 8 | * An instance of a task that has been enqueued by the kernel for consumption 9 | * by a user space global scheduler thread. 10 | */ 11 | struct scx_userland_enqueued_task { 12 | __s32 pid; 13 | u64 sum_exec_runtime; 14 | u64 weight; 15 | }; 16 | 17 | #endif // __SCX_USERLAND_COMMON_H 18 | -------------------------------------------------------------------------------- /scheduler/template/fifo.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A simple FIFO scheduler. 4 | * 5 | * This scheduler implements simple FIFO (First-In-First-Out) scheduling 6 | * where tasks are scheduled in the order they arrive, without considering 7 | * task weights or priorities. 8 | * 9 | * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 10 | * Copyright (c) 2022 Tejun Heo 11 | * Copyright (c) 2022 David Vernet 12 | */ 13 | #include 14 | 15 | char _license[] SEC("license") = "GPL"; 16 | 17 | UEI_DEFINE(uei); 18 | 19 | #define SHARED_DSQ 0 20 | 21 | s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 22 | { 23 | bool is_idle = false; 24 | s32 cpu; 25 | 26 | cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); 27 | if (is_idle) { 28 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 29 | } 30 | 31 | return cpu; 32 | } 33 | 34 | void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) 35 | { 36 | scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); 37 | } 38 | 39 | void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) 40 | { 41 | scx_bpf_dsq_move_to_local(SHARED_DSQ); 42 | } 43 | 44 | s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init) 45 | { 46 | return scx_bpf_create_dsq(SHARED_DSQ, -1); 47 | } 48 | 49 | void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei) 50 | { 51 | UEI_RECORD(uei, ei); 52 | } 53 | 54 | SCX_OPS_DEFINE(simple_ops, 55 | .select_cpu = (void *)simple_select_cpu, 56 | .enqueue = (void *)simple_enqueue, 57 | .dispatch = (void *)simple_dispatch, 58 | .init = (void *)simple_init, 59 | .exit = (void *)simple_exit, 60 | .name = "simple"); -------------------------------------------------------------------------------- /scheduler/template/vruntime.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A simple weighted vruntime scheduler. 4 | * 5 | * This scheduler implements weighted vruntime scheduling where tasks are 6 | * scheduled based on their virtual runtime, providing fairness across 7 | * different task weights. 8 | * 9 | * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 10 | * Copyright (c) 2022 Tejun Heo 11 | * Copyright (c) 2022 David Vernet 12 | */ 13 | #include 14 | 15 | char _license[] SEC("license") = "GPL"; 16 | 17 | static u64 vtime_now; 18 | UEI_DEFINE(uei); 19 | 20 | #define SHARED_DSQ 0 21 | 22 | s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 23 | { 24 | bool is_idle = false; 25 | s32 cpu; 26 | 27 | cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); 28 | if (is_idle) { 29 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 30 | } 31 | 32 | return cpu; 33 | } 34 | 35 | void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) 36 | { 37 | u64 vtime = p->scx.dsq_vtime; 38 | 39 | if (time_before(vtime, vtime_now - SCX_SLICE_DFL)) 40 | vtime = vtime_now - SCX_SLICE_DFL; 41 | 42 | scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, enq_flags); 43 | } 44 | 45 | void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) 46 | { 47 | scx_bpf_dsq_move_to_local(SHARED_DSQ); 48 | } 49 | 50 | void BPF_STRUCT_OPS(simple_running, struct task_struct *p) 51 | { 52 | if (time_before(vtime_now, p->scx.dsq_vtime)) 53 | vtime_now = p->scx.dsq_vtime; 54 | } 55 | 56 | void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable) 57 | { 58 | p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight; 59 | } 60 | 61 | void BPF_STRUCT_OPS(simple_enable, struct task_struct *p) 62 | { 63 | p->scx.dsq_vtime = vtime_now; 64 | } 65 | 66 | s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init) 67 | { 68 | return scx_bpf_create_dsq(SHARED_DSQ, -1); 69 | } 70 | 71 | void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei) 72 | { 73 | UEI_RECORD(uei, ei); 74 | } 75 | 76 | SCX_OPS_DEFINE(simple_ops, 77 | .select_cpu = (void *)simple_select_cpu, 78 | .enqueue = (void *)simple_enqueue, 79 | .dispatch = (void *)simple_dispatch, 80 | .running = (void *)simple_running, 81 | .stopping = (void *)simple_stopping, 82 | .enable = (void *)simple_enable, 83 | .init = (void *)simple_init, 84 | .exit = (void *)simple_exit, 85 | .name = "simple"); -------------------------------------------------------------------------------- /scheduler/update_scheduler_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to update scheduler documentation with help output 4 | 5 | SCHE_BIN_DIR="/root/yunwei37/ai-os/scheduler/sche_bin" 6 | SCHE_DESC_DIR="/root/yunwei37/ai-os/scheduler/sche_description" 7 | 8 | echo "Updating scheduler documentation with help output..." 9 | 10 | # Get unique scheduler names (removing hash suffixes) 11 | schedulers=$(ls "$SCHE_BIN_DIR" | grep -E '^scx_[a-z_]+$' | sort -u) 12 | 13 | for scheduler in $schedulers; do 14 | echo "Processing $scheduler..." 15 | 16 | # Skip certain binaries that aren't actual schedulers 17 | if [[ "$scheduler" == "scx_loader" || "$scheduler" == "scx_lib_selftests" ]]; then 18 | echo " Skipping $scheduler (not a scheduler)" 19 | continue 20 | fi 21 | 22 | desc_file="$SCHE_DESC_DIR/$scheduler.md" 23 | 24 | # Check if description file exists 25 | if [[ ! -f "$desc_file" ]]; then 26 | echo " Warning: No description file found for $scheduler" 27 | continue 28 | fi 29 | 30 | # Try --help first, then -h 31 | help_output="" 32 | if $SCHE_BIN_DIR/$scheduler --help 2>&1 | grep -q "^Usage:"; then 33 | help_output=$($SCHE_BIN_DIR/$scheduler --help 2>&1) 34 | else 35 | # Try -h instead 36 | help_output=$($SCHE_BIN_DIR/$scheduler -h 2>&1) 37 | fi 38 | 39 | # Check if we got valid help output 40 | if [[ -z "$help_output" ]] || [[ "$help_output" == *"invalid option"* && ! "$help_output" == *"Usage:"* ]]; then 41 | echo " Warning: Could not get help output for $scheduler" 42 | continue 43 | fi 44 | 45 | # Check if help section already exists 46 | if grep -q "^## Command Line Options" "$desc_file"; then 47 | echo " Help section already exists in $desc_file, skipping..." 48 | continue 49 | fi 50 | 51 | # Append help output to the description file 52 | cat >> "$desc_file" << EOF 53 | 54 | ## Command Line Options 55 | 56 | \`\`\` 57 | $help_output 58 | \`\`\` 59 | EOF 60 | 61 | echo " Updated $desc_file" 62 | done 63 | 64 | echo "Documentation update complete!" -------------------------------------------------------------------------------- /workloads/basic/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for building workloads/basic benchmarks 2 | 3 | .PHONY: all clean schbench stress-ng cachyos-benchmarker help 4 | 5 | # Default target 6 | all: schbench stress-ng cachyos-benchmarker 7 | @echo "All benchmarks built successfully" 8 | 9 | # Build schbench 10 | schbench: 11 | @echo "Building schbench..." 12 | @$(MAKE) -C schbench 13 | @echo "schbench built successfully" 14 | 15 | # Build stress-ng 16 | stress-ng: 17 | @echo "Building stress-ng..." 18 | @$(MAKE) -C stress-ng 19 | @echo "stress-ng built successfully" 20 | 21 | # Build cachyos-benchmarker 22 | cachyos-benchmarker: 23 | @echo "Building cachyos-benchmarker..." 24 | @if [ -f cachyos-benchmarker/Makefile ]; then \ 25 | $(MAKE) -C cachyos-benchmarker; \ 26 | elif [ -f cachyos-benchmarker/build.sh ]; then \ 27 | cd cachyos-benchmarker && ./build.sh; \ 28 | else \ 29 | echo "Warning: No build system found for cachyos-benchmarker"; \ 30 | fi 31 | @echo "cachyos-benchmarker build complete" 32 | 33 | # Clean all builds 34 | clean: 35 | @echo "Cleaning schbench..." 36 | @$(MAKE) -C schbench clean 2>/dev/null || true 37 | @echo "Cleaning stress-ng..." 38 | @$(MAKE) -C stress-ng clean 2>/dev/null || true 39 | @echo "Cleaning cachyos-benchmarker..." 40 | @if [ -f cachyos-benchmarker/Makefile ]; then \ 41 | $(MAKE) -C cachyos-benchmarker clean 2>/dev/null || true; \ 42 | fi 43 | @echo "All benchmarks cleaned" 44 | 45 | # Help target 46 | help: 47 | @echo "Available targets:" 48 | @echo " all - Build all benchmarks" 49 | @echo " schbench - Build schbench (scheduler benchmark)" 50 | @echo " stress-ng - Build stress-ng (comprehensive stress testing)" 51 | @echo " cachyos-benchmarker - Build cachyos-benchmarker (system benchmarking suite)" 52 | @echo " clean - Clean all build artifacts" 53 | @echo " help - Show this help message" -------------------------------------------------------------------------------- /workloads/basic/schbench_test/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=3.5.0 2 | numpy>=1.20.0 -------------------------------------------------------------------------------- /workloads/basic/schbench_test/results/schbench_performance_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/schbench_test/results/schbench_performance_comparison.png -------------------------------------------------------------------------------- /workloads/basic/schbench_test/results/scheduler_performance_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/schbench_test/results/scheduler_performance_comparison.png -------------------------------------------------------------------------------- /workloads/basic/schbench_test/schbench_bench_start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Scheduler Performance Testing Script using schbench 4 | Tests all available schedulers in the schedcp project and generates performance figures. 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | 11 | # Add the scheduler module to the path 12 | sys.path.insert(0, '/home/yunwei37/ai-os') 13 | 14 | from schbench_tester import SchbenchTester 15 | 16 | def main(): 17 | parser = argparse.ArgumentParser(description="Test schedulers with schbench") 18 | parser.add_argument("--schbench-path", default="../schbench/schbench", 19 | help="Path to schbench binary") 20 | parser.add_argument("--results-dir", default="results", 21 | help="Directory to store results") 22 | parser.add_argument("--production-only", action="store_true", 23 | help="Test only production schedulers") 24 | parser.add_argument("--runtime", type=int, default=30, 25 | help="Test runtime in seconds") 26 | parser.add_argument("--message-threads", type=int, default=2, 27 | help="Number of message threads") 28 | parser.add_argument("--message-groups", type=int, default=4, 29 | help="Number of message groups") 30 | 31 | args = parser.parse_args() 32 | 33 | # Create tester instance 34 | tester = SchbenchTester(args.schbench_path, args.results_dir) 35 | 36 | # Update test parameters 37 | tester.set_test_params( 38 | runtime=args.runtime, 39 | message_threads=args.message_threads, 40 | message_groups=args.message_groups, 41 | ) 42 | 43 | # Check if schbench exists 44 | if not os.path.exists(args.schbench_path): 45 | print(f"Error: schbench not found at {args.schbench_path}") 46 | print("Please build schbench first or specify correct path with --schbench-path") 47 | sys.exit(1) 48 | 49 | # Run tests 50 | print("Starting scheduler performance tests...") 51 | results = tester.run_all_schbench_tests(production_only=args.production_only) 52 | 53 | # Generate figures 54 | tester.generate_performance_figures(results) 55 | 56 | print("\nTesting complete!") 57 | 58 | if __name__ == "__main__": 59 | main() -------------------------------------------------------------------------------- /workloads/basic/schbench_test/schedcp_workloads.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": {}, 3 | "history": [] 4 | } -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": "2025-08-03T12:26:49.694603", 3 | "num_runs": 3, 4 | "runs": [ 5 | { 6 | "run_number": 1, 7 | "metrics": { 8 | "throughput": 858.7, 9 | "50th_percentile_us": 4104, 10 | "95th_percentile_us": 20000, 11 | "99th_percentile_us": 39744, 12 | "99.9th_percentile_us": 61760 13 | } 14 | }, 15 | { 16 | "run_number": 2, 17 | "metrics": { 18 | "throughput": 874.1, 19 | "50th_percentile_us": 3972, 20 | "95th_percentile_us": 20064, 21 | "99th_percentile_us": 40768, 22 | "99.9th_percentile_us": 60480 23 | } 24 | }, 25 | { 26 | "run_number": 3, 27 | "metrics": { 28 | "throughput": 998.37, 29 | "50th_percentile_us": 3436, 30 | "95th_percentile_us": 19040, 31 | "99th_percentile_us": 40256, 32 | "99.9th_percentile_us": 61632 33 | } 34 | } 35 | ], 36 | "averages": { 37 | "throughput": 910.39, 38 | "50th_percentile_us": 3837.3333333333335, 39 | "95th_percentile_us": 19701.333333333332, 40 | "99th_percentile_us": 40256.0, 41 | "99.9th_percentile_us": 61290.666666666664 42 | } 43 | } -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/prompt.md: -------------------------------------------------------------------------------- 1 | time claude '/home/yunwei37/ai-os/workloads/basic/schbench' analyze it and create a workload profile, update the workload profile in the tool, and try to give a plan for selecting scheduler and optimization, try to give a few option for scheduler optimization choise. then, try to run '/home/yunwei37/ai-os/workloads/basic/schbench_test/schbench_simple_collect.py' on deafult cpu and rename the output json to default.json, update execution history, and try to run 3 times on different schedduler you selected based on the plan rankwith different config, collect the '/home/yunwei37/ai-os/workloads/basic/schbench_test/schbench_simple_collect.py' data into multiple different json, update history. after 3 times, get the history and give me the best results 2 | 3 | -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/schbench_performance_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/schbench_test/test-record/schbench_performance_comparison.pdf -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/schbench_performance_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/schbench_test/test-record/schbench_performance_comparison.png -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/schedcp_workloads.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "3fc66918-2cc2-49b1-adc1-c27915e23880": { 4 | "id": "3fc66918-2cc2-49b1-adc1-c27915e23880", 5 | "description": "schbench - High-performance scheduler benchmark simulating web server workloads. This workload creates a producer-consumer pattern with message threads distributing work to worker threads. Key characteristics: (1) Mixed CPU-intensive and I/O simulation with alternating compute and sleep phases, (2) Each request involves 100μs sleep (simulating I/O), followed by 5 matrix multiplications (64x64 matrices, ~256KB cache footprint), then 10μs response sleep, (3) Uses per-CPU spinlocks to model preemption costs, (4) Default configuration runs 2 message threads with 4 workers each for 30 seconds, (5) Critical metrics are wakeup latency (time from wake to execution), request latency (end-to-end), and throughput (requests/sec). The workload is particularly sensitive to scheduler wakeup efficiency, CPU affinity decisions, and timeslice allocation. Optimal schedulers should minimize wakeup latency while maintaining high CPU utilization and avoiding unnecessary thread migrations that disrupt cache locality.", 6 | "created_at": 1754196049, 7 | "updated_at": 1754196049 8 | } 9 | }, 10 | "history": [] 11 | } -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/scx_bpfland_aggressive.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": "2025-08-03T13:04:55.734674", 3 | "scheduler": "scx_bpfland_aggressive", 4 | "num_runs": 3, 5 | "runs": [ 6 | { 7 | "run_number": 1, 8 | "metrics": { 9 | "throughput": 743.43, 10 | "50th_percentile_us": 7944, 11 | "95th_percentile_us": 19616, 12 | "99th_percentile_us": 46144, 13 | "99.9th_percentile_us": 74880 14 | } 15 | }, 16 | { 17 | "run_number": 2, 18 | "metrics": { 19 | "throughput": 754.3, 20 | "50th_percentile_us": 7912, 21 | "95th_percentile_us": 19232, 22 | "99th_percentile_us": 44352, 23 | "99.9th_percentile_us": 76928 24 | } 25 | }, 26 | { 27 | "run_number": 3, 28 | "metrics": { 29 | "throughput": 726.2, 30 | "50th_percentile_us": 7976, 31 | "95th_percentile_us": 20064, 32 | "99th_percentile_us": 47680, 33 | "99.9th_percentile_us": 88960 34 | } 35 | } 36 | ], 37 | "averages": { 38 | "throughput": 741.31, 39 | "50th_percentile_us": 7944.0, 40 | "95th_percentile_us": 19637.333333333332, 41 | "99th_percentile_us": 46058.666666666664, 42 | "99.9th_percentile_us": 80256.0 43 | } 44 | } -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/scx_flash_latency.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": "2025-08-03T13:06:56.611021", 3 | "scheduler": "scx_flash_latency", 4 | "num_runs": 3, 5 | "runs": [ 6 | { 7 | "run_number": 1, 8 | "metrics": { 9 | "throughput": 810.43, 10 | "50th_percentile_us": 7128, 11 | "95th_percentile_us": 18336, 12 | "99th_percentile_us": 39104, 13 | "99.9th_percentile_us": 63680 14 | } 15 | }, 16 | { 17 | "run_number": 2, 18 | "metrics": { 19 | "throughput": 858.53, 20 | "50th_percentile_us": 6920, 21 | "95th_percentile_us": 16864, 22 | "99th_percentile_us": 36544, 23 | "99.9th_percentile_us": 58560 24 | } 25 | }, 26 | { 27 | "run_number": 3, 28 | "metrics": { 29 | "throughput": 862.47, 30 | "50th_percentile_us": 6840, 31 | "95th_percentile_us": 16800, 32 | "99th_percentile_us": 36800, 33 | "99.9th_percentile_us": 68480 34 | } 35 | } 36 | ], 37 | "averages": { 38 | "throughput": 843.81, 39 | "50th_percentile_us": 6962.666666666667, 40 | "95th_percentile_us": 17333.333333333332, 41 | "99th_percentile_us": 37482.666666666664, 42 | "99.9th_percentile_us": 63573.333333333336 43 | } 44 | } -------------------------------------------------------------------------------- /workloads/basic/schbench_test/test-record/scx_rusty_lowlat.json: -------------------------------------------------------------------------------- 1 | { 2 | "timestamp": "2025-08-03T13:08:53.119292", 3 | "scheduler": "scx_rusty_lowlat", 4 | "num_runs": 3, 5 | "runs": [ 6 | { 7 | "run_number": 1, 8 | "metrics": { 9 | "throughput": 1495.93, 10 | "50th_percentile_us": 3148, 11 | "95th_percentile_us": 15632, 12 | "99th_percentile_us": 16016, 13 | "99.9th_percentile_us": 25632 14 | } 15 | }, 16 | { 17 | "run_number": 2, 18 | "metrics": { 19 | "throughput": 1433.7, 20 | "50th_percentile_us": 3148, 21 | "95th_percentile_us": 15632, 22 | "99th_percentile_us": 19424, 23 | "99.9th_percentile_us": 29664 24 | } 25 | }, 26 | { 27 | "run_number": 3, 28 | "metrics": { 29 | "throughput": 1426.87, 30 | "50th_percentile_us": 3028, 31 | "95th_percentile_us": 15632, 32 | "99th_percentile_us": 21856, 33 | "99.9th_percentile_us": 32352 34 | } 35 | } 36 | ], 37 | "averages": { 38 | "throughput": 1452.1666666666667, 39 | "50th_percentile_us": 3108.0, 40 | "95th_percentile_us": 15632.0, 41 | "99th_percentile_us": 19098.666666666668, 42 | "99.9th_percentile_us": 29216.0 43 | } 44 | } -------------------------------------------------------------------------------- /workloads/basic/stress-ng_test/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=3.5.0 2 | numpy>=1.21.0 -------------------------------------------------------------------------------- /workloads/basic/stress-ng_test/results/stress_ng_normalized_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/stress-ng_test/results/stress_ng_normalized_performance.png -------------------------------------------------------------------------------- /workloads/basic/stress-ng_test/results/stress_ng_performance_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/basic/stress-ng_test/results/stress_ng_performance_comparison.png -------------------------------------------------------------------------------- /workloads/basic/stress-ng_test/stress_ng_bench_start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import os 5 | 6 | # Add the scheduler module to the path 7 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'scheduler'))) 8 | 9 | from stress_ng_tester import StressNgTester 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description='Run stress-ng benchmark tests with different schedulers') 13 | parser.add_argument('--duration', type=int, default=30, help='Test duration for each scheduler in seconds (default: 30)') 14 | parser.add_argument('--cpu-workers', type=int, default=0, help='Number of CPU workers (default: 0 = auto)') 15 | parser.add_argument('--vm-workers', type=int, default=2, help='Number of VM workers (default: 2)') 16 | parser.add_argument('--io-workers', type=int, default=2, help='Number of IO workers (default: 2)') 17 | parser.add_argument('--stress-tests', nargs='+', default=['cpu', 'vm', 'io'], 18 | help='Stress tests to run (default: cpu vm io)') 19 | parser.add_argument('--output', type=str, default='results/stress_ng_results.json', 20 | help='Output file for results (default: results/stress_ng_results.json)') 21 | parser.add_argument('--skip-baseline', action='store_true', help='Skip baseline test without scheduler') 22 | parser.add_argument('--schedulers', nargs='+', help='Specific schedulers to test (default: all)') 23 | 24 | args = parser.parse_args() 25 | 26 | # Create tester instance 27 | tester = StressNgTester( 28 | duration=args.duration, 29 | cpu_workers=args.cpu_workers, 30 | vm_workers=args.vm_workers, 31 | io_workers=args.io_workers, 32 | stress_tests=args.stress_tests, 33 | output_file=args.output 34 | ) 35 | 36 | # Run tests 37 | print(f"Starting stress-ng benchmark tests...") 38 | print(f"Duration: {args.duration}s per scheduler") 39 | print(f"Stress tests: {', '.join(args.stress_tests)}") 40 | print(f"Workers - CPU: {args.cpu_workers or 'auto'}, VM: {args.vm_workers}, IO: {args.io_workers}") 41 | print("-" * 60) 42 | 43 | tester.run_all_tests(skip_baseline=args.skip_baseline, specific_schedulers=args.schedulers) 44 | 45 | # Generate performance figures 46 | print("\nGenerating performance comparison figures...") 47 | tester.generate_performance_figures() 48 | 49 | print(f"\nResults saved to: {args.output}") 50 | print(f"Performance figures saved to: results/") 51 | 52 | if __name__ == "__main__": 53 | main() -------------------------------------------------------------------------------- /workloads/cxl-micro/.gitignore: -------------------------------------------------------------------------------- 1 | double_bandwidth 2 | test.log 3 | -------------------------------------------------------------------------------- /workloads/cxl-micro/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for CXL double bandwidth microbenchmark 2 | 3 | # Compiler and flags 4 | CXX = g++ 5 | CXXFLAGS = -std=c++17 -O2 -Wall -Wextra -pthread 6 | LDFLAGS = -lnuma -pthread 7 | 8 | # Source and target 9 | SOURCE = double_bandwidth.cpp 10 | TARGET = double_bandwidth 11 | 12 | # Default target 13 | all: $(TARGET) 14 | 15 | # Build the binary 16 | $(TARGET): $(SOURCE) 17 | $(CXX) $(CXXFLAGS) -o $(TARGET) $(SOURCE) $(LDFLAGS) 18 | 19 | # Clean build artifacts 20 | clean: 21 | rm -f $(TARGET) 22 | 23 | # Install target (optional) 24 | install: $(TARGET) 25 | cp $(TARGET) /usr/local/bin/ 26 | 27 | # Phony targets 28 | .PHONY: all clean install 29 | 30 | # Debug build 31 | debug: CXXFLAGS += -g -DDEBUG 32 | debug: $(TARGET) 33 | 34 | # Help target 35 | help: 36 | @echo "Available targets:" 37 | @echo " all - Build the double_bandwidth binary (default)" 38 | @echo " debug - Build with debug symbols and DEBUG flag" 39 | @echo " clean - Remove build artifacts" 40 | @echo " install - Install binary to /usr/local/bin/" 41 | @echo " help - Show this help message" -------------------------------------------------------------------------------- /workloads/cxl-micro/README.md: -------------------------------------------------------------------------------- 1 | # command 2 | 3 | 4 | numactl --interleave=0,1,2 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_micro_bench_start.py 5 | 6 | numactl --interleave=0,1,2 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_micro_bench_start.py --parameter-sweep 7 | 8 | numactl --interleave=3 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_micro_bench_start.py --parameter-sweep --production-only 9 | 10 | 11 | -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_combined.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_combined.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_datasize_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/bandwidth_vs_read_ratio_datasize_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/default_random_vs_seq_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/default_random_vs_seq_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/default_random_vs_seq_comparison.txt: -------------------------------------------------------------------------------- 1 | ============================================================ 2 | Random vs Sequential Access Comparison Report 3 | Scheduler: default 4 | Configuration: 172 threads, 32GB 5 | ============================================================ 6 | 7 | RANDOM ACCESS STATISTICS: 8 | Average Bandwidth: 50436.22 MB/s 9 | Max Bandwidth: 62907.60 MB/s 10 | Min Bandwidth: 36177.90 MB/s 11 | Read Ratio 0.0 (100% Write): 36177.90 MB/s 12 | Read Ratio 0.5 (50% Read/Write): 57840.70 MB/s 13 | Read Ratio 1.0 (100% Read): 48821.80 MB/s 14 | Optimal Read Ratio: 0.65 (Bandwidth: 62907.60 MB/s) 15 | Bandwidth Range: 36177.90 - 62907.60 MB/s 16 | Max Improvement from Min: 73.9% 17 | 18 | SEQUENTIAL ACCESS STATISTICS: 19 | Average Bandwidth: 107545.74 MB/s 20 | Max Bandwidth: 197014.00 MB/s 21 | Min Bandwidth: 58014.20 MB/s 22 | Read Ratio 0.0 (100% Write): 59039.70 MB/s 23 | Read Ratio 0.5 (50% Read/Write): 72158.10 MB/s 24 | Read Ratio 1.0 (100% Read): 186591.00 MB/s 25 | Optimal Read Ratio: 0.95 (Bandwidth: 197014.00 MB/s) 26 | Bandwidth Range: 58014.20 - 197014.00 MB/s 27 | Max Improvement from Min: 239.6% 28 | 29 | SEQUENTIAL vs RANDOM COMPARISON: 30 | Average Sequential Improvement over Random: 113.2% 31 | Read Ratio 0.0 Improvement: 63.2% (Seq: 59040 vs Random: 36178 MB/s) 32 | Read Ratio 0.5 Improvement: 24.8% (Seq: 72158 vs Random: 57841 MB/s) 33 | Read Ratio 1.0 Improvement: 282.2% (Seq: 186591 vs Random: 48822 MB/s) 34 | Peak Performance Improvement: 213.2% 35 | Sequential Peak: 197014 MB/s at read ratio 0.95 36 | Random Peak: 62908 MB/s at read ratio 0.65 37 | 38 | Random Access Write/Read Ratio: 0.74 (Write is 0.7x of Read) 39 | Sequential Access Write/Read Ratio: 0.32 (Write is 0.3x of Read) 40 | 41 | ============================================================ -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/numa_interleave_01_vs_23_fixed_172_threads.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/numa_interleave_01_vs_23_fixed_172_threads.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/numa_interleave_01_vs_23_fixed_64gb_buffer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/numa_results/numa_interleave_01_vs_23_fixed_64gb_buffer.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/numa_results/runnuma.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | mkdir -p numa_results 4 | 5 | # python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numact_none.log 2>&1 6 | # cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numact_none.csv 7 | 8 | # numactl --interleave=2 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl2_seq.log 2>&1 9 | # cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl2_seq.csv 10 | 11 | # numactl --interleave=3 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl3_seq.log 2>&1 12 | # cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl3_seq.csv 13 | 14 | numactl --interleave=0 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl0_seq.log 2>&1 15 | cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl0_seq.csv 16 | 17 | numactl --interleave=0,1 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl01_seq.log 2>&1 18 | cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl01_seq.csv 19 | 20 | numactl --interleave=2,3 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl23_seq.log 2>&1 21 | cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl23_seq.csv 22 | 23 | # numactl --interleave=0,1,2,3 python /root/yunwei37/ai-os/workloads/cxl-micro/cxl_perf_bandwidth_bench.py --parameter-sweep > numa_results/numa_results_numactl0123.log 2>&1 24 | # cp /root/yunwei37/ai-os/workloads/cxl-micro/results/cxl_perf_parameter_sweep.csv numa_results/cxl_perf_parameter_sweep_numactl0123.csv 25 | 26 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/cxl_scheduler_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_results/cxl_scheduler_performance.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/mini-perf/cxl_perf_parameter_sweep_numactl0.csv: -------------------------------------------------------------------------------- 1 | buffer_size_gb,buffer_size_bytes,threads,read_ratio,duration,app_total_bandwidth_mbps,app_read_bandwidth_mbps,app_write_bandwidth_mbps,app_total_iops,app_execution_time,perf_cache_references_bandwidth_mbps,perf_cache_misses_bandwidth_mbps,perf_dram_read_bandwidth_mbps,perf_dram_write_bandwidth_mbps,perf_mem_loads_bandwidth_mbps,perf_mem_stores_bandwidth_mbps,perf_offcore_data_read_bandwidth_mbps,perf_offcore_rfo_bandwidth_mbps,perf_cache_miss_rate_pct,perf_l3_hit_rate_pct,perf_l3_miss_rate_pct,perf_instructions_per_cycle,status 2 | 64.0,68719476736,64,0.0,20,169138,0,169138,43299200.0,20.0002,2650.460344668753,1932.3589589880462,0,0,39497.143518686884,23526.87235803895,1385.896211989264,40164.94045757102,72.90654104200706,47.66222198005809,52.337778019941915,0.0826300742908492,success 3 | 64.0,68719476736,64,0.5,20,183801,69659,114142,47053100.0,20.0002,26577.883897048032,25784.161223047973,0,0,41390.23641153766,24546.18594995406,18502.328220388674,27137.75590786706,97.01359718074389,10.059850359987724,89.94014964001228,0.08675409113891962,success 4 | 64.0,68719476736,64,1.0,20,169353,169353,0,43354400.0,20.0001,42755.98374612283,41905.84631575236,0,0,39707.20722402198,23639.232363168347,41823.01571457202,861.5805217216842,98.01165274217888,4.635937303783635,95.36406269621637,0.08597764778406772,success 5 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/mini-perf/cxl_perf_parameter_sweep_numactl01.csv: -------------------------------------------------------------------------------- 1 | buffer_size_gb,buffer_size_bytes,threads,read_ratio,duration,app_total_bandwidth_mbps,app_read_bandwidth_mbps,app_write_bandwidth_mbps,app_total_iops,app_execution_time,perf_cache_references_bandwidth_mbps,perf_cache_misses_bandwidth_mbps,perf_dram_read_bandwidth_mbps,perf_dram_write_bandwidth_mbps,perf_mem_loads_bandwidth_mbps,perf_mem_stores_bandwidth_mbps,perf_offcore_data_read_bandwidth_mbps,perf_offcore_rfo_bandwidth_mbps,perf_cache_miss_rate_pct,perf_l3_hit_rate_pct,perf_l3_miss_rate_pct,perf_instructions_per_cycle,status 2 | 64.0,68719476736,64,0.0,20,310741,0.0,310741.0,79549800.0,20.0002,5015.186946227079,3104.222243955105,0,0,51335.75806202103,29702.37503815841,2444.436611738169,71114.0388320129,61.896441293986236,52.779666568146574,47.220333431853426,0.11163039109938458,success 3 | 64.0,68719476736,64,0.5,20,153224,82810.8,70412.9,39225300.0,20.0001,36509.12547334266,35785.32410630548,0,0,35809.79889048454,21779.671769839053,20903.5470815141,16571.115918918913,98.01747821221937,8.138612390858798,91.8613876091412,0.07756817725661193,success 4 | 64.0,68719476736,64,1.0,20,180830,180830.0,0.0,46292600.0,20.0001,44042.54677439209,42741.864979534235,0,0,38193.10785369971,22952.22390188799,43125.41646057173,833.0223626559093,97.04676071181669,5.636396760053477,94.36360323994653,0.08722045782636746,success 5 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/mini-perf/cxl_perf_parameter_sweep_numactl012.csv: -------------------------------------------------------------------------------- 1 | buffer_size_gb,buffer_size_bytes,threads,read_ratio,duration,app_total_bandwidth_mbps,app_read_bandwidth_mbps,app_write_bandwidth_mbps,app_total_iops,app_execution_time,perf_cache_references_bandwidth_mbps,perf_cache_misses_bandwidth_mbps,perf_dram_read_bandwidth_mbps,perf_dram_write_bandwidth_mbps,perf_mem_loads_bandwidth_mbps,perf_mem_stores_bandwidth_mbps,perf_offcore_data_read_bandwidth_mbps,perf_offcore_rfo_bandwidth_mbps,perf_cache_miss_rate_pct,perf_l3_hit_rate_pct,perf_l3_miss_rate_pct,perf_instructions_per_cycle,status 2 | 64.0,68719476736,64,0.0,20,66470.8,0.0,66470.8,17016500.0,20.0002,1375.4326753861792,1146.4055073789407,0,0,24395.004109599857,15707.263239068274,499.85085779883406,14796.368181781538,83.34871839925317,34.58707149856349,65.41292850143651,0.05552603199007588,success 3 | 64.0,68719476736,64,0.5,20,84671.9,38972.4,45699.5,21676000.0,20.0001,13016.962657050846,12652.596570775779,0,0,25966.514860098232,16492.428906674304,8962.241384994422,10434.42454503933,97.20083635580146,10.462833231636328,89.53716676836368,0.060551941241764586,success 4 | 64.0,68719476736,64,1.0,20,72391.7,72391.7,0.0,18532300.0,20.0002,17083.22052848807,16704.840030378058,0,0,25190.212583415512,16176.308348881617,16315.734453968327,748.6192168928842,97.78507514154592,3.801752827337586,96.1982471726624,0.058750930415527536,success 5 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/mini-perf/cxl_perf_parameter_sweep_numactl1.csv: -------------------------------------------------------------------------------- 1 | buffer_size_gb,buffer_size_bytes,threads,read_ratio,duration,app_total_bandwidth_mbps,app_read_bandwidth_mbps,app_write_bandwidth_mbps,app_total_iops,app_execution_time,perf_cache_references_bandwidth_mbps,perf_cache_misses_bandwidth_mbps,perf_dram_read_bandwidth_mbps,perf_dram_write_bandwidth_mbps,perf_mem_loads_bandwidth_mbps,perf_mem_stores_bandwidth_mbps,perf_offcore_data_read_bandwidth_mbps,perf_offcore_rfo_bandwidth_mbps,perf_cache_miss_rate_pct,perf_l3_hit_rate_pct,perf_l3_miss_rate_pct,perf_instructions_per_cycle,status 2 | 64.0,68719476736,64,0.0,20,267768,0.0,267768.0,68548700.0,20.0007,3805.411544942221,2125.9145575289926,0,0,46501.10388626973,27057.953739525798,2132.6202283799958,61787.626282131634,55.86556230309833,56.297932983397494,43.702067016602506,0.10055938980236596,success 3 | 64.0,68719476736,64,0.5,20,161967,82734.7,79232.4,41463600.0,20.0001,36235.74541707754,35072.19884544453,0,0,36401.116039594766,21946.696286512375,21066.529855348985,18900.9451288997,96.78895367477485,10.596639630150872,89.40336036984912,0.0779437434576719,success 4 | 64.0,68719476736,64,1.0,20,180463,180463.0,0.0,46198500.0,20.0006,45596.404506104365,44239.165211446736,0,0,38728.57510879131,23175.775351500637,44654.46386479529,865.3857785902662,97.02336333454554,5.77289353954836,94.22710646045164,0.08359840305412554,success 5 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/mini-perf/cxl_perf_parameter_sweep_numactl3.csv: -------------------------------------------------------------------------------- 1 | buffer_size_gb,buffer_size_bytes,threads,read_ratio,duration,app_total_bandwidth_mbps,app_read_bandwidth_mbps,app_write_bandwidth_mbps,app_total_iops,app_execution_time,perf_cache_references_bandwidth_mbps,perf_cache_misses_bandwidth_mbps,perf_dram_read_bandwidth_mbps,perf_dram_write_bandwidth_mbps,perf_mem_loads_bandwidth_mbps,perf_mem_stores_bandwidth_mbps,perf_offcore_data_read_bandwidth_mbps,perf_offcore_rfo_bandwidth_mbps,perf_cache_miss_rate_pct,perf_l3_hit_rate_pct,perf_l3_miss_rate_pct,perf_instructions_per_cycle,status 2 | 64.0,68719476736,64,0.0,20,35834.1,0.0,35834.1,9173520.0,20.0001,932.2960551529062,810.9680915897305,0,0,18229.057937914677,11986.961857585196,236.4377876488462,7266.030270598565,86.98611209469543,28.784611220916045,71.21538877908395,0.046497905871540714,success 3 | 64.0,68719476736,64,0.5,20,52174.2,25654.8,26519.4,13356600.0,20.0001,8298.424685741335,8066.59487669846,0,0,20215.013629581696,13081.37957055188,5263.730154342175,5865.383800733436,97.20633954247711,6.252293211413116,93.74770678858688,0.0510924052731855,success 4 | 64.0,68719476736,64,1.0,20,47958.1,47958.1,0.0,12277300.0,20.0001,9940.219178162006,9752.744253048053,0,0,19021.699544068764,12355.52911929013,9279.758976250847,630.4004791592496,98.11397594204138,3.5307404674055647,96.46925953259444,0.050415635051218835,success 5 | -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/no_conf_results/cxl_scheduler_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_results/no_conf_results/cxl_scheduler_performance.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_results/numactl_results/cxl_scheduler_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_results/numactl_results/cxl_scheduler_performance.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_scheduler_performance.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_scheduler_performance.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_scheduler_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_scheduler_performance.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_scheduler_performance_t172_s64gb_r0.50_seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_scheduler_performance_t172_s64gb_r0.50_seq.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/cxl_scheduler_performance_t256_s16gb_r0.50_random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/cxl_scheduler_performance_t256_s16gb_r0.50_random.png -------------------------------------------------------------------------------- /workloads/cxl-micro/results/default_random_vs_seq_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/default_random_vs_seq_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/results/default_random_vs_seq_comparison.txt: -------------------------------------------------------------------------------- 1 | ============================================================ 2 | Random vs Sequential Access Comparison Report 3 | Scheduler: default 4 | Configuration: 172 threads, 32GB 5 | ============================================================ 6 | 7 | RANDOM ACCESS STATISTICS: 8 | Average Bandwidth: 48216.06 MB/s 9 | Max Bandwidth: 59057.60 MB/s 10 | Min Bandwidth: 36633.30 MB/s 11 | Read Ratio 0.0 (100% Write): 36633.30 MB/s 12 | Read Ratio 0.5 (50% Read/Write): 59057.60 MB/s 13 | Read Ratio 1.0 (100% Read): 49523.80 MB/s 14 | Optimal Read Ratio: 0.50 (Bandwidth: 59057.60 MB/s) 15 | Bandwidth Range: 36633.30 - 59057.60 MB/s 16 | Max Improvement from Min: 61.2% 17 | 18 | SEQUENTIAL ACCESS STATISTICS: 19 | Average Bandwidth: 107545.74 MB/s 20 | Max Bandwidth: 197014.00 MB/s 21 | Min Bandwidth: 58014.20 MB/s 22 | Read Ratio 0.0 (100% Write): 59039.70 MB/s 23 | Read Ratio 0.5 (50% Read/Write): 72158.10 MB/s 24 | Read Ratio 1.0 (100% Read): 186591.00 MB/s 25 | Optimal Read Ratio: 0.95 (Bandwidth: 197014.00 MB/s) 26 | Bandwidth Range: 58014.20 - 197014.00 MB/s 27 | Max Improvement from Min: 239.6% 28 | 29 | SEQUENTIAL vs RANDOM COMPARISON: 30 | Average Sequential Improvement over Random: 123.0% 31 | Read Ratio 0.0 Improvement: 61.2% (Seq: 59040 vs Random: 36633 MB/s) 32 | Read Ratio 0.5 Improvement: 22.2% (Seq: 72158 vs Random: 59058 MB/s) 33 | Read Ratio 1.0 Improvement: 276.8% (Seq: 186591 vs Random: 49524 MB/s) 34 | Peak Performance Improvement: 233.6% 35 | Sequential Peak: 197014 MB/s at read ratio 0.95 36 | Random Peak: 59058 MB/s at read ratio 0.50 37 | 38 | Random Access Write/Read Ratio: 0.74 (Write is 0.7x of Read) 39 | Sequential Access Write/Read Ratio: 0.32 (Write is 0.3x of Read) 40 | 41 | ============================================================ -------------------------------------------------------------------------------- /workloads/cxl-micro/results/random_schedulers_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/random_schedulers_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/results/raw_schedulers_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/raw_schedulers_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/results/rustland_vs_default_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/rustland_vs_default_comparison.pdf -------------------------------------------------------------------------------- /workloads/cxl-micro/results/sequential_schedulers_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/cxl-micro/results/sequential_schedulers_comparison.pdf -------------------------------------------------------------------------------- /workloads/ktransformers/optimized_local_chat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Optimized local chat script that runs with LD_PRELOAD=./liba.so 4 | This script wraps the ktransformers local_chat functionality with preloaded optimization library. 5 | """ 6 | 7 | import os 8 | import sys 9 | import subprocess 10 | import argparse 11 | 12 | def main(): 13 | """Main function to run optimized local chat with LD_PRELOAD.""" 14 | parser = argparse.ArgumentParser(description='Run optimized local chat with LD_PRELOAD') 15 | parser.add_argument('--model_path', type=str, default='unsloth/DeepSeek-R1', 16 | help='Path to the model') 17 | parser.add_argument('--gguf_path', type=str, default='/root/deepseek-gguf/', 18 | help='Path to GGUF files') 19 | parser.add_argument('--optimize_config_path', type=str, 20 | default='optimize/optimize_rules/DeepSeek-V3-Chat-int8-fast.yaml', 21 | help='Path to optimization config') 22 | parser.add_argument('--batch_size', type=int, default=32, 23 | help='Batch size for processing') 24 | parser.add_argument('--speculative_length', type=int, default=6, 25 | help='Speculative decoding length') 26 | 27 | args = parser.parse_args() 28 | 29 | # Prepare the command with LD_PRELOAD 30 | env = os.environ.copy() 31 | env['LD_PRELOAD'] = './liba.so' 32 | 33 | # Build the command 34 | cmd = [ 35 | sys.executable, 36 | 'optimized_local_chat.py', 37 | f'--model_path={args.model_path}', 38 | f'--gguf_path={args.gguf_path}', 39 | f'--optimize_config_path={args.optimize_config_path}', 40 | f'--batch_size={args.batch_size}', 41 | f'--speculative_length={args.speculative_length}' 42 | ] 43 | 44 | print(f"Running command with LD_PRELOAD=./liba.so:") 45 | print(f"Command: {' '.join(cmd)}") 46 | print(f"Environment: LD_PRELOAD={env.get('LD_PRELOAD', 'Not set')}") 47 | 48 | # Execute the command 49 | try: 50 | result = subprocess.run(cmd, env=env, check=True) 51 | return result.returncode 52 | except subprocess.CalledProcessError as e: 53 | print(f"Error running command: {e}") 54 | return e.returncode 55 | except FileNotFoundError: 56 | print("Error: optimized_local_chat.py not found. Please ensure the script exists.") 57 | return 1 58 | 59 | if __name__ == "__main__": 60 | sys.exit(main()) -------------------------------------------------------------------------------- /workloads/ktransformers/run_optimized_chat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | """ 3 | Shell script to run optimized local chat with LD_PRELOAD=./liba.so 4 | """ 5 | 6 | # Default parameters - can be overridden by command line arguments 7 | MODEL_PATH="${1:-unsloth/DeepSeek-R1}" 8 | GGUF_PATH="${2:-/root/deepseek-gguf/}" 9 | OPTIMIZE_CONFIG_PATH="${3:-optimize/optimize_rules/DeepSeek-V3-Chat-int8-fast.yaml}" 10 | BATCH_SIZE="${4:-32}" 11 | SPECULATIVE_LENGTH="${5:-6}" 12 | 13 | echo "Running optimized local chat with LD_PRELOAD=./liba.so" 14 | echo "Model path: $MODEL_PATH" 15 | echo "GGUF path: $GGUF_PATH" 16 | echo "Optimize config: $OPTIMIZE_CONFIG_PATH" 17 | echo "Batch size: $BATCH_SIZE" 18 | echo "Speculative length: $SPECULATIVE_LENGTH" 19 | echo "" 20 | 21 | # Run the command with LD_PRELOAD 22 | LD_PRELOAD=./liba.so python optimized_local_chat.py \ 23 | --model_path="$MODEL_PATH" \ 24 | --gguf_path="$GGUF_PATH" \ 25 | --optimize_config_path="$OPTIMIZE_CONFIG_PATH" \ 26 | --batch_size="$BATCH_SIZE" \ 27 | --speculative_length="$SPECULATIVE_LENGTH" -------------------------------------------------------------------------------- /workloads/linux-build-bench/.gitignore: -------------------------------------------------------------------------------- 1 | # Linux kernel source 2 | linux/ 3 | 4 | # Build artifacts 5 | *.o 6 | *.ko 7 | *.mod 8 | *.mod.c 9 | modules.order 10 | Module.symvers 11 | build_metrics* 12 | # Python cache 13 | __pycache__/ 14 | *.pyc 15 | *.pyo 16 | 17 | # Temporary files 18 | *.tmp 19 | *.swp 20 | *~ -------------------------------------------------------------------------------- /workloads/linux-build-bench/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Linux build benchmark 2 | LINUX_DIR := linux 3 | CONFIG := defconfig 4 | JOBS ?= $(shell nproc) 5 | 6 | .PHONY: all config clean distclean help 7 | 8 | all: config 9 | @echo "Linux kernel configured. Ready for benchmarking." 10 | 11 | config: 12 | @echo "Configuring Linux kernel with $(CONFIG)..." 13 | @cd $(LINUX_DIR) && make $(CONFIG) 14 | @echo "Configuration complete." 15 | 16 | # Configure with minimal config for faster builds 17 | tinyconfig: 18 | @echo "Configuring Linux kernel with tinyconfig..." 19 | @cd $(LINUX_DIR) && make tinyconfig 20 | @echo "Tiny configuration complete." 21 | 22 | # Configure with allnoconfig for minimal build 23 | allnoconfig: 24 | @echo "Configuring Linux kernel with allnoconfig..." 25 | @cd $(LINUX_DIR) && make allnoconfig 26 | @echo "Minimal configuration complete." 27 | 28 | # Clean build artifacts 29 | clean: 30 | @echo "Cleaning Linux build artifacts..." 31 | @cd $(LINUX_DIR) && make clean 32 | @echo "Clean complete." 33 | 34 | # Deep clean including config 35 | distclean: 36 | @echo "Deep cleaning Linux build..." 37 | @cd $(LINUX_DIR) && make distclean 38 | @echo "Distclean complete." 39 | -------------------------------------------------------------------------------- /workloads/linux-build-bench/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=3.5.0 2 | numpy>=1.21.0 -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/defconfig/linux_build_normalized_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/defconfig/linux_build_normalized_performance.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/defconfig/linux_build_speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/defconfig/linux_build_speedup.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/defconfig/linux_build_time_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/defconfig/linux_build_time_comparison.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/linux_build_normalized_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/linux_build_normalized_performance.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/linux_build_speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/linux_build_speedup.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/linux_build_time_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/linux_build_time_comparison.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/tiny-config/linux_build_normalized_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/tiny-config/linux_build_normalized_performance.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/tiny-config/linux_build_speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/tiny-config/linux_build_speedup.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/results/tiny-config/linux_build_time_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/results/tiny-config/linux_build_time_comparison.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/schedcp_workloads.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": {}, 3 | "history": [] 4 | } -------------------------------------------------------------------------------- /workloads/linux-build-bench/test-record/Linux_build_benchmark_results.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/test-record/Linux_build_benchmark_results.pdf -------------------------------------------------------------------------------- /workloads/linux-build-bench/test-record/Linux_build_benchmark_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/linux-build-bench/test-record/Linux_build_benchmark_results.png -------------------------------------------------------------------------------- /workloads/linux-build-bench/test-record/README.md: -------------------------------------------------------------------------------- 1 | 1. default (baseline) 2 | 3 | real 0m13.584s 4 | user 2m12.253s 5 | sys 0m32.766s 6 | 7 | real 0m13.510s 8 | user 2m6.144s 9 | sys 0m29.817s 10 | 11 | real 0m13.626s 12 | user 2m5.930s 13 | sys 0m30.427s 14 | 15 | 2. scx_rusty (first attempt 16 | 17 | real 0m8.489s 18 | user 2m4.411s 19 | sys 0m28.399s 20 | 21 | real 0m8.094s 22 | user 2m4.165s 23 | sys 0m28.092s 24 | 25 | real 0m8.350s 26 | user 2m4.240s 27 | sys 0m28.456s 28 | 29 | 3. scx_layered (after try 3 times) 30 | 31 | real 0m7.657s 32 | user 2m14.901s 33 | sys 0m31.439s 34 | 35 | real 0m7.505s 36 | user 2m0.235s 37 | sys 0m24.908s 38 | 39 | real 0m7.650s 40 | user 2m0.696s 41 | sys 0m24.910s 42 | 43 | 4. basic RL scheduler 44 | 45 | real 0m13.782s 46 | user 2m21.649s 47 | sys 0m36.960s 48 | 49 | real 0m13.797s 50 | user 2m22.337s 51 | sys 0m37.028s 52 | 53 | real 0m13.806s 54 | user 2m21.671s 55 | sys 0m36.791s 56 | -------------------------------------------------------------------------------- /workloads/llama.cpp/.gitignore: -------------------------------------------------------------------------------- 1 | # Build directories 2 | build/ 3 | build-*/ 4 | bin/ 5 | lib/ 6 | /schedcp_workloads.json 7 | # CMake generated files 8 | CMakeCache.txt 9 | CMakeFiles/ 10 | cmake_install.cmake 11 | Makefile 12 | *.cmake 13 | 14 | # Compiled binaries 15 | llama-cli 16 | llama-server 17 | llama-bench 18 | *.exe 19 | *.dll 20 | *.so 21 | *.dylib 22 | 23 | # Dataset files 24 | datasets/ 25 | *.gguf 26 | *.ggml 27 | *.bin 28 | 29 | # Generated documentation 30 | *.html.gz.hpp 31 | *.html.hpp 32 | 33 | # Python cache 34 | __pycache__/ 35 | *.py[cod] 36 | *$py.class 37 | 38 | # Virtual environments 39 | venv/ 40 | env/ 41 | ENV/ 42 | 43 | # IDE files 44 | .vscode/ 45 | .idea/ 46 | *.swp 47 | *.swo 48 | *~ 49 | 50 | # Logs 51 | *.log 52 | server_logs.stderr 53 | server_logs.stdout 54 | **/server_logs.stderr 55 | **/server_logs.stdout 56 | 57 | # Temporary files 58 | *.tmp 59 | *.temp 60 | .DS_Store -------------------------------------------------------------------------------- /workloads/llama.cpp/download_test_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Download a small test model for llama.cpp 4 | """ 5 | 6 | import os 7 | import requests 8 | from tqdm import tqdm 9 | 10 | def download_file(url, filename): 11 | """Download file with progress bar""" 12 | response = requests.get(url, stream=True) 13 | response.raise_for_status() 14 | 15 | total_size = int(response.headers.get('content-length', 0)) 16 | 17 | with open(filename, 'wb') as f: 18 | with tqdm(total=total_size, unit='B', unit_scale=True, desc=filename) as pbar: 19 | for chunk in response.iter_content(chunk_size=8192): 20 | f.write(chunk) 21 | pbar.update(len(chunk)) 22 | 23 | print(f"Downloaded: {filename}") 24 | 25 | def main(): 26 | # Create models directory 27 | os.makedirs("models", exist_ok=True) 28 | 29 | # Download a small model (TinyLlama 1.1B Q4_K_M - about 668MB) 30 | model_url = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" 31 | model_file = "models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" 32 | 33 | if os.path.exists(model_file): 34 | print(f"Model already exists: {model_file}") 35 | else: 36 | print("Downloading TinyLlama 1.1B model (Q4_K_M quantization)...") 37 | download_file(model_url, model_file) 38 | 39 | print(f"\nModel ready at: {model_file}") 40 | print("You can now use this model with llama.cpp server") 41 | 42 | if __name__ == "__main__": 43 | main() -------------------------------------------------------------------------------- /workloads/llama.cpp/requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | requests 3 | huggingface-hub 4 | aiohttp 5 | numpy 6 | pandas 7 | matplotlib 8 | 9 | psutil 10 | -------------------------------------------------------------------------------- /workloads/llama.cpp/results/benchmark_results.csv: -------------------------------------------------------------------------------- 1 | threads,batch_size,pp_tps,tg_tps,pp_time,tg_time 2 | 1,128,46.946144,20.118349,5585.229857792,814.420272384 3 | 1,256,50.926974,21.088531,5147.490369024,776.94147776 4 | 1,512,42.784833,19.296257,6127.8053632,849.268664704 5 | 1,1024,47.716045,20.182781,5496.69020928,811.861528192 6 | 1,2048,47.316266,20.243785,5540.279517696,809.401171584 7 | 2,128,65.482796,25.884717,4008.449976832,633.305642368 8 | 2,256,62.197321,25.595825,4217.585441792,640.427235456 9 | 2,512,59.116022,25.041311,4435.306324992,654.59884672 10 | 2,1024,57.666258,24.829757,4548.563547136,660.011338368 11 | 2,2048,53.472478,24.442625,4902.622410752,670.666999808 12 | 4,128,82.575521,30.697374,3175.224666624,534.16364544 13 | 4,256,81.100283,32.103331,3233.024544768,511.4153984 14 | 4,512,89.571721,31.325155,2928.343160832,523.891639808 15 | 4,1024,85.631906,31.043462,3061.413276672,531.085870336 16 | 4,2048,85.364767,29.591674,3071.094437376,554.81013952 17 | 8,128,122.004041,37.370464,2150.5011328,438.787144448 18 | 8,256,115.466591,35.908966,2283.545278976,457.13321024 19 | 8,512,134.687886,34.8967,1946.393945088,469.580281728 20 | 8,1024,130.377877,36.966781,2011.424838144,443.37595456 21 | 8,2048,133.416807,37.726025,1965.785145856,434.347098368 22 | 16,128,124.811859,21.722971,2105.094501888,755.997562112 23 | 16,256,129.518528,21.535008,2024.120807424,766.972993536 24 | 16,512,141.478163,23.389872,1852.914226176,702.251191808 25 | 16,1024,140.878359,23.706075,1860.944474624,692.019770112 26 | 16,2048,138.093335,22.877279,1900.711935488,717.898787712 27 | -------------------------------------------------------------------------------- /workloads/llama.cpp/results/duplex_scheduling_analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/duplex_scheduling_analysis.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/llama_benchmark_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/llama_benchmark_results.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/llama_scheduler_performance_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/llama_scheduler_performance_128.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/roofline_analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/roofline_analysis.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/roofline_analysis_results.csv: -------------------------------------------------------------------------------- 1 | scheduler,batch_size,threads,arithmetic_intensity,achieved_tflops,memory_bound_tflops,compute_bound_tflops,bottleneck,pp_tps,tg_tps,measured_bandwidth_gbps,bandwidth_utilization 2 | default,32,8,2274.875269888723,563.4974003295011,0.44456022927113825,20.0,compute,198.2313,39.234289,0.19542180406791457,0.07101082996653872 3 | default,32,16,2274.875269888723,882.1136524336819,0.6804679197968087,20.0,compute,289.367218,61.418388,0.2991231777863998,0.10869301518401156 4 | default,32,32,2274.875269888723,1183.7590545071678,0.8767645852597755,20.0,compute,353.800864,82.420868,0.3854121572575991,0.140048022259302 5 | default,64,8,2673.8943875061004,1140.0761914596387,0.4950854587244834,20.0,compute,164.170484,39.689694,0.18515520322634801,0.06728023373050437 6 | default,64,16,2673.8943875061004,1790.6176438690225,0.822960207583971,20.0,compute,293.977064,62.33712,0.3077758835312614,0.11183716698083626 7 | default,64,32,2673.8943875061004,2349.2508683227584,1.1062762579141652,20.0,compute,381.036184,81.784927,0.41373221885026346,0.15033874231477598 8 | default,128,8,2930.9418498903333,2277.55411517193,0.26573159562488285,20.0,compute,158.191872,39.644467,0.09066423328556508,0.032944852211324524 9 | default,128,16,2930.9418498903333,3604.215999950284,0.7445291372420232,20.0,compute,194.088106,62.737136,0.2540238515035299,0.0923051785986664 10 | default,128,32,2930.9418498903333,4723.510128344404,1.2133161761776368,20.0,compute,393.923253,82.220238,0.41396801380519893,0.1504244236210752 11 | default,256,8,3078.934277445278,4614.245463793548,0.5600135014607242,20.0,compute,164.432523,40.159156,0.18188550030544692,0.06609211493657229 12 | default,256,16,3078.934277445278,7016.20396310256,0.7773025032837639,20.0,compute,191.068775,61.064118,0.25245829668333314,0.09173629966690885 13 | default,256,32,3078.934277445278,9265.570317312247,0.9792243131159143,20.0,compute,245.550968,80.641025,0.3180400180313099,0.11556686701719109 14 | default,512,8,3158.6800997391215,9136.095614193639,0.6482976298463262,20.0,compute,189.005934,39.757084,0.2052432058250754,0.07457965327946055 15 | default,512,16,3158.6800997391215,14320.787783161652,0.8182391065057576,20.0,compute,194.747995,62.319046,0.2590446264480334,0.09412958809884935 16 | default,512,32,3158.6800997391215,18826.759815776284,0.9956425621394838,20.0,compute,243.408202,81.927456,0.3152084195616121,0.11453794315465557 17 | -------------------------------------------------------------------------------- /workloads/llama.cpp/results/scheduler_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/scheduler_comparison.pdf -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_llama_server_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_llama_server_performance.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_vicuna_s1000_c64_20250719_160620/sharegpt_llama_server_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_vicuna_s1000_c64_20250719_160620/sharegpt_llama_server_performance.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_vicuna_s100_c128_20250816_130207/sharegpt_llama_server_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_vicuna_s100_c128_20250816_130207/sharegpt_llama_server_performance.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_vicuna_s30_c4_20250805_112829/sharegpt_llama_server_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_vicuna_s30_c4_20250805_112829/sharegpt_llama_server_performance.png -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_vicuna_s5_c2_20250719_153411/server_logs.stdout: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_vicuna_s5_c2_20250719_153411/server_logs.stdout -------------------------------------------------------------------------------- /workloads/llama.cpp/results/sharegpt_vicuna_s5_c2_20250719_153411/sharegpt_llama_server_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/llama.cpp/results/sharegpt_vicuna_s5_c2_20250719_153411/sharegpt_llama_server_performance.png -------------------------------------------------------------------------------- /workloads/llama.cpp/run_roofline_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Roofline Analysis Test Script for Duplex Scheduling 4 | # This script runs the roofline analysis and generates performance graphs 5 | 6 | set -e 7 | 8 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 9 | cd "$SCRIPT_DIR" 10 | 11 | echo "=========================================" 12 | echo "Roofline Analysis for Duplex Scheduling" 13 | echo "=========================================" 14 | 15 | # Check if llama-bench exists 16 | if [ ! -f "build/bin/llama-bench" ]; then 17 | echo "Error: llama-bench not found. Building llama.cpp..." 18 | ./build_llama.sh 19 | fi 20 | 21 | # Check if model exists 22 | MODEL_PATH="models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" 23 | if [ ! -f "$MODEL_PATH" ]; then 24 | echo "Downloading test model..." 25 | python3 download_test_model.py 26 | fi 27 | 28 | # Install required Python packages if needed 29 | echo "Checking Python dependencies..." 30 | pip install -q psutil GPUtil pandas matplotlib numpy seaborn 31 | 32 | # Run roofline analysis with different configurations 33 | echo "" 34 | echo "Running roofline analysis tests..." 35 | echo "This will test performance with and without duplex scheduling" 36 | echo "" 37 | 38 | # Test with different batch sizes to show memory bandwidth impact 39 | python3 roofline_analysis.py \ 40 | --batch-sizes 32 64 128 256 512 \ 41 | --thread-counts 8 16 32 \ 42 | --schedulers default scx_rusty scx_lavd scx_bpfland 43 | 44 | echo "" 45 | echo "=========================================" 46 | echo "Analysis Complete!" 47 | echo "=========================================" 48 | echo "" 49 | echo "Results saved in ./results/ directory:" 50 | echo " - roofline_analysis_results.csv: Raw performance data" 51 | echo " - roofline_analysis.png: Roofline model visualization" 52 | echo " - duplex_scheduling_analysis.png: Duplex scheduling improvements" 53 | echo "" 54 | echo "Key findings should show:" 55 | echo " • Arithmetic intensity increase from 0.18 to 0.27 FLOPS/byte" 56 | echo " • Performance improvement from 2.4 to 3.9 TFLOPS" 57 | echo " • Memory bandwidth utilization from 58% to 91%" 58 | echo " • Bottleneck shift from memory-bound to compute-bound" -------------------------------------------------------------------------------- /workloads/nginx/.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | nginx/ 3 | wrk2/ 4 | nginx-install/ 5 | nginx-bin 6 | nginx-mime.types 7 | nginx-local.conf 8 | nginx.pid 9 | 10 | # Generated content 11 | html/ 12 | 13 | # Results and logs 14 | results/ 15 | *.json 16 | *.log 17 | 18 | # Temporary files 19 | simple_test.py -------------------------------------------------------------------------------- /workloads/nginx/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Hello World

4 | 5 | -------------------------------------------------------------------------------- /workloads/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | daemon on; 2 | user root; 3 | worker_processes auto; 4 | worker_rlimit_nofile 65535; 5 | 6 | events { 7 | worker_connections 4096; 8 | use epoll; 9 | multi_accept on; 10 | } 11 | 12 | http { 13 | include mime.types; 14 | default_type application/octet-stream; 15 | 16 | # Logging 17 | access_log off; 18 | error_log stderr error; 19 | 20 | # Performance optimizations 21 | sendfile on; 22 | tcp_nopush on; 23 | tcp_nodelay on; 24 | keepalive_timeout 65; 25 | keepalive_requests 1000; 26 | 27 | # Buffer settings 28 | client_body_buffer_size 16K; 29 | client_header_buffer_size 1k; 30 | client_max_body_size 8m; 31 | large_client_header_buffers 4 8k; 32 | 33 | # File cache settings 34 | open_file_cache max=2000 inactive=20s; 35 | open_file_cache_valid 30s; 36 | open_file_cache_min_uses 2; 37 | open_file_cache_errors on; 38 | 39 | # Gzip compression 40 | gzip on; 41 | gzip_vary on; 42 | gzip_min_length 1024; 43 | gzip_comp_level 6; 44 | gzip_types 45 | text/plain 46 | text/css 47 | text/xml 48 | text/javascript 49 | application/json 50 | application/javascript 51 | application/xml+rss 52 | application/atom+xml; 53 | 54 | server { 55 | listen 8080 default_server; 56 | listen [::]:8080 default_server; 57 | server_name _; 58 | 59 | root /root/yunwei37/ai-os/workloads/nginx/html; 60 | index index.html; 61 | 62 | location / { 63 | try_files $uri $uri/ =404; 64 | } 65 | 66 | location /nginx_status { 67 | stub_status; 68 | allow 127.0.0.1; 69 | deny all; 70 | } 71 | 72 | # Static file serving optimization 73 | location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg)$ { 74 | expires 1y; 75 | add_header Cache-Control "public, immutable"; 76 | } 77 | } 78 | } -------------------------------------------------------------------------------- /workloads/nginx/requirements.txt: -------------------------------------------------------------------------------- 1 | psutil>=5.8.0 -------------------------------------------------------------------------------- /workloads/processing/.gitignore: -------------------------------------------------------------------------------- 1 | /file{1..99}.txt 2 | large.bin 3 | assets/short 4 | assets/long 5 | *.mp4 6 | libgit2 7 | -------------------------------------------------------------------------------- /workloads/processing/assets/compression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | File compression utility for parallel compression test. 4 | Compresses a file using gzip with maximum compression level. 5 | """ 6 | 7 | import sys 8 | import gzip 9 | import os 10 | import shutil 11 | import time 12 | 13 | def compress_file(input_path, compression_level=9): 14 | """Compress a file using gzip with specified compression level.""" 15 | output_path = input_path + '.gz' 16 | 17 | # Read and compress the file 18 | with open(input_path, 'rb') as f_in: 19 | with gzip.open(output_path, 'wb', compresslevel=compression_level) as f_out: 20 | # Copy in chunks to handle large files efficiently 21 | shutil.copyfileobj(f_in, f_out, length=65536) 22 | 23 | return output_path 24 | 25 | def main(): 26 | if len(sys.argv) < 2: 27 | print(f"Usage: {sys.argv[0]} [compression_level]", file=sys.stderr) 28 | print("compression_level: 1-9 (default: 9 for maximum compression)", file=sys.stderr) 29 | sys.exit(1) 30 | 31 | filepath = sys.argv[1] 32 | compression_level = 9 # Default to maximum compression like pigz -9 33 | 34 | if len(sys.argv) > 2: 35 | try: 36 | compression_level = int(sys.argv[2]) 37 | if not 1 <= compression_level <= 9: 38 | raise ValueError 39 | except ValueError: 40 | print(f"Error: Compression level must be between 1 and 9", file=sys.stderr) 41 | sys.exit(1) 42 | 43 | # Check if file exists 44 | if not os.path.exists(filepath): 45 | print(f"Error: File '{filepath}' not found", file=sys.stderr) 46 | sys.exit(1) 47 | 48 | # Get file size for reporting 49 | file_size = os.path.getsize(filepath) 50 | 51 | # Compress the file 52 | start_time = time.time() 53 | try: 54 | output_path = compress_file(filepath, compression_level) 55 | end_time = time.time() 56 | 57 | # Get compressed file size 58 | compressed_size = os.path.getsize(output_path) 59 | compression_ratio = (1 - compressed_size / file_size) * 100 60 | 61 | # Output similar to pigz 62 | print(f"Compressed {filepath} -> {output_path}") 63 | print(f"Original: {file_size:,} bytes, Compressed: {compressed_size:,} bytes") 64 | print(f"Compression ratio: {compression_ratio:.1f}%") 65 | 66 | except Exception as e: 67 | print(f"Error compressing file: {e}", file=sys.stderr) 68 | sys.exit(1) 69 | 70 | if __name__ == "__main__": 71 | main() -------------------------------------------------------------------------------- /workloads/processing/assets/dask_groupby_prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Prepare data for dask_groupby_test.py 4 | Creates CSV files with skewed customer data 5 | """ 6 | 7 | import argparse 8 | import random 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser(description='Prepare data for Dask groupby test') 12 | parser.add_argument('--regular-size', type=int, default=1000, 13 | help='Number of transactions per regular customer (default: 1000)') 14 | parser.add_argument('--hot-size', type=int, default=100000, 15 | help='Number of transactions for hot customer (default: 100000)') 16 | parser.add_argument('--num-customers', type=int, default=99, 17 | help='Number of regular customers (default: 99)') 18 | parser.add_argument('--output', type=str, default='dask_data.csv', 19 | help='Output CSV file') 20 | 21 | args = parser.parse_args() 22 | 23 | print(f"Generating dataset for Dask groupby test...") 24 | print(f" Regular customers: {args.num_customers} with ~{args.regular_size} transactions each") 25 | print(f" Hot customer: 1 with {args.hot_size} transactions") 26 | 27 | total_rows = 0 28 | with open(args.output, 'w') as f: 29 | # Regular customers 30 | for customer_id in range(args.num_customers): 31 | # Add some variance (±10%) 32 | num_transactions = random.randint( 33 | int(args.regular_size * 0.9), 34 | int(args.regular_size * 1.1) 35 | ) 36 | for _ in range(num_transactions): 37 | value = random.randint(100, 500) 38 | f.write(f"customer_{customer_id},{value}\n") 39 | total_rows += num_transactions 40 | 41 | # Hot customer (ID 999) 42 | for _ in range(args.hot_size): 43 | value = random.randint(100, 500) 44 | f.write(f"customer_999,{value}\n") 45 | total_rows += args.hot_size 46 | 47 | skew_ratio = args.hot_size / args.regular_size if args.regular_size > 0 else float('inf') 48 | print(f"Generated {total_rows} rows with {skew_ratio:.1f}x skew") 49 | print(f"Data saved to {args.output}") -------------------------------------------------------------------------------- /workloads/processing/assets/dask_groupby_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Dask-like groupby simulation without Dask dependency 4 | Simulates customer analytics with power-law distribution 5 | """ 6 | 7 | import pandas as pd 8 | import time 9 | import argparse 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser(description='Dask-like groupby test') 13 | parser.add_argument('data_file', type=str, help='Path to CSV data file') 14 | 15 | args = parser.parse_args() 16 | 17 | print(f"Processing groupby on {args.data_file}...") 18 | start_time = time.time() 19 | 20 | # Load data from file 21 | data = pd.read_csv(args.data_file, names=['k', 'v']) 22 | print(f"Loaded {len(data)} rows") 23 | 24 | # Perform multiple groupby operations to simulate complex analytics 25 | # Customer analytics: sum, mean, count, std 26 | result_sum = data.groupby('k')['v'].sum() 27 | result_mean = data.groupby('k')['v'].mean() 28 | result_count = data.groupby('k')['v'].count() 29 | result_std = data.groupby('k')['v'].std() 30 | 31 | # Additional aggregations 32 | result_min = data.groupby('k')['v'].min() 33 | result_max = data.groupby('k')['v'].max() 34 | 35 | # Combine all results 36 | final_result = pd.DataFrame({ 37 | 'sum': result_sum, 38 | 'mean': result_mean, 39 | 'count': result_count, 40 | 'std': result_std, 41 | 'min': result_min, 42 | 'max': result_max 43 | }) 44 | 45 | # Sort by sum descending 46 | final_result = final_result.sort_values('sum', ascending=False) 47 | 48 | end_time = time.time() 49 | 50 | print(f"Groupby complete in {end_time - start_time:.2f}s") 51 | print(f"Total groups: {len(final_result)}") 52 | print(f"Total sum: {result_sum.sum()}") 53 | print(f"Top 5 groups by sum:") 54 | print(final_result.head()) -------------------------------------------------------------------------------- /workloads/processing/assets/file_checksum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | File checksum calculator for parallel file system operations test. 4 | Calculates SHA256 checksum of a specified file. 5 | """ 6 | 7 | import sys 8 | import hashlib 9 | import os 10 | import time 11 | 12 | def calculate_sha256(filepath): 13 | """Calculate SHA256 checksum of a file.""" 14 | sha256_hash = hashlib.sha256() 15 | 16 | # Read file in chunks to handle large files efficiently 17 | with open(filepath, "rb") as f: 18 | # Read in 64KB chunks 19 | for chunk in iter(lambda: f.read(65536), b""): 20 | sha256_hash.update(chunk) 21 | 22 | return sha256_hash.hexdigest() 23 | 24 | def main(): 25 | if len(sys.argv) != 2: 26 | print(f"Usage: {sys.argv[0]} ", file=sys.stderr) 27 | sys.exit(1) 28 | 29 | filepath = sys.argv[1] 30 | 31 | # Check if file exists 32 | if not os.path.exists(filepath): 33 | print(f"Error: File '{filepath}' not found", file=sys.stderr) 34 | sys.exit(1) 35 | 36 | # Get file size for reporting 37 | file_size = os.path.getsize(filepath) 38 | 39 | # Calculate checksum 40 | start_time = time.time() 41 | checksum1 = calculate_sha256(filepath) 42 | checksum2 = calculate_sha256(filepath) 43 | end_time = time.time() 44 | 45 | # Output in same format as sha256sum command 46 | print(f"{checksum1} {checksum2} {filepath}") 47 | 48 | # Report processing time to stderr (optional, for debugging) 49 | # print(f"# Processed {file_size / (1024*1024):.2f} MB in {end_time - start_time:.2f} seconds", file=sys.stderr) 50 | 51 | if __name__ == "__main__": 52 | main() -------------------------------------------------------------------------------- /workloads/processing/assets/flink_join_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Flink-like join simulation without Flink dependency 4 | Simulates retail analytics with popular items creating join skew 5 | """ 6 | 7 | import pandas as pd 8 | import time 9 | import argparse 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser(description='Flink-like join test') 13 | parser.add_argument('data_file', type=str, help='Path to CSV data file') 14 | 15 | args = parser.parse_args() 16 | 17 | print(f"Processing join on {args.data_file}...") 18 | start_time = time.time() 19 | 20 | # Load transaction data 21 | df = pd.read_csv(args.data_file, names=['product_id', 'price', 'timestamp']) 22 | print(f"Loaded {len(df)} transactions") 23 | 24 | # Simulate product dimension table 25 | unique_products = df['product_id'].unique() 26 | product_dim = pd.DataFrame({ 27 | 'product_id': unique_products, 28 | 'category': ['electronics' if 'product_999' in str(p) else 'general' for p in unique_products], 29 | 'discount': [0.1 if 'product_999' in str(p) else 0.05 for p in unique_products] 30 | }) 31 | 32 | # Perform join operation 33 | result = df.merge(product_dim, on='product_id', how='inner') 34 | 35 | # Calculate aggregations after join 36 | # 1. Total sales by product 37 | product_sales = result.groupby('product_id').agg({ 38 | 'price': ['sum', 'mean', 'count'], 39 | 'discount': 'first', 40 | 'category': 'first' 41 | }) 42 | 43 | # 2. Sales by category 44 | category_sales = result.groupby('category')['price'].sum() 45 | 46 | # 3. Hourly sales (convert timestamp to hour) 47 | result['hour'] = pd.to_datetime(result['timestamp'], unit='s', errors='coerce').dt.hour 48 | hourly_sales = result.groupby('hour')['price'].sum() 49 | 50 | # 4. Apply discounts and calculate final revenue 51 | result['discounted_price'] = result['price'] * (1 - result['discount']) 52 | total_revenue = result['discounted_price'].sum() 53 | 54 | end_time = time.time() 55 | 56 | print(f"Join complete in {end_time - start_time:.2f}s") 57 | print(f"Total transactions joined: {len(result)}") 58 | print(f"Total products: {len(unique_products)}") 59 | print(f"Total revenue (after discounts): ${total_revenue:.2f}") 60 | print(f"Sales by category: {dict(category_sales)}") 61 | 62 | # Show top products 63 | top_products = product_sales.sort_values(('price', 'sum'), ascending=False).head() 64 | print(f"Top 5 products by revenue:") 65 | print(top_products[('price', 'sum')]) -------------------------------------------------------------------------------- /workloads/processing/assets/long.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() { 6 | double sum = 0; 7 | int n = 1250000000; 8 | while (n--) { 9 | double x = n * 0.0001; 10 | sum += sin(x) * cos(x) * sqrt(x + 1); 11 | } 12 | printf("Long: %f\n", sum); 13 | return 0; 14 | } -------------------------------------------------------------------------------- /workloads/processing/assets/short.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() { 5 | double sum = 0; 6 | int n = 210000000; 7 | while (n--) { 8 | sum += sin(n * 0.001) * cos(n * 0.001); 9 | } 10 | printf("Short: %f\n", sum); 11 | return 0; 12 | } -------------------------------------------------------------------------------- /workloads/processing/assets/spark_skew_prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Prepare data for spark_skew_test.py 4 | Creates CSV files with key-value pairs showing partition skew 5 | """ 6 | 7 | import argparse 8 | import random 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser(description='Prepare data for Spark shuffle test') 12 | parser.add_argument('--regular-keys', type=int, default=1000, 13 | help='Number of values per regular key (default: 1000)') 14 | parser.add_argument('--hot-keys', type=int, default=1000000, 15 | help='Number of values for hot key (default: 1000000)') 16 | parser.add_argument('--num-partitions', type=int, default=99, 17 | help='Number of regular partitions/keys (default: 99)') 18 | parser.add_argument('--output', type=str, default='spark_data.csv', 19 | help='Output CSV file') 20 | 21 | args = parser.parse_args() 22 | 23 | print(f"Generating data for Spark shuffle test...") 24 | print(f" Regular keys: {args.num_partitions} with ~{args.regular_keys} values each") 25 | print(f" Hot key: 1 with {args.hot_keys} values") 26 | 27 | total_rows = 0 28 | with open(args.output, 'w') as f: 29 | # Regular keys/partitions 30 | for key_id in range(args.num_partitions): 31 | # Add some variance (±10%) 32 | num_values = random.randint( 33 | int(args.regular_keys * 0.9), 34 | int(args.regular_keys * 1.1) 35 | ) 36 | for i in range(num_values): 37 | value = random.randint(1, 1000) 38 | f.write(f"{key_id},{value}\n") 39 | total_rows += num_values 40 | 41 | # Hot key (ID 999) - simulating data skew in shuffle 42 | for i in range(args.hot_keys): 43 | value = random.randint(1, 1000) 44 | f.write(f"999,{value}\n") 45 | total_rows += args.hot_keys 46 | 47 | skew_ratio = args.hot_keys / args.regular_keys if args.regular_keys > 0 else float('inf') 48 | print(f"Generated {total_rows} key-value pairs with {skew_ratio:.1f}x skew") 49 | print(f"Data saved to {args.output}") -------------------------------------------------------------------------------- /workloads/processing/assets/spark_skew_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Spark-like skewed workload simulation without Spark dependency 4 | Simulates the hot key problem in distributed processing 5 | """ 6 | 7 | import pandas as pd 8 | import time 9 | import argparse 10 | import math 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser(description='Spark-like shuffle test') 14 | parser.add_argument('data_file', type=str, help='Path to data file') 15 | 16 | args = parser.parse_args() 17 | 18 | print(f"Processing shuffle on {args.data_file}...") 19 | start_time = time.time() 20 | 21 | # Load data 22 | df = pd.read_csv(args.data_file, names=['key', 'value']) 23 | print(f"Loaded {len(df)} records") 24 | 25 | # Simulate shuffle operation with multiple transformations 26 | # 1. Map phase: apply transformations 27 | df['mapped_value'] = df['value'].apply(lambda x: x * 2 + 1) 28 | df['sqrt_value'] = df['value'].apply(lambda x: math.sqrt(abs(x))) 29 | df['log_value'] = df['value'].apply(lambda x: math.log(abs(x) + 1)) 30 | 31 | # 2. Shuffle phase: group by key and aggregate 32 | shuffled = df.groupby('key').agg({ 33 | 'mapped_value': ['sum', 'mean', 'count', 'std'], 34 | 'sqrt_value': ['sum', 'mean'], 35 | 'log_value': ['sum', 'mean'], 36 | 'value': ['min', 'max', 'median'] 37 | }) 38 | 39 | # 3. Reduce phase: final aggregations 40 | # Calculate percentiles 41 | percentiles = df.groupby('key')['value'].quantile([0.25, 0.5, 0.75, 0.95]) 42 | 43 | # Calculate unique values per key 44 | unique_counts = df.groupby('key')['value'].nunique() 45 | 46 | # Sort by total sum to find hot keys 47 | key_totals = shuffled[('mapped_value', 'sum')].sort_values(ascending=False) 48 | 49 | end_time = time.time() 50 | 51 | print(f"Shuffle complete in {end_time - start_time:.2f}s") 52 | print(f"Total keys: {len(shuffled)}") 53 | print(f"Total values processed: {df['mapped_value'].sum()}") 54 | 55 | # Show hot keys 56 | print(f"\nTop 5 keys by sum:") 57 | for key, total in key_totals.head().items(): 58 | count = shuffled.loc[key, ('mapped_value', 'count')] 59 | print(f" Key {key}: sum={total:.0f}, count={count}") 60 | 61 | # Show skew ratio 62 | if len(key_totals) > 1: 63 | max_count = shuffled[('mapped_value', 'count')].max() 64 | avg_count = shuffled[('mapped_value', 'count')].mean() 65 | print(f"\nSkew ratio: {max_count/avg_count:.1f}x (max/avg)") -------------------------------------------------------------------------------- /workloads/processing/schedulers/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.bpf.skel.h 3 | *.bpf.o 4 | *.bpf.l1o 5 | *.bpf.l2o 6 | *.bpf.l3o 7 | scx_simple 8 | loader 9 | example 10 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/compression/time: -------------------------------------------------------------------------------- 1 | real 2m17.468s 2 | user 0m45.762s 3 | sys 0m3.304s -------------------------------------------------------------------------------- /workloads/processing/schedulers/ctest_suite/time: -------------------------------------------------------------------------------- 1 | real 2m14.375s 2 | user 0m45.944s 3 | sys 0m3.846s -------------------------------------------------------------------------------- /workloads/processing/schedulers/ddos_log_analysis/time: -------------------------------------------------------------------------------- 1 | real 2m32.886s 2 | user 0m55.461s 3 | sys 0m4.010s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/fifo.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A simple FIFO scheduler. 4 | * 5 | * This scheduler implements simple FIFO (First-In-First-Out) scheduling 6 | * where tasks are scheduled in the order they arrive, without considering 7 | * task weights or priorities. 8 | * 9 | * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 10 | * Copyright (c) 2022 Tejun Heo 11 | * Copyright (c) 2022 David Vernet 12 | */ 13 | #include 14 | 15 | char _license[] SEC("license") = "GPL"; 16 | 17 | UEI_DEFINE(uei); 18 | 19 | #define SHARED_DSQ 0 20 | 21 | s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 22 | { 23 | bool is_idle = false; 24 | s32 cpu; 25 | 26 | cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); 27 | if (is_idle) { 28 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 29 | } 30 | 31 | return cpu; 32 | } 33 | 34 | void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) 35 | { 36 | scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); 37 | } 38 | 39 | void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) 40 | { 41 | scx_bpf_dsq_move_to_local(SHARED_DSQ); 42 | } 43 | 44 | s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init) 45 | { 46 | return scx_bpf_create_dsq(SHARED_DSQ, -1); 47 | } 48 | 49 | void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei) 50 | { 51 | UEI_RECORD(uei, ei); 52 | } 53 | 54 | SCX_OPS_DEFINE(simple_ops, 55 | .select_cpu = (void *)simple_select_cpu, 56 | .enqueue = (void *)simple_enqueue, 57 | .dispatch = (void *)simple_dispatch, 58 | .init = (void *)simple_init, 59 | .exit = (void *)simple_exit, 60 | .name = "simple"); -------------------------------------------------------------------------------- /workloads/processing/schedulers/file_checksum/time: -------------------------------------------------------------------------------- 1 | real 2m16.111s 2 | user 0m50.266s 3 | sys 0m3.197s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/git_add_different/time: -------------------------------------------------------------------------------- 1 | real 2m27.562s 2 | user 0m54.134s 3 | sys 0m3.528s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/hotkey_aggregation/time: -------------------------------------------------------------------------------- 1 | real 2m28.875s 2 | user 0m57.257s 3 | sys 0m3.669s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/video_transcode/time: -------------------------------------------------------------------------------- 1 | real 2m29.681s 2 | user 0m39.921s 3 | sys 0m2.143s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/viral_product_analytics/time: -------------------------------------------------------------------------------- 1 | real 2m26.409s 2 | user 0m33.631s 3 | sys 0m1.892s 4 | -------------------------------------------------------------------------------- /workloads/processing/schedulers/vruntime.bpf.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 */ 2 | /* 3 | * A simple weighted vruntime scheduler. 4 | * 5 | * This scheduler implements weighted vruntime scheduling where tasks are 6 | * scheduled based on their virtual runtime, providing fairness across 7 | * different task weights. 8 | * 9 | * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 10 | * Copyright (c) 2022 Tejun Heo 11 | * Copyright (c) 2022 David Vernet 12 | */ 13 | #include 14 | 15 | char _license[] SEC("license") = "GPL"; 16 | 17 | static u64 vtime_now; 18 | UEI_DEFINE(uei); 19 | 20 | #define SHARED_DSQ 0 21 | 22 | s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) 23 | { 24 | bool is_idle = false; 25 | s32 cpu; 26 | 27 | cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); 28 | if (is_idle) { 29 | scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); 30 | } 31 | 32 | return cpu; 33 | } 34 | 35 | void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) 36 | { 37 | u64 vtime = p->scx.dsq_vtime; 38 | 39 | if (time_before(vtime, vtime_now - SCX_SLICE_DFL)) 40 | vtime = vtime_now - SCX_SLICE_DFL; 41 | 42 | scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, enq_flags); 43 | } 44 | 45 | void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) 46 | { 47 | scx_bpf_dsq_move_to_local(SHARED_DSQ); 48 | } 49 | 50 | void BPF_STRUCT_OPS(simple_running, struct task_struct *p) 51 | { 52 | if (time_before(vtime_now, p->scx.dsq_vtime)) 53 | vtime_now = p->scx.dsq_vtime; 54 | } 55 | 56 | void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable) 57 | { 58 | p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight; 59 | } 60 | 61 | void BPF_STRUCT_OPS(simple_enable, struct task_struct *p) 62 | { 63 | p->scx.dsq_vtime = vtime_now; 64 | } 65 | 66 | s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init) 67 | { 68 | return scx_bpf_create_dsq(SHARED_DSQ, -1); 69 | } 70 | 71 | void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei) 72 | { 73 | UEI_RECORD(uei, ei); 74 | } 75 | 76 | SCX_OPS_DEFINE(simple_ops, 77 | .select_cpu = (void *)simple_select_cpu, 78 | .enqueue = (void *)simple_enqueue, 79 | .dispatch = (void *)simple_dispatch, 80 | .running = (void *)simple_running, 81 | .stopping = (void *)simple_stopping, 82 | .enable = (void *)simple_enable, 83 | .init = (void *)simple_init, 84 | .exit = (void *)simple_exit, 85 | .name = "simple"); -------------------------------------------------------------------------------- /workloads/processing/scripts/desc_result/time: -------------------------------------------------------------------------------- 1 | # time for all files 2 | 3 | 395s 4 | 5 | -------------------------------------------------------------------------------- /workloads/processing/scripts/install_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install dependencies for long-tail workload tests 3 | 4 | set -e 5 | 6 | echo "Installing dependencies for long-tail workload tests..." 7 | 8 | # Update package lists 9 | echo "Updating package lists..." 10 | sudo apt-get update -qq 11 | 12 | # Core utilities (usually already installed) 13 | echo "Installing core utilities..." 14 | sudo apt-get install -y \ 15 | coreutils \ 16 | git \ 17 | parallel \ 18 | gzip 19 | 20 | # Media processing 21 | echo "Installing media processing tools..." 22 | sudo apt-get install -y \ 23 | ffmpeg \ 24 | pigz 25 | 26 | # Compression tools 27 | echo "Installing compression tools..." 28 | sudo apt-get install -y \ 29 | zstd 30 | 31 | # Python and testing tools 32 | echo "Installing Python and testing tools..." 33 | sudo apt-get install -y \ 34 | python3 \ 35 | python3-pip \ 36 | python3-pytest \ 37 | python3-pytest-xdist \ 38 | python3-pandas \ 39 | python3-numpy 40 | 41 | # Development tools 42 | echo "Installing development tools..." 43 | sudo apt-get install -y \ 44 | g++ \ 45 | make \ 46 | libavcodec-dev \ 47 | libavformat-dev \ 48 | libavutil-dev \ 49 | libswscale-dev \ 50 | libgit2-dev 51 | 52 | # Optional: DuckDB (if available in repos) 53 | echo "Attempting to install DuckDB..." 54 | if apt-cache search duckdb | grep -q duckdb; then 55 | sudo apt-get install -y duckdb 56 | else 57 | echo "DuckDB not available in repositories, skipping..." 58 | fi 59 | 60 | # Optional: Install missing Python packages via pip 61 | echo "Installing additional Python packages..." 62 | python3 -m pip install --user --upgrade \ 63 | psutil \ 64 | multiprocess || echo "Some pip packages install failed, continuing..." 65 | 66 | echo "" 67 | echo "Dependency installation complete!" 68 | echo "" 69 | echo "Installed packages:" 70 | echo "✓ Core utilities: coreutils, git, parallel, gzip" 71 | echo "✓ Media processing: ffmpeg, pigz" 72 | echo "✓ Compression: zstd" 73 | echo "✓ Python: python3, pytest, pandas, numpy" 74 | echo "✓ Development: g++, make, FFmpeg libraries (libavcodec-dev, libavformat-dev, libavutil-dev, libswscale-dev), libgit2-dev" 75 | echo "" 76 | echo "Optional packages attempted:" 77 | echo "- DuckDB (may not be available)" 78 | echo "" 79 | echo "You can now run the test framework with:" 80 | echo " python3 evaluate_workloads.py" -------------------------------------------------------------------------------- /workloads/processing/scripts/scheduler_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/processing/scripts/scheduler_comparison.png -------------------------------------------------------------------------------- /workloads/processing/scripts/scheduler_performance_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/processing/scripts/scheduler_performance_comparison.pdf -------------------------------------------------------------------------------- /workloads/pyvsag/Makefile: -------------------------------------------------------------------------------- 1 | # PyVSAG ANN Benchmark Makefile 2 | 3 | .PHONY: all install test benchmark clean help 4 | 5 | # Default target 6 | all: install 7 | 8 | # Install dependencies 9 | install: 10 | @echo "Installing PyVSAG benchmark dependencies..." 11 | pip install -r requirements.txt 12 | 13 | # Run basic benchmark 14 | benchmark: 15 | @echo "Running PyVSAG scheduler benchmark..." 16 | python pyvsag_bench_start.py 17 | 18 | # Run benchmark with production schedulers only 19 | benchmark-prod: 20 | @echo "Running PyVSAG benchmark (production schedulers only)..." 21 | python pyvsag_bench_start.py --production-only 22 | 23 | # Test specific scheduler 24 | test-scheduler: 25 | @echo "Testing specific scheduler (use SCHEDULER=name)..." 26 | python pyvsag_bench_start.py --scheduler $(SCHEDULER) 27 | 28 | # Run small-scale test 29 | test: 30 | @echo "Running quick test with reduced parameters..." 31 | python pyvsag_bench_start.py \ 32 | --num-elements 5000 \ 33 | --num-queries 100 \ 34 | --timeout 60 35 | 36 | # Check PyVSAG installation 37 | check: 38 | @echo "Checking PyVSAG installation..." 39 | @python -c "import pyvsag; print(f'PyVSAG version: {pyvsag.__version__}')" || \ 40 | echo "PyVSAG not installed. Run 'make install' first." 41 | 42 | # Clean results 43 | clean: 44 | @echo "Cleaning benchmark results..." 45 | rm -rf results/ 46 | find . -name "*.pyc" -delete 47 | find . -name "__pycache__" -delete 48 | 49 | # Show help 50 | help: 51 | @echo "PyVSAG ANN Benchmark Makefile" 52 | @echo "" 53 | @echo "Available targets:" 54 | @echo " install - Install dependencies" 55 | @echo " benchmark - Run full benchmark" 56 | @echo " benchmark-prod - Run benchmark with production schedulers only" 57 | @echo " test-scheduler - Test specific scheduler (set SCHEDULER=name)" 58 | @echo " test - Run quick test" 59 | @echo " check - Check PyVSAG installation" 60 | @echo " clean - Clean results and cache files" 61 | @echo " help - Show this help" 62 | @echo "" 63 | @echo "Examples:" 64 | @echo " make install" 65 | @echo " make benchmark" 66 | @echo " make test-scheduler SCHEDULER=scx_rusty" 67 | @echo " make test" -------------------------------------------------------------------------------- /workloads/pyvsag/requirements.txt: -------------------------------------------------------------------------------- 1 | pyvsag>=0.15.0 2 | numpy>=1.20.0 3 | pandas>=1.3.0 4 | matplotlib>=3.5.0 5 | seaborn>=0.11.0 -------------------------------------------------------------------------------- /workloads/pyvsag/results/pyvsag_scheduler_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/pyvsag/results/pyvsag_scheduler_comparison.pdf -------------------------------------------------------------------------------- /workloads/pyvsag/results_summary.txt: -------------------------------------------------------------------------------- 1 | PyVSAG Vector Database Benchmark Methodology 2 | 3 | Experimental Setup: 4 | - Vector Database: PyVSAG (Python bindings for VSAG) 5 | - Index Type: HNSW (Hierarchical Navigable Small World) 6 | - Vector Dimension: 128 7 | - Dataset Size: 50,000 vectors 8 | - Query Set: 1,000 random queries 9 | - k-NN Search: k=10 nearest neighbors 10 | 11 | Index Configuration: 12 | - HNSW max_degree: 16 (max connections per node) 13 | - HNSW ef_construction: 200 (expansion factor during construction) 14 | - HNSW ef_search: 100 (expansion factor during search) 15 | - Distance Metric: L2 (Euclidean distance) 16 | 17 | Measurement Process: 18 | 1. Data Generation: Random float32 vectors with fixed seed for reproducibility 19 | 2. Index Building: Timed construction of HNSW index structure 20 | 3. Warmup Phase: 10 initial queries to stabilize performance 21 | 4. Search Benchmark: 22 | - Individual timing for each of 1,000 k-NN queries 23 | - Records per-query latency for percentile analysis 24 | 5. Metrics Collection: 25 | - Throughput: Queries Per Second (QPS) 26 | - Latency: Average, P95, P99 query times (milliseconds) 27 | - Recall: Accuracy of nearest neighbor retrieval 28 | 29 | Performance Results: 30 | Throughput (QPS): 31 | default: 3607.09 32 | DuplexOS: 3934.57 33 | Improvement: 9.1% 34 | 35 | Average Query Latency (ms): 36 | default: 0.276432 37 | DuplexOS: 0.253372 38 | Improvement: 8.3% (lower is better) 39 | -------------------------------------------------------------------------------- /workloads/redis/.gitignore: -------------------------------------------------------------------------------- 1 | redis.log 2 | redis_config_*.conf 3 | dump.rdb 4 | temp*.rdb 5 | redis_test.log 6 | memtier_test.log 7 | -------------------------------------------------------------------------------- /workloads/redis/requirements.txt: -------------------------------------------------------------------------------- 1 | psutil>=5.8.0 -------------------------------------------------------------------------------- /workloads/redis/results/memtier_detailed_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/memtier_detailed_comparison.png -------------------------------------------------------------------------------- /workloads/redis/results/memtier_latency_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/memtier_latency_comparison.png -------------------------------------------------------------------------------- /workloads/redis/results/memtier_scheduler_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/memtier_scheduler_comparison.png -------------------------------------------------------------------------------- /workloads/redis/results/memtier_throughput_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/memtier_throughput_comparison.png -------------------------------------------------------------------------------- /workloads/redis/results/redis_combined_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/redis_combined_performance.png -------------------------------------------------------------------------------- /workloads/redis/results/redis_comparison.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/redis_comparison.pdf -------------------------------------------------------------------------------- /workloads/redis/results/redis_data_size_sweep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/redis_data_size_sweep.png -------------------------------------------------------------------------------- /workloads/redis/results/redis_latency_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/redis_latency_comparison.png -------------------------------------------------------------------------------- /workloads/redis/results/redis_throughput_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/redis/results/redis_throughput_comparison.png -------------------------------------------------------------------------------- /workloads/rocksdb/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clone build configure benchmark clean install deps 2 | 3 | # RocksDB configuration 4 | ROCKSDB_REPO = https://github.com/facebook/rocksdb.git 5 | ROCKSDB_DIR = rocksdb 6 | ROCKSDB_BRANCH = main 7 | BUILD_THREADS = $(shell nproc) 8 | 9 | all: deps clone build configure 10 | 11 | deps: 12 | @echo "Installing RocksDB dependencies..." 13 | sudo apt-get update 14 | sudo apt-get install -y build-essential libgflags-dev libsnappy-dev \ 15 | zlib1g-dev libbz2-dev liblz4-dev libzstd-dev cmake \ 16 | libgtest-dev libtbb-dev 17 | 18 | clone: 19 | @echo "Cloning RocksDB repository..." 20 | if [ ! -d $(ROCKSDB_DIR) ]; then \ 21 | git clone --depth=1 --branch=$(ROCKSDB_BRANCH) $(ROCKSDB_REPO) $(ROCKSDB_DIR); \ 22 | fi 23 | 24 | build: clone 25 | @echo "Building RocksDB..." 26 | cd $(ROCKSDB_DIR) && make -j$(BUILD_THREADS) static_lib 27 | cd $(ROCKSDB_DIR) && make -j$(BUILD_THREADS) db_bench 28 | 29 | configure: 30 | @echo "Configuring RocksDB..." 31 | mkdir -p /tmp/rocksdb_data 32 | 33 | benchmark: 34 | @echo "Running RocksDB benchmark..." 35 | python3 rocksdb_benchmark.py 36 | 37 | quick-benchmark: 38 | @echo "Running quick RocksDB benchmark..." 39 | cd $(ROCKSDB_DIR) && ./db_bench \ 40 | --benchmarks=fillseq,readrandom \ 41 | --num=100000 \ 42 | --db=/tmp/rocksdb_data \ 43 | --disable_wal=true \ 44 | --statistics=false 45 | 46 | clean: 47 | @echo "Cleaning up..." 48 | cd $(ROCKSDB_DIR) && make clean || true 49 | rm -rf /tmp/rocksdb_data 50 | rm -rf $(ROCKSDB_DIR) 51 | 52 | install: build 53 | @echo "Installing RocksDB..." 54 | cd $(ROCKSDB_DIR) && sudo make install 55 | 56 | test: build 57 | @echo "Running RocksDB tests..." 58 | cd $(ROCKSDB_DIR) && make -j$(BUILD_THREADS) check -------------------------------------------------------------------------------- /workloads/rocksdb/requirements.txt: -------------------------------------------------------------------------------- 1 | psutil>=5.8.0 -------------------------------------------------------------------------------- /workloads/vllm/.gitignore: -------------------------------------------------------------------------------- 1 | # Datasets 2 | datasets/ 3 | *.json 4 | *.csv 5 | *.txt 6 | !benchmarks/sonnet.txt 7 | !requirements/*.txt 8 | 9 | # Build artifacts 10 | build/ 11 | dist/ 12 | *.egg-info/ 13 | *.so 14 | __pycache__/ 15 | *.pyc 16 | *.pyo 17 | 18 | # Virtual environments 19 | venv/ 20 | env/ 21 | .env 22 | vllm_env/ 23 | 24 | # IDE 25 | .vscode/ 26 | .idea/ 27 | *.swp 28 | *.swo 29 | 30 | # Logs 31 | *.log 32 | logs/ 33 | results/ 34 | 35 | # Cache 36 | .cache/ 37 | *.cache 38 | 39 | # Temporary files 40 | *.tmp 41 | temp/ 42 | tmp/ 43 | 44 | # Model weights 45 | models/ 46 | checkpoints/ 47 | *.bin 48 | *.pt 49 | *.pth 50 | *.safetensors 51 | 52 | # Benchmark outputs 53 | benchmark_results/ 54 | *.out 55 | *.err -------------------------------------------------------------------------------- /workloads/vllm/Makefile: -------------------------------------------------------------------------------- 1 | # Simplified Makefile for vLLM benchmarks 2 | 3 | VENV = ~/workspace/.venv/bin/activate 4 | 5 | .PHONY: bench download-datasets 6 | 7 | # Run vLLM benchmark using CLI 8 | bench: 9 | @. $(VENV) && vllm bench serve \ 10 | --model Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8 \ 11 | --dataset-name sharegpt \ 12 | --num-prompts 1000 \ 13 | --dataset-path /home/yunwei37/workspace/schedcp/workloads/vllm/datasets/ShareGPT_V3_unfiltered_cleaned_split.json 14 | 15 | # Download benchmark datasets 16 | download-datasets: 17 | @mkdir -p datasets 18 | @if [ ! -f "datasets/ShareGPT_V3_unfiltered_cleaned_split.json" ]; then \ 19 | echo "Downloading ShareGPT dataset..."; \ 20 | wget -P datasets https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json; \ 21 | fi 22 | @echo "Dataset ready in ./datasets/" 23 | -------------------------------------------------------------------------------- /workloads/vllm/llama.md: -------------------------------------------------------------------------------- 1 | # llama 2 | 3 | ## gpt-oss-120b 4 | 5 | nsys profile --trace=cuda,nvtx,osrt ~/workspace/llama.cpp/build/bin/llama-server -hf unsloth/gpt-oss-120b-GGUF:Q4_K_M -ncmoe 64 6 | 7 | nsys profile --trace=cuda,cuda-hw --cuda-event-trace=true --cuda-graph=graph ~/workspace/llama.cpp/build/bin/llama-cli -hf unsloth/gpt-oss-120b-GGUF:Q4_K_M -ncmoe 64 8 | 9 | nsys profile --trace=cuda,cuda-hw --cuda-event-trace=true --cuda-graph=node ~/workspace/llama.cpp/build/bin/llama-cli -m /home/yunwei37/.cache/llama.cpp/mradermacher_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER-i1-GGUF_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER.i1-Q4_K_M.gguf 10 | 11 | i1-Q6_K.gguf 12 | 13 | nsys profile --trace=cuda,cuda-hw --cuda-event-trace=true --cuda-graph=node ~/workspace/llama.cpp/build/bin/llama-cli -m /home/yunwei37/.cache/llama.cpp/mradermacher_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER-i1-GGUF_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER.i1-Q6_K.gguf 14 | 15 | 16 | nsys profile --trace=cuda,nvtx,osrt --cuda-graph=node llama.cpp/build/bin/llama-server -hf unsloth/gpt-oss-120b-GGUF:Q4_K_M -ncmoe 64 17 | 18 | nsys profile --trace=cuda,nvtx,osrt llama.cpp/build/bin/llama-server -m /home/yunwei37/.cache/llama.cpp/mradermacher_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER-i1-GGUF_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER.i1-Q4_K_M.gguf -c 40000 19 | 20 | nsys profile --trace=cuda,nvtx,osrt --cuda-graph=node llama.cpp/build/bin/llama-server -m /home/yunwei37/.cache/llama.cpp/mradermacher_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER-i1-GGUF_Qwen3-42B-A3B-2507-Thinking-Abliterated-uncensored-TOTAL-RECALL-v2-Medium-MASTER-CODER.i1-Q4_K_M.gguf -c 40000 21 | 22 | 23 | 24 | ~/workspace/llama.cpp/build/bin/llama-server --gpt-oss-120b-default -ncmoe 32 -------------------------------------------------------------------------------- /workloads/vllm/vllm_latency_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eunomia-bpf/schedcp/45d6ed8fdc4066282cc65e40e5772d2f5a8eaf45/workloads/vllm/vllm_latency_comparison.png --------------------------------------------------------------------------------