├── .github └── workflows │ └── main.yml ├── .gitignore ├── ChangeLog.md ├── Dockerfile ├── LICENSE.BSD ├── LICENSE.GPLv2 ├── NOTICE ├── README.md ├── README_zh.md ├── boundary ├── __init__.py ├── analyze.py ├── collect.py └── extract.py ├── cli.py ├── configs ├── 3.10 │ ├── boundary.yaml │ ├── dynamic_springboard.patch │ ├── post_extract.patch │ └── pre_extract.patch ├── 4.19 │ ├── boundary.yaml │ ├── dynamic_springboard.patch │ └── post_extract.patch └── 5.10 │ ├── boundary.yaml │ ├── dynamic_springboard.patch │ ├── dynamic_springboard_2.patch │ └── post_extract.patch ├── docs ├── Advanced-Features.md ├── Support-various-Linux-distros.md └── Working-without-rpm-or-srpm.md ├── examples └── rpm_test_example.diff ├── module-contrib ├── hotfix_conflict_check ├── plugsched.service ├── scheduler-installer ├── scheduler.spec └── version ├── src ├── .gitignore ├── Makefile ├── Makefile.plugsched ├── export_jump.h ├── head_jump.h ├── helper.h ├── main.c ├── mempool.h ├── sched_rebuild.c ├── scheduler.lds ├── stack_check.h └── tainted_functions.h ├── tests ├── build_case ├── bundles │ ├── ci │ ├── reboot │ └── release ├── libs │ ├── catch_error │ └── working_dir ├── prep_env ├── run_test ├── test_bare_performance │ ├── assert │ └── patch.diff ├── test_cpu_throttle │ ├── assert │ └── patch.diff ├── test_domain_rebuild │ ├── assert │ └── patch.diff ├── test_mem_pressure │ ├── assert │ └── patch.diff ├── test_public_var │ ├── assert │ └── patch.diff ├── test_reboot │ ├── assert │ ├── patch.diff │ └── runonce ├── test_sched_syscall │ ├── assert │ └── patch.diff ├── test_schedule │ ├── assert │ └── patch.diff └── test_var_uniformity │ ├── assert │ └── patch.diff └── tools ├── compile_and_create_rpm.sh ├── springboard_search.sh ├── symbol_resolve ├── Makefile ├── symbol_resolve.cpp └── undefined_functions.h └── yaml-diff.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: TagWhenVersionBump 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'module-contrib/version' 7 | 8 | jobs: 9 | tag: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Read version 14 | run: | 15 | export VERSION=$(cat ./module-contrib/version | awk '{print $NF}') 16 | echo "VERSION=v$VERSION" >> $GITHUB_ENV 17 | - name: Create tag 18 | uses: rickstaa/action-create-tag@v1 19 | with: 20 | tag: ${{ env.VERSION }} 21 | message: "" 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.rej 2 | /rpmbuild 3 | tools/symbol_resolve/symbol_resolve 4 | *.o 5 | *.pyc 6 | *.swp 7 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | Release 1.3.0 2 | --- 3 | 4 | ## Features 5 | 6 | - Move sidecar files to independent dir under mod/. 7 | - Add conflict check with plugbpf. 8 | - Improve Dockerfile and dependent libraries. 9 | - Standardized spec of scheduler package. 10 | 11 | ## Bugfix 12 | 13 | - Fix a bug when extern var inside a function. 14 | - Prevent compiler optimization of static functions in sidecar. 15 | - Fix a bug when building with multiple sidecar files. 16 | - Fix a bug when handling multiple lines of EXPORT_SYMBOL in macro. 17 | - Use the right rule to reference symbols in module. 18 | - Fixed a bug with __mod_ symbol reference count. 19 | - Set the default value for the new configs. 20 | - Don't collect syscalls from sidecars. 21 | - Don't keep inline function's code if it's not optimized. 22 | - Fix the bug that tasks enqueue between clear_sched_state() and rebuild_sched_state(). 23 | 24 | ## Docs 25 | 26 | - Add SPDX to all source files. 27 | - Update README about the location information of the RPM package storage and the 28 | system versions that plugsched supports by default. 29 | 30 | ## Tests 31 | 32 | - Rebuild the container image when test. 33 | - Adapt to use colorlog (instead of coloredlogs) with main programs. 34 | 35 | Release 1.2.0 36 | --- 37 | 38 | ## Features 39 | 40 | - Migrate from Python2 to Python3. 41 | - Add tool compile_and_create_rpm.sh to make creating rpm from patch easier. 42 | - Bump AnolisOS base docker image from 8.4 to 8.6. 43 | - Speed up plugsched-cli init by optimizing the data collection. 44 | - Allow more code to be customized by modularizing header files too. 45 | - Generalize "boundary/" to be reused by the scheduler, eBPF and etc. 46 | - Moving sidecar to init stage, which takes part in modularization. And improve its user experience. 47 | - Support the kernel 5.10. 48 | - Support the kernel 3.10. (x86_64 only) 49 | - Reduce unneccessary callbacks by migrate sched_class for every task during update and rollback. 50 | - Add yeild and sched_exec to interface function set. 51 | - Print logging to stdio for better user experience when building the scheduler. 52 | - Improve stability of extraction, avoiding nasty compiling errors. (tested with kernel/{bpf,sched} and 5.10, 4.19) 53 | - Improve readability of extracted code by deleting __init function in unused macro branches. 54 | - Support explicitly refer to the module's and vmlinux's symbol with __mod_ and __orig_ preifx respectively. 55 | - Add conflict detection of ftrace or others hooked functions' header. 56 | 57 | 58 | # Bugfix 59 | 60 | - Fix a compiling error of stack_trace_save_tsk function passing a wrong type of argument. 61 | - Fix the mempool error that the functions or variables in headers should be static to avoid conflicts with kernel code. 62 | - Fix the unknown symbol error when installing module. Add the __used attribute to function pointers. 63 | - Fix the bug of extraction of va_list and "..." parameter in export_jump.h. 64 | - Fix the bug that some internal functions may be exported functions. Remove them outside. 65 | - Fix removing cgroup file twice in syscall test case, that can cause test to fail. 66 | - Fix panic and compiling bugs of stack-protector for aarch64. 67 | - Fix test/ bug catch_error print error when exitcode=0 and ENOPERM bug when chrt in test_sched_syscall. 68 | - Fix tainted_functions that includes __init functions. 69 | - Fix the backslashes cannot be deleted when deleting code in extraction stage. 70 | - Fix some bugs of the unnamed unions, enums and structures, that can cause the building errors. 71 | - Fix the bug where undefined function sets contained optimized functions, which can cause installation failure. 72 | - Fix some bugs of plugsched service. Keep service active and remove daemon-reload after starting service. 73 | - Fix the bug that some variables only used in __init function will be removed, because scheduler may be use them. 74 | - Fix some static interface functions been optimized that will be removed. 75 | - Fix both strong and weak symbol existing, and treat overriden weak symbols as outsiders. 76 | - Fix two race conditions bewteen redirecting and state rebuilding, that may cause panic. 77 | - Fix the bug that num_online_cpus maybe changed between parallel_state_check_init and stop_machine, which may cause system hung. 78 | - Fix a panic bug that stack check exits too early when insmod/rmmod. 79 | - Fix the stack checker not checking the __sched functions, which may cause panic. 80 | - Fix the bug about container that after installing some packages, containers refuses "podman exec -it". 81 | - Forbid redirecting mangled functions. 82 | - Fix the rebuilding of dying task that cannot found in init_tasklist, which can cause panic. 83 | 84 | # Docs 85 | 86 | - Fix some typoes. 87 | - Update documentation for new sidecar implementation. 88 | 89 | # Tests 90 | 91 | - Remove test case for sched_rt_runtime_us. 92 | - Add test case for stack pivot. 93 | - Add test case for memory pressure. 94 | - Add test case for reboot. 95 | - Add test case for bare package run-time performance. 96 | 97 | # Others 98 | 99 | - Using inner function __sched_setscheduler() to replace the interface function sched_setscheduler(). 100 | 101 | Release 1.1.1 102 | --- 103 | 104 | # BugFixes 105 | 106 | - Fix wrong list of optimized functions caused by the bug `makefile` and `module.symvers` gets overwritten when `cli init`. 107 | 108 | Release 1.1.0 109 | --- 110 | 111 | ## Features 112 | 113 | - Add a test framework. And integrate 5 automated test cases into CI. 114 | - Add fully support aarch64. 115 | - Add fully support for AnolisOS 8. 116 | - Add the per-cpu mempool infrastructure. It could be used like Linux kernel's per-cpu variable, as an extension to the existing mempool. 117 | - Support installing multiple versions of scheduler in multiple kernel versions system. 118 | 119 | ## BugFixes 120 | 121 | - Fix installing failure bug in the Quick Start, that it used to break once cloud OS images upgrade. 122 | - Fix installing failure bug by ignoring confliction among hotfixes themselve, instead of with Plugsched. 123 | - Fix installing failure bug that after rebooting, scheduler won't be loaded. 124 | - Fix memory leak when using mempool. 125 | - Fix user-unfriendly bug that experimental BAD scheduler.rpm (maybe poorly programmed by user) couldn't be erased. 126 | - Fix potential panic bug caused by bottom-half of \_\_schedule by adding callees of the bottom-half as interfaces. 127 | - Fix potential panic bug caused by GCC mangling (.isra, .constprop, .cold, etc.). 128 | - Fix potential panic bug caused by kernel modules (mainly KVM), because modules didn't participate in modularization. 129 | - Fix confliction checking with hotfixes. Now we check it each time scheduler is loaded, rather than only when installing rpm. 130 | - Fix some warnings with no harm. 131 | 132 | ## Docs 133 | 134 | - Clarify how Plugsched deals with data-upgrade (Rebuild or inherit or reinitialize), now this technique is illustrated clearly. 135 | - Clarify that users shouldn't modify the size and semantic of data structure or its fields. 136 | - Enrich the Quick Start, let users get hands on with sched-feature too. 137 | - Update the architecture description figure. 138 | 139 | # Other improvements 140 | 141 | - Simplify sched\_boundary.yaml by removing useless keys (sched\_outsider & force\_outsider). Now users won't be confused about them. 142 | - Improve user experience when debugging by outputting `make` result to the screen. Now users locate compiling errors more easily. 143 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | From openanolis/anolisos:latest 4 | 5 | RUN yum install epel-aliyuncs-release -y && \ 6 | yum install python3 python3-pip gcc gcc-c++ libyaml-devel -y && \ 7 | yum install python3-sh python3-docopt python3-pyyaml python3-colorlog -y 8 | RUN yum install make bison flex \ 9 | gcc-plugin-devel \ 10 | systemd git \ 11 | elfutils-libelf-devel openssl openssl-devel \ 12 | elfutils-devel-static \ 13 | glibc-static zlib-static \ 14 | libstdc++-static \ 15 | platform-python-devel \ 16 | rpm-build rsync bc perl -y && \ 17 | yum install gcc-python-plugin --enablerepo=Plus -y && \ 18 | yum clean all 19 | 20 | COPY . /usr/local/lib/plugsched/ 21 | RUN ln -s /usr/local/lib/plugsched/cli.py /usr/local/bin/plugsched-cli 22 | -------------------------------------------------------------------------------- /LICENSE.BSD: -------------------------------------------------------------------------------- 1 | Copyright (c) Individual contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of PyCA Cryptography nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | ======================================================== 2 | plugsched is a linux kernel hotplugable scheduler SDK 3 | Copyright (c) 2019-2022, Alibaba Group Holding Limited. 4 | Licensed under the GPLv2+ License or BSD-3-Clause License. 5 | ======================================================== 6 | This product reads and patches kernel/ folder linux kernel. 7 | All files in kernel/ are licensed under GPLv2 License. 8 | ======================================================== 9 | This product contains various third-party components under other open source licenses. 10 | This section summarizes those components and their licenses. 11 | ---------------------------------- 12 | elfutils/libelf 13 | Licensed under GPLv2+/LGPLv3+ License 14 | ---------------------------------- 15 | sh 16 | Copyright (C) 2011-2012 by Andrew Moffat 17 | Licensed under the MIT License 18 | ---------------------------------- 19 | docopt 20 | Copyright (c) 2012 Vladimir Keleshev, 21 | Licensed under the MIT License 22 | ---------------------------------- 23 | gcc-python-plugin 24 | Copyright 2011-2019 David Malcolm 25 | Copyright 2011-2019 Red Hat, Inc. 26 | Licensed under the GPLv3+ License 27 | ---------------------------------- 28 | PyYAML 29 | Copyright (c) 2017-2021 Ingy döt Net 30 | Copyright (c) 2006-2016 Kirill Simonov 31 | Licensed under the MIT License 32 | ---------------------------------- 33 | coloredlogs 34 | Copyright (c) 2020 Peter Odding 35 | Licensed under the MIT License 36 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 | ## Plugsched: Linux 内核调度器子系统热升级 2 | plugsched 是 Linux 内核调度器子系统热升级的 SDK,它可以实现在不重启系统、应用的情况下动态替换调度器子系统,毫秒级 downtime 。plugsched 可以对生产环境中的内核调度特性动态的进行增、删、改,以满足不同场景或应用的需求,且支持回滚。 3 | 4 | ## Motivation 5 | * **应用场景不同,最佳调度策略不同:** 在云场景下,调度策略的优化比较复杂,不存在“一劳永逸”的策略。因此,允许用户定制调度器用于不同的场景是必要的。 6 | * **调度器迭代慢:** Linux 内核经过很多年长时间的更新迭代,它的代码变得越来越繁重,而调度器是内核最核心的子系统之一,它的结构复杂,与其它子系统紧密耦合,这使得开发和调试变得越发困难。Linux 很少增加新的调度类,尤其是不太可能接受非通用或场景针对型的调度器。plugsched 可以让调度器与内核解耦 ,开发人员可以只关注调度器的迭代开发。 7 | * **内核升级困难:** 调度器内嵌在内核中,因此应用调度器的修改需要更新内核。内核发布周期通常是数月之久,这将导致新的调度器无法及时应用在系统中。再者,要在集群中升级新内核,涉及迁移业务和停机升级,对业务方来说代价昂贵。 8 | * **无法升级子系统:** kpatch 和 livepatch 是函数粒度的热升级方案,可表达能力较弱,不能实现复杂的代码改动;对于 eBPF,当前调度器还不支持 ebpf hook,将来即使支持,也只是局部策略的修改。 9 | 10 | ## How it works 11 | 调度器子系统在内核中并非是一个独立的模块,而是内嵌在内核中,与内核其它部分紧密相连。plugsched 采用“模块化”的思想:它提供了边界划分程序,确定调度器子系统的边界,把调度器从内核代码中提取到独立的目录中,开发人员可对提取出的调度器代码进行修改,然后编译成新的调度器内核模块,动态替换内核中旧的调度器。 12 | 13 | 对于函数而言,它对外呈现了一些接口函数。通过替换内核中的这些函数,内核就可以绕过原有的执行逻辑进入新的调度器模块中执行,即可完成函数的升级。在模块中的函数要么是接口函数,要么是内部函数,其它函数都是外部函数。 14 | 15 | 对于数据而言,plugsched 将数据分为私有数据和共享数据两类。私有数据在模块内部独立分配内存空间,而共享数据在模块和内核之间共享内存。对于全局变量,既可以通过重新定义的方式将它转化为私有数据,也可以通过声明的方式转化为共享数据。默认情况下,static 全局变量被标记为私有数据,non-static 全局变量被标记为共享数据;但为了让系统更好的工作,我们在边界配置文件中手动调整了部分全局变量的分类。 16 | 17 | 在热升级过程中,数据状态同步是一个核心问题。根据数据的状态是否需要重建,将数据分为核心数据和非核心数据。核心数据包括:rq, cfs_rq, rt_rq, dl_rq, cfs_bandwidth, sched_class, sysfs, debugfs, sched_features, timer;其余为非核心数据,包括: sched_domain_topology, task_group_cache, 以及调度器相关的 sysctls, tracepoint 和 cpumask 等数据。Plugsched 采用 sched rebuild 技术重建调度器的核心数据状态;对于非核心的数据,私有数据不需要同步状态,共享数据自动继承状态,都不需要额外处理。通用的状态重建方案,巧妙的解决了状态同步问题。 18 | 19 | | | 核心数据 | 非核心数据 | 20 | |--------|:----------:|:------------:| 21 | | 私有 | 重建 | 重新初始化 | 22 | | 共享 | 重建 | 继承 | 23 | 24 | **值得注意的是,不能随意修改该结构体的大小和结构体成员的语义,如果需要添加新的成员,建议使用结构体中预先定义的保留字段。** 25 | 26 | ### 边界提取 27 | 调度器本身并不是模块,因此需要明确调度器的边界才能将它模块化,边界划分程序根据边界配置信息从内核源代码中将调度器模块的代码提取出来。边界配置信息主要包含代码文件、接口函数等信息。最终将边界内的代码提取到独立的目录中,主要分为以下过程: 28 | * 信息收集 29 | 30 | 在 Linux Kernel 编译过程中,使用 gcc-python-plugin 收集边界划分相关的信息,比如符号名、位置信息、符号属性及函数调用关系等; 31 | * 边界分析 32 | 33 | 对收集的信息进行分析,根据边界配置文件,计算调度器模块的代码和数据的边界,明确哪些函数、数据在调度器边界内部; 34 | * 代码提取 35 | 36 | 再次使用 gcc-python-plugin 将边界内的代码提取到 kernel/sched/mod 目录作为调度器模块的 code base。 37 | 38 | ### 调度器模块开发 39 | 边界提取之后,调度器模块的代码被放到了独立的目录中,开发人员可修改目录中的调度器代码,根据场景定制调度器,开发过程的注意事项请看 [Limitations](#limitations) 小结。 40 | 41 | ### 编译及安装调度器 42 | 开发过程结束后,调度器模块代码与加载/卸载及其它相关功能的程序编译成内核模块,并生成调度器rpm包。安装后将会替换掉内核中原有的调度器,安装过程会经历以下几个关键过程: 43 | * **符号重定位:** 对模块中的 undefined 符号进行重定位; 44 | * **栈安全检查:** 类似于 kpatch,函数替换前必须进行栈安全检查,否则会出现宕机的风险。plugsched 对栈安全检查进行了并行优化,提升了栈安全检查的效率,降低了停机时间; 45 | * **接口函数替换:** 用模块中的接口函数动态替换内核中的函数; 46 | * **调度器状态重建:** 采用通用方案自动同步新旧调度器的状态,极大的简化数据状态的一致性维护工作。 47 | 48 | ![architecture_zh](https://user-images.githubusercontent.com/33253760/161361451-e1bdae1e-d7b4-4811-a002-9f1f0e005281.png) 49 | 50 | ## Use Cases 51 | 1. 快速开发、验证、上线新特性,稳定后放入内核主线; 52 | 2. 针对不同业务场景做定制优化,以 RPM 包的形式发布和维护非通用调度器特性; 53 | 3. 统一管理调度器热补丁,避免多个热补丁之间的冲突而引发故障; 54 | 55 | ## Quick Start 56 | 目前,plugsched 默认支持 Anolis OS 8.6 ANCK 系统发行版,其它系统需要[调整边界配置](./docs/Support-various-Linux-distros.md)。为了减轻搭建运行环境的复杂度,我们提供了的容器镜像和 Dockerfile,开发人员不需要自己去搭建开发环境。为了方便演示,这里购买了一台阿里云 ECS(64CPU + 128GB),并安装 Anolis OS 8.6 ANCK 系统发行版,我们将会对内核调度器进行热升级。 57 | 58 | 1. 登陆云服务器后,先安装一些必要的基础软件包: 59 | ```shell 60 | # yum install anolis-repos -y 61 | # yum install yum-utils podman kernel-debuginfo-$(uname -r) kernel-devel-$(uname -r) --enablerepo=Plus-debuginfo --enablerepo=Plus -y 62 | ``` 63 | 64 | 2. 创建临时工作目录,下载系统内核的 SRPM 包: 65 | ```shell 66 | # mkdir /tmp/work && cd /tmp/work 67 | # yumdownloader --source kernel-$(uname -r) --enablerepo=Plus 68 | ``` 69 | 70 | 3. 启动并进入容器: 71 | ```shell 72 | # podman run -itd --name=plugsched -v /tmp/work:/tmp/work -v /usr/src/kernels:/usr/src/kernels -v /usr/lib/debug/lib/modules:/usr/lib/debug/lib/modules plugsched-registry.cn-hangzhou.cr.aliyuncs.com/plugsched/plugsched-sdk 73 | # podman exec -it plugsched bash 74 | # cd /tmp/work 75 | ``` 76 | 77 | 4. 提取内核源码: 78 | ```shell 79 | # uname_r=$(uname -r) 80 | # plugsched-cli extract_src kernel-${uname_r%.*}.src.rpm ./kernel 81 | ``` 82 | 83 | 5. 进行边界划分与提取: 84 | ```shell 85 | # plugsched-cli init $(uname -r) ./kernel ./scheduler 86 | ``` 87 | 88 | 6. 提取后的调度器模块代码在 ./scheduler/kernel/sched/mod 中, 新增一个 sched_feature 并打包生成 rpm 包: 89 | ```diff 90 | diff --git a/scheduler/kernel/sched/mod/core.c b/scheduler/kernel/sched/mod/core.c 91 | index 9f16b72..21262fd 100644 92 | --- a/scheduler/kernel/sched/mod/core.c 93 | +++ b/scheduler/kernel/sched/mod/core.c 94 | @@ -3234,6 +3234,9 @@ static void __sched notrace __schedule(bool preempt) 95 | struct rq *rq; 96 | int cpu; 97 | 98 | + if (sched_feat(PLUGSCHED_TEST)) 99 | + printk_once("I am the new scheduler: __schedule\n"); 100 | + 101 | cpu = smp_processor_id(); 102 | rq = cpu_rq(cpu); 103 | prev = rq->curr; 104 | diff --git a/scheduler/kernel/sched/mod/features.h b/scheduler/kernel/sched/mod/features.h 105 | index 4c40fac..8d1eafd 100644 106 | --- a/scheduler/kernel/sched/mod/features.h 107 | +++ b/scheduler/kernel/sched/mod/features.h 108 | @@ -1,4 +1,6 @@ 109 | /* SPDX-License-Identifier: GPL-2.0 */ 110 | +SCHED_FEAT(PLUGSCHED_TEST, false) 111 | + 112 | /* 113 | * Only give sleepers 50% of their service deficit. This allows 114 | * them to run sooner, but does not allow tons of sleepers to 115 | ``` 116 | ```shell 117 | # plugsched-cli build /tmp/work/scheduler 118 | ``` 119 | 120 | 7. 将生成的 rpm 包拷贝到宿主机,退出容器,查看当前 sched_features: 121 | ```text 122 | # uname_r=$(uname -r) 123 | # cp /tmp/work/scheduler/working/rpmbuild/RPMS/x86_64/scheduler-xxx-${uname_r%.*}.yyy.x86_64.rpm /tmp/work/scheduler-xxx.rpm 124 | # exit 125 | exit 126 | # cat /sys/kernel/debug/sched_features 127 | GENTLE_FAIR_SLEEPERS START_DEBIT NO_NEXT_BUDDY LAST_BUDDY CACHE_HOT_BUDDY WAKEUP_PREEMPTION NO_HRTICK NO_DOUBLE_TICK NONTASK_CAPACITY TTWU_QUEUE NO_SIS_AVG_CPU SIS_PROP NO_WARN_DOUBLE_CLOCK RT_PUSH_IPI RT_RUNTIME_SHARE NO_LB_MIN ATTACH_AGE_LOAD WA_IDLE WA_WEIGHT WA_BIAS NO_WA_STATIC_WEIGHT UTIL_EST ID_IDLE_AVG ID_RESCUE_EXPELLEE NO_ID_EXPELLEE_NEVER_HOT NO_ID_LOOSE_EXPEL ID_LAST_HIGHCLASS_STAY 128 | ``` 129 | 130 | 8. 安装调度器包,且新增了一个 PLUGSCHED_TEST sched_feature(关闭状态): 131 | ```text 132 | # rpm -ivh /tmp/work/scheduler-xxx.rpm 133 | # lsmod | grep scheduler 134 | scheduler 503808 1 135 | # dmesg | tail -n 10 136 | [ 2186.213916] cni-podman0: port 1(vethfe1a04fa) entered forwarding state 137 | [ 6092.916180] Hi, scheduler mod is installing! 138 | [ 6092.923037] scheduler: total initialization time is 6855921 ns 139 | [ 6092.923038] scheduler module is loading 140 | [ 6092.924136] scheduler load: current cpu number is 64 141 | [ 6092.924137] scheduler load: current thread number is 667 142 | [ 6092.924138] scheduler load: stop machine time is 249471 ns 143 | [ 6092.924138] scheduler load: stop handler time is 160616 ns 144 | [ 6092.924138] scheduler load: stack check time is 85916 ns 145 | [ 6092.924139] scheduler load: all the time is 1097321 ns 146 | # cat /sys/kernel/debug/sched_features 147 | NO_PLUGSCHED_TEST GENTLE_FAIR_SLEEPERS START_DEBIT NO_NEXT_BUDDY LAST_BUDDY CACHE_HOT_BUDDY WAKEUP_PREEMPTION NO_HRTICK NO_DOUBLE_TICK NONTASK_CAPACITY TTWU_QUEUE NO_SIS_AVG_CPU SIS_PROP NO_WARN_DOUBLE_CLOCK RT_PUSH_IPI RT_RUNTIME_SHARE NO_LB_MIN ATTACH_AGE_LOAD WA_IDLE WA_WEIGHT WA_BIAS NO_WA_STATIC_WEIGHT UTIL_EST ID_IDLE_AVG ID_RESCUE_EXPELLEE NO_ID_EXPELLEE_NEVER_HOT NO_ID_LOOSE_EXPEL ID_LAST_HIGHCLASS_STAY 148 | ``` 149 | 150 | 9. 打开新的 sched_feature,“I am the new scheduler: __schedule” 信息出现在 dmesg 日志中: 151 | ```text 152 | # echo PLUGSCHED_TEST > /sys/kernel/debug/sched_features 153 | # dmesg | tail -n 5 154 | [ 6092.924138] scheduler load: stop machine time is 249471 ns 155 | [ 6092.924138] scheduler load: stop handler time is 160616 ns 156 | [ 6092.924138] scheduler load: stack check time is 85916 ns 157 | [ 6092.924139] scheduler load: all the time is 1097321 ns 158 | [ 6512.539300] I am the new scheduler: __schedule 159 | ``` 160 | 161 | 10. 卸载调度器包后,新的 sched_feature 被删除: 162 | ```text 163 | # rpm -e scheduler-xxx 164 | # dmesg | tail -n 8 165 | [ 6717.794923] scheduler module is unloading 166 | [ 6717.809110] scheduler unload: current cpu number is 64 167 | [ 6717.809111] scheduler unload: current thread number is 670 168 | [ 6717.809112] scheduler unload: stop machine time is 321757 ns 169 | [ 6717.809112] scheduler unload: stop handler time is 142844 ns 170 | [ 6717.809113] scheduler unload: stack check time is 74938 ns 171 | [ 6717.809113] scheduler unload: all the time is 14185493 ns 172 | [ 6717.810189] Bye, scheduler mod has be removed! 173 | # 174 | # cat /sys/kernel/debug/sched_features 175 | GENTLE_FAIR_SLEEPERS START_DEBIT NO_NEXT_BUDDY LAST_BUDDY CACHE_HOT_BUDDY WAKEUP_PREEMPTION NO_HRTICK NO_DOUBLE_TICK NONTASK_CAPACITY TTWU_QUEUE NO_SIS_AVG_CPU SIS_PROP NO_WARN_DOUBLE_CLOCK RT_PUSH_IPI RT_RUNTIME_SHARE NO_LB_MIN ATTACH_AGE_LOAD WA_IDLE WA_WEIGHT WA_BIAS NO_WA_STATIC_WEIGHT UTIL_EST ID_IDLE_AVG ID_RESCUE_EXPELLEE NO_ID_EXPELLEE_NEVER_HOT NO_ID_LOOSE_EXPEL ID_LAST_HIGHCLASS_STAY 176 | ``` 177 | **注意:不可以用“rmmod”命令直接卸载调度器模块,应使用“rpm 或 yum”标准命令卸载调度器包。** 178 | 179 | ## FAQ 180 | **Q: 默认边界配置下, 边界划分后的调度器模块里面有什么东西?** 181 | 182 | 包含以下内容: 183 | 184 | - [ ] autogroup 185 | - [ ] cpuacct 186 | - [ ] cputime 187 | - [X] sched debug 188 | - [X] sched stats 189 | - [X] cfs rt deadline idle stop sched class 190 | - [X] sched domain topology 191 | - [X] sched tick 192 | - [X] scheduler core 193 | 194 | **Q: 调度器热升级可以修改哪些函数?** 195 | 196 | 边界提取结束后,kernel/sched/mod 目录里的文件中定义的函数都是可以修改的,比如,quick start 示例中,调度器模块可修改的范围包含 1k+ 个函数。但是有些需要注意的地方,请看 [Limitations](#limitations) 章节。 197 | 198 | **Q:调度器模块的边界可以修改吗?** 199 | 200 | 可以修改,通过修改边界配置文件可修改调度器边界,比如修改代码文件、接口函数等,请参考[这里](./docs/Support-various-Linux-distros.md)。注意,若调整了调度器边界,上线前需要做严格的测试。 201 | 202 | **Q:plugsched 支持哪些内核版本?** 203 | 204 | 理论上 plugsched 是与内核版本解耦的,我们测试过的内核版本有 3.10 和 4.19,其它版本需开发人员自行适配与测试。 205 | 206 | **Q:可以修改头文件中的函数吗?** 207 | 208 | 可以。我们对头文件中的函数进行了边界划分,kernel/sched/mod 目录中的头文件不可修改的函数已被加上 "DON'T MODIFY INLINE EXTERNAL FUNCTION" 的注释,其它函数可以修改。 209 | 210 | **Q:可以修改结构体吗?** 211 | 212 | 不可以随意修改结构体的大小和成员语义,如果结构体中存在预留字段,则可以修改这些预留字段。 213 | 214 | **Q:内核调度器被替换后会有性能回退吗?** 215 | 216 | 调度器模块本身的 overhead 很小,可以被忽略。其次,还取决于开发人员对调度器的修改。经过 benchmark 测试,如果不加任何修改,是没有性能影响的; 217 | 218 | **Q:加载模块时停机时间长吗?有多少?** 219 | 220 | 这取决于当前系统的负载及进程数量,进程数量越重,负载越多,downtime 越长。在我们的测试中,104 核 CPU 下 10k+ 的进程数量,downtime 不到 10 ms。 221 | 222 | **Q:这和 kpatch 有什么区别?是 kpatch 的一种优化吗?** 223 | 224 | kpatch 是函数粒度的热升级,plugsched 是子系统范围的热升级,有些功能和实现是无法通过 kpatch 的优化做到的,比如 kpatch 无法修改 __schedule 函数、无法同时修改上千个函数等。 225 | 226 | **Q:和 kpatch 的热升级有冲突吗?** 227 | 228 | 有冲突,如果 kpatch 和 plugsched 修改的范围有交集,重叠的部分会被 plugsched 覆盖掉。不过我们设计了可用于生产环境的冲突检测机制。 229 | 230 | **Q:可以修改调度器边界之外的函数吗?** 231 | 232 | 可以,我们提供了 [sidecar](./docs/Advanced-Features.md) 机制可以同时修改边界之外的函数。比如,有些 hotfix 既修改了调度器,又修改了 cpuacct 中的内容,可以使用 sidecar 机制升级 cpuacct 中的内容。 233 | 234 | ## Supported Architectures 235 | - [X] x86-64 236 | - [X] aarch64 237 | 238 | ## Limitations 239 | * 不可修改 init 函数,因为 init 函数在系统重启后被释放掉,需要初始化的过程请在加载模块时执行; 240 | * 不可修改接口函数的签名,也不可删除接口函数,如果要删除,可以将函数修改为空函数; 241 | * 不可修改任何带有 "DON'T MODIFY INLINE EXTERNAL FUNCTION" 注释的函数; 242 | * 不可随意修改结构体及成员的语义,需要修改时请参考 working/boundary_doc.yaml 文档进行; 243 | * 加载调度器模块后,不可直接 hook 内核中属于调度器模块范围内的函数,比如 perf 或者 ftrace 等工具,需要时请指定 scheduler.ko 模块; 244 | 245 | ## License 246 | plugsched is a linux kernel hotpluggable scheduler SDK developed by Alibaba and licensed under the GPLv3+ License or BSD-3-Clause License. This product contains various third-party components under other open source licenses. See the NOTICE file for more information. 247 | -------------------------------------------------------------------------------- /boundary/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | -------------------------------------------------------------------------------- /boundary/collect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | """Use GCC Python Plugin to collect source code information""" 5 | 6 | import re 7 | import os 8 | import json 9 | from collections import defaultdict 10 | from itertools import groupby as _groupby 11 | from yaml import load, resolver, CLoader as Loader 12 | 13 | # Use set as the default sequencer for yaml 14 | Loader.add_constructor( 15 | resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, 16 | lambda loader, node: set(loader.construct_sequence(node))) 17 | 18 | 19 | class GccBugs(object): 20 | array_ptr_re = re.compile(r'(.*)\[([0-9]*)\] \*\s*([^,\);]*)') 21 | 22 | @staticmethod 23 | def array_pointer(decl, str): 24 | """struct cpumask[1] *doms_cur -> struct cpumask (*doms_cur)[1]""" 25 | return GccBugs.array_ptr_re.sub(r'\1 (*\3)[\2]', str) 26 | 27 | @staticmethod 28 | def typedef(decl, str): 29 | t = decl.type 30 | 31 | while isinstance(t, (gcc.PointerType, gcc.ArrayType)): 32 | t = t.dereference 33 | 34 | if isinstance(t.name, gcc.TypeDecl): 35 | name = t.name.name 36 | return str.replace('struct ' + name, name) 37 | else: 38 | return str 39 | 40 | @staticmethod 41 | def enum_type_name(decl, str): 42 | if isinstance(decl.type, gcc.EnumeralType): 43 | i = str.find(decl.type.name.name) 44 | return str[:i] + 'enum ' + str[i:] 45 | else: 46 | return str 47 | 48 | @staticmethod 49 | def is_val_list(arg): 50 | return (isinstance(arg.type, gcc.PointerType) 51 | and isinstance(arg.type.dereference, gcc.RecordType) 52 | and isinstance(arg.type.dereference.name, gcc.Declaration) 53 | and arg.type.dereference.name.is_builtin 54 | and arg.type.dereference.name.name == '__va_list_tag') 55 | 56 | @staticmethod 57 | def va_list(decl, str): 58 | if GccBugs.is_val_list(decl): 59 | return str.replace('struct *', 'va_list') 60 | return str 61 | 62 | @staticmethod 63 | def array_size(decl, str): 64 | """extern type array[] -> extern type array[]""" 65 | return str.replace('[]', '[]') 66 | 67 | @staticmethod 68 | def fix(decl, str): 69 | for bugfix in [ 70 | GccBugs.array_pointer, GccBugs.enum_type_name, 71 | GccBugs.array_size, GccBugs.typedef, GccBugs.va_list 72 | ]: 73 | str = bugfix(decl, str) 74 | return str 75 | 76 | @staticmethod 77 | def variadic_function(decl, signature): 78 | if decl.str_decl.find('...') >= 0: 79 | signature['params'] += ', ...' 80 | 81 | @staticmethod 82 | def var_decl_start_loc(decl): 83 | base_type = decl.type 84 | while isinstance(base_type, (gcc.PointerType, gcc.ArrayType)): 85 | base_type = base_type.type 86 | if base_type.name is None and isinstance( 87 | base_type, (gcc.EnumeralType, gcc.RecordType)): 88 | return base_type.main_variant.stub.location 89 | return decl.location 90 | 91 | 92 | class Collection(object): 93 | 94 | def __init__(self, tmp_dir): 95 | with open(tmp_dir + 'boundary.yaml') as f: 96 | self.config = load(f, Loader) 97 | 98 | self.fn_prop = [] 99 | self.cb_prop = [] 100 | self.var_prop = [] 101 | self.intf_prop = [] 102 | self.edge_prop = [] 103 | self.struct_prop = {} 104 | self.mod_files = self.config['mod_files'] 105 | self.mod_hdrs = [f for f in self.mod_files if f.endswith('.h')] 106 | self.mod_srcs = [f for f in self.mod_files if f.endswith('.c')] 107 | self.sdcr = self.config['sidecar'] or set() 108 | self.sdcr_srcs = [f[1] for f in self.sdcr] 109 | 110 | def relpath(self, decl): 111 | """Get relative path from declaration object""" 112 | return os.path.relpath(decl.location.file) 113 | 114 | def decl_sig(self, decl): 115 | """Get function signature from declaration object""" 116 | if decl.function is None: 117 | return (decl.name, '?') 118 | return (decl.name, os.path.relpath(decl.location.file)) 119 | 120 | def decl_in_section(self, decl, section): 121 | """Whether declaration is in a specific text section""" 122 | for name, val in decl.attributes.items(): 123 | """Canonicalized name "section" since gcc-8.1.0, and 124 | uncanonicalized legacy name "__section__" before 8.1.0 125 | """ 126 | if name in ('section', '__section__'): 127 | assert len(val) == 1 128 | return val[0].constant == section 129 | return False 130 | 131 | def decl_is_weak(self, decl): 132 | """Whether declaration is weak""" 133 | return '__weak__' in decl.attributes or 'weak' in decl.attributes 134 | 135 | def collect_fn(self): 136 | """Collect all funtion properties, including interface functions""" 137 | src_f = gcc.get_main_input_filename() 138 | 139 | for node in gcc.get_callgraph_nodes(): 140 | decl = node.decl 141 | if not isinstance(decl.context, gcc.TranslationUnitDecl): 142 | continue 143 | # Ignore alias function for now ?? 144 | if decl.function is None: 145 | continue 146 | 147 | l_loc = decl.function.start 148 | r_loc = decl.function.end 149 | name_loc = decl.location 150 | 151 | properties = { 152 | 'name': decl.name, 153 | 'init': self.decl_in_section(decl, '.init.text'), 154 | 'file': self.relpath(decl), 155 | 'l_brace_loc': (l_loc.line - 1, l_loc.column - 1), 156 | 'r_brace_loc': (r_loc.line - 1, r_loc.column - 1), 157 | 'name_loc': (name_loc.line - 1, name_loc.column - 1), 158 | 'external': decl.external, 159 | 'public': decl.public, 160 | 'static': decl.static, 161 | 'inline': decl.inline or 'always_inline' in decl.attributes, 162 | 'weak': self.decl_is_weak(decl), 163 | 'signature': self.decl_sig(decl), 164 | 'decl_str': None, 165 | } 166 | self.fn_prop.append(properties) 167 | 168 | # interface candidates must belongs to module source files 169 | if src_f in self.mod_srcs + self.sdcr_srcs: 170 | decl_str = { 171 | 'fn': decl.name, 172 | 'ret': GccBugs.fix(decl.result, decl.result.type.str_no_uid), 173 | 'params': ', '.join(GccBugs.fix(arg, arg.type.str_no_uid) \ 174 | for arg in decl.arguments) if decl.arguments else 'void' 175 | } 176 | 177 | GccBugs.variadic_function(decl, decl_str) 178 | properties['decl_str'] = decl_str 179 | 180 | interface = self.config['function']['interface'] 181 | syscall = self.config['interface_prefix'] 182 | 183 | # sidecars shouln't treat syscall funtions as interfaces 184 | if src_f in self.mod_srcs and ( 185 | decl.name in interface or any( 186 | decl.name.startswith(prefix) for prefix in syscall 187 | ) 188 | ): 189 | self.intf_prop.append(list(self.decl_sig(decl))) 190 | 191 | def collect_var(self): 192 | """Collect properties of all global variables""" 193 | for var in gcc.get_variables(): 194 | decl = var.decl 195 | if not decl.location: 196 | continue 197 | if not isinstance(decl.context, gcc.TranslationUnitDecl): 198 | continue 199 | 200 | properties = { 201 | 'name': decl.name, 202 | 'file': self.relpath(decl), 203 | 'name_loc': (decl.location.line - 1, decl.location.column - 1), 204 | 'decl_start_line': GccBugs.var_decl_start_loc(decl).line - 1, 205 | 'external': decl.external, 206 | 'public': decl.public, 207 | 'static': decl.static, 208 | 'decl_str': None, 209 | } 210 | 211 | # tricky skill to get right str_decl 212 | if decl.location.file in self.mod_srcs + self.sdcr_srcs: 213 | decl_str = decl.str_decl.split('=')[0].strip(' ;') + ';' 214 | decl_str = decl_str.replace('static ', 'extern ') 215 | properties['decl_str'] = GccBugs.fix(decl, decl_str) 216 | 217 | self.var_prop.append(properties) 218 | 219 | def collect_callback(self): 220 | """Collect all callback functions of the current source file""" 221 | 222 | # return True means we stop walk subtree 223 | def mark_callback(op, caller): 224 | if (isinstance(op, gcc.FunctionDecl) 225 | and not self.decl_in_section(op, '.init.text')): 226 | self.cb_prop.append(list(self.decl_sig(op))) 227 | 228 | # Find callbacks in function body 229 | for node in gcc.get_callgraph_nodes(): 230 | # Ignore alias, it's insignificant at all 231 | if node.decl.function is None: 232 | continue 233 | for stmt in self.each_stmt(node): 234 | if isinstance(stmt, gcc.GimpleCall): 235 | # Ignore direct calls 236 | for rhs in stmt.rhs[1:]: 237 | if rhs: rhs.walk_tree(mark_callback, node.decl) 238 | else: 239 | stmt.walk_tree(mark_callback, node.decl) 240 | 241 | # Find callbacks in variable init value 242 | for var in gcc.get_variables(): 243 | decl = var.decl 244 | type_name = '' if not decl.type.name else decl.type.name.name 245 | 246 | # struct sched_class is purely private 247 | if (decl.initial and type_name != 'sched_class' and 248 | not self.decl_in_section(decl, '.discard.addressable')): 249 | decl.initial.walk_tree(mark_callback, decl) 250 | 251 | def collect_struct(self): 252 | """Collect all struct definition information""" 253 | public_fields = defaultdict(set) 254 | 255 | def mark_public_field(op, node, parent_component_ref): 256 | if isinstance(op, gcc.ComponentRef): 257 | if isinstance(op.target, gcc.ComponentRef): 258 | parent_component_ref[op.target] = op 259 | 260 | context = op.field.context 261 | while op.field.name is None and op in parent_component_ref: 262 | op = parent_component_ref[op] 263 | 264 | loc_file = self.relpath(context.stub) 265 | if loc_file in self.mod_hdrs and context.name is not None: 266 | """When acecssing 2 32bit fields at one time, the AST 267 | ancestor is BitFieldRef. And op.field.name is None 268 | """ 269 | field = op.field.name or '' 270 | public_fields[context].add((node.decl, field)) 271 | 272 | for node in gcc.get_callgraph_nodes(): 273 | # Ignore alias, it's insignificant at all 274 | if node.decl.function is None: 275 | continue 276 | for stmt in self.each_stmt(node): 277 | stmt.walk_tree(mark_public_field, node, {}) 278 | 279 | def groupby(it, grouper, selector): 280 | sorted_list = sorted(it, key=grouper) 281 | return dict((k, list(map(selector, v))) 282 | for k, v in _groupby(sorted_list, grouper)) 283 | 284 | for struct, user_fields in public_fields.items(): 285 | self.struct_prop[struct.name.name] = { 286 | 'all_fields': [f.name for f in struct.fields if f.name], 287 | 'public_fields': groupby(user_fields, 288 | grouper=lambda user_field: user_field[1], 289 | selector=lambda user_field: self.decl_sig(user_field[0])) 290 | } 291 | 292 | def collect_edge(self): 293 | """Collect all edges of the call graph""" 294 | for node in gcc.get_callgraph_nodes(): 295 | if self.decl_in_section(node.decl, '.init.text'): 296 | continue 297 | 298 | # alias function 299 | if node.decl.function is None: 300 | alias = node.decl.attributes['alias'][0] 301 | real_name = alias.str_no_uid.replace('"', '') 302 | properties = { 303 | "from": self.decl_sig(node.decl), 304 | "to": (real_name, "?"), 305 | } 306 | self.edge_prop.append(properties) 307 | continue 308 | 309 | for stmt in self.each_call_stmt(node): 310 | if not stmt.fndecl: 311 | continue 312 | assert node.decl.function 313 | properties = { 314 | 'from': self.decl_sig(node.decl), 315 | 'to': self.decl_sig(stmt.fndecl), 316 | } 317 | self.edge_prop.append(properties) 318 | 319 | def each_stmt(self, node): 320 | """Iterate each statement of call graph node""" 321 | for bb in node.decl.function.cfg.basic_blocks: 322 | if bb.gimple: 323 | for stmt in bb.gimple: 324 | yield stmt 325 | 326 | def each_call_stmt(self, node): 327 | """Iterate each call statement of call graph node""" 328 | for bb in node.decl.function.cfg.basic_blocks: 329 | if not bb.gimple: 330 | continue 331 | stmts = list(bb.gimple) 332 | for i, stmt in enumerate(stmts): 333 | if isinstance(stmt, gcc.GimpleCall): 334 | yield stmt 335 | 336 | def collect_info(self, p, _): 337 | """Collect information about the current source file""" 338 | if p.name != '*free_lang_data': 339 | return 340 | 341 | self.collect_fn() 342 | self.collect_callback() 343 | self.collect_struct() 344 | self.collect_edge() 345 | self.collect_var() 346 | 347 | collection = { 348 | 'fn': self.fn_prop, 349 | 'var': self.var_prop, 350 | 'edge': self.edge_prop, 351 | 'callback': self.cb_prop, 352 | 'interface': self.intf_prop, 353 | 'struct': self.struct_prop 354 | } 355 | 356 | with open(gcc.get_main_input_filename() + '.boundary', 'w') as f: 357 | json.dump(collection, f, indent=4) 358 | 359 | def register_cbs(self): 360 | """Register GCC Python Plugin callback""" 361 | gcc.register_callback(gcc.PLUGIN_PASS_EXECUTION, self.collect_info) 362 | 363 | 364 | if __name__ == '__main__': 365 | import gcc 366 | 367 | # tmp directory to store middle files 368 | tmp_dir = gcc.argument_dict['tmpdir'] 369 | collect = Collection(tmp_dir) 370 | collect.register_cbs() 371 | -------------------------------------------------------------------------------- /boundary/extract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | """Extract module code according to boundary information""" 5 | 6 | import json 7 | import re 8 | import os 9 | import sys 10 | from yaml import load, resolver, CLoader as Loader 11 | 12 | # Use set as the default sequencer for yaml 13 | Loader.add_constructor( 14 | resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, 15 | lambda loader, node: set(loader.construct_sequence(node))) 16 | 17 | 18 | class Extraction(object): 19 | 20 | def __init__(self, src_file, tmp_dir, mod_dir): 21 | with open(tmp_dir + 'boundary_extract.yaml') as f: 22 | self.config = load(f, Loader) 23 | 24 | self.src_file = src_file 25 | self.mod_dir = mod_dir 26 | self.mod_files = self.config['mod_files'] 27 | self.mod_srcs = {f for f in self.mod_files if f.endswith('.c')} 28 | self.mod_hdrs = self.mod_files - self.mod_srcs 29 | self.sdcr_srcs = [f[1] for f in self.config['sidecar']] 30 | self.fn_list = [] 31 | self.callback_list = [] 32 | self.interface_list = [] 33 | self.sidecar_list = [] 34 | self.shared_var_list = [] 35 | self.static_var_list = [] 36 | 37 | if src_file in self.sdcr_srcs: 38 | self.dst_file = self.mod_dir + src_file 39 | if not os.path.exists(os.path.dirname(self.dst_file)): 40 | os.makedirs(os.path.dirname(self.dst_file)) 41 | else: 42 | self.dst_file = self.mod_dir + os.path.basename(src_file) 43 | 44 | if src_file in self.mod_hdrs: 45 | file_name = tmp_dir + 'header_symbol.json' 46 | else: 47 | file_name = src_file + '.boundary' 48 | 49 | with open(file_name) as f: 50 | metas = json.load(f) 51 | self.meta_fn = metas['fn'] 52 | self.meta_var = metas['var'] 53 | 54 | def function_location(self): 55 | """Get the source code location of border functions""" 56 | unique = set() 57 | for fn in self.meta_fn: 58 | # filter out *.h in *.c 59 | if fn['file'] != self.src_file: 60 | continue 61 | 62 | # remove duplicated function 63 | obj = tuple(fn['signature']) 64 | if obj in unique: 65 | continue 66 | unique.add(obj) 67 | 68 | # __init function will be deleted during post extract fix_up() 69 | if obj in self.config['function']['init']: 70 | continue 71 | 72 | if (obj in self.config['function']['sched_outsider'] or 73 | obj in self.config['function']['sdcr_out']): 74 | self.fn_list.append(fn) 75 | elif obj in self.config['function']['callback']: 76 | self.callback_list.append(fn) 77 | elif obj in self.config['function']['interface']: 78 | self.interface_list.append(fn) 79 | elif obj in self.config['sidecar']: 80 | self.sidecar_list.append(fn) 81 | 82 | def var_location(self): 83 | """Get the source code location of shared global variables""" 84 | meta_var = [] 85 | var_config = self.config['global_var'] 86 | 87 | for var in self.meta_var: 88 | if var['file'] != self.src_file or var['external']: 89 | continue 90 | meta_var.append(var) 91 | 92 | # sidecar shares all global variables with vmlinux 93 | if self.src_file in self.sdcr_srcs: 94 | self.shared_var_list = meta_var 95 | return 96 | 97 | for var in meta_var: 98 | # static variables are treated as private by default 99 | if not (var['public'] or 100 | var['name'] in var_config['extra_public']): 101 | self.static_var_list.append(var) 102 | 103 | # share public varialbes by default 104 | elif var['name'] not in var_config['force_private']: 105 | self.shared_var_list.append(var) 106 | 107 | def merge_up_lines(self, lines, curr): 108 | """Merge up multi-lines-function-declaration into one line""" 109 | terminator = re.compile(';|}|#|//|\*/|^\n$') 110 | merged = lines[curr].strip() 111 | 112 | while curr >= 1: 113 | line = lines[curr - 1] 114 | if terminator.search(line): 115 | break 116 | merged = line.strip() + ' ' + merged 117 | lines[curr] = '' 118 | curr -= 1 119 | 120 | lines[curr] = merged.replace(' ;', ';') + '\n' 121 | return curr 122 | 123 | def function_extract(self, lines): 124 | """Generate function code for new module""" 125 | warn = "/* DON'T MODIFY INLINE EXTERNAL FUNCTION {} */\n" 126 | cb_warn = "/* DON'T MODIFY SIGNATURE OF CALLBACK FUNCTION {} */\n" 127 | if_warn = "/* DON'T MODIFY SIGNATURE OF INTERFACE FUNCTION {} */\n" 128 | decl_fmt = "extern {ret} {fn}({params});\n" 129 | 130 | for fn in self.fn_list: 131 | name, inline = fn['name'], fn['inline'] 132 | (row_end, _) = fn['r_brace_loc'] 133 | (row_start, col_start) = fn['l_brace_loc'] 134 | 135 | if tuple(fn['signature']) in self.config['function']['outsider_opt']: 136 | lines[row_end] += warn.format(name) 137 | else: 138 | # convert function body "{}" to ";" 139 | # only handle normal kernel function definition 140 | lines[row_start] = lines[row_start][:col_start] + ";\n" 141 | self.merge_up_lines(lines, row_start) 142 | for i in range(row_start + 1, row_end + 1): 143 | lines[i] = '' 144 | 145 | for fn in self.callback_list: 146 | name, decl_str = fn['name'], fn['decl_str'] 147 | (row_start, _) = fn['name_loc'] 148 | (row_end, _) = fn['r_brace_loc'] 149 | new_name = '__cb_' + name 150 | used_name = '__used ' + new_name 151 | 152 | lines[row_start] = lines[row_start].replace(name, used_name) 153 | lines[row_end] += ('\n' + cb_warn.format(new_name) + 154 | decl_fmt.format(**decl_str)) 155 | 156 | for fn in self.interface_list + self.sidecar_list: 157 | name, public = fn['name'], fn['public'] 158 | (row_start, _), (row_end, _) = fn['name_loc'], fn['r_brace_loc'] 159 | used_name = '__used ' + name 160 | 161 | # everyone know that syscall ABI should be consistent 162 | if any(name.startswith(prefix) 163 | for prefix in self.config['interface_prefix']): 164 | continue 165 | 166 | # prevent static interface functions from being optimized. 167 | if not public: 168 | lines[row_start] = lines[row_start].replace(name, used_name) 169 | lines[row_end] += if_warn.format(name) 170 | 171 | def merge_down_var(self, lines, curr): 172 | """Merge down multi-lines-var-definition into one line""" 173 | merged = '' 174 | start = curr 175 | 176 | while curr < len(lines) and ';' not in lines[curr]: 177 | merged += lines[curr].strip() + ' ' 178 | lines[curr] = '' 179 | curr += 1 180 | 181 | merged += lines[curr] 182 | lines[curr] = '' 183 | lines[start] = merged 184 | return curr 185 | 186 | def var_extract(self, lines): 187 | """Generate data declarition code for new module""" 188 | # prevent gcc from removing unused variables 189 | for var in list(self.static_var_list): 190 | (row, _) = var['name_loc'] 191 | lines[row] = lines[row].replace('static ', 'static __used ') 192 | 193 | # General handling all shared variables 194 | for var in list(self.shared_var_list): 195 | name, row_start = var['name'], var['decl_start_line'] 196 | (row_name, _) = var['name_loc'] 197 | 198 | # Fixed variable name not on first line, e.g. nohz 199 | for i in range(row_start + 1, row_name): 200 | lines[i] = '' 201 | 202 | self.merge_down_var(lines, row_start) 203 | 204 | # Specially handling shared per_cpu and static_key variables 205 | # to improve readability 206 | line = lines[row_start] 207 | replace_list = [ 208 | ('DEFINE_PER_CPU', 'DECLARE_PER_CPU'), 209 | ('DEFINE_STATIC_KEY', 'DECLARE_STATIC_KEY'), 210 | ] 211 | 212 | for (p, repl) in replace_list: 213 | if p in line: 214 | line = line.replace(p, repl).replace('static ', '') 215 | lines[row_start] = line 216 | self.shared_var_list.remove(var) 217 | break 218 | 219 | # delete data definition 220 | for var in self.shared_var_list: 221 | row_start = var['decl_start_line'] 222 | lines[row_start] = '' 223 | 224 | # convert data definition to declarition 225 | for var in self.shared_var_list: 226 | row_start = var['decl_start_line'] 227 | lines[row_start] += var['decl_str'] + '\n' 228 | 229 | def fix_include(self, line): 230 | """Fix header file path, assume one include per line""" 231 | old_header = line.split('"')[1] 232 | rel_header = os.path.join(os.path.dirname(self.src_file), old_header) 233 | rel_header = os.path.relpath(rel_header) 234 | 235 | # module header file is already extracted to the right place 236 | if rel_header in self.mod_files: 237 | return line 238 | 239 | dst_d = os.path.dirname(self.dst_file) 240 | new_header = os.path.relpath(rel_header, dst_d) 241 | return line.replace(old_header, new_header) 242 | 243 | def merge_down_fn(self, lines, curr): 244 | """Merge down multi-lines-function-definition into one line""" 245 | merged = '' 246 | start = curr 247 | l_brace = lines[curr].count('{') 248 | r_brace = lines[curr].count('}') 249 | 250 | while l_brace == 0 or l_brace > r_brace: 251 | merged += lines[curr].strip() + ' ' 252 | lines[curr] = '' 253 | curr += 1 254 | l_brace += lines[curr].count('{') 255 | r_brace += lines[curr].count('}') 256 | 257 | merged += lines[curr] 258 | lines[curr] = '' 259 | lines[start] = merged 260 | return curr 261 | 262 | def fix_up(self, lines): 263 | """Post fix trival code adaption""" 264 | delete = re.compile('initcall|early_param|__init |__initdata |__setup') 265 | replace_list = [ 266 | (re.compile(r'struct atomic_t'), r'atomic_t'), 267 | (re.compile(r'^(?!extern ).*struct sched_class ((stop|dl|rt|fair|idle)_sched_class)'), 268 | r'struct sched_class shadow_\1'), 269 | ] 270 | 271 | for (i, line) in enumerate(lines): 272 | if '#include "' in line: 273 | lines[i] = self.fix_include(line) 274 | continue 275 | 276 | if delete.search(line): 277 | # skip extern __init sched_tick_offload_init(void); 278 | if '__init ' in line and ';' not in line: 279 | self.merge_down_fn(lines, i) 280 | lines[i] = '' 281 | continue 282 | 283 | for (p, repl) in replace_list: 284 | if p.search(line): 285 | lines[i] = p.sub(repl, line) 286 | break 287 | 288 | def extract_file(self): 289 | """Generate module source code""" 290 | self.function_location() 291 | self.var_location() 292 | 293 | src_f = self.src_file 294 | dst_f = self.dst_file 295 | 296 | with open(src_f) as in_f, open(dst_f, 'w') as out_f: 297 | lines = in_f.readlines() 298 | self.function_extract(lines) 299 | self.var_extract(lines) 300 | self.fix_up(lines) 301 | out_f.writelines(lines) 302 | 303 | 304 | if __name__ == '__main__': 305 | 306 | src_file = sys.argv[1] 307 | # tmp directory to store middle files 308 | tmp_dir = sys.argv[2] 309 | # directory to store schedule module source code 310 | mod_dir = sys.argv[3] 311 | extract = Extraction(src_file, tmp_dir, mod_dir) 312 | extract.extract_file() 313 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | """cli.py - A command line interface for plugsched 6 | 7 | Usage: 8 | plugsched-cli init 9 | plugsched-cli dev_init 10 | plugsched-cli extract_src 11 | plugsched-cli build 12 | plugsched-cli (-h | --help) 13 | 14 | Options: 15 | -h --help Show help. 16 | 17 | Available subcommands: 18 | init Initialize a scheduler module for a specific kernel release and product 19 | dev_init Initialize plugsched development envrionment from kernel source code 20 | extrat_src extract kernel source code from kernel-src rpm 21 | build Build a scheduler module rpm package for a specific kernel release and product 22 | 23 | Subcommand arguments: 24 | release_kernel `uname -r` of target kernel to be hotpluged 25 | kernel_src kernel source code directory 26 | kernel_src_rpm path of kernel source rpm 27 | work_dir target working directory to develop new scheduler module 28 | target_dir directory to place kernel source code 29 | """ 30 | 31 | import sys 32 | from yaml import load, dump 33 | try: 34 | from yaml import CLoader as Loader, CDumper as Dumper 35 | except ImportError: 36 | print("WARNING: YAML CLoader is not presented, it can be slow.") 37 | from yaml import Loader, Dumper 38 | from docopt import docopt 39 | import sh 40 | from sh import rsync, cp, glob as _glob 41 | from multiprocessing import cpu_count 42 | from tempfile import mkdtemp 43 | import colorlog 44 | import logging 45 | import uuid 46 | import stat 47 | import os 48 | import re 49 | 50 | def glob(pattern, _cwd='.'): 51 | return _glob(os.path.join(_cwd, pattern)) 52 | 53 | class ShutdownHandler(logging.StreamHandler): 54 | def emit(self, record): 55 | if record.levelno >= logging.CRITICAL: 56 | raise Exception("Fatal") 57 | 58 | handler = colorlog.StreamHandler() 59 | handler.setFormatter(colorlog.ColoredFormatter( 60 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 61 | datefmt='%Y-%m-%d %H:%M:%S')) 62 | 63 | logging.getLogger().setLevel(logging.INFO) 64 | logging.getLogger().addHandler(handler) 65 | logging.getLogger().addHandler(ShutdownHandler()) 66 | 67 | class Plugsched(object): 68 | def __init__(self, work_dir, vmlinux, makefile): 69 | self.plugsched_path = os.path.dirname(os.path.realpath(__file__)) 70 | self.work_dir = os.path.abspath(work_dir) 71 | self.vmlinux = os.path.abspath(vmlinux) 72 | self.makefile = os.path.abspath(makefile) 73 | self.mod_path = os.path.join(self.work_dir, 'kernel/sched/mod/') 74 | self.tmp_dir = os.path.join(self.work_dir, 'working/') 75 | plugsched_sh = sh(_cwd=self.plugsched_path) 76 | mod_sh = sh(_cwd=self.work_dir) 77 | self.plugsched_sh, self.mod_sh = plugsched_sh, mod_sh 78 | self.get_kernel_version(self.makefile) 79 | self.get_config_dir() 80 | self.search_springboard = sh.Command(self.plugsched_path + '/tools/springboard_search.sh') 81 | 82 | with open(os.path.join(self.config_dir, 'boundary.yaml')) as f: 83 | self.config = load(f, Loader) 84 | self.file_mapping = { 85 | self.config_dir + '/*': self.tmp_dir, 86 | 'boundary/*.py': self.tmp_dir, 87 | 'tools/symbol_resolve': self.tmp_dir, 88 | 'tools/springboard_search.sh': self.tmp_dir, 89 | 'src/Makefile.plugsched': self.tmp_dir, 90 | 'module-contrib/*': self.tmp_dir, 91 | 'src/*.[ch]': self.mod_path, 92 | 'src/Makefile': self.mod_path, 93 | 'src/scheduler.lds': self.mod_path, 94 | 'src/.gitignore': './', 95 | } 96 | self.threads = cpu_count() 97 | self.mod_files = self.config['mod_files'] 98 | self.mod_srcs = [f for f in self.mod_files if f.endswith('.c')] 99 | self.mod_hdrs = [f for f in self.mod_files if f.endswith('.h')] 100 | self.sdcr = [] if self.config['sidecar'] is None else self.config['sidecar'] 101 | self.sdcr_srcs = [f[1] for f in self.sdcr] 102 | self.sdcr_objs = [f.replace('.c', '.o') for f in self.sdcr_srcs] 103 | self.mod_objs = [f+'.extract' for f in self.mod_files + self.sdcr_srcs] 104 | 105 | def get_kernel_version(self, makefile): 106 | VERSION = self.plugsched_sh.awk('-F=', '/^VERSION/{print $2}', makefile).strip() 107 | PATCHLEVEL = self.plugsched_sh.awk('-F=', '/^PATCHLEVEL/{print $2}', makefile).strip() 108 | SUBLEVEL = self.plugsched_sh.awk('-F=', '/^SUBLEVEL/{print $2}', makefile).strip() 109 | self.KVER = '%s.%s.%s' % (VERSION, PATCHLEVEL, SUBLEVEL) 110 | 111 | KREL = self.plugsched_sh.awk('-F=', '/^EXTRAVERSION/{print $2}', makefile).strip(' \n-') 112 | if len(KREL) == 0: 113 | logging.fatal('''Maybe you are using plugsched on non-released kernel, 114 | please set EXTRAVERSION in Makefile (%s) before build kernel''', 115 | os.path.join(self.work_dir, 'Makefile')) 116 | 117 | self.major = '%s.%s' % (VERSION, PATCHLEVEL) 118 | self.uname_r = '%s-%s' % (self.KVER, KREL) 119 | 120 | # strip ARCH 121 | for arch in ['.x86_64', '.aarch64']: 122 | idx = KREL.find(arch) 123 | if idx != -1: self.KREL = KREL[:idx] 124 | 125 | def get_config_dir(self): 126 | def common_prefix_len(s1, s2): 127 | for i, (a, b) in enumerate(zip(s1, s2)): 128 | if a != b: 129 | break 130 | return i 131 | 132 | candidates = list(map(os.path.basename, glob('%s/configs/%s*' % (self.plugsched_path, self.major)))) 133 | if len(candidates) == 0: 134 | logging.fatal('''Can't find config directory, please add config for kernel %s''', self.KVER) 135 | 136 | candidates.sort(reverse=True) 137 | _, idx = max((common_prefix_len(self.uname_r, t), i) for i, t in enumerate(candidates)) 138 | 139 | logging.info("Choose config dir %s/" % candidates[idx]) 140 | self.config_dir = os.path.join(self.plugsched_path, 'configs/', candidates[idx]) 141 | 142 | def apply_patch(self, f, **kwargs): 143 | path = os.path.join(self.tmp_dir, f) 144 | if os.path.exists(path): 145 | self.mod_sh.patch(input=path, strip=1, _out=sys.stdout, _err=sys.stderr, **kwargs) 146 | 147 | def make(self, stage, objs=[], **kwargs): 148 | self.mod_sh.make(stage, 149 | 'objs=%s' % ' '.join(objs), 150 | *['%s=%s' % i for i in kwargs.items()], 151 | file=os.path.join(self.tmp_dir, 'Makefile.plugsched'), 152 | jobs=self.threads, 153 | _out=sys.stdout, 154 | _err=sys.stderr) 155 | 156 | def extract(self): 157 | logging.info('Extracting scheduler module objs: %s', ' '.join(self.mod_objs)) 158 | self.mod_sh.make('olddefconfig') 159 | self.make(stage = 'collect', plugsched_tmpdir = self.tmp_dir, plugsched_modpath = self.mod_path) 160 | self.make(stage = 'analyze', plugsched_tmpdir = self.tmp_dir, plugsched_modpath = self.mod_path) 161 | self.make(stage = 'extract', plugsched_tmpdir = self.tmp_dir, plugsched_modpath = self.mod_path, 162 | objs = self.mod_objs) 163 | 164 | def create_sandbox(self, kernel_src): 165 | logging.info('Creating mod build directory structure') 166 | rsync(kernel_src + '/', self.work_dir, archive=True, verbose=True, delete=True, exclude='.git', filter=':- .gitignore') 167 | self.mod_sh.mkdir(self.mod_path, parents=True) 168 | self.mod_sh.mkdir(self.tmp_dir, parents=True) 169 | 170 | for f, t in self.file_mapping.items(): 171 | self.mod_sh.cp(glob(f, _cwd=self.plugsched_path), t, recursive=True, dereference=True) 172 | 173 | def find_old_springboard(self): 174 | with open(os.path.join(self.work_dir, 'kernel/sched/mod/core.c'), 'r') as f: 175 | lines = f.readlines() 176 | for i in range(len(lines) - 1): 177 | if ('no-omit-frame-pointer' in lines[i]): 178 | if ('__schedule' in lines[i+1]): 179 | return False 180 | 181 | return True 182 | 183 | def cmd_init(self, kernel_src, sym_vers, kernel_config): 184 | self.create_sandbox(kernel_src) 185 | self.plugsched_sh.cp(sym_vers, self.work_dir, force=True) 186 | self.plugsched_sh.cp(kernel_config, self.work_dir + '/.config', force=True) 187 | self.plugsched_sh.cp(self.makefile, self.work_dir, force=True) 188 | self.plugsched_sh.cp(self.vmlinux, self.work_dir, force=True) 189 | 190 | logging.info('Patching kernel with pre_extract patch') 191 | self.apply_patch('pre_extract.patch') 192 | self.extract() 193 | logging.info('Patching extracted scheduler module with post_extractd patch') 194 | self.apply_patch('post_extract.patch') 195 | logging.info('Patching dynamic springboard') 196 | self.apply_patch('dynamic_springboard.patch') 197 | # For old version in ANCK 5.10, we need to apply part 2 patch 198 | if self.find_old_springboard(): 199 | self.apply_patch('dynamic_springboard_2.patch') 200 | 201 | with open(os.path.join(self.mod_path, 'Makefile'), 'a') as f: 202 | self.search_springboard('init', self.vmlinux, kernel_config, _out=f) 203 | 204 | logging.info("Succeed!") 205 | 206 | # when python3 working with rpmbuild, the /usr/local/python* path 207 | # won't be in included in sys/path which results in some modules 208 | # can't be find. So we need to add the PYTHONPATH manually. 209 | # The detail about this can be find in 210 | # https://fedoraproject.org/wiki/Changes/Making_sudo_pip_safe 211 | def add_python_path(self): 212 | py_ver = sys.version[0:3] 213 | python_path = '/usr/local/lib64/python' + py_ver + '/site-packages' 214 | python_path += os.pathsep 215 | python_path += '/usr/local/lib/python' + py_ver + '/site-packages' 216 | os.environ["PYTHONPATH"] = python_path 217 | 218 | def cmd_build(self): 219 | if not os.path.exists(self.work_dir): 220 | logging.fatal("plugsched: Can't find %s", self.work_dir) 221 | self.add_python_path() 222 | logging.info("Preparing rpmbuild environment") 223 | rpmbuild_root = os.path.join(self.tmp_dir, 'rpmbuild') 224 | self.mod_sh.rm(rpmbuild_root, recursive=True, force=True) 225 | self.mod_sh.mkdir(rpmbuild_root) 226 | rpmbase_sh = sh(_cwd=rpmbuild_root) 227 | rpmbase_sh.mkdir(['BUILD','RPMS','SOURCES','SPECS','SRPMS']) 228 | 229 | self.mod_sh.cp('working/scheduler.spec', os.path.join(rpmbuild_root, 'SPECS'), force=True) 230 | rpmbase_sh.rpmbuild('--define', '%%_topdir %s' % os.path.realpath(rpmbuild_root), 231 | '--define', '%%_builddir %s' % self.work_dir, 232 | '--define', '%%_sdcrobjs "%s"' % ' '.join(self.sdcr_objs), 233 | '--define', '%%KVER %s' % self.KVER, 234 | '--define', '%%KREL %s' % self.KREL, 235 | '-bb', 'SPECS/scheduler.spec', 236 | _out=sys.stdout, 237 | _err=sys.stderr) 238 | logging.info("Succeed!") 239 | 240 | if __name__ == '__main__': 241 | arguments = docopt(__doc__) 242 | 243 | if arguments['extract_src']: 244 | kernel_src_rpm = arguments[''] 245 | target_dir = arguments[''] 246 | 247 | rpmbuild_root = mkdtemp() 248 | sh.rpmbuild('--define', '%%_topdir %s' % rpmbuild_root, 249 | '--define', '%%__python %s' % '/usr/bin/python3', 250 | '-rp', '--nodeps', kernel_src_rpm) 251 | 252 | src = glob('kernel*/linux*', rpmbuild_root + '/BUILD/') 253 | 254 | if len(src) != 1: 255 | logging.fatal("find multi kernel source, fuzz ...") 256 | 257 | rsync(src[0] + '/', target_dir + '/', archive=True, verbose=True, delete=True) 258 | 259 | # certificates for CONFIG_MODULE_SIG_KEY & CONFIG_SYSTEM_TRUSTED_KEYS 260 | for pem in glob('*.pem', rpmbuild_root + '/SOURCES/'): 261 | sh.cp(pem, target_dir + '/certs', force=True) 262 | 263 | sh.rm(rpmbuild_root, recursive=True, force=True) 264 | 265 | elif arguments['init']: 266 | release_kernel = arguments[''] 267 | kernel_src = arguments[''] 268 | work_dir = arguments[''] 269 | 270 | vmlinux = '/usr/lib/debug/lib/modules/' + release_kernel + '/vmlinux' 271 | if not os.path.exists(vmlinux): 272 | logging.fatal("%s not found, please install kernel-debuginfo-%s.rpm", vmlinux, release_kernel) 273 | 274 | sym_vers = '/usr/src/kernels/' + release_kernel + '/Module.symvers' 275 | kernel_config = '/usr/src/kernels/' + release_kernel + '/.config' 276 | makefile = '/usr/src/kernels/' + release_kernel + '/Makefile' 277 | 278 | if not os.path.exists(kernel_config): 279 | logging.fatal("%s not found, please install kernel-devel-%s.rpm", kernel_config, release_kernel) 280 | 281 | plugsched = Plugsched(work_dir, vmlinux, makefile) 282 | plugsched.cmd_init(kernel_src, sym_vers, kernel_config) 283 | 284 | elif arguments['dev_init']: 285 | kernel_src = arguments[''] 286 | work_dir = arguments[''] 287 | 288 | if not os.path.exists(kernel_src): 289 | logging.fatal("Kernel source directory not exists") 290 | 291 | vmlinux = os.path.join(kernel_src, 'vmlinux') 292 | if not os.path.exists(vmlinux): 293 | logging.fatal("%s not found, please execute `make -j %s` firstly", vmlinux, cpu_count()) 294 | 295 | sym_vers = os.path.join(kernel_src, 'Module.symvers') 296 | kernel_config = os.path.join(kernel_src, '.config') 297 | makefile = os.path.join(kernel_src, 'Makefile') 298 | 299 | if not os.path.exists(kernel_config): 300 | logging.fatal("kernel config %s not found", kernel_config) 301 | 302 | plugsched = Plugsched(work_dir, vmlinux, makefile) 303 | plugsched.cmd_init(kernel_src, sym_vers, kernel_config) 304 | 305 | elif arguments['build']: 306 | work_dir = arguments[''] 307 | 308 | vmlinux = os.path.join(work_dir, 'vmlinux') 309 | makefile = os.path.join(work_dir, 'Makefile') 310 | plugsched = Plugsched(work_dir, vmlinux, makefile) 311 | plugsched.cmd_build() 312 | 313 | -------------------------------------------------------------------------------- /configs/3.10/boundary.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | mod_files: 5 | - kernel/sched/core.c 6 | - kernel/sched/fair.c 7 | - kernel/sched/idle_task.c 8 | - kernel/sched/rt.c 9 | - kernel/sched/cpudeadline.c 10 | - kernel/sched/deadline.c 11 | - kernel/sched/cpupri.c 12 | - kernel/sched/debug.c 13 | - kernel/sched/stats.c 14 | - kernel/sched/stop_task.c 15 | - kernel/sched/cpudeadline.h 16 | - kernel/sched/cpupri.h 17 | - kernel/sched/sched.h 18 | - kernel/sched/stats.h 19 | - kernel/sched/features.h 20 | interface_prefix: 21 | - SyS_ 22 | - sys_ 23 | function: 24 | interface: 25 | - yield_to 26 | - __balance_callback 27 | - do_set_cpus_allowed 28 | - set_user_nice 29 | - __sched_setscheduler 30 | - sched_setscheduler_nocheck 31 | - __set_cpus_allowed_ptr 32 | - schedule_tail 33 | - scheduler_tick 34 | - sched_fork 35 | - scheduler_ipi 36 | - resched_cpu 37 | - get_nohz_timer_target 38 | - nohz_balance_enter_idle 39 | - sched_ttwu_pending 40 | - wake_up_if_idle 41 | - try_to_wake_up 42 | - wake_up_new_task 43 | - wake_up_nohz_cpu 44 | - rt_mutex_setprio 45 | - idle_cpu 46 | - partition_sched_domains 47 | - sched_set_stop_task 48 | - task_numa_group_id 49 | - should_numa_migrate_memory 50 | - task_numa_free 51 | - task_numa_fault 52 | - proc_sched_show_task 53 | - proc_sched_set_task 54 | - init_idle 55 | - in_sched_functions 56 | - task_sched_runtime 57 | - cpuset_cpumask_can_shrink 58 | - task_can_attach 59 | - normalize_rt_tasks 60 | - sysrq_sched_debug_show 61 | - sched_move_task 62 | - sched_group_set_shares 63 | - sched_offline_group 64 | - sched_destroy_group 65 | - sched_create_group 66 | - sched_online_group 67 | - finish_task_switch 68 | - sched_exec 69 | - yield 70 | - try_to_wake_up_local 71 | - deactivate_task 72 | - activate_task 73 | - check_preempt_curr 74 | global_var: 75 | extra_public: 76 | - cpu_idle_force_poll 77 | - num_cpus_frozen 78 | - __cfs_bandwidth_used 79 | - max_load_balance_interval 80 | - sched_domains_curr_level 81 | - sched_domains_numa_masks 82 | - default_relax_domain_level 83 | - sched_domain_topology 84 | - sched_domains_numa_distance 85 | - sched_domains_numa_levels 86 | - fallback_doms 87 | - ndoms_cur 88 | - doms_cur 89 | - dattr_cur 90 | - default_topology 91 | - local_cpu_mask 92 | - rt_pull_head 93 | - rt_push_head 94 | - dl_push_head 95 | - local_cpu_mask_dl 96 | - dl_pull_head 97 | - nohz 98 | - cfs_constraints_mutex 99 | - shares_mutex 100 | force_private: 101 | - sysctl_sched_features 102 | - sched_feat_keys 103 | - stop_sched_class 104 | - dl_sched_class 105 | - rt_sched_class 106 | - fair_sched_class 107 | - idle_sched_class 108 | sidecar: 109 | -------------------------------------------------------------------------------- /configs/3.10/dynamic_springboard.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/configs/3.10/dynamic_springboard.patch -------------------------------------------------------------------------------- /configs/3.10/post_extract.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2023 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | diff --git a/kernel/sched/mod/Makefile b/kernel/sched/mod/Makefile 5 | index 38dbf6d..31c6d91 100644 6 | --- a/kernel/sched/mod/Makefile 7 | +++ b/kernel/sched/mod/Makefile 8 | @@ -15,9 +15,9 @@ CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer 9 | endif 10 | 11 | objs-y += core.o 12 | -objs-y += idle.o fair.o rt.o deadline.o 13 | +objs-y += idle_task.o fair.o rt.o deadline.o 14 | 15 | -objs-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 16 | +objs-$(CONFIG_SMP) += cpupri.o cpudeadline.o stop_task.o 17 | objs-$(CONFIG_SCHEDSTATS) += stats.o 18 | objs-$(CONFIG_SCHED_DEBUG) += debug.o 19 | 20 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 21 | index e5236bd..1970dcf 100644 22 | --- a/kernel/sched/mod/core.c 23 | +++ b/kernel/sched/mod/core.c 24 | @@ -57,6 +57,9 @@ 25 | #include 26 | #include 27 | #include 28 | + 29 | +#undef CONFIG_FTRACE_SYSCALLS 30 | + 31 | #include 32 | #include 33 | #include 34 | @@ -89,7 +92,6 @@ 35 | #include "../../workqueue_internal.h" 36 | #include "../../smpboot.h" 37 | 38 | -#define CREATE_TRACE_POINTS 39 | #include 40 | 41 | #ifdef smp_mb__before_atomic 42 | @@ -7530,9 +7532,13 @@ void sched_cpu_deactivate(unsigned int cpu); 43 | #else 44 | #endif /* CONFIG_SMP */ 45 | 46 | +extern char __module_sched_start[], __module_sched_end[]; 47 | + 48 | int in_sched_functions(unsigned long addr) 49 | { 50 | return in_lock_functions(addr) || 51 | + (addr >= (unsigned long)__module_sched_start 52 | + && addr < (unsigned long)__module_sched_end) || 53 | (addr >= (unsigned long)__sched_text_start 54 | && addr < (unsigned long)__sched_text_end); 55 | } 56 | diff --git a/kernel/sched/mod/main.c b/kernel/sched/mod/main.c 57 | index 8e08642..c1d1604 100644 58 | --- a/kernel/sched/mod/main.c 59 | +++ b/kernel/sched/mod/main.c 60 | @@ -12,19 +12,30 @@ 61 | #include 62 | #include 63 | #include 64 | +#include 65 | +#include 66 | #include "sched.h" 67 | #include "helper.h" 68 | #include "mempool.h" 69 | #include "head_jump.h" 70 | #include "stack_check.h" 71 | 72 | -#define CHECK_STACK_LAYOUT() \ 73 | - BUILD_BUG_ON_MSG(MODULE_FRAME_POINTER != VMLINUX_FRAME_POINTER, \ 74 | - "stack layout of __schedule can not match to it in vmlinux") 75 | - 76 | #define MAX_CPU_NR 1024 77 | 78 | -extern void __orig___schedule(bool); 79 | +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ 80 | + typeof(ptr) __PTR = (ptr); \ 81 | + typeof(*ptr) VAL; \ 82 | + for (;;) { \ 83 | + VAL = READ_ONCE(*__PTR); \ 84 | + if (cond_expr) \ 85 | + break; \ 86 | + cpu_relax(); \ 87 | + } \ 88 | + VAL; \ 89 | +}) 90 | +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) 91 | + 92 | +extern void __orig___schedule(void); 93 | int process_id[MAX_CPU_NR]; 94 | atomic_t cpu_finished; 95 | atomic_t clear_finished; 96 | @@ -53,10 +64,9 @@ extern struct percpu_rw_semaphore cpuset_rwsem; 97 | percpu_up_write(&cpuset_rwsem) 98 | #endif 99 | 100 | -extern cpumask_var_t sd_sysctl_cpus; 101 | extern const struct file_operations __mod_sched_feat_fops; 102 | -extern const struct seq_operations __mod_sched_debug_sops; 103 | -extern const struct seq_operations __mod_schedstat_sops; 104 | +extern const struct file_operations __mod_sched_debug_fops; 105 | +extern const struct file_operations __mod_proc_schedstat_operations; 106 | 107 | static struct dentry *sched_features_dir; 108 | static s64 stop_time; 109 | @@ -265,8 +275,25 @@ static int sync_sched_mod(void *func) 110 | } 111 | 112 | #ifdef CONFIG_SCHED_DEBUG 113 | +extern void __mod_register_sched_domain_sysctl(void); 114 | +extern void __mod_unregister_sched_domain_sysctl(void); 115 | + 116 | +extern struct ctl_table_header *__orig_sd_sysctl_header; 117 | +extern struct ctl_table __orig_sd_ctl_dir[]; 118 | +extern void __orig_sd_free_ctl_entry(struct ctl_table **tablep); 119 | + 120 | extern void __orig_register_sched_domain_sysctl(void); 121 | -extern void __orig_unregister_sched_domain_sysctl(void); 122 | +static void __orig_unregister_sched_domain_sysctl(void) 123 | +{ 124 | + if (__orig_sd_sysctl_header) 125 | + unregister_sysctl_table(__orig_sd_sysctl_header); 126 | + 127 | + __orig_sd_sysctl_header = NULL; 128 | + 129 | + if (__orig_sd_ctl_dir[0].child) 130 | + __orig_sd_free_ctl_entry(&__orig_sd_ctl_dir[0].child); 131 | +} 132 | +/* DON'T MODIFY INLINE EXTERNAL FUNCTION unregister_sched_domain_sysctl */ 133 | 134 | static inline void install_sched_domain_sysctl(void) 135 | { 136 | @@ -274,7 +301,7 @@ static inline void install_sched_domain_sysctl(void) 137 | plugsched_cpuset_lock(); 138 | 139 | __orig_unregister_sched_domain_sysctl(); 140 | - register_sched_domain_sysctl(); 141 | + __mod_register_sched_domain_sysctl(); 142 | 143 | plugsched_cpuset_unlock(); 144 | mutex_unlock(&cgroup_mutex); 145 | @@ -285,8 +312,7 @@ static inline void restore_sched_domain_sysctl(void) 146 | mutex_lock(&cgroup_mutex); 147 | plugsched_cpuset_lock(); 148 | 149 | - unregister_sched_domain_sysctl(); 150 | - cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); 151 | + __mod_unregister_sched_domain_sysctl(); 152 | __orig_register_sched_domain_sysctl(); 153 | 154 | plugsched_cpuset_unlock(); 155 | @@ -312,7 +338,7 @@ void install_sched_debugfs(void) 156 | } 157 | 158 | extern struct file_operations __orig_sched_feat_fops; 159 | -extern struct seq_operations __orig_sched_debug_sops; 160 | +extern struct file_operations __orig_sched_debug_fops; 161 | 162 | void restore_sched_debugfs(void) 163 | { 164 | @@ -325,7 +351,7 @@ int install_sched_debug_procfs(void) 165 | { 166 | remove_proc_entry("sched_debug", NULL); 167 | 168 | - if (!proc_create_seq("sched_debug", 0444, NULL, &__mod_sched_debug_sops)) 169 | + if (!proc_create("sched_debug", 0444, NULL, &__mod_sched_debug_fops)) 170 | return -ENOMEM; 171 | 172 | return 0; 173 | @@ -335,7 +361,7 @@ int restore_sched_debug_procfs(void) 174 | { 175 | remove_proc_entry("sched_debug", NULL); 176 | 177 | - if (!proc_create_seq("sched_debug", 0444, NULL, &__orig_sched_debug_sops)) 178 | + if (!proc_create("sched_debug", 0444, NULL, &__orig_sched_debug_fops)) 179 | return -ENOMEM; 180 | 181 | return 0; 182 | @@ -343,14 +369,14 @@ int restore_sched_debug_procfs(void) 183 | #endif 184 | 185 | #ifdef CONFIG_SCHEDSTATS 186 | -extern struct seq_operations __orig_schedstat_sops; 187 | +extern struct file_operations __orig_proc_schedstat_operations; 188 | 189 | /* schedstat interface in proc */ 190 | int install_proc_schedstat(void) 191 | { 192 | remove_proc_entry("schedstat", NULL); 193 | 194 | - if (!proc_create_seq("schedstat", 0444, NULL, &__mod_schedstat_sops)) 195 | + if (!proc_create("schedstat", 0444, NULL, &__mod_proc_schedstat_operations)) 196 | return -ENOMEM; 197 | 198 | return 0; 199 | @@ -360,7 +386,7 @@ int restore_proc_schedstat(void) 200 | { 201 | remove_proc_entry("schedstat", NULL); 202 | 203 | - if (!proc_create_seq("schedstat", 0444, NULL, &__orig_schedstat_sops)) 204 | + if (!proc_create("schedstat", 0444, NULL, &__orig_proc_schedstat_operations)) 205 | return -ENOMEM; 206 | 207 | return 0; 208 | @@ -586,8 +612,6 @@ static int __init sched_mod_init(void) 209 | { 210 | int ret; 211 | 212 | - CHECK_STACK_LAYOUT(); 213 | - 214 | printk("Hi, scheduler mod is installing!\n"); 215 | init_start = ktime_get(); 216 | 217 | diff --git a/kernel/sched/mod/sched_rebuild.c b/kernel/sched/mod/sched_rebuild.c 218 | index 20e8f4c..d2b3343 100644 219 | --- a/kernel/sched/mod/sched_rebuild.c 220 | +++ b/kernel/sched/mod/sched_rebuild.c 221 | @@ -8,8 +8,6 @@ 222 | #include "sched.h" 223 | #include "helper.h" 224 | 225 | -extern void __orig_set_rq_offline(struct rq*); 226 | -extern void __orig_set_rq_online(struct rq*); 227 | extern unsigned int process_id[]; 228 | 229 | extern struct sched_class __orig_stop_sched_class; 230 | @@ -44,12 +42,47 @@ DEFINE_PER_CPU(struct list_head, dying_task_list); 231 | #define NR_SCHED_CLASS 5 232 | struct sched_class bak_class[NR_SCHED_CLASS]; 233 | 234 | +extern void __mod_set_rq_offline(struct rq*); 235 | +extern void __mod_set_rq_online(struct rq*); 236 | + 237 | +static void __orig_set_rq_online(struct rq *rq) 238 | +{ 239 | + if (!rq->online) { 240 | + const struct sched_class *class; 241 | + 242 | + cpumask_set_cpu(rq->cpu, rq->rd->online); 243 | + rq->online = 1; 244 | + 245 | + for_each_class(class) { 246 | + if (class->rq_online) 247 | + class->rq_online(rq); 248 | + } 249 | + } 250 | +} 251 | +/* DON'T MODIFY INLINE EXTERNAL FUNCTION __orig_set_rq_online */ 252 | + 253 | +static void __orig_set_rq_offline(struct rq *rq) 254 | +{ 255 | + if (rq->online) { 256 | + const struct sched_class *class; 257 | + 258 | + for_each_class(class) { 259 | + if (class->rq_offline) 260 | + class->rq_offline(rq); 261 | + } 262 | + 263 | + cpumask_clear_cpu(rq->cpu, rq->rd->online); 264 | + rq->online = 0; 265 | + } 266 | +} 267 | +/* DON'T MODIFY INLINE EXTERNAL FUNCTION __orig_set_rq_offline */ 268 | + 269 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) 270 | 271 | extern struct task_struct __orig_fake_task; 272 | 273 | #define pick_next_task_rq(class, rf) \ 274 | - (class)->pick_next_task(rq, &__orig_fake_task, &(rf)) 275 | + (class)->pick_next_task(rq, &__orig_fake_task) 276 | 277 | #else 278 | #define pick_next_task_rq(class, rf) \ 279 | @@ -83,15 +116,14 @@ void clear_sched_state(bool mod) 280 | { 281 | struct task_struct *g, *p; 282 | struct rq *rq = this_rq(); 283 | - struct rq_flags rf; 284 | - int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; 285 | + int queue_flags = DEQUEUE_SAVE; 286 | int cpu = smp_processor_id(); 287 | 288 | - rq_lock(rq, &rf); 289 | + raw_spin_lock(&rq->lock); 290 | 291 | if (mod) { 292 | update_rq_clock(rq); 293 | - set_rq_offline(rq); 294 | + __mod_set_rq_offline(rq); 295 | } else { 296 | __orig_update_rq_clock(rq); 297 | __orig_set_rq_offline(rq); 298 | @@ -120,7 +152,7 @@ void clear_sched_state(bool mod) 299 | break; 300 | 301 | for_each_class(class) { 302 | - next = pick_next_task_rq(class, rf); 303 | + next = pick_next_task_rq(class, NULL); 304 | if (next) { 305 | next->sched_class->put_prev_task(rq, next); 306 | next->sched_class->dequeue_task(rq, p, queue_flags); 307 | @@ -129,7 +161,7 @@ void clear_sched_state(bool mod) 308 | } 309 | } 310 | } 311 | - rq_unlock(rq, &rf); 312 | + raw_spin_unlock(&rq->lock); 313 | } 314 | 315 | void rebuild_sched_state(bool mod) 316 | @@ -137,15 +169,14 @@ void rebuild_sched_state(bool mod) 317 | struct task_struct *g, *p; 318 | struct task_group *tg; 319 | struct rq *rq = this_rq(); 320 | - struct rq_flags rf; 321 | - int queue_flags = ENQUEUE_RESTORE | ENQUEUE_MOVE | ENQUEUE_NOCLOCK; 322 | + int queue_flags = ENQUEUE_RESTORE; 323 | int cpu = smp_processor_id(); 324 | 325 | - rq_lock(rq, &rf); 326 | + raw_spin_lock(&rq->lock); 327 | 328 | if (mod) { 329 | update_rq_clock(rq); 330 | - set_rq_online(rq); 331 | + __mod_set_rq_online(rq); 332 | } else { 333 | __orig_update_rq_clock(rq); 334 | __orig_set_rq_online(rq); 335 | @@ -166,7 +197,7 @@ void rebuild_sched_state(bool mod) 336 | p->sched_class->enqueue_task(rq, p, queue_flags); 337 | list_del_init(&p->tasks); 338 | } 339 | - rq_unlock(rq, &rf); 340 | + raw_spin_unlock(&rq->lock); 341 | 342 | if (process_id[cpu]) 343 | return; 344 | @@ -176,12 +207,12 @@ void rebuild_sched_state(bool mod) 345 | if (tg == &root_task_group) 346 | continue; 347 | 348 | - if (tg->cfs_bandwidth.period_active) { 349 | + if (hrtimer_active(&tg->cfs_bandwidth.period_timer)) { 350 | hrtimer_restart(&tg->cfs_bandwidth.period_timer); 351 | hrtimer_restart(&tg->cfs_bandwidth.slack_timer); 352 | } 353 | #ifdef CONFIG_RT_GROUP_SCHED 354 | - if (tg->rt_bandwidth.rt_period_active) 355 | + if (hrtimer_active(&tg->rt_bandwidth.rt_period_timer)) 356 | hrtimer_restart(&tg->rt_bandwidth.rt_period_timer); 357 | #endif 358 | } 359 | diff --git a/kernel/sched/mod/stack_check.h b/kernel/sched/mod/stack_check.h 360 | index f83c463..2517230 100644 361 | --- a/kernel/sched/mod/stack_check.h 362 | +++ b/kernel/sched/mod/stack_check.h 363 | @@ -24,7 +24,6 @@ static void stack_check_init(void) 364 | #undef EXPORT_PLUGSCHED 365 | #undef EXPORT_CALLBACK 366 | 367 | - vm_func_size[NR___schedule] = 0; 368 | addr_sort(vm_func_addr, vm_func_size, NR_INTERFACE_FN); 369 | 370 | #define EXPORT_CALLBACK(fn, ...) \ 371 | @@ -41,7 +40,6 @@ static void stack_check_init(void) 372 | #undef EXPORT_PLUGSCHED 373 | #undef EXPORT_CALLBACK 374 | 375 | - mod_func_size[NR___schedule] = 0; 376 | addr_sort(mod_func_addr, mod_func_size, NR_INTERFACE_FN); 377 | } 378 | 379 | @@ -143,11 +141,7 @@ static unsigned int get_stack_trace(struct task_struct *tsk, 380 | trace.max_entries = MAX_STACK_ENTRIES; 381 | trace.entries = store; 382 | 383 | - if (!try_get_task_stack(tsk)) 384 | - return 0; 385 | - 386 | save_stack(&trace, tsk); 387 | - put_task_stack(tsk); 388 | return trace.nr_entries; 389 | } 390 | #endif 391 | -------------------------------------------------------------------------------- /configs/4.19/boundary.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | mod_files: 5 | - kernel/sched/core.c 6 | - kernel/sched/fair.c 7 | - kernel/sched/topology.c 8 | - kernel/sched/idle.c 9 | - kernel/sched/rt.c 10 | - kernel/sched/cpudeadline.c 11 | - kernel/sched/deadline.c 12 | - kernel/sched/cpupri.c 13 | - kernel/sched/debug.c 14 | - kernel/sched/stats.c 15 | - kernel/sched/pelt.c 16 | - kernel/sched/stop_task.c 17 | - kernel/sched/cpudeadline.h 18 | - kernel/sched/cpupri.h 19 | - kernel/sched/pelt.h 20 | - kernel/sched/sched.h 21 | - kernel/sched/sched-pelt.h 22 | - kernel/sched/stats.h 23 | - kernel/sched/features.h 24 | interface_prefix: 25 | - __ia32_sys_ 26 | - __x64_sys_ 27 | - __x32_compat_ 28 | - __ia32_compat_sys_ 29 | - __arm64_sys_ 30 | - __arm64_compat_sys_ 31 | function: 32 | interface: 33 | - yield_to 34 | - wake_up_idle_ht 35 | - __balance_callback 36 | - do_set_cpus_allowed 37 | - set_user_nice 38 | - __sched_setscheduler 39 | - sched_setscheduler_nocheck 40 | - __set_cpus_allowed_ptr 41 | - schedule_tail 42 | - scheduler_tick 43 | - sched_fork 44 | - scheduler_ipi 45 | - __schedule 46 | - resched_cpu 47 | - task_rq_lock 48 | - get_nohz_timer_target 49 | - nohz_balance_enter_idle 50 | - sched_ttwu_pending 51 | - wake_up_if_idle 52 | - try_to_wake_up 53 | - wake_up_new_task 54 | - wake_up_nohz_cpu 55 | - rt_mutex_setprio 56 | - idle_cpu 57 | - partition_sched_domains 58 | - sched_set_stop_task 59 | - task_numa_group_id 60 | - should_numa_migrate_memory 61 | - task_numa_free 62 | - task_numa_fault 63 | - proc_sched_show_task 64 | - proc_sched_set_task 65 | - init_idle 66 | - release_task_reserve 67 | - init_task_reserve 68 | - in_sched_functions 69 | - task_sched_runtime 70 | - cpuset_cpumask_can_shrink 71 | - task_can_attach 72 | - normalize_rt_tasks 73 | - sysrq_sched_debug_show 74 | - sched_move_task 75 | - sched_group_set_shares 76 | - sched_offline_group 77 | - sched_destroy_group 78 | - sched_create_group 79 | - sched_online_group 80 | - id_nr_invalid 81 | - finish_task_switch 82 | - sched_exec 83 | - yield 84 | global_var: 85 | extra_public: 86 | - cpu_idle_force_poll 87 | - task_group_cache 88 | - preempt_notifier_key 89 | - tick_work_cpu 90 | - num_cpus_frozen 91 | - __cfs_bandwidth_used 92 | - max_load_balance_interval 93 | - sched_domains_curr_level 94 | - sched_domains_numa_masks 95 | - default_relax_domain_level 96 | - sched_domain_topology 97 | - sched_domains_numa_distance 98 | - sched_domains_numa_levels 99 | - fallback_doms 100 | - ndoms_cur 101 | - doms_cur 102 | - dattr_cur 103 | - default_topology 104 | - local_cpu_mask 105 | - rt_pull_head 106 | - rt_push_head 107 | - dl_push_head 108 | - local_cpu_mask_dl 109 | - dl_pull_head 110 | - sd_sysctl_cpus 111 | - nohz 112 | - cfs_constraints_mutex 113 | - shares_mutex 114 | force_private: 115 | - sysctl_sched_features 116 | - sched_feat_keys 117 | - stop_sched_class 118 | - dl_sched_class 119 | - rt_sched_class 120 | - fair_sched_class 121 | - idle_sched_class 122 | sidecar: 123 | -------------------------------------------------------------------------------- /configs/4.19/dynamic_springboard.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | From d627ff8250fdb7a092a75729b02c90cc75191640 Mon Sep 17 00:00:00 2001 5 | From: Yihao Wu 6 | Date: Sun, 28 Mar 2021 01:59:28 +0800 7 | Subject: [PATCH] Dynamic __schedule springboard 8 | 9 | With this springboard, we don't have to customize the kernel's __schedule 10 | 11 | Signed-off-by: Yihao Wu 12 | --- 13 | kernel/sched/mod/core.c | 12 +++++++++++- 14 | 1 file changed, 10 insertions(+), 1 deletion(-) 15 | 16 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 17 | index 5ec2ca5..3b8925a 100644 18 | --- a/kernel/sched/mod/core.c 19 | +++ b/kernel/sched/mod/core.c 20 | @@ -2590,6 +2590,8 @@ context_switch(struct rq *rq, struct task_struct *prev, 21 | /* 22 | * context_switch - switch to the new MM and the new thread's register state. 23 | */ 24 | +extern unsigned long sched_springboard; 25 | + 26 | static __always_inline struct rq * 27 | context_switch(struct rq *rq, struct task_struct *prev, 28 | struct task_struct *next, struct rq_flags *rf) 29 | @@ -2634,3 +2636,37 @@ context_switch(struct rq *rq, struct task_struct *prev, 30 | 31 | /* Here we just switch the register state and the stack. */ 32 | - switch_to(prev, next, prev); 33 | +#ifdef CONFIG_X86_64 34 | + prepare_switch_to(next); 35 | + __asm__("add %0,%%rsp\n\t" 36 | + "jmp *%1\n\t" 37 | + : 38 | + :"i"(STACKSIZE_MOD - STACKSIZE_VMLINUX), "r"(sched_springboard), "D"(prev), "S"(next) 39 | + :"rbx","r12","r13","r14","r15" 40 | + ); 41 | +#endif 42 | +#ifdef CONFIG_ARM64 43 | + __asm__( 44 | + "ldp x29,x30,[sp, #0]\n\t" 45 | + "ldp x19,x20,[sp, #16]\n\t" 46 | + "ldp x21,x22,[sp, #32]\n\t" 47 | + "ldp x23,x24,[sp, #48]\n\t" 48 | + "ldp x25,x26,[sp, #64]\n\t" 49 | + "ldp x27,x28,[sp, #80]\n\t" 50 | + "sub sp,x29,%0\n\t" 51 | + "stp x29,x30,[sp, #0]\n\t" 52 | + "stp x19,x20,[sp, #16]\n\t" 53 | + "stp x21,x22,[sp, #32]\n\t" 54 | + "stp x23,x24,[sp, #48]\n\t" 55 | + "stp x25,x26,[sp, #64]\n\t" 56 | + "stp x27,x28,[sp, #80]\n\t" 57 | + "mov x0,%2\n\t" 58 | + "mov x1,%3\n\t" 59 | + "br %1" 60 | + : 61 | + :"i"(STACKSIZE_VMLINUX), "r"(sched_springboard),"r"(prev),"r"(next) 62 | + :"x19","x20","x21","x22","x23","x24","x25", 63 | + "x26","x27","x28","x30","x0","x1" 64 | + ); 65 | +#endif 66 | + 67 | + /* Below will not be executed, we'll return to vmlinux here */ 68 | @@ -3178,6 +3190,7 @@ again: 69 | * 70 | * WARNING: must be called with preemption disabled! 71 | */ 72 | +__attribute__ ((optimize("no-omit-frame-pointer"))) 73 | static void __sched notrace __used __schedule(bool preempt) 74 | { 75 | struct task_struct *prev, *next; 76 | 2.20.1.2432.ga663e714 77 | 78 | -------------------------------------------------------------------------------- /configs/4.19/post_extract.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h 5 | index c01813c..574977b 100644 6 | --- a/include/linux/preempt.h 7 | +++ b/include/linux/preempt.h 8 | @@ -255,7 +255,7 @@ do { \ 9 | 10 | #endif /* CONFIG_PREEMPT_COUNT */ 11 | 12 | -#ifdef MODULE 13 | +#if 0 14 | /* 15 | * Modules have no business playing preemption tricks. 16 | */ 17 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 18 | index 2dcecfd..0e37f1f 100644 19 | --- a/kernel/sched/mod/core.c 20 | +++ b/kernel/sched/mod/core.c 21 | @@ -20,7 +20,6 @@ 22 | 23 | #include "pelt.h" 24 | 25 | -#define CREATE_TRACE_POINTS 26 | #include 27 | 28 | DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 29 | @@ -5296,9 +5295,13 @@ extern int sched_cpu_dying (unsigned int); 30 | } 31 | #endif /* CONFIG_SMP */ 32 | 33 | +extern char __module_sched_start[], __module_sched_end[]; 34 | + 35 | int in_sched_functions(unsigned long addr) 36 | { 37 | return in_lock_functions(addr) || 38 | + (addr >= (unsigned long)__module_sched_start 39 | + && addr < (unsigned long)__module_sched_end) || 40 | (addr >= (unsigned long)__sched_text_start 41 | && addr < (unsigned long)__sched_text_end); 42 | } 43 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h 44 | index 3581697..8f4f532 100644 45 | --- a/kernel/sched/mod/sched.h 46 | +++ b/kernel/sched/mod/sched.h 47 | @@ -62,6 +62,9 @@ 48 | #include 49 | #include 50 | #include 51 | + 52 | +#undef CONFIG_FTRACE_SYSCALLS 53 | + 54 | #include 55 | #include 56 | #include 57 | diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h 58 | index 2dcecfd..0e37f1f 100644 59 | --- a/arch/arm64/include/asm/stackprotector.h 60 | +++ b/arch/arm64/include/asm/stackprotector.h 61 | @@ -16,7 +16,9 @@ 62 | #include 63 | #include 64 | 65 | +#ifndef CONFIG_STACKPROTECTOR_PER_TASK 66 | extern unsigned long __stack_chk_guard; 67 | +#endif 68 | 69 | /* 70 | * Initialize the stackprotector canary value. 71 | @@ -34,8 +36,10 @@ 72 | canary &= CANARY_MASK; 73 | 74 | current->stack_canary = canary; 75 | +#ifndef CONFIG_STACKPROTECTOR_PER_TASK 76 | if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK)) 77 | __stack_chk_guard = current->stack_canary; 78 | +#endif 79 | } 80 | 81 | #endif /* _ASM_STACKPROTECTOR_H */ 82 | -------------------------------------------------------------------------------- /configs/5.10/boundary.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | mod_files: 5 | - kernel/sched/core.c 6 | - kernel/sched/fair.c 7 | - kernel/sched/topology.c 8 | - kernel/sched/idle.c 9 | - kernel/sched/rt.c 10 | - kernel/sched/cpudeadline.c 11 | - kernel/sched/deadline.c 12 | - kernel/sched/cpupri.c 13 | - kernel/sched/debug.c 14 | - kernel/sched/stats.c 15 | - kernel/sched/pelt.c 16 | - kernel/sched/stop_task.c 17 | - kernel/sched/cpudeadline.h 18 | - kernel/sched/cpupri.h 19 | - kernel/sched/pelt.h 20 | - kernel/sched/sched.h 21 | - kernel/sched/sched-pelt.h 22 | - kernel/sched/stats.h 23 | - kernel/sched/features.h 24 | interface_prefix: 25 | - __ia32_sys_ 26 | - __x64_sys_ 27 | - __x32_compat_ 28 | - __ia32_compat_sys_ 29 | - __arm64_sys_ 30 | - __arm64_compat_sys_ 31 | function: 32 | interface: 33 | - yield_to 34 | - wake_up_idle_ht 35 | - send_call_function_single_ipi 36 | - do_set_cpus_allowed 37 | - set_user_nice 38 | - __sched_setscheduler 39 | - sched_setscheduler_nocheck 40 | - __set_cpus_allowed_ptr 41 | - schedule_tail 42 | - scheduler_tick 43 | - sched_fork 44 | - sched_post_fork 45 | - __schedule 46 | - resched_cpu 47 | - task_rq_lock 48 | - get_nohz_timer_target 49 | - nohz_balance_enter_idle 50 | - sched_ttwu_pending 51 | - wake_up_if_idle 52 | - try_to_wake_up 53 | - wake_up_new_task 54 | - wake_up_nohz_cpu 55 | - rt_mutex_setprio 56 | - idle_cpu 57 | - partition_sched_domains_locked 58 | - sched_set_stop_task 59 | - task_numa_group_id 60 | - should_numa_migrate_memory 61 | - task_numa_free 62 | - task_numa_fault 63 | - proc_sched_show_task 64 | - proc_sched_set_task 65 | - init_task_reserve 66 | - in_sched_functions 67 | - task_sched_runtime 68 | - cpuset_cpumask_can_shrink 69 | - task_can_attach 70 | - normalize_rt_tasks 71 | - sysrq_sched_debug_show 72 | - sched_move_task 73 | - sched_group_set_shares 74 | - sched_offline_group 75 | - sched_destroy_group 76 | - sched_create_group 77 | - sched_online_group 78 | - id_nr_invalid 79 | - finish_task_switch 80 | - sched_exec 81 | - yield 82 | global_var: 83 | extra_public: 84 | - cpu_idle_force_poll 85 | - task_group_cache 86 | - preempt_notifier_key 87 | - tick_work_cpu 88 | - num_cpus_frozen 89 | - __cfs_bandwidth_used 90 | - max_load_balance_interval 91 | - sched_domains_curr_level 92 | - sched_domains_numa_masks 93 | - default_relax_domain_level 94 | - sched_domain_topology 95 | - sched_domains_numa_distance 96 | - sched_domains_numa_levels 97 | - fallback_doms 98 | - ndoms_cur 99 | - doms_cur 100 | - dattr_cur 101 | - default_topology 102 | - local_cpu_mask 103 | - rt_pull_head 104 | - rt_push_head 105 | - dl_push_head 106 | - local_cpu_mask_dl 107 | - dl_pull_head 108 | - sd_sysctl_cpus 109 | - nohz 110 | - cfs_constraints_mutex 111 | - shares_mutex 112 | - normalized_sysctl_sched_latency 113 | - normalized_sysctl_sched_min_granularity 114 | - normalized_sysctl_sched_wakeup_granularity 115 | - sched_domains_tmpmask 116 | - sched_domains_tmpmask2 117 | force_private: 118 | - sysctl_sched_features 119 | - sched_feat_keys 120 | - stop_sched_class 121 | - dl_sched_class 122 | - rt_sched_class 123 | - fair_sched_class 124 | - idle_sched_class 125 | sidecar: 126 | -------------------------------------------------------------------------------- /configs/5.10/dynamic_springboard.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | From cf0e314e367f00a4beb9e38b2e69feca0138e59c Mon Sep 17 00:00:00 2001 5 | From: Erwei Deng 6 | Date: Thu, 28 Jul 2022 21:32:14 +0800 7 | Subject: [PATCH] Dynamic __schedule springboard 8 | 9 | With this springboard, we don't have to customize the kernel's __schedule. 10 | 11 | Co-developed-by: Yihao Wu 12 | Signed-off-by: Erwei Deng 13 | --- 14 | kernel/sched/mod/core.c | 38 +++++++++++++++++++++++++++++++++++++- 15 | 1 file changed, 37 insertions(+), 1 deletion(-) 16 | 17 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 18 | index eee32739c..26ccde413 100644 19 | --- a/kernel/sched/mod/core.c 20 | +++ b/kernel/sched/mod/core.c 21 | @@ -3417,6 +3417,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) 22 | /* 23 | * context_switch - switch to the new MM and the new thread's register state. 24 | */ 25 | +extern unsigned long sched_springboard; 26 | + 27 | static __always_inline struct rq * 28 | context_switch(struct rq *rq, struct task_struct *prev, 29 | struct task_struct *next, struct rq_flags *rf) 30 | @@ -3469,7 +3471,40 @@ context_switch(struct rq *rq, struct task_struct *prev, 31 | prepare_lock_switch(rq, next, rf); 32 | 33 | /* Here we just switch the register state and the stack. */ 34 | - switch_to(prev, next, prev); 35 | +#ifdef CONFIG_X86_64 36 | + __asm__("add %0,%%rsp\n\t" 37 | + "jmp *%1" 38 | + : 39 | + :"i"(STACKSIZE_MOD - STACKSIZE_VMLINUX), "r"(sched_springboard), "D"(prev), "S"(next) 40 | + :"rbx","r12","r13","r14","r15" 41 | + ); 42 | +#endif 43 | +#ifdef CONFIG_ARM64 44 | + __asm__( 45 | + "ldp x29,x30,[sp, #0]\n\t" 46 | + "ldp x19,x20,[sp, #16]\n\t" 47 | + "ldp x21,x22,[sp, #32]\n\t" 48 | + "ldp x23,x24,[sp, #48]\n\t" 49 | + "ldp x25,x26,[sp, #64]\n\t" 50 | + "ldp x27,x28,[sp, #80]\n\t" 51 | + "sub sp,x29,%0\n\t" 52 | + "stp x29,x30,[sp, #0]\n\t" 53 | + "stp x19,x20,[sp, #16]\n\t" 54 | + "stp x21,x22,[sp, #32]\n\t" 55 | + "stp x23,x24,[sp, #48]\n\t" 56 | + "stp x25,x26,[sp, #64]\n\t" 57 | + "stp x27,x28,[sp, #80]\n\t" 58 | + "mov x0,%2\n\t" 59 | + "mov x1,%3\n\t" 60 | + "br %1" 61 | + : 62 | + :"i"(STACKSIZE_VMLINUX), "r"(sched_springboard),"r"(prev),"r"(next) 63 | + :"x19","x20","x21","x22","x23","x24","x25", 64 | + "x26","x27","x28","x30","x0","x1" 65 | + ); 66 | +#endif 67 | + 68 | + /* Below will not be executed, we'll return to vmlinux here */ 69 | barrier(); 70 | 71 | return finish_task_switch(prev); 72 | -- 73 | 2.27.0 74 | 75 | -------------------------------------------------------------------------------- /configs/5.10/dynamic_springboard_2.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | From cf0e314e367f00a4beb9e38b2e69feca0138e59c Mon Sep 17 00:00:00 2001 5 | From: Erwei Deng 6 | Date: Thu, 28 Jul 2022 21:32:14 +0800 7 | Subject: [PATCH] Dynamic __schedule springboard 8 | 9 | With this springboard, we don't have to customize the kernel's __schedule. 10 | 11 | Co-developed-by: Yihao Wu 12 | Signed-off-by: Erwei Deng 13 | --- 14 | kernel/sched/mod/core.c | 38 +++++++++++++++++++++++++++++++++++++- 15 | 1 file changed, 37 insertions(+), 1 deletion(-) 16 | 17 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 18 | index eee32739c..26ccde413 100644 19 | --- a/kernel/sched/mod/core.c 20 | +++ b/kernel/sched/mod/core.c 21 | @@ -4066,6 +4101,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) 22 | * 23 | * WARNING: must be called with preemption disabled! 24 | */ 25 | +__attribute__ ((optimize("no-omit-frame-pointer"))) 26 | static void __sched notrace __used __schedule(bool preempt) 27 | { 28 | struct task_struct *prev, *next; 29 | -- 30 | 2.27.0 31 | 32 | -------------------------------------------------------------------------------- /configs/5.10/post_extract.patch: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2023 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h 5 | index 7263e0bac..e01b6fc93 100644 6 | --- a/arch/arm64/include/asm/stackprotector.h 7 | +++ b/arch/arm64/include/asm/stackprotector.h 8 | @@ -17,7 +17,9 @@ 9 | #include 10 | #include 11 | 12 | +#ifndef CONFIG_STACKPROTECTOR_PER_TASK 13 | extern unsigned long __stack_chk_guard; 14 | +#endif 15 | 16 | /* 17 | * Initialize the stackprotector canary value. 18 | @@ -36,8 +38,10 @@ static __always_inline void boot_init_stack_canary(void) 19 | canary &= CANARY_MASK; 20 | 21 | current->stack_canary = canary; 22 | +#ifndef CONFIG_STACKPROTECTOR_PER_TASK 23 | if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK)) 24 | __stack_chk_guard = current->stack_canary; 25 | +#endif 26 | #endif 27 | ptrauth_thread_init_kernel(current); 28 | ptrauth_thread_switch_kernel(current); 29 | diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h 30 | index 8c87a2e0b..ac5719391 100644 31 | --- a/arch/x86/include/asm/tlbflush.h 32 | +++ b/arch/x86/include/asm/tlbflush.h 33 | @@ -52,7 +52,7 @@ static inline void cr4_clear_bits(unsigned long mask) 34 | local_irq_restore(flags); 35 | } 36 | 37 | -#ifndef MODULE 38 | +#if 1 39 | /* 40 | * 6 because 6 should be plenty and struct tlb_state will fit in two cache 41 | * lines. 42 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h 43 | index 7d9c1c0e1..d63971e5a 100644 44 | --- a/include/linux/preempt.h 45 | +++ b/include/linux/preempt.h 46 | @@ -252,7 +252,7 @@ do { \ 47 | 48 | #endif /* CONFIG_PREEMPT_COUNT */ 49 | 50 | -#ifdef MODULE 51 | +#if 0 52 | /* 53 | * Modules have no business playing preemption tricks. 54 | */ 55 | diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c 56 | index e977763c9..d665eac45 100644 57 | --- a/kernel/sched/mod/core.c 58 | +++ b/kernel/sched/mod/core.c 59 | @@ -6,9 +6,7 @@ 60 | * 61 | * Copyright (C) 1991-2002 Linus Torvalds 62 | */ 63 | -#define CREATE_TRACE_POINTS 64 | #include 65 | -#undef CREATE_TRACE_POINTS 66 | 67 | #include "sched.h" 68 | 69 | @@ -5965,9 +5963,13 @@ extern int sched_cpu_dying(unsigned int); 70 | } 71 | #endif /* CONFIG_SMP */ 72 | 73 | +extern char __module_sched_start[], __module_sched_end[]; 74 | + 75 | int in_sched_functions(unsigned long addr) 76 | { 77 | return in_lock_functions(addr) || 78 | + (addr >= (unsigned long)__module_sched_start 79 | + && addr < (unsigned long)__module_sched_end) || 80 | (addr >= (unsigned long)__sched_text_start 81 | && addr < (unsigned long)__sched_text_end); 82 | } 83 | diff --git a/kernel/sched/mod/sched.h b/kernel/sched/mod/sched.h 84 | index d251ce5c8..1402ceb9f 100644 85 | --- a/kernel/sched/mod/sched.h 86 | +++ b/kernel/sched/mod/sched.h 87 | @@ -62,6 +62,9 @@ 88 | #include 89 | #include 90 | #include 91 | + 92 | +#undef CONFIG_FTRACE_SYSCALLS 93 | + 94 | #include 95 | #include 96 | #include 97 | -------------------------------------------------------------------------------- /docs/Advanced-Features.md: -------------------------------------------------------------------------------- 1 | # Sidecar 2 | Since Linux kernel scheduler is tightly coupled with other subsystems, new features usually come with small modifications to other subsystems. There are some examples. 3 | 4 | - If you want to add some metrics for group-scheduling, you're likely to modify cpuacct.c as well. 5 | - If you want to modify scheduling policies for kernel threads, you're likely to modify kthread.c as well. 6 | - If you want to modify CPU affinity related policies, you may need to modify cpuset.c as well. 7 | 8 | Just like kpatch or livepatch, sidecar provides a way to live upgrade code at function granularity. With sidecar, developers can modify functions outside of scheduler boundary, such as cpuacct, kthread and cpuset. Sidecar reuses infrastuctures of plugsched, so development with sidecar is almost as easy as the core functionality of plugsched. 9 | 10 | ## How it works 11 | Here is an example of how to use sidecar. If developers want to live upgrade function cpuusage_write() and cpuacct_free() in kernel/sched/cpuacct.c, they only need to configure boundary.yaml as below. 12 | 13 | ``` 14 | sidecar: !!pairs 15 | - cpuusage_write: kernel/sched/cpuacct.c 16 | - cpuacct_free: kernel/sched/cpuacct.c 17 | ``` 18 | 19 | After the configuration is complete, you can run init operation. See [Quick Start](../README.md#quick-start). Plugsched will generate a new cpuacct.c file under kernel/sched/mod/ directory automatically. Then you can change code of function cpuusage_write() and cpuacct_free() in new cpuacct.c freely. To make code change easier, some handy mechanisms are provided by plugsched. 20 | 21 | 1. The new cpuacct_free() can reference any functions or variables directly, plugsched will help to fix symbol location automatically; 22 | 2. Inline functions, data struct definition, header file including are reserved in new cpuacct.c, so the new cpuacct_free() can use them directly; 23 | 3. All variable definitons are translated into declarations, so the new cpuacct_free() can share data state with the running system. 24 | 25 | Once the code changes are complete, you can run build operation. Plugsched then compiles all scheduler files and sidecar files, and link the compiled object into the final module binary file. When you install the scheduler module on the running system, plugsched treats all sidecar functions as same as the interface functions of scheduler. IOW, plugsched does all steps for sidecar functions mentioned in [Compile and install the scheduler](../README.md#compile-and-install-the-scheduler) except scheduler state rebuild. 26 | -------------------------------------------------------------------------------- /docs/Support-various-Linux-distros.md: -------------------------------------------------------------------------------- 1 | Plugsched supports various Linux distros through config files. Config files are consumed by [Code Extraction](../README.md#boundary-extraction) of plugsched, to generate various scheduler module source code. 2 | 3 | The default config file is designed for [Anolis OS 7 ANCK](https://openanolis.cn/),so plugsched can work on it out-of-the-box. And If you want the scheduler module to be plugged into other Linux distros, you should define the scheduler boundary properly first. 4 | 5 | **========================= NOTE =========================** 6 | 7 | Prior knowledge about [scheduler boundary](../README.md#how-it-works) is required before reading this chapter. 8 | 9 | # Where to add config for a given linux kernel 10 | Config files for Linux kernels are put in `configs` directory. And they're organized as a flat list fashion, 11 | 12 | configs 13 | ├── 3.10 14 | ├── 4.19 15 | ├── 4.19.91 16 | └── your-linux-kernel-version 17 | 18 | The init stage is usually triggered with the following command, please refer to [Quick Start](../README.md#quick-start). 19 | 20 | ```bash 21 | plugsched-cli init $version $kernel_src $sched_mod 22 | ``` 23 | 24 | plugsched then searches in the `configs/` directory, by the `version` given in the command line. Instead of full-text matching search, plugsched does Longest Common Prefix (LCP) search. 25 | 26 | For the example above, 4.19.91-1.x86_64 matches 4.19.91. And 4.19.64-5.x86_64 matches 4.19. And if you want to add a config for kernel 4.19.91-12.x86_64, you can create a folder such as 4.19.91-12, 27 | 28 | configs 29 | ├── 3.10 30 | ├── 4.19 31 | ├── 4.19.91 32 | └── 4.19.91-12 (*) 33 | 34 | And because 4.19.91-12.x86_64 shares the longest common prefix length with 4.19.91-12, which is 10, plugsched will choose `configs/4.19.91-12` for the kernel 4.19.91-12.x86_64. 35 | 36 | And this is how plugsched does config matching, and how to add config for you own kernel. 37 | 38 | # How to write the config file 39 | boundary.yaml in configs defines a scheduler boundary for a specific linux kernel, 40 | 41 | configs 42 | └── 4.19.91 43 | └── boundary.yaml 44 | 45 | It's structured as the yaml file below. 46 | 47 | (Note that, interface, insider and outsider functions mentioned below are illustrated in [How it works](../README.md#how-it-works). Please refer to it first.) 48 | 49 | ```yaml 50 | # List files in kernel/sched, but only those you concern. 51 | # And they are all the files that will be extracted (See How It Works in README) 52 | mod_files: 53 | - ** 54 | # Usually syscall prefixes. *Don't* modify this unless you know what you're doing. 55 | interface_prefix: 56 | - ** 57 | function: 58 | # List interface functions. Insiders and outsiders will be calculated accordingly. 59 | interface: 60 | - ** 61 | global_var: 62 | # Static variables are private by default. Announce them as public explicitly here. 63 | extra_public: 64 | - ** 65 | # Global variables are public by default. Announce them as private explicitly here. 66 | force_private: 67 | - ** 68 | ``` 69 | 70 | It's recommended to take the default config file as a template, and do your customization over it. Then when you start working on defining scheduler boundary, you should ask yourself several questions, 71 | 72 | - **Does my kernel have some different files from kernels listed in configs/ directory?** 73 | For example, core_sched.c was added to the kernel since version 5.14. And apparently, it is one scheduler file. You can simply add core_sched.c to `mod_files` to make it part of the scheduler module. 74 | 75 | - **Do I want to inherit some variable from the original kernel** 76 | For global variables, this is useful when in some cases. In the cases you want the scheduler to be *clean*. You don't want the scheduler module to inherit some state (meaning variable) from the original kernel. In this case, you should add these variable names to `force_private`. 77 | On the contrary, static variables are all private by default (This is a flaw that needs to be fixed). However we usually want to inherit all variables. So all static variables better be added to `extra_public`. 78 | 79 | - **Which functions must be modifiable in the scheduler module?** 80 | This will guide you tuning interface functions. Because the most important rule to verify the correctness of interface function list is, does they cover all functions that you want to modify? 81 | You will go through a little iterations of the workflow below to get the satisfied interface function list. 82 | 83 | **The workflow to try tune boundary.yaml** 84 | 85 | Copy from anolis's template 86 | | 87 | v 88 | modify boundary.yaml <----------------------------------------+ 89 | | | 90 | v | 91 | plugsched init | 92 | | | 93 | v | 94 | Check working/boundary_extract.yaml and kernel/sched/mod/ | 95 | | | 96 | v | 97 | Get the satisfied boundary result -----Y--------------------------)----> Done 98 | |N | 99 | v | 100 | Locate those unexpected sched_outsider/private variable | 101 | | | 102 | v | 103 | Decide why they become so -----------------------------------------+ 104 | 105 | The basic advice for you to define sched boundary 106 | - Functions called by many other functions in other subsystems should be `interface`. 107 | - Variables should all be defined as `public`, unless you know what you're doing. 108 | 109 | # What are other files in config directory 110 | 111 | - `dynamic_springboard.patch` Internal implementation. No need to concern. 112 | - `pre_extract.patch` If your kernel has some strange non-standard code style, plugsched might be confused. This patch is used to refactor code styles. This file mainly serves as workarounds to strange bugs. 113 | - `post_extract.patch` Internal implementation. No need to concern 114 | 115 | dynamic_springboard.patch and post_extract.patch are mentioned as "Internal implementaion". But sometimes due to different kernel code bases. They need to be adjusted to make patch utility work. 116 | -------------------------------------------------------------------------------- /docs/Working-without-rpm-or-srpm.md: -------------------------------------------------------------------------------- 1 | It's recommended to work with `kernel-devel.rpm`, `kernel-debuginfo.rpm` and `kernel.srpm`. But if you don't have some of them, don't worry. There are some alternative workflows in the table below. Choose among them according to what resource (rpm, srpm, src code) you have. 2 | 3 | | kernel-devel.rpm| kernel-debuginfo.rpm| kernel.srpm|kernel src| | 4 | |---- | -----------------|-----------------------|------------|-------------------| 5 | |✅ | ✅ | ✅ | |[→ Standard Scenario](../README.md#quick-start) | 6 | | ✅ | ✅ | ❌ | ✅ |[→ Scenario 2](#scenario-2---with-rpm-src-code) | 7 | |❌ |❌ |❌ | ✅ |[→ Scenario 3](#scenario-3---with-src-code) | 8 | 9 | # Scenario 2 - With rpm, src code 10 | Some distros don't provide `kernel.srpm` package. You may work with the combination of `kernel-devel.rpm`, `kernel-debuginfo.rpm` and `kernel src`. 11 | 12 | 1. Log into the cloud server, and install some neccessary basic software packages. 13 | ```shell 14 | # yum install anolis-repos -y 15 | # yum install podman git kernel-debuginfo-$(uname -r) kernel-devel-$(uname -r) --enablerepo=Plus-debuginfo --enablerepo=Plus -y 16 | ``` 17 | 2. Create a temporary working directory and download the source code of the kernel. 18 | ```shell 19 | # mkdir /tmp/work 20 | # uname -r 21 | 4.19.91-25.2.an7.x86_64 22 | # cd /tmp/work 23 | # git clone --depth 1 --branch 4.19.91-25.2 https://gitee.com/anolis/cloud-kernel.git kernel 24 | ``` 25 | 3. Startup the container, and spawn a shell. 26 | ```shell 27 | # podman run -itd --name=plugsched -v /tmp/work:/tmp/work -v /usr/src/kernels:/usr/src/kernels -v /usr/lib/debug/lib/modules:/usr/lib/debug/lib/modules docker.io/plugsched/plugsched-sdk 28 | # podman exec -it plugsched bash 29 | # cd /tmp/work 30 | ``` 31 | 4. Boundary analysis and extraction. 32 | ```shell 33 | # plugsched-cli init 4.19.91-25.2.an7.x86_64 ./kernel ./scheduler 34 | ``` 35 | 5. Do some modifications and build scheduler rpm. (Refer to [Quick Start](../README.md#quick-start)) 36 | 6. Copy the scheduler rpm to the host, exit the container, and then install scheduler. (Refer to [Quick Start](../README.md#quick-start)) 37 | 38 | # Scenario 3 - With src code 39 | This usually means you are experimenting on your own development kernel. You have only `kernel src` at hand. 40 | 41 | 1. Log into the cloud server, and install some neccessary basic software packages. 42 | ```shell 43 | # yum install anolis-repos -y 44 | ``` 45 | 2. Create a temporary working directory and download the source code of the kernel. 46 | ```shell 47 | # mkdir /tmp/work 48 | # uname -r 49 | 4.19.91-25.2.an7.x86_64 50 | # cd /tmp/work 51 | # git clone --depth 1 --branch 4.19.91-25.2 git@gitee.com:anolis/cloud-kernel.git kernel 52 | ``` 53 | 3. Startup the container, and spawn a shell. 54 | ```shell 55 | # podman run -itd --name=plugsched -v /tmp/work:/tmp/work docker.io/plugsched/plugsched-sdk 56 | # podman exec -it plugsched bash 57 | # cd /tmp/work 58 | ``` 59 | 4. Build the kernel 60 | ```shell 61 | # pushd kernel 62 | # cp arch/x86/configs/anolis_defconfig .config 63 | # sed 's/EXTRAVERSION =/EXTRAVERSION = -25.2.an7.x86_64/g' -i Makefile 64 | # make -j16 65 | # popd 66 | ``` 67 | 6. Boundary analysis and extraction. 68 | ```shell 69 | # plugsched-cli dev_init /tmp/work/kernel ./scheduler 70 | ``` 71 | 5. Do some modifications and build scheduler rpm. (Refer to [Quick Start](../README.md#quick-start)) 72 | 6. Copy the scheduler rpm to the host, exit the container, and then install scheduler. (Refer to [Quick Start](../README.md#quick-start)) 73 | 7. Install the new kernel and reboot to it 74 | ```shell 75 | make install 76 | reboot 77 | ``` 78 | -------------------------------------------------------------------------------- /examples/rpm_test_example.diff: -------------------------------------------------------------------------------- 1 | diff --git a/scheduler/kernel/sched/mod/core.c b/scheduler/kernel/sched/mod/core.c 2 | index 9f16b72..21262fd 100644 3 | --- a/scheduler/kernel/sched/mod/core.c 4 | +++ b/scheduler/kernel/sched/mod/core.c 5 | @@ -3234,6 +3234,9 @@ static void __sched notrace __schedule(bool preempt) 6 | struct rq *rq; 7 | int cpu; 8 | 9 | + if (sched_feat(PLUGSCHED_TEST)) 10 | + printk_once("I am the new scheduler: __schedule\n"); 11 | + 12 | cpu = smp_processor_id(); 13 | rq = cpu_rq(cpu); 14 | prev = rq->curr; 15 | diff --git a/scheduler/kernel/sched/mod/features.h b/scheduler/kernel/sched/mod/features.h 16 | index 4c40fac..8d1eafd 100644 17 | --- a/scheduler/kernel/sched/mod/features.h 18 | +++ b/scheduler/kernel/sched/mod/features.h 19 | @@ -1,4 +1,6 @@ 20 | /* SPDX-License-Identifier: GPL-2.0 */ 21 | +SCHED_FEAT(PLUGSCHED_TEST, false) 22 | + 23 | /* 24 | * Only give sleepers 50% of their service deficit. This allows 25 | * them to run sooner, but does not allow tons of sleepers to 26 | -------------------------------------------------------------------------------- /module-contrib/hotfix_conflict_check: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | # input file format: 6 | # function sympos module 7 | # 8 | # valid e.g: 9 | # pick_next_task 1 vmlinux 10 | # ext4_free_blocks 2 ext4 11 | 12 | if [ "$1" == "" ]; then 13 | echo Error: please input files! 14 | exit 1 15 | elif [ ! -e "$1" ]; then 16 | echo Error: input file is not exist! 17 | exit 1 18 | else 19 | tainted_file=$1 20 | fi 21 | 22 | func_list=$(mktemp) 23 | 24 | # Some hotfix do not provide the sympos of patched function, so use a new set 25 | func_list_nosympos=$(mktemp) 26 | 27 | trap "rm -r $func_list $func_list_nosympos" INT HUP QUIT ABRT ALRM TERM EXIT # ensures it is deleted when script ends 28 | 29 | # deal with kpatch prev-0.4 ABI 30 | find /sys/kernel/kpatch/patches/*/functions -type d -not -path "*/functions" 2>/dev/null | while read path ; do 31 | # /sys/kernel/kpatch/patches/kpatch_D689377/functions/blk_mq_update_queue_map -> blk_mq_update_queue_map 32 | func="${path##*/}" 33 | echo "$func" >> $func_list_nosympos 34 | done 35 | 36 | # deal with kpatch 0.4 ABI, livepatch, plugsched and plugbpf 37 | for subdir in kpatch livepatch plugsched plugbpf; do 38 | find /sys/kernel/$subdir/*/ -type d -path "*,[0-9]" 2>/dev/null | while read path ; do 39 | # /sys/kernel/kpatch/kpatch_5135717/vmlinux/kernfs_find_ns,1 -> kernfs_find_ns,1 40 | func_ver=`echo $path | awk -F / -e '{print $NF}'` 41 | mod=`echo $path | awk -F / -e '{print $(NF-1)}'` 42 | func=`echo $func_ver | awk -F , '{print $1}'` 43 | ver=`echo $func_ver | awk -F , '{print $2}'` 44 | echo "$func $ver $mod" >> $func_list 45 | done 46 | done 47 | 48 | # deal with manual hotfix that has sys directory entry 49 | find /sys/kernel/manual_*/ -type d -not -path "*manual_*/" 2>/dev/null | while read path ; do 50 | func="${path##*/}" 51 | echo "$func" >> $func_list_nosympos 52 | done 53 | 54 | # deal with manual hotfix that does not have sys directory entry, i.e, the early days implemenation 55 | for func in `cat /proc/kallsyms | grep '\[kpatch_' | grep -v __kpatch | awk '{print $3}' | grep -v 'patch_'`; do 56 | if [ $(grep "e9_$func" /proc/kallsyms | wc -l) -gt 0 ]; then 57 | echo "$func" >> $func_list_nosympos 58 | fi 59 | done 60 | 61 | if [ "$(awk 'END{print NF}' $tainted_file)" != "3" ]; then 62 | # tainted_file provided by manual_hotfix or kpatch-pre-0.4 that don't have the sympos 63 | conflicts=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list | sort | uniq) | uniq -d) 64 | else 65 | # Get the conflict functions 66 | conflicts=$(sort $tainted_file <(awk '{print $1" "$2" "$3}' $func_list | sort | uniq) | uniq -d) 67 | fi 68 | 69 | conflicts_nosympos=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list_nosympos | sort | uniq) | uniq -d) 70 | 71 | if [ "$conflicts" != "" -o "$conflicts_nosympos" != "" ]; then 72 | echo Error: confict detected: 73 | if [ "$conflicts" != "" ]; then 74 | echo $(awk '{print $1}' <(echo $conflicts)) 75 | elif [ "$conflicts_nosympos" != "" ]; then 76 | echo $conflicts_nosympos 77 | fi 78 | exit 1 79 | fi 80 | -------------------------------------------------------------------------------- /module-contrib/plugsched.service: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | [Unit] 5 | Description=The plugsched service 6 | ConditionKernelCommandLine=!plugsched.enable=0 7 | 8 | [Service] 9 | Type=oneshot 10 | RemainAfterExit=yes 11 | ExecStart=/usr/bin/bash -c "\ 12 | if [ -d /var/plugsched/$(uname -r) ]; then \ 13 | /var/plugsched/$(uname -r)/scheduler-installer install; \ 14 | else \ 15 | echo \"Scheduler for the current kernel version is not installed. Start service failed!\"; \ 16 | exit 1; \ 17 | fi" 18 | 19 | [Install] 20 | WantedBy=multi-user.target 21 | -------------------------------------------------------------------------------- /module-contrib/scheduler-installer: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | MAX_LOAD_ATTEMPTS=5 6 | RETRY_INTERVAL=2 7 | 8 | cursys=$(uname -r) 9 | modfile=/var/plugsched/$cursys/scheduler.ko 10 | hotfix_conflict_check=/var/plugsched/$cursys/hotfix_conflict_check 11 | tainted_functions=/var/plugsched/$cursys/tainted_functions 12 | enablefile=/sys/kernel/plugsched/plugsched/enable 13 | mod=$(modinfo $modfile | grep vermagic | awk '{print $2}') 14 | 15 | warn() { 16 | echo "scheduler: $*" >&2 17 | } 18 | 19 | install_module() { 20 | local i=0 21 | while true; do 22 | out="$(LC_ALL=C insmod "$1" 2>&1)" 23 | [[ -z "$out" ]] && break 24 | echo "$out" 1>&2 25 | 26 | # Safety check or memory pool allocated failed! Retry in a few seconds. 27 | i=$((i+1)) 28 | if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then 29 | warn "load module failed! $1" 30 | exit 1 31 | else 32 | warn "retrying..." 33 | sleep $RETRY_INTERVAL 34 | fi 35 | done 36 | } 37 | 38 | uninstall_module() { 39 | local i=0 40 | while true; do 41 | out="$(export LC_ALL=C; sh -c "echo 0 > $enablefile" 2>&1)" 42 | [[ -z "$out" ]] && break 43 | echo "$out" 1>&2 44 | 45 | # Safety check failed! Retry in a few seconds. 46 | i=$((i+1)) 47 | if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then 48 | warn "disable module failed!" 49 | exit 1 50 | else 51 | warn "retrying..." 52 | sleep $RETRY_INTERVAL 53 | fi 54 | done 55 | rmmod scheduler 56 | } 57 | 58 | if [ "$1" == "install" ]; then 59 | if [ -f "$enablefile" ]; then 60 | echo "scheduler: scheduler module has been installed! Skip..." 61 | exit 62 | fi 63 | 64 | if [ "$cursys" == "$mod" ]; then 65 | $hotfix_conflict_check $tainted_functions || exit 1 66 | /usr/bin/mkdir -p /run/plugsched 67 | /usr/bin/cp $modfile /run/plugsched/scheduler.ko 68 | /var/plugsched/$(uname -r)/symbol_resolve /run/plugsched/scheduler.ko /proc/kallsyms 69 | install_module /run/plugsched/scheduler.ko 70 | else 71 | warn "Error: kernel version is not same as plugsched version!" 72 | exit 1 73 | fi 74 | elif [ "$1" == "uninstall" ]; then 75 | if [ -f "$enablefile" ]; then 76 | uninstall_module 77 | else 78 | echo "scheduler: scheduler module has been removed! Skip ..." 79 | fi 80 | 81 | /usr/bin/rm -rf /run/plugsched 82 | else 83 | warn "Error: Unknown operation" 84 | exit 1 85 | fi 86 | -------------------------------------------------------------------------------- /module-contrib/scheduler.spec: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | %define minor_name xxx 5 | %define release yyy 6 | %define _modpath kernel/sched/mod 7 | 8 | Name: scheduler-%{minor_name} 9 | Version: %{KVER} 10 | Release: %{KREL}.%{release} 11 | Summary: The schedule policy RPM for linux kernel scheduler subsystem 12 | Packager: None 13 | 14 | Group: System Environment/Kernel 15 | License: GPLv2 16 | URL: None 17 | 18 | BuildRequires: make, gcc-c++, bc, bison, flex, openssl, openssl-devel 19 | BuildRequires: glibc-static, zlib-static, libstdc++-static 20 | BuildRequires: elfutils-devel, elfutils-devel-static, elfutils-libelf-devel 21 | 22 | Requires: systemd 23 | Requires: binutils 24 | 25 | %description 26 | The scheduler policy rpm-package. 27 | 28 | %prep 29 | 30 | %build 31 | # Build sched_mod 32 | make KBUILD_MODPOST_WARN=1 \ 33 | plugsched_tmpdir=working \ 34 | plugsched_modpath=%{_modpath} \ 35 | sidecar_objs=%{?_sdcrobjs} \ 36 | -C . -f working/Makefile.plugsched \ 37 | plugsched -j $(nproc) 38 | 39 | # Build symbol resolve tool 40 | make -C working/symbol_resolve 41 | 42 | # Generate the tainted_functions file 43 | awk -F '[(,)]' '$2!=""{print $2" "$3" vmlinux"}' %{_modpath}/tainted_functions.h > working/tainted_functions 44 | 45 | %install 46 | #install tool, module and systemd service 47 | mkdir -p %{buildroot}/usr/lib/systemd/system 48 | mkdir -p %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch} 49 | 50 | install -m 644 working/plugsched.service \ 51 | %{buildroot}/usr/lib/systemd/system/plugsched.service 52 | 53 | install -m 755 working/symbol_resolve/symbol_resolve \ 54 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/symbol_resolve 55 | 56 | install -m 755 %{_modpath}/scheduler.ko \ 57 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/scheduler.ko 58 | 59 | install -m 444 working/tainted_functions \ 60 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/tainted_functions 61 | 62 | install -m 444 working/boundary.yaml \ 63 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/boundary.yaml 64 | 65 | install -m 755 working/scheduler-installer \ 66 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/scheduler-installer 67 | 68 | install -m 755 working/hotfix_conflict_check \ 69 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/hotfix_conflict_check 70 | 71 | install -m 444 working/version \ 72 | %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/version 73 | 74 | #install kernel module after install this rpm-package 75 | %post 76 | sync 77 | 78 | if [ "$(uname -r)" != "%{KVER}-%{KREL}.%{_arch}" ]; then 79 | echo "INFO: scheduler does not match current kernel version, skip starting service ..." 80 | exit 0 81 | fi 82 | 83 | echo "Start plugsched.service" 84 | systemctl enable plugsched 85 | systemctl start plugsched 86 | 87 | #uninstall kernel module before remove this rpm-package 88 | %preun 89 | if [ "$(uname -r)" != "%{KVER}-%{KREL}.%{_arch}" ]; then 90 | echo "INFO: scheduler does not match current kernel version, skip unloading module..." 91 | exit 0 92 | fi 93 | 94 | echo "Stop plugsched.service" 95 | /var/plugsched/$(uname -r)/scheduler-installer uninstall || exit 1 96 | systemctl stop plugsched 97 | 98 | %postun 99 | systemctl reset-failed plugsched 100 | 101 | %files 102 | %dir %{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch} 103 | /usr/lib/systemd/system/plugsched.service 104 | %{_localstatedir}/plugsched/%{KVER}-%{KREL}.%{_arch}/* 105 | 106 | %changelog 107 | -------------------------------------------------------------------------------- /module-contrib/version: -------------------------------------------------------------------------------- 1 | plugsched version: 1.3.0 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.boundary 2 | *.export_jump 3 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | # These files are disabled because they produce non-interesting flaky coverage 5 | # that is not a function of syscall inputs. E.g. involuntary context switches. 6 | KCOV_INSTRUMENT := n 7 | 8 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 9 | # According to Alan Modra , the -fno-omit-frame-pointer is 10 | # needed for x86 only. Why this used to be enabled for all architectures is beyond 11 | # me. I suspect most platforms don't need this, but until we know that for sure 12 | # I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k 13 | # to get a correct value for the wait-channel (WCHAN in ps). --davidm 14 | CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer 15 | endif 16 | 17 | objs-y += core.o 18 | objs-y += idle.o fair.o rt.o deadline.o 19 | 20 | objs-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 21 | objs-$(CONFIG_SCHEDSTATS) += stats.o 22 | objs-$(CONFIG_SCHED_DEBUG) += debug.o 23 | 24 | obj-m += scheduler.o 25 | scheduler-objs := $(objs-y) $(sidecar_objs) main.o sched_rebuild.o 26 | 27 | search_cb := {if (/CALLBACK/) {print "__cb_"$$2} else {print $$2}} 28 | search_und := grep "UND __mod_" | awk '{print substr($$8,7)}' | sort | uniq 29 | search_rdf := grep "UND __mod_" | awk '{print $$8,substr($$8,7)}' 30 | error_msg := access non-existent symbol by using __mod_ prefix 31 | 32 | obj-stub := $(addprefix $(obj)/, $(scheduler-objs:.o=.stub.o)) 33 | und-file := $(obj)/.und 34 | rdf-file = $(dir $@).$(notdir $@).rdf 35 | 36 | cmd_find_sym = \ 37 | awk -F'[(,]' '$(search_cb)' $< > $@; \ 38 | readelf -sW $(obj-stub) | $(search_und) | tee $(und-file) >> $@; \ 39 | count1=$$(cat $(und-file) | wc -l); \ 40 | count2=$$(readelf -sW $(obj-stub) | grep -w -f $(und-file) | \ 41 | grep -v '\.' | grep -v UND | wc -l); \ 42 | if [ "$$count1" != "$$count2" ]; then \ 43 | echo -e '\033[31m'$(error_msg)'\033[0m'; \ 44 | exit 1; \ 45 | fi 46 | 47 | CFLAGS_core.stub.o := -DMODULE -DSTACKSIZE_MOD=0 48 | CFLAGS_main.stub.o := -DMODULE_FRAME_POINTER=VMLINUX_FRAME_POINTER 49 | $(obj)/%.stub.o: $(src)/%.c FORCE 50 | $(call cmd,force_checksrc) 51 | $(call if_changed_rule,cc_o_c) 52 | 53 | GET_STACK_SIZE: $(obj)/core.stub.o 54 | $(eval ccflags-y += $(shell bash $(plugsched_tmpdir)/springboard_search.sh build $<)) 55 | 56 | $(obj)/.globalize: $(src)/export_jump.h $(obj-stub) FORCE 57 | $(cmd_find_sym) 58 | 59 | $(obj)/%.o: $(src)/%.c $(obj)/.globalize GET_STACK_SIZE FORCE 60 | $(call cmd,force_checksrc) 61 | $(call if_changed_rule,cc_o_c) 62 | readelf -sW $@ | $(search_rdf) > $(rdf-file) 63 | $(OBJCOPY) --globalize-symbols $(obj)/.globalize \ 64 | --redefine-syms $(rdf-file) $@ 65 | 66 | 67 | ldflags-y += -T $(plugsched_modpath)/scheduler.lds 68 | ccflags-n += -DSCHEDMOD_MEMPOOL 69 | ccflags-y += -Wno-unused-function 70 | ccflags-y += -fkeep-static-functions 71 | ccflags-y += -D__DISABLE_EXPORTS 72 | -------------------------------------------------------------------------------- /src/Makefile.plugsched: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | include Makefile 5 | 6 | GCC_PLUGIN_FLAGS := -fplugin=/usr/lib64/gcc-python-plugin/python.so \ 7 | -fplugin-arg-python-script=$(plugsched_tmpdir)/collect.py \ 8 | -fplugin-arg-python-tmpdir=$(plugsched_tmpdir) 9 | 10 | PHONY += plugsched collect extract 11 | 12 | plugsched: scripts prepare 13 | $(MAKE) -C $(srctree) M=$(plugsched_modpath) modules 14 | 15 | collect: modules_prepare 16 | $(MAKE) CFLAGS_KERNEL="$(GCC_PLUGIN_FLAGS)" \ 17 | CFLAGS_MODULE="$(GCC_PLUGIN_FLAGS)" $(vmlinux-dirs) 18 | analyze: 19 | find $(srctree)/arch -name "compressed" -type d | xargs -I% find % -name "*.c.boundary" -exec rm -f {} \; 20 | rm -f $(srctree)/drivers/firmware/efi/libstub/*.c.boundary 21 | python3 $(plugsched_tmpdir)/analyze.py ./vmlinux $(plugsched_tmpdir) $(plugsched_modpath) 22 | 23 | extract: $(objs) 24 | 25 | %.extract: % 26 | python3 $(plugsched_tmpdir)/extract.py $^ $(plugsched_tmpdir) $(plugsched_modpath) 27 | -------------------------------------------------------------------------------- /src/export_jump.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2022 Alibaba Group Holding Limited. 3 | * SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | */ 5 | 6 | /* file contents will be generated automatically */ 7 | -------------------------------------------------------------------------------- /src/head_jump.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2022 Alibaba Group Holding Limited. 3 | * SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | */ 5 | 6 | #ifndef __HEAD_JUMP_H 7 | #define __HEAD_JUMP_H 8 | 9 | #include 10 | #include 11 | 12 | #define EXPORT_SIDECAR(fn, file, ...) EXPORT_PLUGSCHED(fn, __VA_ARGS__) 13 | #define EXPORT_CALLBACK EXPORT_PLUGSCHED 14 | #define EXPORT_PLUGSCHED(fn, ...) NR_##fn, 15 | enum { 16 | #include "export_jump.h" 17 | NR_INTERFACE_FN 18 | } nr_inter_fn; 19 | #undef EXPORT_PLUGSCHED 20 | #undef EXPORT_CALLBACK 21 | 22 | static unsigned long vm_func_addr[NR_INTERFACE_FN]; 23 | static unsigned long vm_func_size[NR_INTERFACE_FN]; 24 | static unsigned long mod_func_addr[NR_INTERFACE_FN]; 25 | static unsigned long mod_func_size[NR_INTERFACE_FN]; 26 | 27 | /* Used to declare the extern function set */ 28 | #define EXPORT_CALLBACK(fn, ret, ...) extern ret __cb_##fn(__VA_ARGS__); 29 | #define EXPORT_PLUGSCHED(fn, ret, ...) extern ret fn(__VA_ARGS__); 30 | #include "export_jump.h" 31 | #undef EXPORT_PLUGSCHED 32 | #undef EXPORT_CALLBACK 33 | 34 | /* Used to declare extern functions defined in vmlinux*/ 35 | #define EXPORT_CALLBACK(fn, ret, ...) extern ret __orig_##fn(__VA_ARGS__); 36 | #define EXPORT_PLUGSCHED(fn, ret, ...) extern ret __orig_##fn(__VA_ARGS__); 37 | #include "export_jump.h" 38 | #undef EXPORT_PLUGSCHED 39 | #undef EXPORT_CALLBACK 40 | 41 | /* They are completely identical unless specified */ 42 | #define EXPORT_CALLBACK EXPORT_PLUGSCHED 43 | 44 | /* This APIs set is used to replace the function in vmlinux with other 45 | * function(have the same name) in module. Usage by fallow: 46 | * 47 | * 1) For just one function: 48 | * 1. DEFINE_JUMP_FUNC(function) //define the useful data 49 | * 2. JUMP_INIT_FUNC(function) //init the data 50 | * 3. JUMP_INSTALL_FUNC(function) //replace the funciton 51 | * 4. JUMP_REMOVE_FUNC(function) //restore the function 52 | * 53 | * 2) For functions set: 54 | * 1. Add the function to export_jump.h file 55 | * 2. Call jump_init_all() to init all functions data 56 | * 3. Use JUMP_OPERATION(install) macro to replace the functions set 57 | * 4. Use JUMP_OPERATION(remove) macro to restore the functions set 58 | */ 59 | 60 | #ifdef CONFIG_X86_64 61 | 62 | #define HEAD_LEN 5 63 | 64 | #define DEFINE_JUMP_FUNC(func) \ 65 | static unsigned char store_jump_##func[HEAD_LEN]; \ 66 | static unsigned char store_orig_##func[HEAD_LEN]; \ 67 | static unsigned long orig_##func##_size; \ 68 | static unsigned long mod_##func##_size 69 | 70 | extern void __orig___fentry__(void); 71 | 72 | #define JUMP_INIT_FUNC(func, prefix) do { \ 73 | curr_func = #func; \ 74 | vm_func_addr[NR_##func] = (unsigned long)__orig_##func; \ 75 | mod_func_addr[NR_##func] = (unsigned long)prefix##func; \ 76 | memcpy(store_orig_##func, __orig_##func, HEAD_LEN); \ 77 | store_jump_##func[0] = 0xe9; \ 78 | (*(int *)(store_jump_##func + 1)) = \ 79 | (long)prefix##func - (long)__orig_##func - HEAD_LEN; \ 80 | if (store_orig_##func[0] == 0xe8) { \ 81 | offset = *(int *)(store_orig_##func + 1); \ 82 | target = (void*)__orig_##func + HEAD_LEN + offset; \ 83 | if (target != __orig___fentry__) \ 84 | goto hooked; \ 85 | } \ 86 | if (store_orig_##func[0] == 0xe9) \ 87 | goto hooked; \ 88 | } while(0) 89 | 90 | #define JUMP_INSTALL_FUNC(func) \ 91 | memcpy((unsigned char *)__orig_##func, store_jump_##func, HEAD_LEN) 92 | 93 | #define JUMP_REMOVE_FUNC(func) \ 94 | memcpy((unsigned char *)__orig_##func, store_orig_##func, HEAD_LEN) 95 | 96 | 97 | /* Must be used in stop machine context */ 98 | #define JUMP_OPERATION(ops) do { \ 99 | void *unused = disable_write_protect(NULL); \ 100 | jump_##ops(); \ 101 | enable_write_protect(); \ 102 | } while(0) 103 | 104 | #else /* For ARM64 */ 105 | #define DEFINE_JUMP_FUNC(func) \ 106 | static u32 store_orig_##func; \ 107 | static u32 store_jump_##func; \ 108 | static unsigned long orig_##func##_size; \ 109 | static unsigned long mod_##func##_size 110 | 111 | #define JUMP_INIT_FUNC(func, prefix) do { \ 112 | vm_func_addr[NR_##func] = (unsigned long)__orig_##func; \ 113 | mod_func_addr[NR_##func] = (unsigned long)prefix##func; \ 114 | memcpy((void *)&store_orig_##func, __orig_##func, AARCH64_INSN_SIZE); \ 115 | store_jump_##func = aarch64_insn_gen_branch_imm((unsigned long)__orig_##func, \ 116 | (unsigned long)prefix##func, AARCH64_INSN_BRANCH_NOLINK); \ 117 | } while(0) 118 | 119 | #define JUMP_INSTALL_FUNC(func) \ 120 | aarch64_insn_patch_text_nosync(__orig_##func, store_jump_##func) 121 | 122 | #define JUMP_REMOVE_FUNC(func) \ 123 | aarch64_insn_patch_text_nosync(__orig_##func, store_orig_##func) 124 | 125 | #define JUMP_OPERATION(ops) do { \ 126 | jump_##ops(); \ 127 | } while(0) 128 | 129 | #endif /* CONFIG_X86_64 */ 130 | 131 | #define EXPORT_PLUGSCHED(fn, ...) DEFINE_JUMP_FUNC(fn); 132 | #include "export_jump.h" 133 | #undef EXPORT_PLUGSCHED 134 | 135 | #define EXPORT_PLUGSCHED(fn, ...) JUMP_INSTALL_FUNC(fn); 136 | static inline void jump_install(void) 137 | { 138 | #include "export_jump.h" 139 | } 140 | #undef EXPORT_PLUGSCHED 141 | 142 | #define EXPORT_PLUGSCHED(fn, ...) JUMP_REMOVE_FUNC(fn); 143 | static inline void jump_remove(void) 144 | { 145 | #include "export_jump.h" 146 | } 147 | #undef EXPORT_PLUGSCHED 148 | 149 | 150 | #undef EXPORT_CALLBACK 151 | #define EXPORT_CALLBACK(fn, ...) JUMP_INIT_FUNC(fn, __cb_); 152 | #define EXPORT_PLUGSCHED(fn, ...) JUMP_INIT_FUNC(fn, ); 153 | static int __maybe_unused jump_init_all(void) 154 | { 155 | char *curr_func; 156 | int offset; 157 | void* target; 158 | 159 | #include "export_jump.h" 160 | return 0; 161 | hooked: 162 | printk(KBUILD_MODNAME ": Error: function %s is already hooked by someone.\n", curr_func); 163 | return 1; 164 | } 165 | #undef EXPORT_PLUGSCHED 166 | #undef EXPORT_CALLBACK 167 | 168 | #endif 169 | -------------------------------------------------------------------------------- /src/helper.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | 5 | /* 6 | * helper function to communicate with vmlinux 7 | */ 8 | 9 | #ifdef CONFIG_X86_64 10 | static unsigned long orig_cr0; 11 | 12 | static inline void do_write_cr0(unsigned long val) 13 | { 14 | asm volatile("mov %0,%%cr0": "+r" (val) : : "memory"); 15 | } 16 | 17 | static inline void *disable_write_protect(void *addr) 18 | { 19 | BUG_ON(orig_cr0); 20 | 21 | orig_cr0 = read_cr0(); 22 | do_write_cr0(orig_cr0 & 0xfffeffff); 23 | 24 | return (void *)addr; 25 | } 26 | 27 | static inline void enable_write_protect(void) 28 | { 29 | do_write_cr0(orig_cr0); 30 | orig_cr0 = 0; 31 | } 32 | 33 | #else /* ARM64 */ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | static void *disable_write_protect(void *addr) 41 | { 42 | unsigned long uintaddr = (uintptr_t) addr; 43 | struct page *page; 44 | 45 | page = phys_to_page(__pa_symbol(addr)); 46 | 47 | return (void *)set_fixmap_offset(FIX_TEXT_POKE0, page_to_phys(page) + 48 | (uintaddr & ~PAGE_MASK)); 49 | } 50 | 51 | static inline void enable_write_protect(void) 52 | { 53 | clear_fixmap(FIX_TEXT_POKE0); 54 | } 55 | #endif 56 | 57 | 58 | static inline unsigned long get_ptr_value(unsigned long ptr_addr) 59 | { 60 | unsigned long mid_addr = *((unsigned long *)ptr_addr); 61 | return *((unsigned long *)mid_addr); 62 | } 63 | 64 | static inline void set_ptr_value(unsigned long ptr_addr, unsigned long val) 65 | { 66 | unsigned long mid_addr = *((unsigned long *)ptr_addr); 67 | *((unsigned long *)mid_addr) = val; 68 | } 69 | 70 | static inline unsigned long get_value_long(unsigned long addr) 71 | { 72 | return *((unsigned long *)addr); 73 | } 74 | 75 | static inline void set_value_long(unsigned long addr, unsigned long val) 76 | { 77 | *((unsigned long *)addr) = val; 78 | } 79 | 80 | /* 81 | * binary search method 82 | */ 83 | static int bsearch(unsigned long *arr, int start, int end, unsigned long tar) 84 | { 85 | int mid; 86 | 87 | if (end < start) 88 | return -1; 89 | if (tar < arr[start]) 90 | return -1; 91 | if (tar >= arr[end]) 92 | return end; 93 | 94 | while(start <= end) { 95 | mid = (start + end) >> 1; 96 | if (tar == arr[mid]) 97 | return mid; 98 | else if (tar < arr[mid]) 99 | end = mid - 1; 100 | else 101 | start = mid + 1; 102 | } 103 | 104 | return end; 105 | } 106 | 107 | static inline void addr_swap(unsigned long *a, unsigned long *b) 108 | { 109 | if (*a ^ *b) { 110 | *a = *a ^ *b; 111 | *b = *b ^ *a; 112 | *a = *a ^ *b; 113 | } 114 | } 115 | 116 | /* 117 | * This sort method is coming from lib/sort.c 118 | */ 119 | static void addr_sort(unsigned long *addr, unsigned long *size, int n) { 120 | int i = n/2 - 1, c, r; 121 | 122 | for ( ; i >= 0; i -= 1) { 123 | for (r = i; r * 2 + 1 < n; r = c) { 124 | c = r * 2 + 1; 125 | if (c < n - 1 && 126 | *(addr + c) < *(addr + c + 1)) 127 | c += 1; 128 | if (*(addr + r) >= *(addr + c)) 129 | break; 130 | addr_swap(addr + r, addr + c); 131 | addr_swap(size + r, size + c); 132 | } 133 | } 134 | 135 | for (i = n - 1; i > 0; i -= 1) { 136 | addr_swap(addr, addr + i); 137 | addr_swap(size, size + i); 138 | for (r = 0; r * 2 + 1 < i; r = c) { 139 | c = r * 2 + 1; 140 | if (c < i - 1 && 141 | *(addr + c) < *(addr + c + 1)) 142 | c += 1; 143 | if (*(addr + r) >= *(addr + c)) 144 | break; 145 | addr_swap(addr + r, addr + c); 146 | addr_swap(size + r, size + c); 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/mempool.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2022 Alibaba Group Holding Limited. 3 | * SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | */ 5 | 6 | #ifdef SCHEDMOD_MEMPOOL 7 | 8 | #include 9 | 10 | #define is_simple_mempool_addr(smpool, addr) \ 11 | ((unsigned long)(addr) >= (smpool)->vstart && \ 12 | (unsigned long)(addr) <= (smpool)->vend) 13 | 14 | struct simple_mempool { 15 | unsigned long vstart; 16 | unsigned long vend; 17 | unsigned long alloc_addr; 18 | unsigned int obj_size; 19 | unsigned int obj_num; 20 | }; 21 | 22 | struct simple_percpu_mempool { 23 | /* The base address of each percpu memory area. */ 24 | unsigned long *percpu_ptr; 25 | /* Record the areas' allocated size. */ 26 | unsigned long allocated_size; 27 | unsigned int obj_size; 28 | /* How many areas are required for the mempool. */ 29 | unsigned int areas; 30 | /* How many objs can be assigned from each area. */ 31 | unsigned int objs_per_area; 32 | /* Used to record which area is allocated from. */ 33 | unsigned int area_id; 34 | }; 35 | 36 | static inline void *simple_mempool_alloc(struct simple_mempool *smpool) 37 | { 38 | void *ret; 39 | 40 | ret = smpool->alloc_addr; 41 | smpool->alloc_addr += smpool->obj_size; 42 | 43 | return ret; 44 | } 45 | 46 | static inline struct simple_mempool *simple_mempool_create(int obj_num, int obj_size) 47 | { 48 | struct simple_mempool *smpool; 49 | 50 | smpool = kzalloc_node(sizeof(*smpool), GFP_ATOMIC, 0); 51 | if (!smpool) 52 | return NULL; 53 | 54 | smpool->vstart = vmalloc_node(obj_num * obj_size, 0); 55 | if (!smpool->vstart) { 56 | kfree(smpool); 57 | return NULL; 58 | } 59 | 60 | smpool->alloc_addr = smpool->vstart; 61 | smpool->vend = smpool->vstart + obj_num * obj_size; 62 | smpool->obj_size = obj_size; 63 | smpool->obj_num = obj_num; 64 | 65 | return smpool; 66 | } 67 | 68 | static inline void simple_mempool_destory(struct simple_mempool *smpool) 69 | { 70 | vfree(smpool->vstart); 71 | kfree(smpool); 72 | } 73 | 74 | static struct simple_percpu_mempool *simple_percpu_mempool_create(int obj_num, 75 | int obj_size) 76 | { 77 | unsigned int areas, objs_per_area, cnt = 0; 78 | struct simple_percpu_mempool *psmpool; 79 | void *ptr; 80 | 81 | psmpool = kzalloc_node(sizeof(*psmpool), GFP_ATOMIC, 0); 82 | if (!psmpool) 83 | return NULL; 84 | 85 | /* Calculate how many percpu areas are required. */ 86 | objs_per_area = PCPU_MIN_UNIT_SIZE / obj_size; 87 | areas = (obj_num + objs_per_area - 1) / objs_per_area; 88 | 89 | psmpool->percpu_ptr = 90 | kzalloc_node(sizeof(unsigned long) * areas, GFP_ATOMIC, 0); 91 | if (!psmpool->percpu_ptr) 92 | goto error; 93 | 94 | for (cnt = 0; cnt < areas; cnt++) { 95 | ptr = __alloc_percpu(PCPU_MIN_UNIT_SIZE, obj_size); 96 | if (!ptr) 97 | goto error; 98 | 99 | psmpool->percpu_ptr[cnt] = (unsigned long)ptr; 100 | } 101 | 102 | psmpool->obj_size = obj_size; 103 | psmpool->objs_per_area = objs_per_area; 104 | psmpool->areas = areas; 105 | 106 | return psmpool; 107 | 108 | error: 109 | while (cnt > 0) 110 | free_percpu((void *)psmpool->percpu_ptr[--cnt]); 111 | 112 | kfree(psmpool->percpu_ptr); 113 | kfree(psmpool); 114 | 115 | return NULL; 116 | } 117 | 118 | static void *simple_percpu_mempool_alloc(struct simple_percpu_mempool *psmpool) 119 | { 120 | unsigned long area_size, ret; 121 | 122 | area_size = psmpool->obj_size * psmpool->objs_per_area; 123 | 124 | if ((psmpool->allocated_size + psmpool->obj_size) > area_size) { 125 | psmpool->area_id++; 126 | psmpool->allocated_size = 0; 127 | } 128 | 129 | ret = psmpool->percpu_ptr[psmpool->area_id] + psmpool->allocated_size; 130 | psmpool->allocated_size += psmpool->obj_size; 131 | 132 | return (void *)ret; 133 | } 134 | 135 | static void simple_percpu_mempool_destory(struct simple_percpu_mempool *psmpool) 136 | { 137 | int i; 138 | 139 | for (i = 0; i < psmpool->areas; i++) 140 | free_percpu((void *)psmpool->percpu_ptr[i]); 141 | 142 | kfree(psmpool->percpu_ptr); 143 | kfree(psmpool); 144 | } 145 | 146 | static inline bool is_simple_percpu_mempool_addr( 147 | struct simple_percpu_mempool *psmpool, void *_addr) 148 | { 149 | int i; 150 | unsigned long addr, area_size, base; 151 | 152 | addr = (unsigned long)_addr; 153 | area_size = psmpool->obj_size * psmpool->objs_per_area; 154 | 155 | for (i = 0; i < psmpool->areas; i++) { 156 | base = psmpool->percpu_ptr[i]; 157 | if (addr >= base && addr < (base + area_size)) 158 | return true; 159 | } 160 | 161 | return false; 162 | } 163 | 164 | #define FIELD_TYPE(t, f) typeof(((struct t*)0)->f) 165 | #define FIELD_INDIRECT_TYPE(t, f) typeof(*((struct t*)0)->f) 166 | 167 | #define DEFINE_RESERVE(type, field, name, require, max) \ 168 | static struct simple_mempool *name##_smp = NULL; \ 169 | static void release_##name##_reserve(struct type *x) \ 170 | { \ 171 | if (!is_simple_mempool_addr(name##_smp, x->field)) \ 172 | kfree(x->field); \ 173 | x->field = NULL; \ 174 | } \ 175 | static FIELD_TYPE(type, field) alloc_##name##_reserve(void) \ 176 | { \ 177 | return simple_mempool_alloc(name##_smp); \ 178 | } \ 179 | static int create_mempool_##name(void) \ 180 | { \ 181 | name##_smp = simple_mempool_create(max, \ 182 | sizeof(FIELD_INDIRECT_TYPE(type, field))); \ 183 | if (!name##_smp) \ 184 | return -ENOMEM; \ 185 | return 0; \ 186 | } \ 187 | static int recheck_mempool_##name(void) \ 188 | { \ 189 | if (require > name##_smp->obj_num) \ 190 | return -ENOMEM; \ 191 | return 0; \ 192 | } 193 | 194 | #define DEFINE_RESERVE_PERCPU(type, field, name, require, max) \ 195 | static struct simple_percpu_mempool *name##_smp = NULL; \ 196 | static void release_##name##_reserve(struct type *x) \ 197 | { \ 198 | if (!is_simple_percpu_mempool_addr(name##_smp, x->field)) \ 199 | free_percpu((void *)x->field); \ 200 | x->field = NULL; \ 201 | } \ 202 | static FIELD_TYPE(type, field) alloc_##name##_reserve(void) \ 203 | { \ 204 | return simple_percpu_mempool_alloc(name##_smp); \ 205 | } \ 206 | static int create_mempool_##name(void) \ 207 | { \ 208 | name##_smp = simple_percpu_mempool_create(max, \ 209 | sizeof(FIELD_INDIRECT_TYPE(type, field))); \ 210 | if (!name##_smp) \ 211 | return -ENOMEM; \ 212 | return 0; \ 213 | } \ 214 | static int recheck_mempool_##name(void) \ 215 | { \ 216 | if (require > (name##_smp->areas * name##_smp->objs_per_area)) \ 217 | return -ENOMEM; \ 218 | return 0; \ 219 | } 220 | 221 | /* 222 | * Examples of simple mempool usage 223 | 224 | * #define nr_tgs atomic_read(&cpu_cgrp_subsys.root->nr_cgrps) 225 | * 226 | * DEFINE_RESERVE(sched_statistics, 227 | * bvt, 228 | * se, 229 | * nr_threads + nr_cpu_ids + (nr_tgs - 1) * nr_cpu_ids, 230 | * (nr_threads + nr_cpu_ids + (nr_tgs - 1) * nr_cpu_ids)*2) 231 | * 232 | * DEFINE_RESERVE(rq, // struct rq 233 | * bvt, // struct rq's bvt field 234 | * rq, // name the mempool as rq_smp 235 | * nr_cpu_ids, // we need exactly nr_cpu_ids objects 236 | * nr_cpu_ids); // we alloc nr_cpu_ids objects before stop_machine 237 | * 238 | * DEFINE_RESERVE_PERCPU(task_struct, // struct task_struct 239 | * percpu_var, // task_struct's new percpu_var feild 240 | * percpu_var, // name the percpu mempool as percpu_var_smp 241 | * nr_threads + nr_cpu_ids,// we need exactly nr_cpu_ids objects 242 | * nr_threads + nr_cpu_ids)// we alloc nr_cpu_ids objects before stop_machine 243 | */ 244 | 245 | static int sched_mempools_create(void) 246 | { 247 | int err; 248 | 249 | /* 250 | * Examples of mempools create 251 | * if ((err = create_mempool_se())) 252 | * return err; 253 | 254 | * if ((err = create_mempool_rq())) 255 | * return err; 256 | 257 | * if (err = create_mempool_percpu_var()) 258 | * return err; 259 | */ 260 | 261 | return 0; 262 | } 263 | 264 | static void sched_mempools_destroy(void) 265 | { 266 | /* 267 | * Examples of mempools destroy 268 | * simple_mempool_destory(se_smp); 269 | * simple_mempool_destory(rq_smp); 270 | * simple_percpu_mempool_destory(percpu_var_smp); 271 | */ 272 | } 273 | 274 | static int recheck_smps(void) 275 | { 276 | int err = -ENOMEM; 277 | 278 | /* 279 | * Examples of mempools recheck 280 | * if ((err = recheck_mempool_rq())) 281 | * return err; 282 | 283 | * if ((err = recheck_mempool_se())) 284 | * return err; 285 | 286 | * if ((err = recheck_mempool_percpu_var())) 287 | * return err; 288 | */ 289 | 290 | return 0; 291 | } 292 | 293 | static void sched_alloc_extrapad(void) 294 | { 295 | /* TODO: Exploit all CPUs */ 296 | 297 | /* 298 | * Examples of alloc extrapad 299 | * struct task_struct *p, *t; 300 | * struct task_group *tg; 301 | * int cpu; 302 | 303 | * for_each_possible_cpu(cpu) { 304 | * cpu_rq(cpu)->bvt = alloc_rq_reserve(); 305 | * idle_task(cpu)->se.statistics.bvt = alloc_se_reserve(); 306 | * } 307 | 308 | * for_each_process_thread(p, t) 309 | * t->se.statistics.bvt = alloc_se_reserve(); 310 | 311 | * list_for_each_entry_rcu(tg, &task_groups, list) { 312 | * if (tg == &root_task_group || task_group_is_autogroup(tg)) 313 | * continue; 314 | * for_each_possible_cpu(cpu) 315 | * tg->se[cpu]->statistics.bvt = alloc_se_reserve(); 316 | * } 317 | 318 | * for_each_process_thread (p, t) 319 | * t->percpu_var = alloc_percpu_var_reserve(); 320 | */ 321 | } 322 | 323 | static void sched_free_extrapad(void) 324 | { 325 | /* TODO: Exploit all CPUs */ 326 | 327 | /* 328 | * Examples of free extrapad 329 | * struct task_struct *p, *t; 330 | * struct task_group *tg; 331 | * int cpu; 332 | 333 | * for_each_possible_cpu(cpu) { 334 | * release_se_reserve(&idle_task(cpu)->se.statistics); 335 | * release_rq_reserve(cpu_rq(cpu)); 336 | * } 337 | * for_each_process_thread (p, t) 338 | * release_se_reserve(&t->se.statistics); 339 | 340 | * list_for_each_entry_rcu(tg, &task_groups, list) { 341 | * if (tg == &root_task_group || task_group_is_autogroup(tg)) 342 | * continue; 343 | * for_each_possible_cpu(cpu) 344 | * release_se_reserve(&tg->se[cpu]->statistics); 345 | * } 346 | 347 | * for_each_process_thread(p, t) 348 | * release_percpu_var_reserve(t); 349 | */ 350 | } 351 | 352 | #else 353 | static inline int recheck_smps(void) { return 0; } 354 | static inline void sched_alloc_extrapad(void) { } 355 | static inline void sched_free_extrapad(void) { } 356 | static inline int sched_mempools_create(void) { return 0; } 357 | static inline int sched_mempools_destroy(void) { return 0; } 358 | #endif /* SCHEDMOD_MEMPOOL */ 359 | -------------------------------------------------------------------------------- /src/sched_rebuild.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2022 Alibaba Group Holding Limited. 3 | * SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | */ 5 | 6 | #include 7 | #include 8 | #include "sched.h" 9 | #include "helper.h" 10 | 11 | extern void __orig_set_rq_offline(struct rq*); 12 | extern void __orig_set_rq_online(struct rq*); 13 | extern void __orig_update_rq_clock(struct rq *rq); 14 | 15 | extern void __mod_set_rq_offline(struct rq*); 16 | extern void __mod_set_rq_online(struct rq*); 17 | extern void __mod_update_rq_clock(struct rq *rq); 18 | 19 | extern unsigned int process_id[]; 20 | 21 | extern struct sched_class __orig_stop_sched_class; 22 | extern struct sched_class __orig_dl_sched_class; 23 | extern struct sched_class __orig_rt_sched_class; 24 | extern struct sched_class __orig_fair_sched_class; 25 | extern struct sched_class __orig_idle_sched_class; 26 | extern struct sched_class shadow_stop_sched_class; 27 | extern struct sched_class shadow_dl_sched_class; 28 | extern struct sched_class shadow_rt_sched_class; 29 | extern struct sched_class shadow_fair_sched_class; 30 | extern struct sched_class shadow_idle_sched_class; 31 | 32 | struct sched_class *orig_class[] = { 33 | &__orig_stop_sched_class, 34 | &__orig_dl_sched_class, 35 | &__orig_rt_sched_class, 36 | &__orig_fair_sched_class, 37 | &__orig_idle_sched_class, 38 | }; 39 | 40 | struct sched_class *mod_class[] = { 41 | &shadow_stop_sched_class, 42 | &shadow_dl_sched_class, 43 | &shadow_rt_sched_class, 44 | &shadow_fair_sched_class, 45 | &shadow_idle_sched_class, 46 | }; 47 | 48 | DEFINE_PER_CPU(struct list_head, dying_task_list); 49 | 50 | #define NR_SCHED_CLASS 5 51 | struct sched_class bak_class[NR_SCHED_CLASS]; 52 | 53 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) 54 | 55 | extern struct task_struct __orig_fake_task; 56 | 57 | #define pick_next_task_rq(class, rf) \ 58 | (class)->pick_next_task(rq, &__orig_fake_task, &(rf)) 59 | 60 | #else 61 | #define pick_next_task_rq(class, rf) \ 62 | (class)->pick_next_task(rq) 63 | #endif 64 | 65 | void switch_sched_class(bool mod) 66 | { 67 | int i; 68 | int size = sizeof(struct sched_class); 69 | 70 | for (i = 0; i < NR_SCHED_CLASS; i++) { 71 | void *waddr; 72 | 73 | waddr = disable_write_protect(orig_class[i]); 74 | 75 | if (mod) { 76 | memcpy(&bak_class[i], waddr, size); 77 | memcpy(waddr, mod_class[i], size); 78 | } else { 79 | memcpy(waddr, &bak_class[i], size); 80 | } 81 | 82 | enable_write_protect(); 83 | } 84 | } 85 | 86 | void clear_sched_state(bool mod) 87 | { 88 | struct task_struct *g, *p; 89 | struct rq *rq = this_rq(); 90 | struct rq_flags rf; 91 | int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; 92 | int cpu = smp_processor_id(); 93 | 94 | rq_lock(rq, &rf); 95 | 96 | if (mod) { 97 | __mod_update_rq_clock(rq); 98 | __mod_set_rq_offline(rq); 99 | } else { 100 | __orig_update_rq_clock(rq); 101 | __orig_set_rq_offline(rq); 102 | } 103 | 104 | for_each_process_thread(g, p) { 105 | if (rq != task_rq(p)) 106 | continue; 107 | 108 | if (p == rq->stop) 109 | continue; 110 | 111 | if (task_on_rq_queued(p)) 112 | p->sched_class->dequeue_task(rq, p, queue_flags); 113 | } 114 | 115 | INIT_LIST_HEAD(&per_cpu(dying_task_list, cpu)); 116 | 117 | /* This logic comes from sched_cpu_dying to deal with dying tasks. */ 118 | for (;;) { 119 | const struct sched_class *class; 120 | struct task_struct *next; 121 | 122 | /* Now, just the stopper task is running. */ 123 | if (rq->nr_running == 1) 124 | break; 125 | 126 | for_each_class(class) { 127 | next = pick_next_task_rq(class, rf); 128 | if (next) { 129 | next->sched_class->put_prev_task(rq, next); 130 | next->sched_class->dequeue_task(rq, p, queue_flags); 131 | list_add_tail_rcu(&p->tasks, &per_cpu(dying_task_list, cpu)); 132 | break; 133 | } 134 | } 135 | } 136 | rq_unlock(rq, &rf); 137 | } 138 | 139 | void rebuild_sched_state(bool mod) 140 | { 141 | struct task_struct *g, *p; 142 | struct task_group *tg; 143 | struct rq *rq = this_rq(); 144 | struct rq_flags rf; 145 | int queue_flags = ENQUEUE_RESTORE | ENQUEUE_MOVE | ENQUEUE_NOCLOCK; 146 | int cpu = smp_processor_id(); 147 | 148 | rq_lock(rq, &rf); 149 | 150 | if (mod) { 151 | __mod_update_rq_clock(rq); 152 | __mod_set_rq_online(rq); 153 | } else { 154 | __orig_update_rq_clock(rq); 155 | __orig_set_rq_online(rq); 156 | } 157 | 158 | for_each_process_thread(g, p) { 159 | if (rq != task_rq(p)) 160 | continue; 161 | 162 | if (p == rq->stop) 163 | continue; 164 | 165 | if (task_on_rq_queued(p)) 166 | p->sched_class->enqueue_task(rq, p, queue_flags); 167 | } 168 | 169 | list_for_each_entry_rcu(p, &per_cpu(dying_task_list, cpu), tasks) { 170 | p->sched_class->enqueue_task(rq, p, queue_flags); 171 | list_del_init(&p->tasks); 172 | } 173 | rq_unlock(rq, &rf); 174 | 175 | if (process_id[cpu]) 176 | return; 177 | 178 | /* Restart the cfs/rt bandwidth timer */ 179 | list_for_each_entry_rcu(tg, &task_groups, list) { 180 | if (tg == &root_task_group) 181 | continue; 182 | 183 | if (tg->cfs_bandwidth.period_active) { 184 | hrtimer_restart(&tg->cfs_bandwidth.period_timer); 185 | hrtimer_restart(&tg->cfs_bandwidth.slack_timer); 186 | } 187 | #ifdef CONFIG_RT_GROUP_SCHED 188 | if (tg->rt_bandwidth.rt_period_active) 189 | hrtimer_restart(&tg->rt_bandwidth.rt_period_timer); 190 | #endif 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/scheduler.lds: -------------------------------------------------------------------------------- 1 | /* 2 | * Common module linker script, always used when linking a module. 3 | * Archs are free to supply their own linker scripts. ld will 4 | * combine them automatically. 5 | * 6 | * Copyright 2019-2022 Alibaba Group Holding Limited. 7 | * SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 8 | */ 9 | SECTIONS { 10 | PROVIDE(__module_sched_start = .); 11 | .sched.text 0 : ALIGN(8) { *(.sched.text) } 12 | PROVIDE(__module_sched_end = .); 13 | } 14 | -------------------------------------------------------------------------------- /src/stack_check.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define MAX_STACK_ENTRIES 100 9 | 10 | extern const char *get_ksymbol(struct module *, unsigned long, 11 | unsigned long *, unsigned long *); 12 | 13 | extern int process_id[]; 14 | 15 | static void stack_check_init(void) 16 | { 17 | #define EXPORT_CALLBACK EXPORT_PLUGSCHED 18 | #define EXPORT_PLUGSCHED(fn, ...) \ 19 | kallsyms_lookup_size_offset((unsigned long)__orig_##fn, \ 20 | &orig_##fn##_size, NULL); \ 21 | vm_func_size[NR_##fn] = orig_##fn##_size; 22 | 23 | #include "export_jump.h" 24 | #undef EXPORT_PLUGSCHED 25 | #undef EXPORT_CALLBACK 26 | 27 | vm_func_size[NR___schedule] = 0; 28 | addr_sort(vm_func_addr, vm_func_size, NR_INTERFACE_FN); 29 | 30 | #define EXPORT_CALLBACK(fn, ...) \ 31 | kallsyms_lookup_size_offset((unsigned long)__cb_##fn, \ 32 | &mod_##fn##_size, NULL); \ 33 | mod_func_size[NR_##fn] = mod_##fn##_size; 34 | 35 | #define EXPORT_PLUGSCHED(fn, ...) \ 36 | kallsyms_lookup_size_offset((unsigned long)fn, \ 37 | &mod_##fn##_size, NULL); \ 38 | mod_func_size[NR_##fn] = mod_##fn##_size; 39 | 40 | #include "export_jump.h" 41 | #undef EXPORT_PLUGSCHED 42 | #undef EXPORT_CALLBACK 43 | 44 | mod_func_size[NR___schedule] = 0; 45 | addr_sort(mod_func_addr, mod_func_size, NR_INTERFACE_FN); 46 | } 47 | 48 | static int stack_check_fn(unsigned long *entries, unsigned int nr_entries, bool install) 49 | { 50 | int i; 51 | unsigned long *func_addr; 52 | unsigned long *func_size; 53 | 54 | if (install) { 55 | func_addr = vm_func_addr; 56 | func_size = vm_func_size; 57 | } else { 58 | func_addr = mod_func_addr; 59 | func_size = mod_func_size; 60 | } 61 | 62 | for (i = 0; i < nr_entries; i++) { 63 | int idx; 64 | unsigned long address = entries[i]; 65 | 66 | idx = bsearch(func_addr, 0, NR_INTERFACE_FN - 1, address); 67 | if (idx == -1) 68 | continue; 69 | if (address < func_addr[idx] + func_size[idx]) 70 | return -EAGAIN; 71 | } 72 | 73 | return 0; 74 | } 75 | 76 | #ifdef CONFIG_ARCH_STACKWALK 77 | struct stacktrace_cookie { 78 | unsigned long *store; 79 | unsigned int size; 80 | unsigned int skip; 81 | unsigned int len; 82 | }; 83 | 84 | static bool stack_trace_consume_entry(void *cookie, unsigned long addr) 85 | { 86 | struct stacktrace_cookie *c = cookie; 87 | 88 | if (c->len >= c->size) 89 | return false; 90 | 91 | if (c->skip > 0) { 92 | c->skip--; 93 | return true; 94 | } 95 | c->store[c->len++] = addr; 96 | return c->len < c->size; 97 | } 98 | 99 | static unsigned int get_stack_trace(struct task_struct *tsk, 100 | unsigned long *store, unsigned int size) 101 | { 102 | struct stacktrace_cookie c = { 103 | .store = store, 104 | .size = size, 105 | .skip = 0 106 | }; 107 | 108 | if (!try_get_task_stack(tsk)) 109 | return 0; 110 | 111 | arch_stack_walk(stack_trace_consume_entry, &c, tsk, NULL); 112 | put_task_stack(tsk); 113 | return c.len; 114 | } 115 | #else 116 | #ifdef CONFIG_X86_64 117 | extern void __save_stack_trace(struct stack_trace *, struct task_struct *, 118 | struct pt_regs *, bool); 119 | 120 | static inline void 121 | save_stack(struct stack_trace *trace, struct task_struct *tsk) 122 | { 123 | __save_stack_trace(trace, tsk, NULL, false); 124 | } 125 | #else 126 | extern int __save_stack_trace(struct task_struct *, struct stack_trace *, 127 | unsigned int); 128 | 129 | static inline void 130 | save_stack(struct stack_trace *trace, struct task_struct *tsk) 131 | { 132 | __save_stack_trace(tsk, trace, 0); 133 | } 134 | #endif 135 | 136 | static unsigned int get_stack_trace(struct task_struct *tsk, 137 | unsigned long *store, unsigned int size) 138 | { 139 | struct stack_trace trace; 140 | 141 | trace.skip = 0; 142 | trace.nr_entries = 0; 143 | trace.max_entries = MAX_STACK_ENTRIES; 144 | trace.entries = store; 145 | 146 | if (!try_get_task_stack(tsk)) 147 | return 0; 148 | 149 | save_stack(&trace, tsk); 150 | put_task_stack(tsk); 151 | return trace.nr_entries; 152 | } 153 | #endif 154 | 155 | static int stack_check_task(struct task_struct *task, bool install) 156 | { 157 | unsigned long entries[MAX_STACK_ENTRIES]; 158 | unsigned int nr_entries; 159 | 160 | nr_entries = get_stack_trace(task, entries, MAX_STACK_ENTRIES); 161 | return stack_check_fn(entries, nr_entries, install); 162 | } 163 | 164 | static int stack_check(bool install) 165 | { 166 | struct task_struct *p, *t; 167 | int task_count = 0; 168 | int nr_cpus = num_online_cpus(); 169 | int cpu = smp_processor_id(); 170 | 171 | for_each_process_thread(p, t) { 172 | if ((task_count % nr_cpus) == process_id[cpu]) { 173 | if (stack_check_task(t, install)) 174 | return -EBUSY; 175 | } 176 | task_count++; 177 | } 178 | 179 | t = idle_task(cpu); 180 | if (stack_check_task(t, install)) 181 | return -EBUSY; 182 | 183 | return 0; 184 | } 185 | -------------------------------------------------------------------------------- /src/tainted_functions.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | /* file contents will be generated automatically */ 5 | -------------------------------------------------------------------------------- /tests/build_case: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | source $(dirname "$0")/libs/working_dir 6 | source libs/catch_error 7 | 8 | podman cp $1/patch.diff plugsched:/root/patch 9 | podman exec plugsched patch -f -p1 -i patch 10 | podman exec plugsched plugsched-cli build scheduler 11 | podman exec plugsched patch -f -p1 -i patch -R 12 | podman exec plugsched ls /root/scheduler/working/rpmbuild/RPMS/$(uname -i)/ 13 | podman exec plugsched bash -c "cp /root/scheduler/working/rpmbuild/RPMS/$(uname -i)/scheduler-xxx-*.rpm /root" 14 | 15 | -------------------------------------------------------------------------------- /tests/bundles/ci: -------------------------------------------------------------------------------- 1 | schedule 2 | public_var 3 | var_uniformity 4 | cpu_throttle 5 | domain_rebuild 6 | sched_syscall 7 | mem_pressure 8 | -------------------------------------------------------------------------------- /tests/bundles/reboot: -------------------------------------------------------------------------------- 1 | reboot 2 | -------------------------------------------------------------------------------- /tests/bundles/release: -------------------------------------------------------------------------------- 1 | bare_performance 2 | -------------------------------------------------------------------------------- /tests/libs/catch_error: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | set -e 6 | # keep track of the last executed command 7 | trap 'last_command=$current_command; current_command=$BASH_COMMAND' DEBUG 8 | # echo an error message before exiting 9 | trap 'ret=$?; cmd=${last_command}; [ $ret -ne 0 ] && echo "\"${cmd}\" command failed with exit code $ret."' EXIT 10 | -------------------------------------------------------------------------------- /tests/libs/working_dir: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | TEST_PATH=$(realpath $(dirname "$0")) 6 | cd ${TEST_PATH} 7 | export PATH=$PATH:${TEST_PATH} 8 | -------------------------------------------------------------------------------- /tests/prep_env: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | source $(dirname "$0")/libs/working_dir 6 | source libs/catch_error 7 | 8 | arch=$(uname -i) 9 | 10 | uname_r=$(uname -r) 11 | uname_noarch=${uname_r%.*} 12 | yum install anolis-repos epel-aliyuncs-release -y 13 | yum install sysstat -y 14 | yum install podman yum-utils kernel-debuginfo-${uname_r} kernel-devel-${uname_r} --enablerepo=Plus-debuginfo --enablerepo=Plus -y 15 | yum install python3-psutil python3-sh python3-pyyaml python3-colorlog -y 16 | 17 | echo -e "[registries.search]\nregistries = ['docker.io']" > /etc/containers/registries.conf 18 | systemctl restart podman 19 | 20 | container=$(podman ps -a | awk '$NF=="plugsched"{print $1}') 21 | if [ -n "$container" ]; then 22 | podman rm -f ${container} 23 | fi 24 | mkdir -p /tmp/work 25 | 26 | podman build -t plugsched-sdk:test -f ../Dockerfile .. 27 | podman run -itd --name=plugsched -w /root/ -v /tmp/work:/root -v /usr/src/kernels:/usr/src/kernels -v /usr/lib/debug/lib/modules:/usr/lib/debug/lib/modules localhost/plugsched-sdk:test /bin/bash 28 | podman exec -it plugsched rm -rf /usr/local/lib/plugsched 29 | podman cp .. plugsched:/usr/local/lib/plugsched 30 | cd /tmp/work 31 | yumdownloader --source kernel-${uname_r} --enablerepo=Plus 32 | podman exec -it plugsched plugsched-cli extract_src kernel-${uname_noarch}.src.rpm ./kernel 33 | podman exec -it plugsched plugsched-cli init ${uname_r} ./kernel ./scheduler 34 | -------------------------------------------------------------------------------- /tests/run_test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | source $(dirname "$0")/libs/working_dir 6 | source libs/catch_error 7 | 8 | export RED="\033[31m" 9 | export GREEN="\033[32m" 10 | export RESET="\033[0m" 11 | 12 | tests=$(cat bundles/$1) 13 | prep_env 14 | for T in ${tests}; do 15 | build_case test_$T 16 | if test_$T/assert; then 17 | echo -e "$T test ${GREEN}PASS${RESET}" 18 | else 19 | dmesg -c 20 | echo -e "$T test ${RED}FAILED${RESET}" 21 | fi 22 | done 23 | -------------------------------------------------------------------------------- /tests/test_bare_performance/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2023 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import sys 6 | import os 7 | from glob import glob 8 | from sh import wget, unzip, make, grep, lsmod, rpm, yum, Command 9 | import colorlog 10 | 11 | handler = logging.StreamHandler() 12 | handler.setFormatter(colorlog.ColoredFormatter( 13 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 14 | datefmt='%Y-%m-%d %H:%M:%S')) 15 | logging.getLogger().setLevel(logging.INFO) 16 | logging.getLogger().addHandler(handler) 17 | 18 | class TestMemPressure: 19 | def setup_class(self): 20 | yum.install(['unzip', 'perl-Time-HiRes', 'hwloc-devel', 'python39'], assumeyes=True) 21 | self.rpm = self.get_rpm() 22 | self.install_unixbench() 23 | self.install_will_it_scale() 24 | 25 | def install_unixbench(self): 26 | wget('https://github.com/kdlucas/byte-unixbench/archive/refs/heads/master.zip', 27 | output_document='unixbench.zip') 28 | unzip('-o', 'unixbench.zip') 29 | make(_cwd='byte-unixbench-master/UnixBench') 30 | 31 | def install_will_it_scale(self): 32 | wget('https://github.com/antonblanchard/will-it-scale/archive/refs/heads/master.zip', 33 | output_document='will-it-scale.zip') 34 | unzip('-o', 'will-it-scale.zip') 35 | make(_cwd='will-it-scale-master') 36 | 37 | def teardown_class(self): 38 | if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: 39 | rpm('scheduler-xxx', erase=True) 40 | 41 | def get_rpm(self): 42 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 43 | if len(scheduler_rpm) != 1: 44 | print("Please check your scheduler rpm"); 45 | sys.exit(1) 46 | return scheduler_rpm 47 | 48 | def __test_all(self): 49 | for case in ['getppid1', 'futex1', 'futex2', 'futex3', 'futex4', 50 | 'lock1', 'posix_semaphore1', 'pthread_mutex1', 51 | 'pthread_mutex2', 'pthread_mutex3', 52 | 'pthread_mutex4', 'sched_yield', 'signal1']: 53 | Command('python3.9')('./runtest.py', case, _cwd='will-it-scale-master', _out=sys.stdout) 54 | Command('./Run')(_cwd='byte-unixbench-master/UnixBench', _out=sys.stdout) 55 | 56 | def test_all(self): 57 | self.__test_all() 58 | rpm(self.rpm, install=True) 59 | self.__test_all() 60 | 61 | if __name__ == '__main__': 62 | test_unit = TestMemPressure() 63 | test_unit.setup_class() 64 | test_unit.test_all() 65 | test_unit.teardown_class() 66 | -------------------------------------------------------------------------------- /tests/test_bare_performance/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_bare_performance/patch.diff -------------------------------------------------------------------------------- /tests/test_cpu_throttle/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import subprocess 6 | import sh 7 | import os 8 | import sys 9 | import time 10 | import psutil 11 | from glob import glob 12 | import logging 13 | import colorlog 14 | 15 | handler = logging.StreamHandler() 16 | handler.setFormatter(colorlog.ColoredFormatter( 17 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 18 | datefmt='%Y-%m-%d %H:%M:%S')) 19 | logging.getLogger().setLevel(logging.INFO) 20 | logging.getLogger().addHandler(handler) 21 | 22 | class TestCPUThrottle: 23 | def setup_class(self): 24 | print("CPU throttle test") 25 | self.cgpath = '/sys/fs/cgroup/cpu/test' 26 | sh.tee(sh.echo(1), '/sys/fs/cgroup/cpu/cgroup.clone_children') 27 | sh.mkdir(self.cgpath) 28 | 29 | def init_cgroup(self): 30 | self.child = sh.bash('-c', 'while :; do :; done', _bg=True, bg_exc=False) 31 | sh.tee(sh.echo(self.child.pid), '%s/cgroup.procs' % self.cgpath) 32 | 33 | def set_cfs_quota(self, t_us): 34 | sh.echo(t_us, _out='%s/cpu.cfs_quota_us' % self.cgpath) 35 | 36 | def test_all(self): 37 | self.install_module() 38 | self.init_cgroup() 39 | self.set_cfs_quota('50000') 40 | self.check_le_75() 41 | self.check_after_load() 42 | self.set_cfs_quota('100000') 43 | self.check_gt_75() 44 | self.check_after_unload() 45 | 46 | def check_le_75(self): 47 | cpu_util = self.get_cpu_util(self.child.pid) 48 | self.validate_lt(cpu_util, 75) 49 | 50 | def install_module(self): 51 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 52 | if len(scheduler_rpm) != 1: 53 | print("Please check your scheduler rpm"); 54 | self.teardown_class() 55 | sys.exit(1) 56 | scheduler_rpm = scheduler_rpm[0] 57 | sh.rpm('-ivh', scheduler_rpm) 58 | sh.tee(sh.echo(0), '/sys/kernel/plugsched/plugsched/enable') 59 | 60 | def uninstall_module(self): 61 | sh.rpm('-e', 'scheduler-xxx') 62 | 63 | def check_after_load(self): 64 | sh.tee(sh.echo(1), '/sys/kernel/plugsched/plugsched/enable') 65 | cpu_util = self.get_cpu_util(self.child.pid) 66 | self.validate_lt(cpu_util, 75) 67 | 68 | def check_gt_75(self): 69 | cpu_util = self.get_cpu_util(self.child.pid) 70 | self.validate_gt(cpu_util, 75) 71 | 72 | def check_after_unload(self): 73 | sh.tee(sh.echo(0), '/sys/kernel/plugsched/plugsched/enable') 74 | cpu_util = self.get_cpu_util(self.child.pid) 75 | self.validate_gt(cpu_util, 75) 76 | 77 | def get_cpu_util(self, pid): 78 | return psutil.Process(pid).cpu_percent(interval=2) 79 | 80 | def teardown_class(self): 81 | try: 82 | self.child.kill() 83 | self.child.wait() 84 | except sh.SignalException_SIGKILL: 85 | pass 86 | sh.rmdir(self.cgpath) 87 | if sh.grep(sh.lsmod(), 'scheduler', _ok_code=[0,1]).exit_code == 0: 88 | self.uninstall_module() 89 | 90 | def validate_lt(self, util, bound): 91 | if util >= bound: 92 | self.error_handler('less', util, bound) 93 | 94 | def validate_gt(self, util, bound): 95 | if util <= bound: 96 | self.error_handler('greater', util, bound) 97 | 98 | def error_handler(self, expect, util, bound): 99 | err_msg = 'CPU util is {} but should be {} than {}'.format(util, expect, bound) 100 | print(err_msg) 101 | self.teardown_class() 102 | raise 103 | 104 | 105 | if __name__ == '__main__': 106 | test_unit = TestCPUThrottle() 107 | test_unit.setup_class() 108 | test_unit.test_all() 109 | test_unit.teardown_class() 110 | -------------------------------------------------------------------------------- /tests/test_cpu_throttle/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_cpu_throttle/patch.diff -------------------------------------------------------------------------------- /tests/test_domain_rebuild/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import os 6 | import sh 7 | import sys 8 | import subprocess 9 | import time 10 | from glob import glob 11 | 12 | class CPUDomainReuildTest: 13 | def setup_class(self): 14 | print("CPU domain rebuild test") 15 | self.init_cpu_set() 16 | 17 | def init_cpu_set(self): 18 | self.cpu_set = [] 19 | 20 | def add_cpu(num): 21 | cpu_state = "/sys/devices/system/cpu/cpu" + str(num) + "/online" 22 | self.cpu_set.append(cpu_state) 23 | 24 | for i in range(0, 6, 2): 25 | add_cpu(i) 26 | 27 | def test_cpu_rebuild(self): 28 | self.record_orig_state() 29 | self.load_scheduler() 30 | self.record_state_after_load() 31 | if self.load_domain_file != self.orig_domain_file or self.orig_state != self.load_state: 32 | self.error_handler() 33 | self.change_cpu_state() 34 | self.record_after_change_cpu_state() 35 | self.unload_scheduler() 36 | self.record_state_after_unload() 37 | if self.state_after_change_cpu != self.state_after_unload or self.domain_file_after_change_cpu != self.unload_domain_file: 38 | self.error_handler() 39 | 40 | def record_after_change_cpu_state(self): 41 | self.state_after_change_cpu = {} 42 | self.record_data(self.state_after_change_cpu) 43 | self.domain_file_after_change_cpu = set(sh.find("/proc/sys/kernel/sched_domain").split()) 44 | 45 | def record_state_after_load(self): 46 | self.load_state = {} 47 | self.record_data(self.load_state) 48 | self.load_domain_file = set(sh.find("/proc/sys/kernel/sched_domain").split()) 49 | 50 | def record_state_after_unload(self): 51 | self.state_after_unload = {} 52 | self.record_data(self.state_after_unload) 53 | self.unload_domain_file = set(sh.find("/proc/sys/kernel/sched_domain").split()) 54 | 55 | def record_orig_state(self): 56 | self.orig_state = {} 57 | self.record_data(self.orig_state) 58 | self.orig_domain_file = set(sh.find("/proc/sys/kernel/sched_domain").split()) 59 | 60 | def record_data(self, data_arr): 61 | for item in self.cpu_set: 62 | if not os.path.exists(item): 63 | continue 64 | val = sh.cat(item).split()[0] 65 | data_arr[item] = val 66 | 67 | def load_scheduler(self): 68 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 69 | if len(scheduler_rpm) != 1: 70 | print("Please check your scheduler rpm"); 71 | self.teardown_class() 72 | sys.exit(1) 73 | scheduler_rpm = scheduler_rpm[0] 74 | sh.rpm('-ivh', scheduler_rpm) 75 | 76 | def unload_scheduler(self): 77 | tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) 78 | if tmp.stdout.read() != b'': 79 | sh.rpm('-e', 'scheduler-xxx') 80 | 81 | def change_cpu_state(self): 82 | def reverse(val): 83 | return "0" if val == "1" else "1" 84 | 85 | for k, v in self.orig_state.items(): 86 | sh.echo(reverse(v), _out=k) 87 | 88 | def reload_cpu_state(self): 89 | for k, v in self.orig_state.items(): 90 | sh.echo(v, _out=k) 91 | 92 | def error_handler(self): 93 | self.unload_scheduler() 94 | self.reload_cpu_state() 95 | sys.exit(1) 96 | 97 | def teardown_class(self): 98 | self.unload_scheduler() 99 | self.reload_cpu_state() 100 | 101 | if __name__ == '__main__': 102 | unit_test = CPUDomainReuildTest() 103 | unit_test.setup_class() 104 | unit_test.test_cpu_rebuild() 105 | unit_test.teardown_class() 106 | 107 | 108 | -------------------------------------------------------------------------------- /tests/test_domain_rebuild/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_domain_rebuild/patch.diff -------------------------------------------------------------------------------- /tests/test_mem_pressure/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2023 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import sys 6 | import os 7 | from math import log, ceil 8 | import logging 9 | from glob import glob 10 | from sh import awk, rpm, lsmod, grep, dmesg, mount, rm, dd 11 | 12 | import colorlog 13 | 14 | handler = logging.StreamHandler() 15 | handler.setFormatter(colorlog.ColoredFormatter( 16 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 17 | datefmt='%Y-%m-%d %H:%M:%S')) 18 | logging.getLogger().setLevel(logging.INFO) 19 | logging.getLogger().addHandler(handler) 20 | 21 | class TestMemPressure: 22 | def setup_class(self): 23 | self.step = 0.4 # Leaving only (step^level)% of the whole memory each level 24 | self.min_mem = 50 * 1024 # The most extreme test case 25 | self.all_available = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) 26 | self.iterations = ceil(log(1.0 * self.min_mem / self.all_available, self.step)) 27 | self.rpm = self.get_rpm() 28 | mount('-o', 'size=1P', '-o', 'remount', '/dev/shm') 29 | 30 | def teardown_class(self): 31 | rm('/dev/shm/pin', force=True) 32 | if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: 33 | rpm('-e', 'scheduler-xxx') 34 | 35 | def get_rpm(self): 36 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 37 | if len(scheduler_rpm) != 1: 38 | print("Please check your scheduler rpm"); 39 | sys.exit(1) 40 | return scheduler_rpm 41 | 42 | def check_oom(self, pin_mem): 43 | exit_if_oom = '|'.join(['rpm', 'insmod']) 44 | if grep(dmesg(), '-P', 'Killed process [0-9]* \((%s)\)' % exit_if_oom, _ok_code=[0,1]).exit_code == 1: 45 | return 46 | logging.warning("Test exited early because oomed when pinning %d kbytes memory" % pin_mem) 47 | self.teardown_class() 48 | os._exit(0) 49 | 50 | def pin_memory(self, target): 51 | left = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) 52 | if left < target: 53 | return 54 | logging.info("Adjusting available memory from %dKB to %dKB" % (left, target)) 55 | new_anonymous_obj = ceil((left - target) / 2048.0) 56 | dd('if=/dev/zero', 'of=/dev/shm/pin', 'bs=2M', 'count=%d' % new_anonymous_obj, 'oflag=append', 'conv=notrunc') 57 | self.check_oom(target) 58 | 59 | def test_level(self, level): 60 | dmesg(clear=True) 61 | curr = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) 62 | target = max(self.all_available * self.step ** level, self.min_mem) 63 | self.pin_memory(target) 64 | curr = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) 65 | logging.info("Installing rpm when available memory = %dKB" % curr) 66 | try: 67 | rpm('-ivh', self.rpm) 68 | grep(lsmod(), 'scheduler', word_regexp=True) 69 | rpm('-e', 'scheduler-xxx') 70 | except Exception: 71 | self.check_oom(target) 72 | raise 73 | 74 | def test_all(self): 75 | for level in range(1, self.iterations + 4): 76 | self.test_level(level) 77 | 78 | if __name__ == '__main__': 79 | test_unit = TestMemPressure() 80 | test_unit.setup_class() 81 | test_unit.test_all() 82 | test_unit.teardown_class() 83 | -------------------------------------------------------------------------------- /tests/test_mem_pressure/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_mem_pressure/patch.diff -------------------------------------------------------------------------------- /tests/test_public_var/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import subprocess 6 | import sh 7 | import yaml 8 | import os 9 | import sys 10 | from glob import glob 11 | 12 | class TestPublicVar: 13 | def setup_class(self): 14 | print("Public vars test") 15 | cur_sys = str(sh.uname('-r')).strip() 16 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 17 | if len(scheduler_rpm) != 1: 18 | print("Please check your scheduler rpm"); 19 | self.teardown_class() 20 | sys.exit(1) 21 | scheduler_rpm = scheduler_rpm[0] 22 | sh.rpm('-ivh', scheduler_rpm) 23 | module = '/var/plugsched/' + cur_sys + '/scheduler.ko' 24 | yaml_file = '/tmp/work/scheduler/working/boundary.yaml' 25 | with open(yaml_file, 'r') as f: 26 | yaml_version = float(yaml.__version__) 27 | if yaml_version >= 5.1: 28 | config = yaml.load(f, Loader=yaml.FullLoader) 29 | else: 30 | config = yaml.load(f) 31 | self.public_vars = config['global_var']['extra_public'] 32 | cmd = "objdump -t " + module 33 | self.symtab = str(subprocess.check_output(cmd, shell=True)).split('\\n') 34 | 35 | def test_syms(self): 36 | for var in self.public_vars: 37 | for cur_line in self.symtab: 38 | if cur_line == '' or cur_line.split()[-1] != var: 39 | continue 40 | if not '*UND*' in cur_line: 41 | self.error_handler(var) 42 | break 43 | 44 | def error_handler(self, var): 45 | print("Public var: " + str(var) + "is not UND") 46 | self.teardown_class() 47 | sys.exit(1) 48 | 49 | def teardown_class(self): 50 | tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) 51 | if tmp.stdout.read() != b'': 52 | sh.rpm('-e', 'scheduler-xxx') 53 | 54 | if __name__ == '__main__': 55 | test_unit = TestPublicVar() 56 | test_unit.setup_class() 57 | test_unit.test_syms() 58 | test_unit.teardown_class() 59 | -------------------------------------------------------------------------------- /tests/test_public_var/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_public_var/patch.diff -------------------------------------------------------------------------------- /tests/test_reboot/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2023 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | from sh import Command 6 | import sys 7 | from glob import glob 8 | import os 9 | from sh import rpm, grep, lsmod, grubby, rpm2cpio, echo, cpio, awk, modinfo, yum, reboot, uname 10 | import logging 11 | import colorlog 12 | 13 | handler = logging.StreamHandler() 14 | handler.setFormatter(colorlog.ColoredFormatter( 15 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 16 | datefmt='%Y-%m-%d %H:%M:%S')) 17 | logging.getLogger().setLevel(logging.INFO) 18 | logging.getLogger().addHandler(handler) 19 | logging.getLogger().addHandler(logging.FileHandler('/var/log/reboot-test')) 20 | 21 | runonce = Command('test_reboot/runonce') 22 | 23 | class TestReboot: 24 | def setup_class(self, step, alter_ver=''): 25 | self.step = int(step) 26 | self.alter_ver = alter_ver 27 | 28 | def get_rpm(self): 29 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 30 | if len(scheduler_rpm) != 1: 31 | print("Please check your scheduler rpm"); 32 | sys.exit(1) 33 | return scheduler_rpm 34 | 35 | def get_kernel_from_rpm(self): 36 | rpm = self.get_rpm() 37 | echo('*/scheduler.ko', _out='pattern.txt') 38 | cpio(rpm2cpio(rpm, _piped=True), 39 | to_stdout=True, extract=True, pattern_file='pattern.txt', 40 | _out='scheduler.ko') 41 | return awk(modinfo('scheduler.ko'), '/vermagic/{print $2}').strip() 42 | 43 | def error_handler(self): 44 | print("Reboot test " + "\033[31mFAILED\033[0m") 45 | self.teardown_class() 46 | raise 47 | 48 | def check_scheduler_ver(self, expected): 49 | curr = uname(kernel_release=True).strip() 50 | if expected != curr: 51 | self.error_handler() 52 | 53 | def install_alternative_kernel(self): 54 | curr = uname(kernel_release=True).strip() 55 | uname_noarch = curr[:curr.rfind('.')] 56 | arch = uname(hardware_platform=True).strip() 57 | 58 | installed_kernel = yum.list.installed('kernel', showduplicates=True, color='never', enablerepo='Plus') 59 | available_kernel = yum.list('kernel', showduplicates=True, color='never', enablerepo='Plus') 60 | installed_vers = awk(installed_kernel, '/^kernel/{print $2}').splitlines() 61 | available_vers = awk(available_kernel, '/^kernel/{print $2}').splitlines() 62 | 63 | if len(installed_vers) >= 2: 64 | installed_vers.remove(uname_noarch) 65 | return '%s.%s' % (installed_vers[0], arch) 66 | else: 67 | available_vers.remove(uname_noarch) 68 | yum.install('kernel-%s' % available_vers[0], assumeyes=True) 69 | return '%s.%s' % (available_vers[0], arch) 70 | 71 | def change_kernel(self, ver): 72 | vmlinuz = '/boot/vmlinuz-%s' % ver 73 | grubby(set_default=vmlinuz) 74 | 75 | def test_all(self): 76 | if self.step == 0: 77 | logging.info("NOTE this test runs in the background, please check /var/log/reboot-test") 78 | # check kernel, install, check ko, change kernel, reboot 79 | logging.info("Running Test Reboot #1") 80 | self.check_scheduler_ver(self.get_kernel_from_rpm()) 81 | rpm(self.get_rpm(), install=True) 82 | grep(lsmod(), 'scheduler', word_regexp=True) 83 | alter_ver = self.install_alternative_kernel() 84 | self.change_kernel(alter_ver) 85 | runonce('test_reboot/assert %d %s' % (self.step + 1, alter_ver)) 86 | elif self.step == 1: 87 | # check kernel, remove, install, change back kernel, reboot 88 | logging.info("Running Test Reboot #2") 89 | self.check_scheduler_ver(self.alter_ver) 90 | rpm('scheduler-xxx', erase=True) 91 | rpm(self.get_rpm(), install=True) 92 | self.change_kernel(self.get_kernel_from_rpm()) 93 | runonce('test_reboot/assert %d' % (self.step + 1)) 94 | elif self.step == 2: 95 | # check kernel, check ko, remove 96 | logging.info("Running Test Reboot #3") 97 | self.check_scheduler_ver(self.get_kernel_from_rpm()) 98 | grep(lsmod(), 'scheduler', word_regexp=True) 99 | rpm('scheduler-xxx', erase=True) 100 | return 101 | else: 102 | return 103 | 104 | reboot() 105 | 106 | def teardown_class(self): 107 | if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: 108 | rpm('scheduler-xxx', erase=True) 109 | 110 | if __name__ == '__main__': 111 | test_unit = TestReboot() 112 | if len(sys.argv) == 1: 113 | args = [0] 114 | elif len(sys.argv) >= 2: 115 | args = sys.argv[1:] 116 | 117 | test_unit.setup_class(*args) 118 | test_unit.test_all() 119 | test_unit.teardown_class() 120 | -------------------------------------------------------------------------------- /tests/test_reboot/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_reboot/patch.diff -------------------------------------------------------------------------------- /tests/test_reboot/runonce: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2023 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | if [[ $# -eq 0 ]]; then 6 | echo "Schedules a command to be run after the next reboot." 7 | echo "Usage: $(basename $0) " 8 | echo " $(basename $0) -p " 9 | echo " $(basename $0) -r " 10 | else 11 | REMOVE=0 12 | COMMAND=${!#} 13 | SCRIPTPATH=$PATH 14 | 15 | while getopts ":r:p:" optionName; do 16 | case "$optionName" in 17 | r) REMOVE=1; COMMAND=$OPTARG;; 18 | p) SCRIPTPATH=$OPTARG;; 19 | esac 20 | done 21 | 22 | SCRIPT="${HOME}/.$(basename $0)_$(echo $COMMAND | sed 's/[^a-zA-Z0-9_]/_/g')" 23 | 24 | if [[ ! -f $SCRIPT ]]; then 25 | echo "PATH=$SCRIPTPATH" >> $SCRIPT 26 | echo "cd $(pwd)" >> $SCRIPT 27 | echo "logger -t $(basename $0) -p local3.info \"COMMAND=$COMMAND ; USER=\$(whoami) ($(logname)) ; PWD=$(pwd) ; PATH=\$PATH\"" >> $SCRIPT 28 | echo "$0 -r \"$(echo $COMMAND | sed 's/\"/\\\"/g')\"" >> $SCRIPT 29 | echo "$COMMAND | logger -t $(basename $0) -p local3.info" >> $SCRIPT 30 | chmod +x $SCRIPT 31 | fi 32 | 33 | CRONTAB="${HOME}/.$(basename $0)_temp_crontab_$RANDOM" 34 | ENTRY="@reboot $SCRIPT" 35 | 36 | echo "$(crontab -l 2>/dev/null)" | grep -v "$ENTRY" | grep -v "^# DO NOT EDIT THIS FILE - edit the master and reinstall.$" | grep -v "^# ([^ ]* installed on [^)]*)$" | grep -v "^# (Cron version [^$]*\$[^$]*\$)$" > $CRONTAB 37 | 38 | if [[ $REMOVE -eq 0 ]]; then 39 | echo "$ENTRY" >> $CRONTAB 40 | fi 41 | 42 | crontab $CRONTAB 43 | rm $CRONTAB 44 | 45 | if [[ $REMOVE -ne 0 ]]; then 46 | rm $SCRIPT 47 | fi 48 | fi 49 | -------------------------------------------------------------------------------- /tests/test_sched_syscall/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | import subprocess 6 | import sys 7 | import sh 8 | import os 9 | from glob import glob 10 | import colorlog 11 | import logging 12 | 13 | handler = logging.StreamHandler() 14 | handler.setFormatter(colorlog.ColoredFormatter( 15 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 16 | datefmt='%Y-%m-%d %H:%M:%S')) 17 | logging.getLogger().setLevel(logging.INFO) 18 | logging.getLogger().addHandler(handler) 19 | 20 | class TestSchedSyscall: 21 | def setup_class(self): 22 | print("Sched syscall test") 23 | cmd = "while :; do :; done" 24 | self.child = subprocess.Popen(cmd, shell=True) 25 | self.rt_runtime_us = int(sh.sysctl('kernel.sched_rt_runtime_us').split()[-1]) 26 | 27 | def load_scheduler(self): 28 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 29 | if len(scheduler_rpm) != 1: 30 | print("Please check your scheduler rpm"); 31 | self.teardown_class() 32 | sys.exit(1) 33 | scheduler_rpm = scheduler_rpm[0] 34 | sh.rpm('-ivh', scheduler_rpm) 35 | 36 | def test_cpuset(self): 37 | fa_mems = sh.cat("/sys/fs/cgroup/cpuset/cpuset.mems").split()[0] 38 | fa_cpus = sh.cat("/sys/fs/cgroup/cpuset/cpuset.cpus").split()[0] 39 | sh.mkdir("/sys/fs/cgroup/cpuset/test") 40 | self.load_scheduler() 41 | sh.echo(fa_mems, _out="/sys/fs/cgroup/cpuset/test/cpuset.mems") 42 | sh.echo(fa_cpus, _out="/sys/fs/cgroup/cpuset/test/cpuset.cpus") 43 | ch_mems = sh.cat("/sys/fs/cgroup/cpuset/test/cpuset.mems").split()[0] 44 | ch_cpus = sh.cat("/sys/fs/cgroup/cpuset/test/cpuset.cpus").split()[0] 45 | if fa_mems != ch_mems or fa_cpus != ch_cpus: 46 | self.error_handler() 47 | self.remove_file() 48 | 49 | def test_policy_and_prio(self): 50 | sh.sysctl('-w', 'kernel.sched_rt_runtime_us=-1') 51 | sh.chrt('-p', '-f', 10, self.child.pid) 52 | res = sh.chrt('-p', self.child.pid).split('\n') 53 | if res[0].split()[-1] != 'SCHED_FIFO' or res[1].split()[-1] != '10': 54 | logging.info("class=%s prio=%s", res[0], res[1]) 55 | self.error_handler() 56 | 57 | def test_all(self): 58 | self.test_cpuset() 59 | self.test_policy_and_prio() 60 | 61 | def error_handler(self): 62 | self.child.kill() 63 | self.child.wait() 64 | self.remove_file() 65 | self.unload_scheduler() 66 | sh.sysctl('-w', 'kernel.sched_rt_runtime_us={}'.format(self.rt_runtime_us)) 67 | sys.exit(1) 68 | 69 | def remove_file(self): 70 | if os.path.exists("/sys/fs/cgroup/cpuset/test"): 71 | sh.rmdir("/sys/fs/cgroup/cpuset/test") 72 | 73 | def unload_scheduler(self): 74 | tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) 75 | if tmp.stdout.read() != b'': 76 | sh.rpm('-e', 'scheduler-xxx') 77 | 78 | def teardown_class(self): 79 | self.child.kill() 80 | self.child.wait() 81 | self.unload_scheduler() 82 | sh.sysctl('-w', 'kernel.sched_rt_runtime_us={}'.format(self.rt_runtime_us)) 83 | 84 | if __name__ == '__main__': 85 | test_unit = TestSchedSyscall() 86 | test_unit.setup_class() 87 | test_unit.test_all() 88 | test_unit.teardown_class() 89 | 90 | -------------------------------------------------------------------------------- /tests/test_sched_syscall/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_sched_syscall/patch.diff -------------------------------------------------------------------------------- /tests/test_schedule/assert: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | rpm -ivh /tmp/work/scheduler-xxx-*.rpm 6 | if ! dmesg | grep "I am the new scheduler: __schedule"; then 7 | 2>&1 echo "Failed to install the scheduler module" 8 | exit 1 9 | fi 10 | rpm -e scheduler-xxx 11 | -------------------------------------------------------------------------------- /tests/test_schedule/patch.diff: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | diff --git a/scheduler/kernel/sched/mod/core.c b/scheduler/kernel/sched/mod/core.c 5 | index 9f16b72..21262fd 100644 6 | --- a/scheduler/kernel/sched/mod/core.c 7 | +++ b/scheduler/kernel/sched/mod/core.c 8 | @@ -3248,6 +3248,10 @@ static void __sched notrace __schedule(bool preempt) 9 | struct rq_flags rf; 10 | struct rq *rq; 11 | int cpu; 12 | + char buf[128]; 13 | + 14 | + sprintf(buf, "%s", "I am the new scheduler: __schedule\n"); 15 | + printk_once(buf); 16 | 17 | cpu = smp_processor_id(); 18 | rq = cpu_rq(cpu); 19 | -------------------------------------------------------------------------------- /tests/test_var_uniformity/assert: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | from typing import Dict 6 | import sh 7 | import os 8 | import subprocess 9 | import sys 10 | from glob import glob 11 | 12 | class TestVarUniformity: 13 | def setup_class(self): 14 | print("Var uniformity test") 15 | self.global_name = [ 16 | "/proc/sys/kernel/sched_child_runs_first", 17 | "/proc/sys/kernel/sched_min_granularity_ns", 18 | "/proc/sys/kernel/sched_latency_ns", 19 | "/proc/sys/kernel/sched_wakeup_granularity_ns", 20 | "/proc/sys/kernel/sched_tunable_scaling", 21 | "/proc/sys/kernel/sched_migration_cost_ns", 22 | "/proc/sys/kernel/sched_nr_migrate", 23 | "/proc/sys/kernel/sched_schedstats", 24 | "/proc/sys/kernel/numa_balancing_scan_delay_ms", 25 | "/proc/sys/kernel/numa_balancing_scan_period_min_ms", 26 | "/proc/sys/kernel/numa_balancing_scan_period_max_ms", 27 | "/proc/sys/kernel/numa_balancing_scan_size_mb", 28 | "/proc/sys/kernel/numa_balancing", 29 | "/proc/sys/kernel/sched_rt_period_us", 30 | "/proc/sys/kernel/sched_rr_timeslice_ms", 31 | "/proc/sys/kernel/sched_autogroup_enabled", 32 | "/proc/sys/kernel/sched_cfs_bandwidth_slice_us", 33 | "/sys/kernel/debug/sched_debug", 34 | ] 35 | 36 | def before_change(self): 37 | self.orig_data = {} 38 | self.record_data(self.orig_data) 39 | self.load_scheduler() 40 | self.data_after_load = {} 41 | self.record_data(self.data_after_load) 42 | 43 | def record_data(self, dict: Dict): 44 | for item in self.global_name: 45 | if not os.path.exists(item): 46 | continue 47 | dict[item] = str(sh.cat(item)).strip() 48 | 49 | def load_scheduler(self): 50 | scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) 51 | if len(scheduler_rpm) != 1: 52 | print("Please check your scheduler rpm"); 53 | self.teardown_class() 54 | sys.exit(1) 55 | scheduler_rpm = scheduler_rpm[0] 56 | sh.rpm('-ivh', scheduler_rpm) 57 | 58 | def after_change_unload(self): 59 | self.modify_data() 60 | self.data_after_modified = {} 61 | self.record_data(self.data_after_modified) 62 | sh.rpm('-e', 'scheduler-xxx') 63 | self.data_after_unload = {} 64 | self.record_data(self.data_after_unload) 65 | 66 | def modify_data(self): 67 | def reverse(ch): 68 | if ch.isdigit(): 69 | return '1' if ch == '0' else str(int(ch) - 1) 70 | return 'N' if ch == 'Y' else 'Y' 71 | 72 | for k, v in self.orig_data.items(): 73 | sh.echo(reverse(v), _out=k) 74 | 75 | def teardown_class(self): 76 | tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) 77 | if tmp.stdout.read() != b'': 78 | sh.rpm('-e', 'scheduler-xxx') 79 | for k, v in self.orig_data.items(): 80 | sh.echo(v, _out=k) 81 | 82 | def test_data_uniformity(self): 83 | self.before_change() 84 | if not self.orig_data == self.data_after_load: 85 | self.error_handler() 86 | self.after_change_unload() 87 | if not self.data_after_modified == self.data_after_unload: 88 | self.error_handler() 89 | 90 | def error_handler(self): 91 | self.teardown_class() 92 | sys.exit(1) 93 | 94 | if __name__ == '__main__': 95 | unit_test = TestVarUniformity() 96 | unit_test.setup_class() 97 | unit_test.test_data_uniformity() 98 | unit_test.teardown_class() 99 | -------------------------------------------------------------------------------- /tests/test_var_uniformity/patch.diff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aliyun/plugsched/1dafa53781de36af3798c1cd0b016c0136799cd7/tests/test_var_uniformity/patch.diff -------------------------------------------------------------------------------- /tools/compile_and_create_rpm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019-2023 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | # This script will compile plugsched and create a rpm package 5 | # test on Anolis 7.9 AHCK 6 | # examples/rpm_test_example.diff is the example patch_file 7 | 8 | patch_file="" 9 | if [ $# == 1 ]; then 10 | if [ ${1} = "-h" -o ${1} = "--help" ]; then 11 | echo "usage: ${0} patch_file" 12 | exit 0 13 | fi 14 | patch_file=${1} 15 | if [ ! -f $patch_file ]; then 16 | echo "$patch_file is not a file" 17 | exit 1 18 | fi 19 | fi 20 | 21 | yum install anolis-repos -y 22 | yum install yum-utils podman -y 23 | yum install kernel-debuginfo-$(uname -r) kernel-devel-$(uname -r) --enablerepo=Plus-debuginfo --enablerepo=Plus -y 24 | 25 | mkdir -p /tmp/work 26 | if [ ! -z $patch_file ]; then 27 | /bin/cp -f $patch_file /tmp/work/test.diff 28 | patch_cmd="patch -p1 -f /tmp/work/create_rpm.sh <&2 echo "ERROR: __schedule function range not found in target object" 12 | exit 1 13 | fi 14 | 15 | end_addr=$(python3 -c "print(hex($start_addr + $size))") 16 | echo "start_addr=$start_addr; end_addr=$end_addr" 17 | } 18 | 19 | function get_function_asm() 20 | { 21 | if [ "$stage" == "init" ]; then 22 | objdump -d $object --start-address=$start_addr --stop-address=$end_addr | sed -e '/ <.*>:$/d;/^$/d' 23 | else 24 | objdump -d $object | grep "<__schedule>:" -A30 25 | fi 26 | } 27 | 28 | function get_stack_size_x86_64() 29 | { 30 | stack_size=$(awk '/sub.*,%rsp/ {print $NF; exit}' <<< "$schedule_asm") 31 | stack_size=${stack_size%,*} 32 | stack_size=${stack_size#*$} 33 | 34 | if [ -z "${stack_size// }" ]; then 35 | 1>&2 echo "ERROR: stack_size of __schedule not found in target object." 36 | exit 1 37 | fi 38 | echo $stack_size 39 | } 40 | 41 | function get_stack_size_aarch64() 42 | { 43 | stack_size=$(awk '/stp\s*x29, x30/{print $NF; quit}' <<< "$schedule_asm") 44 | stack_size=${stack_size%]*} 45 | stack_size=${stack_size#*-} 46 | 47 | if [ -z "${stack_size// }" ]; then 48 | 1>&2 echo "ERROR: stack_size of __schedule not found in target object." 49 | exit 1 50 | fi 51 | echo $stack_size 52 | } 53 | 54 | function get_springboard_target() 55 | { 56 | target_addr=$(awk '$NF == "<'$1'>"{print $1; exit}' <<< "$schedule_asm") 57 | target_addr=0x${target_addr%:*} 58 | target_off=$((target_addr-start_addr)) 59 | 60 | if [ -z "${target_off// }" ]; then 61 | 1>&2 echo "ERROR: springboard not found in target object." 62 | exit 1 63 | fi 64 | echo $target_off 65 | } 66 | 67 | function get_springboard_target_x86_64() 68 | { 69 | get_springboard_target __switch_to_asm 70 | } 71 | 72 | function get_springboard_target_aarch64() 73 | { 74 | get_springboard_target __switch_to 75 | } 76 | 77 | function get_stack_check_off_x86_64() { :; } 78 | 79 | function get_stack_check_off_aarch64() 80 | { 81 | stack_chk_fail=$(awk '$3 == "bl" && $NF=="<__stack_chk_fail>"{print "0x"$1}' <<< "$schedule_asm") 82 | stack_chk_fail=${stack_chk_fail%:*} 83 | stack_chk_fail_off=$(printf "0x%x" $((stack_chk_fail-start_addr))) 84 | stack_chk_fail_off_by_4=$(printf "0x%x" $((stack_chk_fail-start_addr-4))) 85 | 86 | asm_sequence=$(awk ' 87 | /Disassembly of section/ {start = 1; next} 88 | start == 1 && $3 == "ldr" {print "ldr"; next} 89 | start == 1 && $3 == "ldp" {print "ldp"; next} 90 | start == 1 && $3 == "ret" {print "ret"; next} 91 | start == 1 && $5 == "<__schedule+'$stack_chk_fail_off'>" {print "chk"; next} 92 | start == 1 && $6 == "<__schedule+'$stack_chk_fail_off'>" {print "chk"; next} 93 | start == 1 && $5 == "<__schedule+'$stack_chk_fail_off_by_4'>" {print "chk"; next} 94 | start == 1 && $6 == "<__schedule+'$stack_chk_fail_off_by_4'>" {print "chk"; next} 95 | start == 1 {print "any"}' <<< "$schedule_asm") 96 | 97 | 98 | stack_chk_seq_with_off=$(echo $asm_sequence | grep -Po 'ldr ldr (any ){1,4}chk (ldp ){5,6}ret' --byte-offset) 99 | stack_chk_off=$(cut -d: -f1 <<< "$stack_chk_seq_with_off") 100 | stack_chk_seq=$(cut -d: -f2 <<< "$stack_chk_seq_with_off") 101 | # Sequence length without ldp * {5,6} + ret 102 | stack_chk_len=$(echo "${stack_chk_seq}" | sed 's/chk.*/chk/g' | awk '{print NF}') 103 | 104 | if [ -z "${stack_chk_off}" ] || [ ${stack_chk_off} -eq 0 ]; then 105 | >&2 echo 'ERROR: Stack protector sequence "ldr ldr (any ){1,4}chk (ldp ){6}ret" not found:' 106 | >&2 echo "$asm_sequence" 107 | exit 1 108 | fi 109 | 110 | echo "stack_chk_off=$stack_chk_off; stack_chk_len=$stack_chk_len" 111 | } 112 | 113 | function get_stack_layout_x86_64() 114 | { 115 | echo $schedule_asm | awk '{for(i = 0; i <= NF; i++) if($i == "push") {print $(i+1);break;}}' | hexdump -ve '"%x"' 116 | } 117 | 118 | function get_stack_layout_aarch64() 119 | { 120 | echo $schedule_asm | awk '{for(i = 0; i <= NF; i++) if($i == "stp") {print $(i+1);break;}}' | hexdump -ve '"%x"' 121 | } 122 | 123 | function output() 124 | { 125 | echo "ccflags-y += -DSPRINGBOARD=$target_off" 126 | echo "ccflags-y += -DSTACKSIZE_VMLINUX=$stack_size" 127 | if [ $flag_stack_protector = "Y" ]; then 128 | echo "ccflags-y += -DSTACK_PROTECTOR=$stack_chk_off" 129 | echo "ccflags-y += -DSTACK_PROTECTOR_LEN=$stack_chk_len" 130 | fi 131 | echo "ccflags-y += -DVMLINUX_FRAME_POINTER=0x$(get_stack_layout_$arch)" 132 | } 133 | 134 | function read_config() 135 | { 136 | if grep -q CONFIG_STACKPROTECTOR=y $config; then 137 | flag_stack_protector=Y 138 | else 139 | flag_stack_protector=N 140 | fi 141 | } 142 | 143 | function do_search() 144 | { 145 | read_config 146 | eval $(get_function_range __schedule) 147 | schedule_asm="$(get_function_asm)" 148 | target_off=$(get_springboard_target_$arch) 149 | stack_size=$(get_stack_size_$arch) 150 | if [ $flag_stack_protector = "Y" ]; then 151 | eval $(get_stack_check_off_$arch) 152 | fi 153 | output 154 | } 155 | 156 | 157 | stage=$1 158 | object=$2 159 | config=$3 160 | 161 | arch=$(arch) 162 | 163 | if [ "$stage" == "init" ]; then 164 | do_search 165 | elif [ "$stage" == "build" ]; then 166 | schedule_asm="$(get_function_asm)" 167 | size=$(get_stack_size_$arch) 168 | stack_layout=0x$(get_stack_layout_$arch) 169 | echo "-DSTACKSIZE_MOD=$size -DMODULE_FRAME_POINTER=$stack_layout" 170 | else 171 | 1>&2 echo "Usage: springboard_search.sh ." 172 | exit 1 173 | fi 174 | -------------------------------------------------------------------------------- /tools/symbol_resolve/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2022 Alibaba Group Holding Limited. 2 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | SYMBOL_RESOLVE := $(OUTPUT)symbol_resolve 5 | LIBELF_FLAGS := $(shell pkg-config --static libelf --cflags 2>/dev/null) 6 | LIBELF_LIBS := $(shell pkg-config --static libelf --libs 2>/dev/null || echo -lelf) 7 | 8 | all: $(SYMBOL_RESOLVE) 9 | 10 | CPP = g++ 11 | CPPFLAGS = -g -static 12 | OBJS := symbol_resolve.o 13 | 14 | $(SYMBOL_RESOLVE): $(OBJS) 15 | $(CPP) $(CPPFLAGS) $(LDFLAGS) $(OBJS) -o $(SYMBOL_RESOLVE) $(LIBELF_LIBS) 16 | 17 | clean: 18 | rm -f symbol_resolve symbol_resolve.o 19 | 20 | .PHONY: clean 21 | -------------------------------------------------------------------------------- /tools/symbol_resolve/symbol_resolve.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | static void ERROR(std::string msg, bool elf_error, std::string extra="") 17 | { 18 | if (elf_error) 19 | std::cerr << msg << ": " << elf_errmsg(-1) << std::endl; 20 | else 21 | std::cerr << msg << ": " << extra << std::endl; 22 | std::abort(); 23 | } 24 | 25 | typedef std::map> kallsym_collection; 26 | typedef std::map sympos_collection; 27 | 28 | static void resolve_ref(const char *fname, kallsym_collection &kallsyms, sympos_collection &symposes) 29 | { 30 | int fd, sympos; 31 | Elf *elf; 32 | GElf_Sym sym; 33 | GElf_Shdr sh; 34 | Elf_Scn *scn = NULL; 35 | Elf_Data *data = NULL; 36 | size_t shstrndx, i; 37 | std::vector kallsym; 38 | char *name, modified = 0; 39 | 40 | if (elf_version(EV_CURRENT) == EV_NONE ) 41 | ERROR("ELF library initialization failed", true); 42 | 43 | fd = open(fname, O_RDWR); 44 | if (fd == -1) 45 | ERROR("open", true); 46 | 47 | elf = elf_begin(fd, ELF_C_RDWR, NULL); 48 | if (!elf) 49 | ERROR("elf_begin", true); 50 | 51 | elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT); 52 | 53 | /* Find .symtab */ 54 | if (elf_getshdrstrndx(elf, &shstrndx)) 55 | ERROR("elf_getshdrstrndx", true); 56 | 57 | for (scn = elf_nextscn(elf, scn); scn; scn = elf_nextscn(elf, scn)) { 58 | if (!scn) 59 | ERROR("scn NULL", true); 60 | if (!gelf_getshdr(scn, &sh)) 61 | ERROR("gelf_getshdr", true); 62 | if (!(name = elf_strptr(elf, shstrndx, sh.sh_name))) 63 | ERROR("elf_strptr", true); 64 | if (!(data = elf_getdata(scn, NULL))) 65 | ERROR("elf_getdata", true); 66 | if (!strcmp(name, ".symtab")) 67 | break; 68 | } 69 | 70 | /* Find UND symbols in kallsyms */ 71 | for (i=0; i < sh.sh_size / sh.sh_entsize; i++) { 72 | if (!gelf_getsym(data, i, &sym)) 73 | ERROR("gelf_getsym", true); 74 | if (!(name = elf_strptr(elf, sh.sh_link, sym.st_name))) 75 | ERROR("elf_strptr", true); 76 | if (sym.st_shndx != SHN_UNDEF) 77 | continue; 78 | /* 79 | * Filter out the "__orig_" prefix, which represents interface 80 | * or callback functions defined in vmlinux. 81 | */ 82 | if (strstr(name, "__orig_")) 83 | name += sizeof("__orig_") - 1; 84 | if (kallsyms.find(name) == kallsyms.end()) 85 | continue; 86 | kallsym = kallsyms[name]; 87 | 88 | /* 89 | * Symbols which don't appear in sched_outsider may be 90 | * 1. Global symbols => sympos should be 0 91 | * 2. Optimized, all prefixed with .isra, .constprop. => should fail 92 | */ 93 | if (symposes.find(name) != symposes.end()) 94 | sympos = symposes[name]; 95 | else 96 | sympos = 0; 97 | if (sympos == 0 && kallsym.size() > 1) 98 | ERROR("global symbol ambigouos is unresolvable.", false, name); 99 | if (sympos > 0 && kallsym.size() < sympos) 100 | ERROR("local symbol doens't have as many alternatives.", false, name); 101 | if (sympos > 0) 102 | sympos --; 103 | /* Resolve UND symbols */ 104 | sym.st_shndx = SHN_ABS; 105 | sym.st_value = kallsym[sympos]; 106 | modified = 1; 107 | if (gelf_update_sym(data, i, &sym) == -1) 108 | ERROR("gelf_update_sym", true); 109 | } 110 | 111 | /* Write back elf file */ 112 | if (modified) { 113 | if (!elf_flagdata(data, ELF_C_SET, ELF_F_DIRTY)) 114 | ERROR("elf_flagdata", true); 115 | if (elf_update(elf, ELF_C_WRITE) == -1) 116 | ERROR("elf_update", true); 117 | } 118 | 119 | elf_end(elf); 120 | close(fd); 121 | } 122 | 123 | static void load_kallsyms(const char *fname, kallsym_collection &kallsyms) 124 | { 125 | unsigned long long addr; 126 | char type; 127 | std::string name, line; 128 | std::ifstream f(fname); 129 | std::stringstream buffer; 130 | 131 | if (!f.is_open()) 132 | ERROR("fopen kallsyms", false); 133 | 134 | while (getline(f, line)) { 135 | std::istringstream line_stream(line); 136 | line_stream >> std::hex >> addr >> type >> name; 137 | if (name == "kern_path" && type != 'T') 138 | continue; 139 | if (name.find('.') != name.npos) 140 | continue; 141 | /* Reached modules */ 142 | if (!line_stream.eof()) break; 143 | kallsyms[name].push_back(addr); 144 | } 145 | 146 | f.close(); 147 | } 148 | 149 | int main(int argc, const char **argv) 150 | { 151 | kallsym_collection kallsyms; 152 | sympos_collection sched_outsider = { 153 | #include "undefined_functions.h" 154 | }; 155 | 156 | load_kallsyms(argv[2], kallsyms); 157 | resolve_ref(argv[1], kallsyms, sched_outsider); 158 | 159 | return 0; 160 | } 161 | -------------------------------------------------------------------------------- /tools/symbol_resolve/undefined_functions.h: -------------------------------------------------------------------------------- 1 | // Copyright 2019-2022 Alibaba Group Holding Limited. 2 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 3 | 4 | /* file contents will be generated automatically */ 5 | -------------------------------------------------------------------------------- /tools/yaml-diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019-2022 Alibaba Group Holding Limited. 3 | # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 4 | 5 | from yaml import load, dump 6 | from yaml import CLoader as Loader, CDumper as Dumper 7 | import colorlog 8 | import logging 9 | import sys 10 | 11 | handler = logging.StreamHandler() 12 | handler.setFormatter(colorlog.ColoredFormatter( 13 | '%(cyan)s%(asctime)s%(reset)s %(log_color)s%(levelname)s %(white)s%(message)s%(reset)s', 14 | datefmt='%Y-%m-%d %H:%M:%S')) 15 | logging.getLogger().setLevel(logging.INFO) 16 | logging.getLogger().addHandler(handler) 17 | 18 | def YamlDiff(old_file, new_file): 19 | 20 | """ find the difference of two boundary_extract.yaml 21 | 22 | :param old_file: the 1st yaml file 23 | :param new_file: the 2nd yaml file 24 | """ 25 | 26 | with open(old_file) as f: 27 | old_yaml = load(f, Loader) 28 | 29 | with open(new_file) as f: 30 | new_yaml = load(f, Loader) 31 | 32 | old_set = set(old_yaml['function']['outsider']) 33 | new_set = set(new_yaml['function']['outsider']) 34 | 35 | for changed in (old_set | new_set) - (old_set & new_set): 36 | logging.warn('DIFF: check the outsider \"%s\"', changed) 37 | 38 | logging.info("Bye: analyze the DIFF and remember to update boundary.yaml") 39 | 40 | if __name__ == '__main__': 41 | YamlDiff(sys.argv[1], sys.argv[2]) 42 | --------------------------------------------------------------------------------