├── .gitignore ├── 0001-add-umonitor-umwait-C0.x-C-states.patch ├── 0001-mm-memcontrol-add-some-branch-hints-based-on-gcov-an.patch ├── 0001-powerbump-functionality.patch ├── 0001-sched-cpuset-Fix-dl_cpu_busy-panic-due-to-empty-cs-c.patch ├── 0001-sched-migrate.patch ├── 0001-sched-numa-Initialise-numa_migrate_retry.patch ├── 0002-add-networking-support-for-powerbump.patch ├── 0002-exit-Fix-typo-in-comment-s-sub-theads-sub-threads.patch ├── 0002-sched-core-add-some-branch-hints-based-on-gcov-analy.patch ├── 0002-sched-migrate.patch ├── 0002-sched-numa-Do-not-swap-tasks-between-nodes-when-spar.patch ├── 0003-futex-bump.patch ├── 0003-sched-numa-Apply-imbalance-limitations-consistently.patch ├── 0003-sched-rt-Fix-Sparse-warnings-due-to-undefined-rt.c-d.patch ├── 0004-sched-core-Do-not-requeue-task-on-CPU-excluded-from-.patch ├── 0004-sched-numa-Adjust-imb_numa_nr-to-a-better-approximat.patch ├── 0005-sched-fair-Consider-CPU-affinity-when-allowing-NUMA-.patch ├── 0006-sched-fair-Optimize-and-simplify-rq-leaf_cfs_rq_list.patch ├── 0007-sched-deadline-Use-proc_douintvec_minmax-limit-minim.patch ├── 0008-sched-Allow-newidle-balancing-to-bail-out-of-load_ba.patch ├── 0009-sched-Fix-the-check-of-nr_running-at-queue-wakelist.patch ├── 0010-sched-Remove-the-limitation-of-WF_ON_CPU-on-wakelist.patch ├── 0011-selftests-rseq-riscv-use-rseq_get_abi-helper.patch ├── 0012-selftests-rseq-riscv-fix-literal-suffix-warning.patch ├── 0013-selftests-rseq-check-if-libc-rseq-support-is-registe.patch ├── 0014-sched-fair-Remove-redundant-word.patch ├── 0015-sched-Remove-unused-function-group_first_cpu.patch ├── 0016-sched-only-perform-capability-check-on-privileged-op.patch ├── 0017-sched-fair-Introduce-SIS_UTIL-to-search-idle-CPU-bas.patch ├── 0018-sched-fair-Provide-u64-read-for-32-bits-arch-helper.patch ├── 0019-sched-fair-Decay-task-PELT-values-during-wakeup-migr.patch ├── 0020-sched-drivers-Remove-max-param-from-effective_cpu_ut.patch ├── 0021-sched-fair-Rename-select_idle_mask-to-select_rq_mask.patch ├── 0022-sched-fair-Use-the-same-cpumask-per-PD-throughout-fi.patch ├── 0023-sched-fair-Remove-task_util-from-effective-utilizati.patch ├── 0024-sched-fair-Remove-the-energy-margin-in-feec.patch ├── 0025-sched-core-add-forced-idle-accounting-for-cgroups.patch ├── 0026-sched-core-Use-try_cmpxchg-in-set_nr_-and_not-if-_po.patch ├── 0027-sched-fair-fix-case-with-reduced-capacity-CPU.patch ├── 0028-sched-core-Always-flush-pending-blk_plug.patch ├── 0029-nohz-full-sched-rt-Fix-missed-tick-reenabling-bug-in.patch ├── 0030-sched-core-Fix-the-bug-that-task-won-t-enqueue-into-.patch ├── 0031-rseq-Deprecate-RSEQ_CS_FLAG_NO_RESTART_ON_-flags.patch ├── 0032-rseq-Kill-process-when-unknown-flags-are-encountered.patch ├── 0050-Revert-ext4-do-not-create-EA-inode-under-buffer-lock.patch ├── 0051-block-bfq-Fix-division-by-zero-error-on-zero-wsum.patch ├── 0101-i8042-decrease-debug-message-level-to-info.patch ├── 0102-increase-the-ext4-default-commit-age.patch ├── 0103-silence-rapl.patch ├── 0104-pci-pme-wakeups.patch ├── 0106-intel_idle-tweak-cpuidle-cstates.patch ├── 0107-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch ├── 0108-smpboot-reuse-timer-calibration.patch ├── 0109-initialize-ata-before-graphics.patch ├── 0110-give-rdrand-some-credit.patch ├── 0111-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch ├── 0112-init-wait-for-partition-and-retry-scan.patch ├── 0113-print-fsync-count-for-bootchart.patch ├── 0114-add-boot-option-to-allow-unsigned-modules.patch ├── 0115-enable-stateless-firmware-loading.patch ├── 0116-migrate-some-systemd-defaults-to-the-kernel-defaults.patch ├── 0117-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch ├── 0118-add-scheduler-turbo3-patch.patch ├── 0120-do-accept-in-LIFO-order-for-cache-efficiency.patch ├── 0121-locking-rwsem-spin-faster.patch ├── 0122-ata-libahci-ignore-staggered-spin-up.patch ├── 0123-print-CPU-that-faults.patch ├── 0124-x86-microcode-Add-an-option-to-reload-microcode-even.patch ├── 0125-nvme-workaround.patch ├── 0126-don-t-report-an-error-if-PowerClamp-run-on-other-CPU.patch ├── 0127-lib-raid6-add-patch.patch ├── 0128-itmt_epb-use-epb-to-scale-itmt.patch ├── 0129-mm-wakeups-remove-a-wakeup.patch ├── 0130-itmt2-ADL-fixes.patch ├── 0131-add-a-per-cpu-minimum-high-watermark-an-tune-batch-s.patch ├── 0132-prezero-20220308.patch ├── 0133-novector.patch ├── 0134-md-raid6-algorithms-scale-test-duration-for-speedier.patch ├── 0135-initcall-only-print-non-zero-initcall-debug-to-speed.patch ├── 0136-crypto-kdf-make-the-module-init-call-a-late-init-cal.patch ├── 0149-select-do_pollfd-add-unlikely-branch-hint-return-pat.patch ├── 0150-select-core_sys_select-add-unlikely-branch-hint-on-r.patch ├── 0158-clocksource-only-perform-extended-clocksource-checks.patch ├── 0161-ACPI-align-slab-buffers-for-improved-memory-performa.patch ├── 0162-extra-optmization-flags.patch ├── 0163-thermal-intel-powerclamp-check-MWAIT-first-use-pr_wa.patch ├── 0164-KVM-VMX-make-vmx-init-a-late-init-call-to-get-to-ini.patch ├── 0166-sched-fair-remove-upper-limit-on-cpu-number.patch ├── 0167-net-sock-increase-default-number-of-_SK_MEM_PACKETS-.patch ├── 0169-mm-mincore-improve-performance-by-adding-an-unlikely.patch ├── 0170-sched-Add-unlikey-branch-hints-to-several-system-cal.patch ├── 0171-kcmp-improve-performance-adding-an-unlikely-hint-to-.patch ├── 0173-cpuidle-psd-add-power-sleep-demotion-prevention-for-.patch ├── 0174-memcg-increase-MEMCG_CHARGE_BATCH-to-128.patch ├── 0175-readdir-add-unlikely-hint-on-len-check.patch ├── Makefile ├── Makefile.custom ├── adlrdt.patch ├── archive ├── 0114-tweak-perfbias.patch ├── 0123-zero-extra-registers.patch ├── 0131-overload-on-wakeup.patch ├── 0151-mm-Export-do_madvise.patch- ├── 0152-x86-kvm-Notify-host-to-release-pages.patch- ├── 0153-x86-Return-memory-from-guest-to-host-kernel.patch- ├── 0154-sysctl-vm-Fine-grained-cache-shrinking.patch- ├── 1011-virtualbox-add-module-sources.patch ├── 1012-virtualbox-setup-Kconfig-and-Makefiles.patch ├── 2001-opae-add-intel-fpga-drivers.patch- ├── 2002-opae-add-Kconfig-and-Makefile.patch- ├── 3001-Add-sysdig-0.20-driver.patch- └── 3002Add-sysdig-to-kernel-build-system.patch- ├── backport-ioboost.patch ├── better_idle_balance.patch ├── cmdline ├── config ├── cstatedemotion.patch ├── epp-retune.patch ├── filter-stable.py ├── iommu.patch ├── kdf-boottime.patch ├── kvm-printk.patch ├── libsgrowdown.patch ├── linux.spec ├── mm-lru_cache_disable-use-synchronize_rcu_expedited.patch ├── mmput_async.patch ├── netscale.patch ├── nonapi-realtek.patch ├── options.conf ├── posted_msi.patch ├── ratelimit-sched-yield.patch ├── rcuref-1.patch ├── rcuref-2.patch ├── rcuref-3.patch ├── release ├── revert-regression.patch ├── scale-net-alloc.patch ├── scale.patch ├── sched-hybrid1.patch ├── sched-hybrid2.patch ├── sched-hybrid3.patch ├── sched-hybrid4.patch ├── scripts ├── develop.sh ├── port-to-current.sh └── to-spec.sh ├── slack.patch ├── testresults ├── update.sh ├── upstream └── vmidle.patch /.gitignore: -------------------------------------------------------------------------------- 1 | .*~ 2 | *~ 3 | *.info 4 | *.mod 5 | *.swp 6 | .repo-index 7 | *.log 8 | build.log.round* 9 | *.tar.* 10 | *.tgz 11 | !*.tar.*.* 12 | *.zip 13 | *.jar 14 | *.pom 15 | *.xml 16 | commitmsg 17 | results/ 18 | rpms/ 19 | for-review.txt 20 | 21 | linux-5.* 22 | /releases.json 23 | -------------------------------------------------------------------------------- /0001-mm-memcontrol-add-some-branch-hints-based-on-gcov-an.patch: -------------------------------------------------------------------------------- 1 | From c50d383b767ea7337b58fc004dd9e2cffebb8524 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 1 Feb 2023 10:40:24 +0000 4 | Subject: [PATCH] mm/memcontrol: add some branch hints based on gcov analysis 5 | 6 | Signed-off-by: Colin Ian King 7 | --- 8 | mm/memcontrol.c | 6 +++--- 9 | 1 file changed, 3 insertions(+), 3 deletions(-) 10 | 11 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c 12 | index a1a35c12635e..762d8a819c4a 100644 13 | --- a/mm/memcontrol.c 14 | +++ b/mm/memcontrol.c 15 | @@ -611,7 +611,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) 16 | cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); 17 | 18 | x = __this_cpu_add_return(stats_updates, abs(val)); 19 | - if (x > MEMCG_CHARGE_BATCH * 128) { 20 | + if (unlikely(x > MEMCG_CHARGE_BATCH * 128)) { 21 | /* 22 | * If stats_flush_threshold exceeds the threshold 23 | * (>num_online_cpus()), cgroup stats update will be triggered 24 | @@ -817,7 +817,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, 25 | __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); 26 | 27 | /* Update memcg and lruvec */ 28 | - if (!mem_cgroup_disabled()) 29 | + if (likely(!mem_cgroup_disabled())) 30 | __mod_memcg_lruvec_state(lruvec, idx, val); 31 | } 32 | 33 | @@ -2136,7 +2136,7 @@ void lock_page_memcg(struct page *page) 34 | 35 | static void __folio_memcg_unlock(struct mem_cgroup *memcg) 36 | { 37 | - if (memcg && memcg->move_lock_task == current) { 38 | + if (likely(memcg && memcg->move_lock_task == current)) { 39 | unsigned long flags = memcg->move_lock_flags; 40 | 41 | memcg->move_lock_task = NULL; 42 | -- 43 | 2.39.1 44 | 45 | -------------------------------------------------------------------------------- /0001-sched-cpuset-Fix-dl_cpu_busy-panic-due-to-empty-cs-c.patch: -------------------------------------------------------------------------------- 1 | From b6e8d40d43ae4dec00c8fea2593eeea3114b8f44 Mon Sep 17 00:00:00 2001 2 | From: Waiman Long 3 | Date: Tue, 2 Aug 2022 21:54:51 -0400 4 | Subject: [PATCH 1/4] sched, cpuset: Fix dl_cpu_busy() panic due to empty 5 | cs->cpus_allowed 6 | 7 | With cgroup v2, the cpuset's cpus_allowed mask can be empty indicating 8 | that the cpuset will just use the effective CPUs of its parent. So 9 | cpuset_can_attach() can call task_can_attach() with an empty mask. 10 | This can lead to cpumask_any_and() returns nr_cpu_ids causing the call 11 | to dl_bw_of() to crash due to percpu value access of an out of bound 12 | CPU value. For example: 13 | 14 | [80468.182258] BUG: unable to handle page fault for address: ffffffff8b6648b0 15 | : 16 | [80468.191019] RIP: 0010:dl_cpu_busy+0x30/0x2b0 17 | : 18 | [80468.207946] Call Trace: 19 | [80468.208947] cpuset_can_attach+0xa0/0x140 20 | [80468.209953] cgroup_migrate_execute+0x8c/0x490 21 | [80468.210931] cgroup_update_dfl_csses+0x254/0x270 22 | [80468.211898] cgroup_subtree_control_write+0x322/0x400 23 | [80468.212854] kernfs_fop_write_iter+0x11c/0x1b0 24 | [80468.213777] new_sync_write+0x11f/0x1b0 25 | [80468.214689] vfs_write+0x1eb/0x280 26 | [80468.215592] ksys_write+0x5f/0xe0 27 | [80468.216463] do_syscall_64+0x5c/0x80 28 | [80468.224287] entry_SYSCALL_64_after_hwframe+0x44/0xae 29 | 30 | Fix that by using effective_cpus instead. For cgroup v1, effective_cpus 31 | is the same as cpus_allowed. For v2, effective_cpus is the real cpumask 32 | to be used by tasks within the cpuset anyway. 33 | 34 | Also update task_can_attach()'s 2nd argument name to cs_effective_cpus to 35 | reflect the change. In addition, a check is added to task_can_attach() 36 | to guard against the possibility that cpumask_any_and() may return a 37 | value >= nr_cpu_ids. 38 | 39 | Fixes: 7f51412a415d ("sched/deadline: Fix bandwidth check/update when migrating tasks between exclusive cpusets") 40 | Signed-off-by: Waiman Long 41 | Signed-off-by: Ingo Molnar 42 | Acked-by: Juri Lelli 43 | Link: https://lore.kernel.org/r/20220803015451.2219567-1-longman@redhat.com 44 | --- 45 | include/linux/sched.h | 2 +- 46 | kernel/cgroup/cpuset.c | 2 +- 47 | kernel/sched/core.c | 8 +++++--- 48 | 3 files changed, 7 insertions(+), 5 deletions(-) 49 | 50 | diff --git a/include/linux/sched.h b/include/linux/sched.h 51 | index 88b8817b827d..6a060160f0db 100644 52 | --- a/include/linux/sched.h 53 | +++ b/include/linux/sched.h 54 | @@ -1813,7 +1813,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) 55 | } 56 | 57 | extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); 58 | -extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); 59 | +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); 60 | #ifdef CONFIG_SMP 61 | extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); 62 | extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); 63 | diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c 64 | index 71a418858a5e..58aadfda9b8b 100644 65 | --- a/kernel/cgroup/cpuset.c 66 | +++ b/kernel/cgroup/cpuset.c 67 | @@ -2239,7 +2239,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) 68 | goto out_unlock; 69 | 70 | cgroup_taskset_for_each(task, css, tset) { 71 | - ret = task_can_attach(task, cs->cpus_allowed); 72 | + ret = task_can_attach(task, cs->effective_cpus); 73 | if (ret) 74 | goto out_unlock; 75 | ret = security_task_setscheduler(task); 76 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 77 | index 5555e49c4e12..addc3c2d2122 100644 78 | --- a/kernel/sched/core.c 79 | +++ b/kernel/sched/core.c 80 | @@ -8980,7 +8980,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, 81 | } 82 | 83 | int task_can_attach(struct task_struct *p, 84 | - const struct cpumask *cs_cpus_allowed) 85 | + const struct cpumask *cs_effective_cpus) 86 | { 87 | int ret = 0; 88 | 89 | @@ -8999,9 +8999,11 @@ int task_can_attach(struct task_struct *p, 90 | } 91 | 92 | if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, 93 | - cs_cpus_allowed)) { 94 | - int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed); 95 | + cs_effective_cpus)) { 96 | + int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus); 97 | 98 | + if (unlikely(cpu >= nr_cpu_ids)) 99 | + return -EINVAL; 100 | ret = dl_cpu_busy(cpu, p); 101 | } 102 | 103 | -- 104 | 2.37.1 105 | 106 | -------------------------------------------------------------------------------- /0001-sched-migrate.patch: -------------------------------------------------------------------------------- 1 | Subject: [PATCH v5 1/2] sched/fair: Record the average duration of a task 2 | Date: Fri, 3 Feb 2023 13:17:59 +0800 3 | Message-Id: <155aa36ba14b8a1f8e6c3ccda7999125edfff990.1675361144.git.yu.c.chen@intel.com> 4 | X-Mailer: git-send-email 2.25.1 5 | In-Reply-To: 6 | References: 7 | MIME-Version: 1.0 8 | Content-Transfer-Encoding: 8bit 9 | Precedence: bulk 10 | List-ID: 11 | X-Mailing-List: linux-kernel@vger.kernel.org 12 | 13 | Record the average duration of a task, as there is a requirement 14 | to leverage this information for better task placement. 15 | 16 | At first thought the (p->se.sum_exec_runtime / p->nvcsw) 17 | can be used to measure the task duration. However, the 18 | history long past was factored too heavily in such a formula. 19 | Ideally, the old activity should decay and not affect 20 | the current status too much. 21 | 22 | Although something based on PELT can be used, se.util_avg might 23 | not be appropriate to describe the task duration: 24 | Task p1 and task p2 are doing frequent ping-pong scheduling on 25 | one CPU, both p1 and p2 have a short duration, but the util_avg 26 | can be up to 50%, which is inconsistent with task duration. 27 | 28 | It was found that there was once a similar feature to track the 29 | duration of a task: 30 | commit ad4b78bbcbab ("sched: Add new wakeup preemption mode: WAKEUP_RUNNING") 31 | Unfortunately, it was reverted because it was an experiment. Pick the 32 | patch up again, by recording the average duration when a task voluntarily 33 | switches out. 34 | 35 | For example, suppose on CPU1, task p1 and p2 run alternatively: 36 | 37 | --------------------> time 38 | 39 | | p1 runs 1ms | p2 preempt p1 | p1 switch in, runs 0.5ms and blocks | 40 | ^ ^ ^ 41 | |_____________| |_____________________________________| 42 | ^ 43 | | 44 | p1 dequeued 45 | 46 | p1's duration in one section is (1 + 0.5)ms. Because if p2 does not 47 | preempt p1, p1 can run 1.5ms. This reflects the nature of a task: 48 | how long it wishes to run at most. 49 | 50 | Suggested-by: Tim Chen 51 | Suggested-by: Vincent Guittot 52 | Signed-off-by: Chen Yu 53 | --- 54 | include/linux/sched.h | 3 +++ 55 | kernel/sched/core.c | 2 ++ 56 | kernel/sched/debug.c | 1 + 57 | kernel/sched/fair.c | 13 +++++++++++++ 58 | 4 files changed, 19 insertions(+) 59 | 60 | diff --git a/include/linux/sched.h b/include/linux/sched.h 61 | index 4df2b3e76b30..e21709402a31 100644 62 | --- a/include/linux/sched.h 63 | +++ b/include/linux/sched.h 64 | @@ -557,6 +557,9 @@ struct sched_entity { 65 | u64 prev_sum_exec_runtime; 66 | 67 | u64 nr_migrations; 68 | + u64 prev_sleep_sum_runtime; 69 | + /* average duration of a task */ 70 | + u64 dur_avg; 71 | 72 | #ifdef CONFIG_FAIR_GROUP_SCHED 73 | int depth; 74 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 75 | index 03b8529db73f..b805c5bdc7ff 100644 76 | --- a/kernel/sched/core.c 77 | +++ b/kernel/sched/core.c 78 | @@ -4379,6 +4379,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) 79 | p->se.prev_sum_exec_runtime = 0; 80 | p->se.nr_migrations = 0; 81 | p->se.vruntime = 0; 82 | + p->se.dur_avg = 0; 83 | + p->se.prev_sleep_sum_runtime = 0; 84 | INIT_LIST_HEAD(&p->se.group_node); 85 | 86 | #ifdef CONFIG_FAIR_GROUP_SCHED 87 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c 88 | index 1637b65ba07a..8d64fba16cfe 100644 89 | --- a/kernel/sched/debug.c 90 | +++ b/kernel/sched/debug.c 91 | @@ -1024,6 +1024,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, 92 | __PS("nr_involuntary_switches", p->nivcsw); 93 | 94 | P(se.load.weight); 95 | + P(se.dur_avg); 96 | #ifdef CONFIG_SMP 97 | P(se.avg.load_sum); 98 | P(se.avg.runnable_sum); 99 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 100 | index d4db72f8f84e..aa16611c7263 100644 101 | --- a/kernel/sched/fair.c 102 | +++ b/kernel/sched/fair.c 103 | @@ -6271,6 +6271,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) 104 | 105 | static void set_next_buddy(struct sched_entity *se); 106 | 107 | +static inline void dur_avg_update(struct task_struct *p, bool task_sleep) 108 | +{ 109 | + u64 dur; 110 | + 111 | + if (!task_sleep) 112 | + return; 113 | + 114 | + dur = p->se.sum_exec_runtime - p->se.prev_sleep_sum_runtime; 115 | + p->se.prev_sleep_sum_runtime = p->se.sum_exec_runtime; 116 | + update_avg(&p->se.dur_avg, dur); 117 | +} 118 | + 119 | /* 120 | * The dequeue_task method is called before nr_running is 121 | * decreased. We remove the task from the rbtree and 122 | @@ -6343,6 +6355,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) 123 | 124 | dequeue_throttle: 125 | util_est_update(&rq->cfs, p, task_sleep); 126 | + dur_avg_update(p, task_sleep); 127 | hrtick_update(rq); 128 | } 129 | 130 | -- 131 | 2.25.1 132 | 133 | 134 | -------------------------------------------------------------------------------- /0001-sched-numa-Initialise-numa_migrate_retry.patch: -------------------------------------------------------------------------------- 1 | From 70ce3ea9aa4ed901c8a90de667df5ef307766e71 Mon Sep 17 00:00:00 2001 2 | From: Mel Gorman 3 | Date: Fri, 20 May 2022 11:35:16 +0100 4 | Subject: [PATCH 01/32] sched/numa: Initialise numa_migrate_retry 5 | 6 | On clone, numa_migrate_retry is inherited from the parent which means 7 | that the first NUMA placement of a task is non-deterministic. This 8 | affects when load balancing recognises numa tasks and whether to 9 | migrate "regular", "remote" or "all" tasks between NUMA scheduler 10 | domains. 11 | 12 | Signed-off-by: Mel Gorman 13 | Signed-off-by: Peter Zijlstra (Intel) 14 | Tested-by: K Prateek Nayak 15 | Link: https://lore.kernel.org/r/20220520103519.1863-2-mgorman@techsingularity.net 16 | --- 17 | kernel/sched/fair.c | 1 + 18 | 1 file changed, 1 insertion(+) 19 | 20 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 21 | index 77b2048a9326..51836efe5931 100644 22 | --- a/kernel/sched/fair.c 23 | +++ b/kernel/sched/fair.c 24 | @@ -2885,6 +2885,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) 25 | p->node_stamp = 0; 26 | p->numa_scan_seq = mm ? mm->numa_scan_seq : 0; 27 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; 28 | + p->numa_migrate_retry = 0; 29 | /* Protect against double add, see task_tick_numa and task_numa_work */ 30 | p->numa_work.next = &p->numa_work; 31 | p->numa_faults = NULL; 32 | -- 33 | 2.37.1 34 | 35 | -------------------------------------------------------------------------------- /0002-add-networking-support-for-powerbump.patch: -------------------------------------------------------------------------------- 1 | From 3265f948dab9253e087030794b3f02c86c07dc92 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Thu, 5 Jan 2023 16:52:33 +0000 4 | Subject: [PATCH 2/2] add networking support for powerbump 5 | 6 | --- 7 | include/linux/powerbump.h | 3 ++- 8 | net/core/dev.c | 3 +++ 9 | 2 files changed, 5 insertions(+), 1 deletion(-) 10 | 11 | diff --git a/include/linux/powerbump.h b/include/linux/powerbump.h 12 | index 8fc81d958484..79dd40620ba0 100644 13 | --- a/include/linux/powerbump.h 14 | +++ b/include/linux/powerbump.h 15 | @@ -5,7 +5,8 @@ 16 | 17 | 18 | /* bump time constants, in msec */ 19 | -#define BUMP_FOR_DISK 3 20 | +#define BUMP_FOR_DISK 3 21 | +#define BUMP_FOR_NETWORK 3 22 | 23 | 24 | 25 | diff --git a/net/core/dev.c b/net/core/dev.c 26 | index 70e06853ba25..054fe9024982 100644 27 | --- a/net/core/dev.c 28 | +++ b/net/core/dev.c 29 | @@ -150,6 +150,7 @@ 30 | #include 31 | #include 32 | #include 33 | +#include 34 | 35 | #include "dev.h" 36 | #include "net-sysfs.h" 37 | @@ -5744,6 +5745,7 @@ int netif_receive_skb(struct sk_buff *skb) 38 | int ret; 39 | 40 | trace_netif_receive_skb_entry(skb); 41 | + give_power_bump(BUMP_FOR_NETWORK); 42 | 43 | ret = netif_receive_skb_internal(skb); 44 | trace_netif_receive_skb_exit(ret); 45 | @@ -5768,6 +5770,7 @@ void netif_receive_skb_list(struct list_head *head) 46 | 47 | if (list_empty(head)) 48 | return; 49 | + give_power_bump(BUMP_FOR_NETWORK); 50 | if (trace_netif_receive_skb_list_entry_enabled()) { 51 | list_for_each_entry(skb, head, list) 52 | trace_netif_receive_skb_list_entry(skb); 53 | -- 54 | 2.39.0 55 | 56 | -------------------------------------------------------------------------------- /0002-exit-Fix-typo-in-comment-s-sub-theads-sub-threads.patch: -------------------------------------------------------------------------------- 1 | From dcca34754a3f5290406403b8066e3b15dda9f4bf Mon Sep 17 00:00:00 2001 2 | From: Ingo Molnar 3 | Date: Wed, 3 Aug 2022 10:43:42 +0200 4 | Subject: [PATCH 2/4] exit: Fix typo in comment: s/sub-theads/sub-threads 5 | 6 | Cc: linux-kernel@vger.kernel.org 7 | Signed-off-by: Ingo Molnar 8 | --- 9 | kernel/exit.c | 2 +- 10 | 1 file changed, 1 insertion(+), 1 deletion(-) 11 | 12 | diff --git a/kernel/exit.c b/kernel/exit.c 13 | index 64c938ce36fe..84021b24f79e 100644 14 | --- a/kernel/exit.c 15 | +++ b/kernel/exit.c 16 | @@ -1051,7 +1051,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) 17 | * p->signal fields because the whole thread group is dead 18 | * and nobody can change them. 19 | * 20 | - * psig->stats_lock also protects us from our sub-theads 21 | + * psig->stats_lock also protects us from our sub-threads 22 | * which can reap other children at the same time. Until 23 | * we change k_getrusage()-like users to rely on this lock 24 | * we have to take ->siglock as well. 25 | -- 26 | 2.37.1 27 | 28 | -------------------------------------------------------------------------------- /0002-sched-core-add-some-branch-hints-based-on-gcov-analy.patch: -------------------------------------------------------------------------------- 1 | From eae943f2b22979ae1b378d72f9b94085577f5800 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Mon, 17 Mar 2025 12:03:19 +0000 4 | Subject: [PATCH] sched/core: add some branch hints based on gcov analysis 5 | 6 | Patch refreshed for v6.13.7 7 | 8 | Signed-off-by: Colin Ian King 9 | --- 10 | kernel/sched/core.c | 8 ++++---- 11 | 1 file changed, 4 insertions(+), 4 deletions(-) 12 | 13 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 14 | index 86cb6db08168..d2ccde102870 100644 15 | --- a/kernel/sched/core.c 16 | +++ b/kernel/sched/core.c 17 | @@ -594,7 +594,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) 18 | 19 | /* Matches synchronize_rcu() in __sched_core_enable() */ 20 | preempt_disable(); 21 | - if (sched_core_disabled()) { 22 | + if (likely(sched_core_disabled())) { 23 | raw_spin_lock_nested(&rq->__lock, subclass); 24 | /* preempt_count *MUST* be > 1 */ 25 | preempt_enable_no_resched(); 26 | @@ -804,7 +804,7 @@ void update_rq_clock(struct rq *rq) 27 | #endif 28 | 29 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; 30 | - if (delta < 0) 31 | + if (unlikely(delta < 0)) 32 | return; 33 | rq->clock += delta; 34 | update_rq_clock_task(rq, delta); 35 | @@ -6106,7 +6106,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) 36 | struct rq *rq_i; 37 | bool need_sync; 38 | 39 | - if (!sched_core_enabled(rq)) 40 | + if (likely(!sched_core_enabled(rq))) 41 | return __pick_next_task(rq, prev, rf); 42 | 43 | cpu = cpu_of(rq); 44 | @@ -7278,7 +7278,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) 45 | #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) 46 | int __sched __cond_resched(void) 47 | { 48 | - if (should_resched(0) && !irqs_disabled()) { 49 | + if (unlikely(should_resched(0) && !irqs_disabled())) { 50 | preempt_schedule_common(); 51 | return 1; 52 | } 53 | -- 54 | 2.48.1 55 | 56 | -------------------------------------------------------------------------------- /0002-sched-numa-Do-not-swap-tasks-between-nodes-when-spar.patch: -------------------------------------------------------------------------------- 1 | From 13ede33150877d44756171e33570076882b17b0b Mon Sep 17 00:00:00 2001 2 | From: Mel Gorman 3 | Date: Fri, 20 May 2022 11:35:17 +0100 4 | Subject: [PATCH 02/32] sched/numa: Do not swap tasks between nodes when spare 5 | capacity is available 6 | 7 | If a destination node has spare capacity but there is an imbalance then 8 | two tasks are selected for swapping. If the tasks have no numa group 9 | or are within the same NUMA group, it's simply shuffling tasks around 10 | without having any impact on the compute imbalance. Instead, it's just 11 | punishing one task to help another. 12 | 13 | Signed-off-by: Mel Gorman 14 | Signed-off-by: Peter Zijlstra (Intel) 15 | Tested-by: K Prateek Nayak 16 | Link: https://lore.kernel.org/r/20220520103519.1863-3-mgorman@techsingularity.net 17 | --- 18 | kernel/sched/fair.c | 9 +++++++++ 19 | 1 file changed, 9 insertions(+) 20 | 21 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 22 | index 51836efe5931..23da36c9cacb 100644 23 | --- a/kernel/sched/fair.c 24 | +++ b/kernel/sched/fair.c 25 | @@ -1790,6 +1790,15 @@ static bool task_numa_compare(struct task_numa_env *env, 26 | */ 27 | cur_ng = rcu_dereference(cur->numa_group); 28 | if (cur_ng == p_ng) { 29 | + /* 30 | + * Do not swap within a group or between tasks that have 31 | + * no group if there is spare capacity. Swapping does 32 | + * not address the load imbalance and helps one task at 33 | + * the cost of punishing another. 34 | + */ 35 | + if (env->dst_stats.node_type == node_has_spare) 36 | + goto unlock; 37 | + 38 | imp = taskimp + task_weight(cur, env->src_nid, dist) - 39 | task_weight(cur, env->dst_nid, dist); 40 | /* 41 | -- 42 | 2.37.1 43 | 44 | -------------------------------------------------------------------------------- /0003-futex-bump.patch: -------------------------------------------------------------------------------- 1 | From c47e3b2e38ac2ac4c401f02048a2745f75e27f88 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Thu, 12 Jan 2023 19:19:04 +0000 4 | Subject: [PATCH 3/3] futex bump 5 | 6 | --- 7 | include/linux/powerbump.h | 1 + 8 | kernel/futex/waitwake.c | 2 ++ 9 | 2 files changed, 3 insertions(+) 10 | 11 | diff --git a/include/linux/powerbump.h b/include/linux/powerbump.h 12 | index 79dd40620ba0..1de5bb88725a 100644 13 | --- a/include/linux/powerbump.h 14 | +++ b/include/linux/powerbump.h 15 | @@ -7,6 +7,7 @@ 16 | /* bump time constants, in msec */ 17 | #define BUMP_FOR_DISK 3 18 | #define BUMP_FOR_NETWORK 3 19 | +#define BUMP_FOR_FUTEX 3 20 | 21 | 22 | 23 | diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c 24 | index ba01b9408203..e4fc09a98cbc 100644 25 | --- a/kernel/futex/waitwake.c 26 | +++ b/kernel/futex/waitwake.c 27 | @@ -3,6 +3,7 @@ 28 | #include 29 | #include 30 | #include 31 | +#include 32 | 33 | #include "futex.h" 34 | 35 | @@ -336,6 +337,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, 36 | */ 37 | set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 38 | futex_queue(q, hb); 39 | + give_power_bump(BUMP_FOR_FUTEX); 40 | 41 | /* Arm the timer */ 42 | if (timeout) 43 | -- 44 | 2.39.0 45 | 46 | -------------------------------------------------------------------------------- /0003-sched-rt-Fix-Sparse-warnings-due-to-undefined-rt.c-d.patch: -------------------------------------------------------------------------------- 1 | From 87514b2c24f294c32e9e743b095541dcf43928f7 Mon Sep 17 00:00:00 2001 2 | From: Ben Dooks 3 | Date: Thu, 21 Jul 2022 15:51:55 +0100 4 | Subject: [PATCH 3/4] sched/rt: Fix Sparse warnings due to undefined rt.c 5 | declarations 6 | 7 | There are several symbols defined in kernel/sched/sched.h but get wrapped 8 | in CONFIG_CGROUP_SCHED, even though dummy versions get built in rt.c and 9 | therefore trigger Sparse warnings: 10 | 11 | kernel/sched/rt.c:309:6: warning: symbol 'unregister_rt_sched_group' was not declared. Should it be static? 12 | kernel/sched/rt.c:311:6: warning: symbol 'free_rt_sched_group' was not declared. Should it be static? 13 | kernel/sched/rt.c:313:5: warning: symbol 'alloc_rt_sched_group' was not declared. Should it be static? 14 | 15 | Fix this by moving them outside the CONFIG_CGROUP_SCHED block. 16 | 17 | [ mingo: Refreshed to the latest scheduler tree, tweaked changelog. ] 18 | 19 | Signed-off-by: Ben Dooks 20 | Signed-off-by: Ingo Molnar 21 | Link: https://lore.kernel.org/r/20220721145155.358366-1-ben-linux@fluff.org 22 | --- 23 | kernel/sched/sched.h | 7 ++++--- 24 | 1 file changed, 4 insertions(+), 3 deletions(-) 25 | 26 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h 27 | index aad7f5ee9666..1429315610d9 100644 28 | --- a/kernel/sched/sched.h 29 | +++ b/kernel/sched/sched.h 30 | @@ -480,9 +480,6 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); 31 | extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); 32 | extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); 33 | 34 | -extern void unregister_rt_sched_group(struct task_group *tg); 35 | -extern void free_rt_sched_group(struct task_group *tg); 36 | -extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); 37 | extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 38 | struct sched_rt_entity *rt_se, int cpu, 39 | struct sched_rt_entity *parent); 40 | @@ -520,6 +517,10 @@ struct cfs_bandwidth { }; 41 | 42 | #endif /* CONFIG_CGROUP_SCHED */ 43 | 44 | +extern void unregister_rt_sched_group(struct task_group *tg); 45 | +extern void free_rt_sched_group(struct task_group *tg); 46 | +extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); 47 | + 48 | /* 49 | * u64_u32_load/u64_u32_store 50 | * 51 | -- 52 | 2.37.1 53 | 54 | -------------------------------------------------------------------------------- /0004-sched-core-Do-not-requeue-task-on-CPU-excluded-from-.patch: -------------------------------------------------------------------------------- 1 | From 751d4cbc43879229dbc124afefe240b70fd29a85 Mon Sep 17 00:00:00 2001 2 | From: Mel Gorman 3 | Date: Thu, 4 Aug 2022 10:21:19 +0100 4 | Subject: [PATCH 4/4] sched/core: Do not requeue task on CPU excluded from 5 | cpus_mask 6 | 7 | The following warning was triggered on a large machine early in boot on 8 | a distribution kernel but the same problem should also affect mainline. 9 | 10 | WARNING: CPU: 439 PID: 10 at ../kernel/workqueue.c:2231 process_one_work+0x4d/0x440 11 | Call Trace: 12 | 13 | rescuer_thread+0x1f6/0x360 14 | kthread+0x156/0x180 15 | ret_from_fork+0x22/0x30 16 | 17 | 18 | Commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") 19 | optimises ttwu by queueing a task that is descheduling on the wakelist, 20 | but does not check if the task descheduling is still allowed to run on that CPU. 21 | 22 | In this warning, the problematic task is a workqueue rescue thread which 23 | checks if the rescue is for a per-cpu workqueue and running on the wrong CPU. 24 | While this is early in boot and it should be possible to create workers, 25 | the rescue thread may still used if the MAYDAY_INITIAL_TIMEOUT is reached 26 | or MAYDAY_INTERVAL and on a sufficiently large machine, the rescue 27 | thread is being used frequently. 28 | 29 | Tracing confirmed that the task should have migrated properly using the 30 | stopper thread to handle the migration. However, a parallel wakeup from udev 31 | running on another CPU that does not share CPU cache observes p->on_cpu and 32 | uses task_cpu(p), queues the task on the old CPU and triggers the warning. 33 | 34 | Check that the wakee task that is descheduling is still allowed to run 35 | on its current CPU and if not, wait for the descheduling to complete 36 | and select an allowed CPU. 37 | 38 | Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") 39 | Signed-off-by: Mel Gorman 40 | Signed-off-by: Ingo Molnar 41 | Link: https://lore.kernel.org/r/20220804092119.20137-1-mgorman@techsingularity.net 42 | --- 43 | kernel/sched/core.c | 8 ++++++-- 44 | 1 file changed, 6 insertions(+), 2 deletions(-) 45 | 46 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 47 | index addc3c2d2122..02afa1cc3c8c 100644 48 | --- a/kernel/sched/core.c 49 | +++ b/kernel/sched/core.c 50 | @@ -3802,7 +3802,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu) 51 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); 52 | } 53 | 54 | -static inline bool ttwu_queue_cond(int cpu) 55 | +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) 56 | { 57 | /* 58 | * Do not complicate things with the async wake_list while the CPU is 59 | @@ -3811,6 +3811,10 @@ static inline bool ttwu_queue_cond(int cpu) 60 | if (!cpu_active(cpu)) 61 | return false; 62 | 63 | + /* Ensure the task will still be allowed to run on the CPU. */ 64 | + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) 65 | + return false; 66 | + 67 | /* 68 | * If the CPU does not share cache, then queue the task on the 69 | * remote rqs wakelist to avoid accessing remote data. 70 | @@ -3840,7 +3844,7 @@ static inline bool ttwu_queue_cond(int cpu) 71 | 72 | static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) 73 | { 74 | - if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) { 75 | + if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) { 76 | sched_clock_cpu(cpu); /* Sync clocks across CPUs */ 77 | __ttwu_queue_wakelist(p, cpu, wake_flags); 78 | return true; 79 | -- 80 | 2.37.1 81 | 82 | -------------------------------------------------------------------------------- /0004-sched-numa-Adjust-imb_numa_nr-to-a-better-approximat.patch: -------------------------------------------------------------------------------- 1 | From 026b98a93bbdbefb37ab8008df84e38e2fedaf92 Mon Sep 17 00:00:00 2001 2 | From: Mel Gorman 3 | Date: Fri, 20 May 2022 11:35:19 +0100 4 | Subject: [PATCH 04/32] sched/numa: Adjust imb_numa_nr to a better 5 | approximation of memory channels 6 | 7 | For a single LLC per node, a NUMA imbalance is allowed up until 25% 8 | of CPUs sharing a node could be active. One intent of the cut-off is 9 | to avoid an imbalance of memory channels but there is no topological 10 | information based on active memory channels. Furthermore, there can 11 | be differences between nodes depending on the number of populated 12 | DIMMs. 13 | 14 | A cut-off of 25% was arbitrary but generally worked. It does have a severe 15 | corner cases though when an parallel workload is using 25% of all available 16 | CPUs over-saturates memory channels. This can happen due to the initial 17 | forking of tasks that get pulled more to one node after early wakeups 18 | (e.g. a barrier synchronisation) that is not quickly corrected by the 19 | load balancer. The LB may fail to act quickly as the parallel tasks are 20 | considered to be poor migrate candidates due to locality or cache hotness. 21 | 22 | On a range of modern Intel CPUs, 12.5% appears to be a better cut-off 23 | assuming all memory channels are populated and is used as the new cut-off 24 | point. A minimum of 1 is specified to allow a communicating pair to 25 | remain local even for CPUs with low numbers of cores. For modern AMDs, 26 | there are multiple LLCs and are not affected. 27 | 28 | Signed-off-by: Mel Gorman 29 | Signed-off-by: Peter Zijlstra (Intel) 30 | Tested-by: K Prateek Nayak 31 | Link: https://lore.kernel.org/r/20220520103519.1863-5-mgorman@techsingularity.net 32 | --- 33 | kernel/sched/topology.c | 23 +++++++++++++++-------- 34 | 1 file changed, 15 insertions(+), 8 deletions(-) 35 | 36 | diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c 37 | index 05b6c2ad90b9..8739c2a5a54e 100644 38 | --- a/kernel/sched/topology.c 39 | +++ b/kernel/sched/topology.c 40 | @@ -2316,23 +2316,30 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att 41 | 42 | /* 43 | * For a single LLC per node, allow an 44 | - * imbalance up to 25% of the node. This is an 45 | - * arbitrary cutoff based on SMT-2 to balance 46 | - * between memory bandwidth and avoiding 47 | - * premature sharing of HT resources and SMT-4 48 | - * or SMT-8 *may* benefit from a different 49 | - * cutoff. 50 | + * imbalance up to 12.5% of the node. This is 51 | + * arbitrary cutoff based two factors -- SMT and 52 | + * memory channels. For SMT-2, the intent is to 53 | + * avoid premature sharing of HT resources but 54 | + * SMT-4 or SMT-8 *may* benefit from a different 55 | + * cutoff. For memory channels, this is a very 56 | + * rough estimate of how many channels may be 57 | + * active and is based on recent CPUs with 58 | + * many cores. 59 | * 60 | * For multiple LLCs, allow an imbalance 61 | * until multiple tasks would share an LLC 62 | * on one node while LLCs on another node 63 | - * remain idle. 64 | + * remain idle. This assumes that there are 65 | + * enough logical CPUs per LLC to avoid SMT 66 | + * factors and that there is a correlation 67 | + * between LLCs and memory channels. 68 | */ 69 | nr_llcs = sd->span_weight / child->span_weight; 70 | if (nr_llcs == 1) 71 | - imb = sd->span_weight >> 2; 72 | + imb = sd->span_weight >> 3; 73 | else 74 | imb = nr_llcs; 75 | + imb = max(1U, imb); 76 | sd->imb_numa_nr = imb; 77 | 78 | /* Set span based on the first NUMA domain. */ 79 | -- 80 | 2.37.1 81 | 82 | -------------------------------------------------------------------------------- /0007-sched-deadline-Use-proc_douintvec_minmax-limit-minim.patch: -------------------------------------------------------------------------------- 1 | From 2ed81e765417ec2526f901366167a13294ef09ce Mon Sep 17 00:00:00 2001 2 | From: Yajun Deng 3 | Date: Tue, 7 Jun 2022 18:18:07 +0800 4 | Subject: [PATCH 07/32] sched/deadline: Use proc_douintvec_minmax() limit 5 | minimum value 6 | 7 | sysctl_sched_dl_period_max and sysctl_sched_dl_period_min are unsigned 8 | integer, but proc_dointvec() wouldn't return error even if we set a 9 | negative number. 10 | 11 | Use proc_douintvec_minmax() instead of proc_dointvec(). Add extra1 for 12 | sysctl_sched_dl_period_max and extra2 for sysctl_sched_dl_period_min. 13 | 14 | It's just an optimization for match data and proc_handler in struct 15 | ctl_table. The 'if (period < min || period > max)' in __checkparam_dl() 16 | will work fine even if there hasn't this patch. 17 | 18 | Signed-off-by: Yajun Deng 19 | Signed-off-by: Peter Zijlstra (Intel) 20 | Reviewed-by: Daniel Bristot de Oliveira 21 | Link: https://lore.kernel.org/r/20220607101807.249965-1-yajun.deng@linux.dev 22 | --- 23 | kernel/sched/deadline.c | 6 ++++-- 24 | 1 file changed, 4 insertions(+), 2 deletions(-) 25 | 26 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c 27 | index b5152961b743..5867e186c39a 100644 28 | --- a/kernel/sched/deadline.c 29 | +++ b/kernel/sched/deadline.c 30 | @@ -30,14 +30,16 @@ static struct ctl_table sched_dl_sysctls[] = { 31 | .data = &sysctl_sched_dl_period_max, 32 | .maxlen = sizeof(unsigned int), 33 | .mode = 0644, 34 | - .proc_handler = proc_dointvec, 35 | + .proc_handler = proc_douintvec_minmax, 36 | + .extra1 = (void *)&sysctl_sched_dl_period_min, 37 | }, 38 | { 39 | .procname = "sched_deadline_period_min_us", 40 | .data = &sysctl_sched_dl_period_min, 41 | .maxlen = sizeof(unsigned int), 42 | .mode = 0644, 43 | - .proc_handler = proc_dointvec, 44 | + .proc_handler = proc_douintvec_minmax, 45 | + .extra2 = (void *)&sysctl_sched_dl_period_max, 46 | }, 47 | {} 48 | }; 49 | -- 50 | 2.37.1 51 | 52 | -------------------------------------------------------------------------------- /0008-sched-Allow-newidle-balancing-to-bail-out-of-load_ba.patch: -------------------------------------------------------------------------------- 1 | From 792b9f65a568f48c50b3175536db9cde5a1edcc0 Mon Sep 17 00:00:00 2001 2 | From: Josh Don 3 | Date: Wed, 8 Jun 2022 19:55:15 -0700 4 | Subject: [PATCH 08/32] sched: Allow newidle balancing to bail out of 5 | load_balance 6 | 7 | While doing newidle load balancing, it is possible for new tasks to 8 | arrive, such as with pending wakeups. newidle_balance() already accounts 9 | for this by exiting the sched_domain load_balance() iteration if it 10 | detects these cases. This is very important for minimizing wakeup 11 | latency. 12 | 13 | However, if we are already in load_balance(), we may stay there for a 14 | while before returning back to newidle_balance(). This is most 15 | exacerbated if we enter a 'goto redo' loop in the LBF_ALL_PINNED case. A 16 | very straightforward workaround to this is to adjust should_we_balance() 17 | to bail out if we're doing a CPU_NEWLY_IDLE balance and new tasks are 18 | detected. 19 | 20 | This was tested with the following reproduction: 21 | - two threads that take turns sleeping and waking each other up are 22 | affined to two cores 23 | - a large number of threads with 100% utilization are pinned to all 24 | other cores 25 | 26 | Without this patch, wakeup latency was ~120us for the pair of threads, 27 | almost entirely spent in load_balance(). With this patch, wakeup latency 28 | is ~6us. 29 | 30 | Signed-off-by: Josh Don 31 | Signed-off-by: Peter Zijlstra (Intel) 32 | Link: https://lkml.kernel.org/r/20220609025515.2086253-1-joshdon@google.com 33 | --- 34 | kernel/sched/fair.c | 8 +++++++- 35 | 1 file changed, 7 insertions(+), 1 deletion(-) 36 | 37 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 38 | index 7d8ef01669a5..8bed75757e65 100644 39 | --- a/kernel/sched/fair.c 40 | +++ b/kernel/sched/fair.c 41 | @@ -9824,9 +9824,15 @@ static int should_we_balance(struct lb_env *env) 42 | /* 43 | * In the newly idle case, we will allow all the CPUs 44 | * to do the newly idle load balance. 45 | + * 46 | + * However, we bail out if we already have tasks or a wakeup pending, 47 | + * to optimize wakeup latency. 48 | */ 49 | - if (env->idle == CPU_NEWLY_IDLE) 50 | + if (env->idle == CPU_NEWLY_IDLE) { 51 | + if (env->dst_rq->nr_running > 0 || env->dst_rq->ttwu_pending) 52 | + return 0; 53 | return 1; 54 | + } 55 | 56 | /* Try to find first idle CPU */ 57 | for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { 58 | -- 59 | 2.37.1 60 | 61 | -------------------------------------------------------------------------------- /0009-sched-Fix-the-check-of-nr_running-at-queue-wakelist.patch: -------------------------------------------------------------------------------- 1 | From 28156108fecb1f808b21d216e8ea8f0d205a530c Mon Sep 17 00:00:00 2001 2 | From: Tianchen Ding 3 | Date: Thu, 9 Jun 2022 07:34:11 +0800 4 | Subject: [PATCH 09/32] sched: Fix the check of nr_running at queue wakelist 5 | 6 | The commit 2ebb17717550 ("sched/core: Offload wakee task activation if it 7 | the wakee is descheduling") checked rq->nr_running <= 1 to avoid task 8 | stacking when WF_ON_CPU. 9 | 10 | Per the ordering of writes to p->on_rq and p->on_cpu, observing p->on_cpu 11 | (WF_ON_CPU) in ttwu_queue_cond() implies !p->on_rq, IOW p has gone through 12 | the deactivate_task() in __schedule(), thus p has been accounted out of 13 | rq->nr_running. As such, the task being the only runnable task on the rq 14 | implies reading rq->nr_running == 0 at that point. 15 | 16 | The benchmark result is in [1]. 17 | 18 | [1] https://lore.kernel.org/all/e34de686-4e85-bde1-9f3c-9bbc86b38627@linux.alibaba.com/ 19 | 20 | Suggested-by: Valentin Schneider 21 | Signed-off-by: Tianchen Ding 22 | Signed-off-by: Peter Zijlstra (Intel) 23 | Reviewed-by: Valentin Schneider 24 | Link: https://lore.kernel.org/r/20220608233412.327341-2-dtcccc@linux.alibaba.com 25 | --- 26 | kernel/sched/core.c | 6 +++++- 27 | 1 file changed, 5 insertions(+), 1 deletion(-) 28 | 29 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 30 | index bfa7452ca92e..294b9184dfe1 100644 31 | --- a/kernel/sched/core.c 32 | +++ b/kernel/sched/core.c 33 | @@ -3829,8 +3829,12 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) 34 | * CPU then use the wakelist to offload the task activation to 35 | * the soon-to-be-idle CPU as the current CPU is likely busy. 36 | * nr_running is checked to avoid unnecessary task stacking. 37 | + * 38 | + * Note that we can only get here with (wakee) p->on_rq=0, 39 | + * p->on_cpu can be whatever, we've done the dequeue, so 40 | + * the wakee has been accounted out of ->nr_running. 41 | */ 42 | - if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) 43 | + if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running) 44 | return true; 45 | 46 | return false; 47 | -- 48 | 2.37.1 49 | 50 | -------------------------------------------------------------------------------- /0012-selftests-rseq-riscv-fix-literal-suffix-warning.patch: -------------------------------------------------------------------------------- 1 | From d47c0cc94a86b9098930523a9e68180bef6b26cf Mon Sep 17 00:00:00 2001 2 | From: Michael Jeanson 3 | Date: Tue, 14 Jun 2022 11:48:29 -0400 4 | Subject: [PATCH 12/32] selftests/rseq: riscv: fix 'literal-suffix' warning 5 | 6 | This header is also used in librseq where it can be included in C++ 7 | code, add a space between literals and string macros. 8 | 9 | Signed-off-by: Michael Jeanson 10 | Signed-off-by: Peter Zijlstra (Intel) 11 | Reviewed-by: Mathieu Desnoyers 12 | Link: https://lore.kernel.org/r/20220614154830.1367382-3-mjeanson@efficios.com 13 | --- 14 | tools/testing/selftests/rseq/rseq-riscv.h | 14 +++++++------- 15 | 1 file changed, 7 insertions(+), 7 deletions(-) 16 | 17 | diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h 18 | index 6f8a605b75c0..3a391c9bf468 100644 19 | --- a/tools/testing/selftests/rseq/rseq-riscv.h 20 | +++ b/tools/testing/selftests/rseq/rseq-riscv.h 21 | @@ -86,7 +86,7 @@ do { \ 22 | 23 | #define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ 24 | RSEQ_INJECT_ASM(1) \ 25 | - "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ 26 | + "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ 27 | REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \ 28 | __rseq_str(label) ":\n" 29 | 30 | @@ -103,17 +103,17 @@ do { \ 31 | 32 | #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ 33 | REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ 34 | - "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 35 | + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 36 | __rseq_str(label) "\n" 37 | 38 | #define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ 39 | - "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ 40 | - "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 41 | + "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ 42 | + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 43 | __rseq_str(label) "\n" 44 | 45 | #define RSEQ_ASM_OP_CMPNE(var, expect, label) \ 46 | REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ 47 | - "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 48 | + "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ 49 | __rseq_str(label) "\n" 50 | 51 | #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ 52 | @@ -127,12 +127,12 @@ do { \ 53 | REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" 54 | 55 | #define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ 56 | - "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ 57 | + "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ 58 | RSEQ_ASM_TMP_REG_1 "\n" \ 59 | REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n" 60 | 61 | #define RSEQ_ASM_OP_R_ADD(count) \ 62 | - "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ 63 | + "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ 64 | ", %[" __rseq_str(count) "]\n" 65 | 66 | #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ 67 | -- 68 | 2.37.1 69 | 70 | -------------------------------------------------------------------------------- /0013-selftests-rseq-check-if-libc-rseq-support-is-registe.patch: -------------------------------------------------------------------------------- 1 | From d1a997ba4c1bf65497d956aea90de42a6398f73a Mon Sep 17 00:00:00 2001 2 | From: Michael Jeanson 3 | Date: Tue, 14 Jun 2022 11:48:30 -0400 4 | Subject: [PATCH 13/32] selftests/rseq: check if libc rseq support is 5 | registered 6 | 7 | When checking for libc rseq support in the library constructor, don't 8 | only depend on the symbols presence, check that the registration was 9 | completed. 10 | 11 | This targets a scenario where the libc has rseq support but it is not 12 | wired for the current architecture in 'bits/rseq.h', we want to fallback 13 | to our internal registration mechanism. 14 | 15 | Signed-off-by: Michael Jeanson 16 | Signed-off-by: Peter Zijlstra (Intel) 17 | Reviewed-by: Mathieu Desnoyers 18 | Link: https://lore.kernel.org/r/20220614154830.1367382-4-mjeanson@efficios.com 19 | --- 20 | tools/testing/selftests/rseq/rseq.c | 3 ++- 21 | 1 file changed, 2 insertions(+), 1 deletion(-) 22 | 23 | diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c 24 | index 986b9458efb2..4177f9507bbe 100644 25 | --- a/tools/testing/selftests/rseq/rseq.c 26 | +++ b/tools/testing/selftests/rseq/rseq.c 27 | @@ -111,7 +111,8 @@ void rseq_init(void) 28 | libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); 29 | libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); 30 | libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); 31 | - if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { 32 | + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && 33 | + *libc_rseq_size_p != 0) { 34 | /* rseq registration owned by glibc */ 35 | rseq_offset = *libc_rseq_offset_p; 36 | rseq_size = *libc_rseq_size_p; 37 | -- 38 | 2.37.1 39 | 40 | -------------------------------------------------------------------------------- /0014-sched-fair-Remove-redundant-word.patch: -------------------------------------------------------------------------------- 1 | From fb95a5a04d72aecdd5e151a4c2f7e4cde368bc10 Mon Sep 17 00:00:00 2001 2 | From: Zhang Qiao 3 | Date: Sat, 18 Jun 2022 02:11:50 +0800 4 | Subject: [PATCH 14/32] sched/fair: Remove redundant word " *" 5 | 6 | " *" is redundant. so remove it. 7 | 8 | Signed-off-by: Zhang Qiao 9 | Signed-off-by: Peter Zijlstra (Intel) 10 | Link: https://lore.kernel.org/r/20220617181151.29980-2-zhangqiao22@huawei.com 11 | --- 12 | kernel/sched/fair.c | 2 +- 13 | 1 file changed, 1 insertion(+), 1 deletion(-) 14 | 15 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 16 | index 8bed75757e65..7400600b4db6 100644 17 | --- a/kernel/sched/fair.c 18 | +++ b/kernel/sched/fair.c 19 | @@ -8496,7 +8496,7 @@ static inline int sg_imbalanced(struct sched_group *group) 20 | /* 21 | * group_has_capacity returns true if the group has spare capacity that could 22 | * be used by some tasks. 23 | - * We consider that a group has spare capacity if the * number of task is 24 | + * We consider that a group has spare capacity if the number of task is 25 | * smaller than the number of CPUs or if the utilization is lower than the 26 | * available capacity for CFS tasks. 27 | * For the latter, we use a threshold to stabilize the state, to take into 28 | -- 29 | 2.37.1 30 | 31 | -------------------------------------------------------------------------------- /0015-sched-Remove-unused-function-group_first_cpu.patch: -------------------------------------------------------------------------------- 1 | From c64b551f6a338eb9724a2f9ef3dddf80ccef2894 Mon Sep 17 00:00:00 2001 2 | From: Zhang Qiao 3 | Date: Sat, 18 Jun 2022 02:11:51 +0800 4 | Subject: [PATCH 15/32] sched: Remove unused function group_first_cpu() 5 | 6 | As of commit afe06efdf07c ("sched: Extend scheduler's asym packing") 7 | group_first_cpu() became an unused function, remove it. 8 | 9 | Signed-off-by: Zhang Qiao 10 | Signed-off-by: Peter Zijlstra (Intel) 11 | Reviewed-by: Valentin Schneider 12 | Link: https://lore.kernel.org/r/20220617181151.29980-3-zhangqiao22@huawei.com 13 | --- 14 | kernel/sched/sched.h | 9 --------- 15 | 1 file changed, 9 deletions(-) 16 | 17 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h 18 | index 1e34bb4527fd..02c970501295 100644 19 | --- a/kernel/sched/sched.h 20 | +++ b/kernel/sched/sched.h 21 | @@ -1810,15 +1810,6 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg) 22 | return to_cpumask(sg->sgc->cpumask); 23 | } 24 | 25 | -/** 26 | - * group_first_cpu - Returns the first CPU in the cpumask of a sched_group. 27 | - * @group: The group whose first CPU is to be returned. 28 | - */ 29 | -static inline unsigned int group_first_cpu(struct sched_group *group) 30 | -{ 31 | - return cpumask_first(sched_group_span(group)); 32 | -} 33 | - 34 | extern int group_balance_cpu(struct sched_group *sg); 35 | 36 | #ifdef CONFIG_SCHED_DEBUG 37 | -- 38 | 2.37.1 39 | 40 | -------------------------------------------------------------------------------- /0021-sched-fair-Rename-select_idle_mask-to-select_rq_mask.patch: -------------------------------------------------------------------------------- 1 | From ec4fc801a02d96180c597238fe87141471b70971 Mon Sep 17 00:00:00 2001 2 | From: Dietmar Eggemann 3 | Date: Thu, 23 Jun 2022 11:11:02 +0200 4 | Subject: [PATCH 21/32] sched/fair: Rename select_idle_mask to select_rq_mask 5 | 6 | On 21/06/2022 11:04, Vincent Donnefort wrote: 7 | > From: Dietmar Eggemann 8 | 9 | https://lkml.kernel.org/r/202206221253.ZVyGQvPX-lkp@intel.com discovered 10 | that this patch doesn't build anymore (on tip sched/core or linux-next) 11 | because of commit f5b2eeb499910 ("sched/fair: Consider CPU affinity when 12 | allowing NUMA imbalance in find_idlest_group()"). 13 | 14 | New version of [PATCH v11 4/7] sched/fair: Rename select_idle_mask to 15 | select_rq_mask below. 16 | 17 | -- >8 -- 18 | 19 | Decouple the name of the per-cpu cpumask select_idle_mask from its usage 20 | in select_idle_[cpu/capacity]() of the CFS run-queue selection 21 | (select_task_rq_fair()). 22 | 23 | This is to support the reuse of this cpumask in the Energy Aware 24 | Scheduling (EAS) path (find_energy_efficient_cpu()) of the CFS run-queue 25 | selection. 26 | 27 | Signed-off-by: Dietmar Eggemann 28 | Signed-off-by: Peter Zijlstra (Intel) 29 | Reviewed-by: Vincent Guittot 30 | Tested-by: Lukasz Luba 31 | Link: https://lkml.kernel.org/r/250691c7-0e2b-05ab-bedf-b245c11d9400@arm.com 32 | --- 33 | kernel/sched/core.c | 4 ++-- 34 | kernel/sched/fair.c | 10 +++++----- 35 | 2 files changed, 7 insertions(+), 7 deletions(-) 36 | 37 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 38 | index c538a0ac4617..dd69e85b7879 100644 39 | --- a/kernel/sched/core.c 40 | +++ b/kernel/sched/core.c 41 | @@ -9536,7 +9536,7 @@ static struct kmem_cache *task_group_cache __read_mostly; 42 | #endif 43 | 44 | DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); 45 | -DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); 46 | +DECLARE_PER_CPU(cpumask_var_t, select_rq_mask); 47 | 48 | void __init sched_init(void) 49 | { 50 | @@ -9585,7 +9585,7 @@ void __init sched_init(void) 51 | for_each_possible_cpu(i) { 52 | per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( 53 | cpumask_size(), GFP_KERNEL, cpu_to_node(i)); 54 | - per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( 55 | + per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node( 56 | cpumask_size(), GFP_KERNEL, cpu_to_node(i)); 57 | } 58 | #endif /* CONFIG_CPUMASK_OFFSTACK */ 59 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 60 | index 6de09b26b455..e3f750135f78 100644 61 | --- a/kernel/sched/fair.c 62 | +++ b/kernel/sched/fair.c 63 | @@ -5894,7 +5894,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) 64 | 65 | /* Working cpumask for: load_balance, load_balance_newidle. */ 66 | DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); 67 | -DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); 68 | +DEFINE_PER_CPU(cpumask_var_t, select_rq_mask); 69 | 70 | #ifdef CONFIG_NO_HZ_COMMON 71 | 72 | @@ -6384,7 +6384,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd 73 | */ 74 | static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target) 75 | { 76 | - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); 77 | + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask); 78 | int i, cpu, idle_cpu = -1, nr = INT_MAX; 79 | struct sched_domain_shared *sd_share; 80 | struct rq *this_rq = this_rq(); 81 | @@ -6482,7 +6482,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) 82 | int cpu, best_cpu = -1; 83 | struct cpumask *cpus; 84 | 85 | - cpus = this_cpu_cpumask_var_ptr(select_idle_mask); 86 | + cpus = this_cpu_cpumask_var_ptr(select_rq_mask); 87 | cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); 88 | 89 | task_util = uclamp_task_util(p); 90 | @@ -6532,7 +6532,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) 91 | } 92 | 93 | /* 94 | - * per-cpu select_idle_mask usage 95 | + * per-cpu select_rq_mask usage 96 | */ 97 | lockdep_assert_irqs_disabled(); 98 | 99 | @@ -9255,7 +9255,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) 100 | * take care of it. 101 | */ 102 | if (p->nr_cpus_allowed != NR_CPUS) { 103 | - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); 104 | + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask); 105 | 106 | cpumask_and(cpus, sched_group_span(local), p->cpus_ptr); 107 | imb_numa_nr = min(cpumask_weight(cpus), sd->imb_numa_nr); 108 | -- 109 | 2.37.1 110 | 111 | -------------------------------------------------------------------------------- /0022-sched-fair-Use-the-same-cpumask-per-PD-throughout-fi.patch: -------------------------------------------------------------------------------- 1 | From 9b340131a4bcf6d0a282a2bdcd8ca268a74da709 Mon Sep 17 00:00:00 2001 2 | From: Dietmar Eggemann 3 | Date: Tue, 21 Jun 2022 10:04:12 +0100 4 | Subject: [PATCH 22/32] sched/fair: Use the same cpumask per-PD throughout 5 | find_energy_efficient_cpu() 6 | 7 | The Perf Domain (PD) cpumask (struct em_perf_domain.cpus) stays 8 | invariant after Energy Model creation, i.e. it is not updated after 9 | CPU hotplug operations. 10 | 11 | That's why the PD mask is used in conjunction with the cpu_online_mask 12 | (or Sched Domain cpumask). Thereby the cpu_online_mask is fetched 13 | multiple times (in compute_energy()) during a run-queue selection 14 | for a task. 15 | 16 | cpu_online_mask may change during this time which can lead to wrong 17 | energy calculations. 18 | 19 | To be able to avoid this, use the select_rq_mask per-cpu cpumask to 20 | create a cpumask out of PD cpumask and cpu_online_mask and pass it 21 | through the function calls of the EAS run-queue selection path. 22 | 23 | The PD cpumask for max_spare_cap_cpu/compute_prev_delta selection 24 | (find_energy_efficient_cpu()) is now ANDed not only with the SD mask 25 | but also with the cpu_online_mask. This is fine since this cpumask 26 | has to be in syc with the one used for energy computation 27 | (compute_energy()). 28 | An exclusive cpuset setup with at least one asymmetric CPU capacity 29 | island (hence the additional AND with the SD cpumask) is the obvious 30 | exception here. 31 | 32 | Signed-off-by: Dietmar Eggemann 33 | Signed-off-by: Peter Zijlstra (Intel) 34 | Reviewed-by: Vincent Guittot 35 | Tested-by: Lukasz Luba 36 | Link: https://lkml.kernel.org/r/20220621090414.433602-6-vdonnefort@google.com 37 | --- 38 | kernel/sched/fair.c | 22 +++++++++++++--------- 39 | 1 file changed, 13 insertions(+), 9 deletions(-) 40 | 41 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 42 | index e3f750135f78..46d669297b1f 100644 43 | --- a/kernel/sched/fair.c 44 | +++ b/kernel/sched/fair.c 45 | @@ -6709,14 +6709,14 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) 46 | * task. 47 | */ 48 | static long 49 | -compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) 50 | +compute_energy(struct task_struct *p, int dst_cpu, struct cpumask *cpus, 51 | + struct perf_domain *pd) 52 | { 53 | - struct cpumask *pd_mask = perf_domain_span(pd); 54 | unsigned long max_util = 0, sum_util = 0, cpu_cap; 55 | int cpu; 56 | 57 | - cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask)); 58 | - cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask)); 59 | + cpu_cap = arch_scale_cpu_capacity(cpumask_first(cpus)); 60 | + cpu_cap -= arch_scale_thermal_pressure(cpumask_first(cpus)); 61 | 62 | /* 63 | * The capacity state of CPUs of the current rd can be driven by CPUs 64 | @@ -6727,7 +6727,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) 65 | * If an entire pd is outside of the current rd, it will not appear in 66 | * its pd list and will not be accounted by compute_energy(). 67 | */ 68 | - for_each_cpu_and(cpu, pd_mask, cpu_online_mask) { 69 | + for_each_cpu(cpu, cpus) { 70 | unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu); 71 | unsigned long cpu_util, util_running = util_freq; 72 | struct task_struct *tsk = NULL; 73 | @@ -6814,6 +6814,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) 74 | */ 75 | static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 76 | { 77 | + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask); 78 | unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; 79 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; 80 | int cpu, best_energy_cpu = prev_cpu, target = -1; 81 | @@ -6848,7 +6849,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 82 | unsigned long base_energy_pd; 83 | int max_spare_cap_cpu = -1; 84 | 85 | - for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { 86 | + cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask); 87 | + 88 | + for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) { 89 | if (!cpumask_test_cpu(cpu, p->cpus_ptr)) 90 | continue; 91 | 92 | @@ -6885,12 +6888,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 93 | continue; 94 | 95 | /* Compute the 'base' energy of the pd, without @p */ 96 | - base_energy_pd = compute_energy(p, -1, pd); 97 | + base_energy_pd = compute_energy(p, -1, cpus, pd); 98 | base_energy += base_energy_pd; 99 | 100 | /* Evaluate the energy impact of using prev_cpu. */ 101 | if (compute_prev_delta) { 102 | - prev_delta = compute_energy(p, prev_cpu, pd); 103 | + prev_delta = compute_energy(p, prev_cpu, cpus, pd); 104 | if (prev_delta < base_energy_pd) 105 | goto unlock; 106 | prev_delta -= base_energy_pd; 107 | @@ -6899,7 +6902,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 108 | 109 | /* Evaluate the energy impact of using max_spare_cap_cpu. */ 110 | if (max_spare_cap_cpu >= 0) { 111 | - cur_delta = compute_energy(p, max_spare_cap_cpu, pd); 112 | + cur_delta = compute_energy(p, max_spare_cap_cpu, cpus, 113 | + pd); 114 | if (cur_delta < base_energy_pd) 115 | goto unlock; 116 | cur_delta -= base_energy_pd; 117 | -- 118 | 2.37.1 119 | 120 | -------------------------------------------------------------------------------- /0026-sched-core-Use-try_cmpxchg-in-set_nr_-and_not-if-_po.patch: -------------------------------------------------------------------------------- 1 | From c02d5546ea34d589c83eda5055dbd727a396642b Mon Sep 17 00:00:00 2001 2 | From: Uros Bizjak 3 | Date: Wed, 29 Jun 2022 17:15:52 +0200 4 | Subject: [PATCH 26/32] sched/core: Use try_cmpxchg in 5 | set_nr_{and_not,if}_polling 6 | 7 | Use try_cmpxchg instead of cmpxchg (*ptr, old, new) != old in 8 | set_nr_{and_not,if}_polling. x86 cmpxchg returns success in ZF flag, 9 | so this change saves a compare after cmpxchg. 10 | 11 | The definition of cmpxchg based fetch_or was changed in the 12 | same way as atomic_fetch_##op definitions were changed 13 | in e6790e4b5d5e97dc287f3496dd2cf2dbabdfdb35. 14 | 15 | Also declare these two functions as inline to ensure inlining. In the 16 | case of set_nr_and_not_polling, the compiler (gcc) tries to outsmart 17 | itself by constructing the boolean return value with logic operations 18 | on the fetched value, and these extra operations enlarge the function 19 | over the inlining threshold value. 20 | 21 | Signed-off-by: Uros Bizjak 22 | Signed-off-by: Peter Zijlstra (Intel) 23 | Link: https://lkml.kernel.org/r/20220629151552.6015-1-ubizjak@gmail.com 24 | --- 25 | kernel/sched/core.c | 24 +++++++++--------------- 26 | 1 file changed, 9 insertions(+), 15 deletions(-) 27 | 28 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c 29 | index dd69e85b7879..c703d177f62d 100644 30 | --- a/kernel/sched/core.c 31 | +++ b/kernel/sched/core.c 32 | @@ -873,15 +873,11 @@ static inline void hrtick_rq_init(struct rq *rq) 33 | ({ \ 34 | typeof(ptr) _ptr = (ptr); \ 35 | typeof(mask) _mask = (mask); \ 36 | - typeof(*_ptr) _old, _val = *_ptr; \ 37 | + typeof(*_ptr) _val = *_ptr; \ 38 | \ 39 | - for (;;) { \ 40 | - _old = cmpxchg(_ptr, _val, _val | _mask); \ 41 | - if (_old == _val) \ 42 | - break; \ 43 | - _val = _old; \ 44 | - } \ 45 | - _old; \ 46 | + do { \ 47 | + } while (!try_cmpxchg(_ptr, &_val, _val | _mask)); \ 48 | + _val; \ 49 | }) 50 | 51 | #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) 52 | @@ -890,7 +886,7 @@ static inline void hrtick_rq_init(struct rq *rq) 53 | * this avoids any races wrt polling state changes and thereby avoids 54 | * spurious IPIs. 55 | */ 56 | -static bool set_nr_and_not_polling(struct task_struct *p) 57 | +static inline bool set_nr_and_not_polling(struct task_struct *p) 58 | { 59 | struct thread_info *ti = task_thread_info(p); 60 | return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); 61 | @@ -905,30 +901,28 @@ static bool set_nr_and_not_polling(struct task_struct *p) 62 | static bool set_nr_if_polling(struct task_struct *p) 63 | { 64 | struct thread_info *ti = task_thread_info(p); 65 | - typeof(ti->flags) old, val = READ_ONCE(ti->flags); 66 | + typeof(ti->flags) val = READ_ONCE(ti->flags); 67 | 68 | for (;;) { 69 | if (!(val & _TIF_POLLING_NRFLAG)) 70 | return false; 71 | if (val & _TIF_NEED_RESCHED) 72 | return true; 73 | - old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); 74 | - if (old == val) 75 | + if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED)) 76 | break; 77 | - val = old; 78 | } 79 | return true; 80 | } 81 | 82 | #else 83 | -static bool set_nr_and_not_polling(struct task_struct *p) 84 | +static inline bool set_nr_and_not_polling(struct task_struct *p) 85 | { 86 | set_tsk_need_resched(p); 87 | return true; 88 | } 89 | 90 | #ifdef CONFIG_SMP 91 | -static bool set_nr_if_polling(struct task_struct *p) 92 | +static inline bool set_nr_if_polling(struct task_struct *p) 93 | { 94 | return false; 95 | } 96 | -- 97 | 2.37.1 98 | 99 | -------------------------------------------------------------------------------- /0027-sched-fair-fix-case-with-reduced-capacity-CPU.patch: -------------------------------------------------------------------------------- 1 | From c82a69629c53eda5233f13fc11c3c01585ef48a2 Mon Sep 17 00:00:00 2001 2 | From: Vincent Guittot 3 | Date: Fri, 8 Jul 2022 17:44:01 +0200 4 | Subject: [PATCH 27/32] sched/fair: fix case with reduced capacity CPU 5 | 6 | The capacity of the CPU available for CFS tasks can be reduced because of 7 | other activities running on the latter. In such case, it's worth trying to 8 | move CFS tasks on a CPU with more available capacity. 9 | 10 | The rework of the load balance has filtered the case when the CPU is 11 | classified to be fully busy but its capacity is reduced. 12 | 13 | Check if CPU's capacity is reduced while gathering load balance statistic 14 | and classify it group_misfit_task instead of group_fully_busy so we can 15 | try to move the load on another CPU. 16 | 17 | Reported-by: David Chen 18 | Reported-by: Zhang Qiao 19 | Signed-off-by: Vincent Guittot 20 | Signed-off-by: Peter Zijlstra (Intel) 21 | Tested-by: David Chen 22 | Tested-by: Zhang Qiao 23 | Link: https://lkml.kernel.org/r/20220708154401.21411-1-vincent.guittot@linaro.org 24 | --- 25 | kernel/sched/fair.c | 54 +++++++++++++++++++++++++++++++++++---------- 26 | 1 file changed, 42 insertions(+), 12 deletions(-) 27 | 28 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 29 | index a78d2e3b9d49..914096c5b1ae 100644 30 | --- a/kernel/sched/fair.c 31 | +++ b/kernel/sched/fair.c 32 | @@ -7711,8 +7711,8 @@ enum group_type { 33 | */ 34 | group_fully_busy, 35 | /* 36 | - * SD_ASYM_CPUCAPACITY only: One task doesn't fit with CPU's capacity 37 | - * and must be migrated to a more powerful CPU. 38 | + * One task doesn't fit with CPU's capacity and must be migrated to a 39 | + * more powerful CPU. 40 | */ 41 | group_misfit_task, 42 | /* 43 | @@ -8798,6 +8798,19 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs 44 | return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu); 45 | } 46 | 47 | +static inline bool 48 | +sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) 49 | +{ 50 | + /* 51 | + * When there is more than 1 task, the group_overloaded case already 52 | + * takes care of cpu with reduced capacity 53 | + */ 54 | + if (rq->cfs.h_nr_running != 1) 55 | + return false; 56 | + 57 | + return check_cpu_capacity(rq, sd); 58 | +} 59 | + 60 | /** 61 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. 62 | * @env: The load balancing environment. 63 | @@ -8820,8 +8833,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, 64 | 65 | for_each_cpu_and(i, sched_group_span(group), env->cpus) { 66 | struct rq *rq = cpu_rq(i); 67 | + unsigned long load = cpu_load(rq); 68 | 69 | - sgs->group_load += cpu_load(rq); 70 | + sgs->group_load += load; 71 | sgs->group_util += cpu_util_cfs(i); 72 | sgs->group_runnable += cpu_runnable(rq); 73 | sgs->sum_h_nr_running += rq->cfs.h_nr_running; 74 | @@ -8851,11 +8865,17 @@ static inline void update_sg_lb_stats(struct lb_env *env, 75 | if (local_group) 76 | continue; 77 | 78 | - /* Check for a misfit task on the cpu */ 79 | - if (env->sd->flags & SD_ASYM_CPUCAPACITY && 80 | - sgs->group_misfit_task_load < rq->misfit_task_load) { 81 | - sgs->group_misfit_task_load = rq->misfit_task_load; 82 | - *sg_status |= SG_OVERLOAD; 83 | + if (env->sd->flags & SD_ASYM_CPUCAPACITY) { 84 | + /* Check for a misfit task on the cpu */ 85 | + if (sgs->group_misfit_task_load < rq->misfit_task_load) { 86 | + sgs->group_misfit_task_load = rq->misfit_task_load; 87 | + *sg_status |= SG_OVERLOAD; 88 | + } 89 | + } else if ((env->idle != CPU_NOT_IDLE) && 90 | + sched_reduced_capacity(rq, env->sd)) { 91 | + /* Check for a task running on a CPU with reduced capacity */ 92 | + if (sgs->group_misfit_task_load < load) 93 | + sgs->group_misfit_task_load = load; 94 | } 95 | } 96 | 97 | @@ -8908,7 +8928,8 @@ static bool update_sd_pick_busiest(struct lb_env *env, 98 | * CPUs in the group should either be possible to resolve 99 | * internally or be covered by avg_load imbalance (eventually). 100 | */ 101 | - if (sgs->group_type == group_misfit_task && 102 | + if ((env->sd->flags & SD_ASYM_CPUCAPACITY) && 103 | + (sgs->group_type == group_misfit_task) && 104 | (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) || 105 | sds->local_stat.group_type != group_has_spare)) 106 | return false; 107 | @@ -9517,9 +9538,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s 108 | busiest = &sds->busiest_stat; 109 | 110 | if (busiest->group_type == group_misfit_task) { 111 | - /* Set imbalance to allow misfit tasks to be balanced. */ 112 | - env->migration_type = migrate_misfit; 113 | - env->imbalance = 1; 114 | + if (env->sd->flags & SD_ASYM_CPUCAPACITY) { 115 | + /* Set imbalance to allow misfit tasks to be balanced. */ 116 | + env->migration_type = migrate_misfit; 117 | + env->imbalance = 1; 118 | + } else { 119 | + /* 120 | + * Set load imbalance to allow moving task from cpu 121 | + * with reduced capacity. 122 | + */ 123 | + env->migration_type = migrate_load; 124 | + env->imbalance = busiest->group_misfit_task_load; 125 | + } 126 | return; 127 | } 128 | 129 | -- 130 | 2.37.1 131 | 132 | -------------------------------------------------------------------------------- /0029-nohz-full-sched-rt-Fix-missed-tick-reenabling-bug-in.patch: -------------------------------------------------------------------------------- 1 | From 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a Mon Sep 17 00:00:00 2001 2 | From: Nicolas Saenz Julienne 3 | Date: Tue, 28 Jun 2022 11:22:59 +0200 4 | Subject: [PATCH 29/32] nohz/full, sched/rt: Fix missed tick-reenabling bug in 5 | dequeue_task_rt() 6 | 7 | dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having 8 | called sched_update_tick_dependency() preventing it from re-enabling the 9 | tick on systems that no longer have pending SCHED_RT tasks but have 10 | multiple runnable SCHED_OTHER tasks: 11 | 12 | dequeue_task_rt() 13 | dequeue_rt_entity() 14 | dequeue_rt_stack() 15 | dequeue_top_rt_rq() 16 | sub_nr_running() // decrements rq->nr_running 17 | sched_update_tick_dependency() 18 | sched_can_stop_tick() // checks rq->rt.rt_nr_running, 19 | ... 20 | __dequeue_rt_entity() 21 | dec_rt_tasks() // decrements rq->rt.rt_nr_running 22 | ... 23 | 24 | Every other scheduler class performs the operation in the opposite 25 | order, and sched_update_tick_dependency() expects the values to be 26 | updated as such. So avoid the misbehaviour by inverting the order in 27 | which the above operations are performed in the RT scheduler. 28 | 29 | Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model") 30 | Signed-off-by: Nicolas Saenz Julienne 31 | Signed-off-by: Peter Zijlstra (Intel) 32 | Reviewed-by: Valentin Schneider 33 | Reviewed-by: Phil Auld 34 | Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com 35 | --- 36 | kernel/sched/rt.c | 15 +++++++++------ 37 | 1 file changed, 9 insertions(+), 6 deletions(-) 38 | 39 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c 40 | index 8c9ed9664840..55f39c8f4203 100644 41 | --- a/kernel/sched/rt.c 42 | +++ b/kernel/sched/rt.c 43 | @@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq) 44 | #endif /* CONFIG_SMP */ 45 | 46 | static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 47 | -static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 48 | +static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count); 49 | 50 | static inline int on_rt_rq(struct sched_rt_entity *rt_se) 51 | { 52 | @@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 53 | rt_se = rt_rq->tg->rt_se[cpu]; 54 | 55 | if (!rt_se) { 56 | - dequeue_top_rt_rq(rt_rq); 57 | + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 58 | /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 59 | cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); 60 | } 61 | @@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 62 | 63 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 64 | { 65 | - dequeue_top_rt_rq(rt_rq); 66 | + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); 67 | } 68 | 69 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) 70 | @@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq) 71 | } 72 | 73 | static void 74 | -dequeue_top_rt_rq(struct rt_rq *rt_rq) 75 | +dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count) 76 | { 77 | struct rq *rq = rq_of_rt_rq(rt_rq); 78 | 79 | @@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) 80 | 81 | BUG_ON(!rq->nr_running); 82 | 83 | - sub_nr_running(rq, rt_rq->rt_nr_running); 84 | + sub_nr_running(rq, count); 85 | rt_rq->rt_queued = 0; 86 | 87 | } 88 | @@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag 89 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) 90 | { 91 | struct sched_rt_entity *back = NULL; 92 | + unsigned int rt_nr_running; 93 | 94 | for_each_sched_rt_entity(rt_se) { 95 | rt_se->back = back; 96 | back = rt_se; 97 | } 98 | 99 | - dequeue_top_rt_rq(rt_rq_of_se(back)); 100 | + rt_nr_running = rt_rq_of_se(back)->rt_nr_running; 101 | 102 | for (rt_se = back; rt_se; rt_se = rt_se->back) { 103 | if (on_rt_rq(rt_se)) 104 | __dequeue_rt_entity(rt_se, flags); 105 | } 106 | + 107 | + dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running); 108 | } 109 | 110 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 111 | -- 112 | 2.37.1 113 | 114 | -------------------------------------------------------------------------------- /0030-sched-core-Fix-the-bug-that-task-won-t-enqueue-into-.patch: -------------------------------------------------------------------------------- 1 | From 91caa5ae242465c3ab9fd473e50170faa7e944f4 Mon Sep 17 00:00:00 2001 2 | From: Cruz Zhao 3 | Date: Tue, 28 Jun 2022 15:57:23 +0800 4 | Subject: [PATCH 30/32] sched/core: Fix the bug that task won't enqueue into 5 | core tree when update cookie 6 | 7 | In function sched_core_update_cookie(), a task will enqueue into the 8 | core tree only when it enqueued before, that is, if an uncookied task 9 | is cookied, it will not enqueue into the core tree until it enqueue 10 | again, which will result in unnecessary force idle. 11 | 12 | Here follows the scenario: 13 | CPU x and CPU y are a pair of SMT siblings. 14 | 1. Start task a running on CPU x without sleeping, and task b and 15 | task c running on CPU y without sleeping. 16 | 2. We create a cookie and share it to task a and task b, and then 17 | we create another cookie and share it to task c. 18 | 3. Simpling core_forceidle_sum of task a and b from /proc/PID/sched 19 | 20 | And we will find out that core_forceidle_sum of task a takes 30% 21 | time of the sampling period, which shouldn't happen as task a and b 22 | have the same cookie. 23 | 24 | Then we migrate task a to CPU x', migrate task b and c to CPU y', where 25 | CPU x' and CPU y' are a pair of SMT siblings, and sampling again, we 26 | will found out that core_forceidle_sum of task a and b are almost zero. 27 | 28 | To solve this problem, we enqueue the task into the core tree if it's 29 | on rq. 30 | 31 | Fixes: 6e33cad0af49("sched: Trivial core scheduling cookie management") 32 | Signed-off-by: Cruz Zhao 33 | Signed-off-by: Peter Zijlstra (Intel) 34 | Link: https://lkml.kernel.org/r/1656403045-100840-2-git-send-email-CruzZhao@linux.alibaba.com 35 | --- 36 | kernel/sched/core_sched.c | 9 +++++---- 37 | 1 file changed, 5 insertions(+), 4 deletions(-) 38 | 39 | diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c 40 | index 5103502da7ba..93878cb2a46d 100644 41 | --- a/kernel/sched/core_sched.c 42 | +++ b/kernel/sched/core_sched.c 43 | @@ -56,7 +56,6 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, 44 | unsigned long old_cookie; 45 | struct rq_flags rf; 46 | struct rq *rq; 47 | - bool enqueued; 48 | 49 | rq = task_rq_lock(p, &rf); 50 | 51 | @@ -68,14 +67,16 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, 52 | */ 53 | SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); 54 | 55 | - enqueued = sched_core_enqueued(p); 56 | - if (enqueued) 57 | + if (sched_core_enqueued(p)) 58 | sched_core_dequeue(rq, p, DEQUEUE_SAVE); 59 | 60 | old_cookie = p->core_cookie; 61 | p->core_cookie = cookie; 62 | 63 | - if (enqueued) 64 | + /* 65 | + * Consider the cases: !prev_cookie and !cookie. 66 | + */ 67 | + if (cookie && task_on_rq_queued(p)) 68 | sched_core_enqueue(rq, p); 69 | 70 | /* 71 | -- 72 | 2.37.1 73 | 74 | -------------------------------------------------------------------------------- /0031-rseq-Deprecate-RSEQ_CS_FLAG_NO_RESTART_ON_-flags.patch: -------------------------------------------------------------------------------- 1 | From 0190e4198e47fe99d002d72588f34fd62c9ab570 Mon Sep 17 00:00:00 2001 2 | From: Mathieu Desnoyers 3 | Date: Wed, 22 Jun 2022 15:46:16 -0400 4 | Subject: [PATCH 31/32] rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_* flags 5 | 6 | The pretty much unused RSEQ_CS_FLAG_NO_RESTART_ON_* flags introduce 7 | complexity in rseq, and are subtly buggy [1]. Solving those issues 8 | requires introducing additional complexity in the rseq implementation 9 | for each supported architecture. 10 | 11 | Considering that it complexifies the rseq ABI, I am proposing that we 12 | deprecate those flags. [2] 13 | 14 | So far there appears to be consensus from maintainers of user-space 15 | projects impacted by this feature that its removal would be a welcome 16 | simplification. [3] 17 | 18 | The deprecation approach proposed here is to issue WARN_ON_ONCE() when 19 | encountering those flags and kill the offending process with sigsegv. 20 | This should allow us to quickly identify whether anyone yells at us for 21 | removing this. 22 | 23 | Link: https://lore.kernel.org/lkml/20220618182515.95831-1-minhquangbui99@gmail.com/ [1] 24 | Link: https://lore.kernel.org/lkml/258546133.12151.1655739550814.JavaMail.zimbra@efficios.com/ [2] 25 | Link: https://lore.kernel.org/lkml/87pmj1enjh.fsf@email.froward.int.ebiederm.org/ [3] 26 | Signed-off-by: Mathieu Desnoyers 27 | Signed-off-by: Peter Zijlstra (Intel) 28 | Signed-off-by: Ingo Molnar 29 | Link: https://lore.kernel.org/lkml/20220622194617.1155957-1-mathieu.desnoyers@efficios.com 30 | --- 31 | kernel/rseq.c | 23 ++++++++--------------- 32 | 1 file changed, 8 insertions(+), 15 deletions(-) 33 | 34 | diff --git a/kernel/rseq.c b/kernel/rseq.c 35 | index 97ac20b4f738..81d7dc80787b 100644 36 | --- a/kernel/rseq.c 37 | +++ b/kernel/rseq.c 38 | @@ -18,8 +18,9 @@ 39 | #define CREATE_TRACE_POINTS 40 | #include 41 | 42 | -#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \ 43 | - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) 44 | +#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \ 45 | + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \ 46 | + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) 47 | 48 | /* 49 | * 50 | @@ -175,23 +176,15 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) 51 | u32 flags, event_mask; 52 | int ret; 53 | 54 | + if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS)) 55 | + return -EINVAL; 56 | + 57 | /* Get thread flags. */ 58 | ret = get_user(flags, &t->rseq->flags); 59 | if (ret) 60 | return ret; 61 | 62 | - /* Take critical section flags into account. */ 63 | - flags |= cs_flags; 64 | - 65 | - /* 66 | - * Restart on signal can only be inhibited when restart on 67 | - * preempt and restart on migrate are inhibited too. Otherwise, 68 | - * a preempted signal handler could fail to restart the prior 69 | - * execution context on sigreturn. 70 | - */ 71 | - if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) && 72 | - (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) != 73 | - RSEQ_CS_PREEMPT_MIGRATE_FLAGS)) 74 | + if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS)) 75 | return -EINVAL; 76 | 77 | /* 78 | @@ -203,7 +196,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) 79 | t->rseq_event_mask = 0; 80 | preempt_enable(); 81 | 82 | - return !!(event_mask & ~flags); 83 | + return !!event_mask; 84 | } 85 | 86 | static int clear_rseq_cs(struct task_struct *t) 87 | -- 88 | 2.37.1 89 | 90 | -------------------------------------------------------------------------------- /0032-rseq-Kill-process-when-unknown-flags-are-encountered.patch: -------------------------------------------------------------------------------- 1 | From c17a6ff9321355487d7d5ccaa7d406a0ea06b6c4 Mon Sep 17 00:00:00 2001 2 | From: Mathieu Desnoyers 3 | Date: Wed, 22 Jun 2022 15:46:17 -0400 4 | Subject: [PATCH 32/32] rseq: Kill process when unknown flags are encountered 5 | in ABI structures 6 | 7 | rseq_abi()->flags and rseq_abi()->rseq_cs->flags 29 upper bits are 8 | currently unused. 9 | 10 | The current behavior when those bits are set is to ignore them. This is 11 | not an ideal behavior, because when future features will start using 12 | those flags, if user-space fails to correctly validate that the kernel 13 | indeed supports those flags (e.g. with a new sys_rseq flags bit) before 14 | using them, it may incorrectly assume that the kernel will handle those 15 | flags way when in fact those will be silently ignored on older kernels. 16 | 17 | Validating that unused flags bits are cleared will allow a smoother 18 | transition when those flags will start to be used by allowing 19 | applications to fail early, and obviously, when they attempt to use the 20 | new flags on an older kernel that does not support them. 21 | 22 | Signed-off-by: Mathieu Desnoyers 23 | Signed-off-by: Peter Zijlstra (Intel) 24 | Signed-off-by: Ingo Molnar 25 | Link: https://lkml.kernel.org/r/20220622194617.1155957-2-mathieu.desnoyers@efficios.com 26 | --- 27 | kernel/rseq.c | 4 ++-- 28 | 1 file changed, 2 insertions(+), 2 deletions(-) 29 | 30 | diff --git a/kernel/rseq.c b/kernel/rseq.c 31 | index 81d7dc80787b..bda8175f8f99 100644 32 | --- a/kernel/rseq.c 33 | +++ b/kernel/rseq.c 34 | @@ -176,7 +176,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) 35 | u32 flags, event_mask; 36 | int ret; 37 | 38 | - if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS)) 39 | + if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags) 40 | return -EINVAL; 41 | 42 | /* Get thread flags. */ 43 | @@ -184,7 +184,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) 44 | if (ret) 45 | return ret; 46 | 47 | - if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS)) 48 | + if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags) 49 | return -EINVAL; 50 | 51 | /* 52 | -- 53 | 2.37.1 54 | 55 | -------------------------------------------------------------------------------- /0051-block-bfq-Fix-division-by-zero-error-on-zero-wsum.patch: -------------------------------------------------------------------------------- 1 | From e53413f8deedf738a6782cc14cc00bd5852ccf18 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Thu, 13 Apr 2023 14:30:09 +0100 4 | Subject: [PATCH] block, bfq: Fix division by zero error on zero wsum 5 | Content-Type: text/plain; charset="utf-8" 6 | Content-Transfer-Encoding: 8bit 7 | 8 | When the weighted sum is zero the calculation of limit causes 9 | a division by zero error. Fix this by continuing to the next level. 10 | 11 | This was discovered by running as root: 12 | 13 | stress-ng --ioprio 0 14 | 15 | Fixes divison by error oops: 16 | 17 | [ 521.450556] divide error: 0000 [#1] SMP NOPTI 18 | [ 521.450766] CPU: 2 PID: 2684464 Comm: stress-ng-iopri Not tainted 6.2.1-1280.native #1 19 | [ 521.451117] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014 20 | [ 521.451627] RIP: 0010:bfqq_request_over_limit+0x207/0x400 21 | [ 521.451875] Code: 01 48 8d 0c c8 74 0b 48 8b 82 98 00 00 00 48 8d 0c c8 8b 85 34 ff ff ff 48 89 ca 41 0f af 41 50 48 d1 ea 48 98 48 01 d0 31 d2 <48> f7 f1 41 39 41 48 89 85 34 ff ff ff 0f 8c 7b 01 00 00 49 8b 44 22 | [ 521.452699] RSP: 0018:ffffb1af84eb3948 EFLAGS: 00010046 23 | [ 521.452938] RAX: 000000000000003c RBX: 0000000000000000 RCX: 0000000000000000 24 | [ 521.453262] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb1af84eb3978 25 | [ 521.453584] RBP: ffffb1af84eb3a30 R08: 0000000000000001 R09: ffff8f88ab8a4ba0 26 | [ 521.453905] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8f88ab8a4b18 27 | [ 521.454224] R13: ffff8f8699093000 R14: 0000000000000001 R15: ffffb1af84eb3970 28 | [ 521.454549] FS: 00005640b6b0b580(0000) GS:ffff8f88b3880000(0000) knlGS:0000000000000000 29 | [ 521.454912] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 30 | [ 521.455170] CR2: 00007ffcbcae4e38 CR3: 00000002e46de001 CR4: 0000000000770ee0 31 | [ 521.455491] PKRU: 55555554 32 | [ 521.455619] Call Trace: 33 | [ 521.455736] 34 | [ 521.455837] ? bfq_request_merge+0x3a/0xc0 35 | [ 521.456027] ? elv_merge+0x115/0x140 36 | [ 521.456191] bfq_limit_depth+0xc8/0x240 37 | [ 521.456366] __blk_mq_alloc_requests+0x21a/0x2c0 38 | [ 521.456577] blk_mq_submit_bio+0x23c/0x6c0 39 | [ 521.456766] __submit_bio+0xb8/0x140 40 | [ 521.457236] submit_bio_noacct_nocheck+0x212/0x300 41 | [ 521.457748] submit_bio_noacct+0x1a6/0x580 42 | [ 521.458220] submit_bio+0x43/0x80 43 | [ 521.458660] ext4_io_submit+0x23/0x80 44 | [ 521.459116] ext4_do_writepages+0x40a/0xd00 45 | [ 521.459596] ext4_writepages+0x65/0x100 46 | [ 521.460050] do_writepages+0xb7/0x1c0 47 | [ 521.460492] __filemap_fdatawrite_range+0xa6/0x100 48 | [ 521.460979] file_write_and_wait_range+0xbf/0x140 49 | [ 521.461452] ext4_sync_file+0x105/0x340 50 | [ 521.461882] __x64_sys_fsync+0x67/0x100 51 | [ 521.462305] ? syscall_exit_to_user_mode+0x2c/0x1c0 52 | [ 521.462768] do_syscall_64+0x3b/0xc0 53 | [ 521.463165] entry_SYSCALL_64_after_hwframe+0x5a/0xc4 54 | [ 521.463621] RIP: 0033:0x5640b6c56590 55 | [ 521.464006] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 80 3d 71 70 0e 00 00 74 17 b8 4a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c 56 | 57 | Signed-off-by: Colin Ian King 58 | Link: https://lore.kernel.org/r/20230413133009.1605335-1-colin.i.king@gmail.com 59 | Signed-off-by: Jens Axboe 60 | --- 61 | block/bfq-iosched.c | 2 ++ 62 | 1 file changed, 2 insertions(+) 63 | 64 | diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c 65 | index b4c4b4808c6c..3164e3177965 100644 66 | --- a/block/bfq-iosched.c 67 | +++ b/block/bfq-iosched.c 68 | @@ -648,6 +648,8 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) 69 | sched_data->service_tree[i].wsum; 70 | } 71 | } 72 | + if (!wsum) 73 | + continue; 74 | limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum); 75 | if (entity->allocated >= limit) { 76 | bfq_log_bfqq(bfqq->bfqd, bfqq, 77 | -- 78 | 2.30.2 79 | 80 | -------------------------------------------------------------------------------- /0101-i8042-decrease-debug-message-level-to-info.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Tue, 23 Jun 2015 01:26:52 -0500 4 | Subject: [PATCH] i8042: decrease debug message level to info 5 | 6 | Author: Arjan van de Ven 7 | 8 | Signed-off-by: Miguel Bernal Marin 9 | Signed-off-by: Jose Carlos Venegas Munoz 10 | --- 11 | drivers/input/serio/i8042.c | 10 +++++----- 12 | 1 file changed, 5 insertions(+), 5 deletions(-) 13 | 14 | diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c 15 | index 3fc0a89cc785..a7c103f9dfd3 100644 16 | --- a/drivers/input/serio/i8042.c 17 | +++ b/drivers/input/serio/i8042.c 18 | @@ -621,7 +621,7 @@ static int i8042_enable_kbd_port(void) 19 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { 20 | i8042_ctr &= ~I8042_CTR_KBDINT; 21 | i8042_ctr |= I8042_CTR_KBDDIS; 22 | - pr_err("Failed to enable KBD port\n"); 23 | + pr_info("Failed to enable KBD port\n"); 24 | return -EIO; 25 | } 26 | 27 | @@ -640,7 +640,7 @@ static int i8042_enable_aux_port(void) 28 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { 29 | i8042_ctr &= ~I8042_CTR_AUXINT; 30 | i8042_ctr |= I8042_CTR_AUXDIS; 31 | - pr_err("Failed to enable AUX port\n"); 32 | + pr_info("Failed to enable AUX port\n"); 33 | return -EIO; 34 | } 35 | 36 | @@ -732,7 +732,7 @@ static int i8042_check_mux(void) 37 | i8042_ctr &= ~I8042_CTR_AUXINT; 38 | 39 | if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { 40 | - pr_err("Failed to disable AUX port, can't use MUX\n"); 41 | + pr_info("Failed to disable AUX port, can't use MUX\n"); 42 | return -EIO; 43 | } 44 | 45 | @@ -955,7 +955,7 @@ static int i8042_controller_selftest(void) 46 | do { 47 | 48 | if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { 49 | - pr_err("i8042 controller selftest timeout\n"); 50 | + pr_info("i8042 controller selftest timeout\n"); 51 | return -ENODEV; 52 | } 53 | 54 | @@ -977,7 +977,7 @@ static int i8042_controller_selftest(void) 55 | pr_info("giving up on controller selftest, continuing anyway...\n"); 56 | return 0; 57 | #else 58 | - pr_err("i8042 controller selftest failed\n"); 59 | + pr_info("i8042 controller selftest failed\n"); 60 | return -EIO; 61 | #endif 62 | } 63 | -- 64 | https://clearlinux.org 65 | 66 | -------------------------------------------------------------------------------- /0102-increase-the-ext4-default-commit-age.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Mon, 11 Jan 2016 10:01:44 -0600 4 | Subject: [PATCH] increase the ext4 default commit age 5 | 6 | Both the VM and EXT4 have a "commit to disk after X seconds" time. 7 | Currently the EXT4 time is shorter than our VM time, which is a bit 8 | suboptional, 9 | it's better for performance to let the VM do the writeouts in bulk 10 | rather than something deep in the journalling layer. 11 | 12 | (DISTRO TWEAK -- NOT FOR UPSTREAM) 13 | 14 | Signed-off-by: Arjan van de Ven 15 | Signed-off-by: Jose Carlos Venegas Munoz 16 | --- 17 | include/linux/jbd2.h | 2 +- 18 | 1 file changed, 1 insertion(+), 1 deletion(-) 19 | 20 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h 21 | index 9c3ada74ffb1..c4aef0bb2661 100644 22 | --- a/include/linux/jbd2.h 23 | +++ b/include/linux/jbd2.h 24 | @@ -45,7 +45,7 @@ 25 | /* 26 | * The default maximum commit age, in seconds. 27 | */ 28 | -#define JBD2_DEFAULT_MAX_COMMIT_AGE 5 29 | +#define JBD2_DEFAULT_MAX_COMMIT_AGE 30 30 | 31 | #ifdef CONFIG_JBD2_DEBUG 32 | /* 33 | -- 34 | https://clearlinux.org 35 | 36 | -------------------------------------------------------------------------------- /0103-silence-rapl.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Mon, 14 Mar 2016 11:22:09 -0600 4 | Subject: [PATCH] silence rapl 5 | 6 | --- 7 | drivers/powercap/intel_rapl_common.c | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c 11 | index 07611a00b78f..4031d810def5 100644 12 | --- a/drivers/powercap/intel_rapl_common.c 13 | +++ b/drivers/powercap/intel_rapl_common.c 14 | @@ -1512,7 +1512,7 @@ static int __init rapl_init(void) 15 | 16 | id = x86_match_cpu(rapl_ids); 17 | if (!id) { 18 | - pr_err("driver does not support CPU family %d model %d\n", 19 | + pr_info("driver does not support CPU family %d model %d\n", 20 | boot_cpu_data.x86, boot_cpu_data.x86_model); 21 | 22 | return -ENODEV; 23 | -- 24 | https://clearlinux.org 25 | 26 | -------------------------------------------------------------------------------- /0104-pci-pme-wakeups.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Mon, 14 Mar 2016 11:10:58 -0600 4 | Subject: [PATCH] pci pme wakeups 5 | 6 | Reduce wakeups for PME checks, which are a workaround for miswired 7 | boards (sadly, too many of them) in laptops. 8 | --- 9 | drivers/pci/pci.c | 2 +- 10 | 1 file changed, 1 insertion(+), 1 deletion(-) 11 | 12 | diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c 13 | index d25122fbe98a..dbfb6aaa4a07 100644 14 | --- a/drivers/pci/pci.c 15 | +++ b/drivers/pci/pci.c 16 | @@ -60,7 +60,7 @@ struct pci_pme_device { 17 | struct pci_dev *dev; 18 | }; 19 | 20 | -#define PME_TIMEOUT 1000 /* How long between PME checks */ 21 | +#define PME_TIMEOUT 4000 /* How long between PME checks */ 22 | 23 | static void pci_dev_d3_sleep(struct pci_dev *dev) 24 | { 25 | -- 26 | https://clearlinux.org 27 | 28 | -------------------------------------------------------------------------------- /0107-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Wed, 11 Feb 2015 16:05:23 -0600 4 | Subject: [PATCH] bootstats: add printk's to measure boot time in more detail 5 | 6 | Few distro-tweaks to add printk's to visualize boot time better 7 | 8 | Author: Arjan van de Ven 9 | 10 | Signed-off-by: Miguel Bernal Marin 11 | --- 12 | arch/x86/kernel/alternative.c | 2 ++ 13 | 1 file changed, 2 insertions(+) 14 | 15 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c 16 | index b4470eabf151..f9de9eb7b8e1 100644 17 | --- a/arch/x86/kernel/alternative.c 18 | +++ b/arch/x86/kernel/alternative.c 19 | @@ -829,7 +829,9 @@ void __init alternative_instructions(void) 20 | * Then patch alternatives, such that those paravirt calls that are in 21 | * alternatives can be overwritten by their immediate fragments. 22 | */ 23 | + printk("clr: Applying alternatives\n"); 24 | apply_alternatives(__alt_instructions, __alt_instructions_end); 25 | + printk("clr: Applying alternatives done\n"); 26 | 27 | #ifdef CONFIG_SMP 28 | /* Patch to UP if other cpus not imminent. */ 29 | -- 30 | https://clearlinux.org 31 | 32 | -------------------------------------------------------------------------------- /0108-smpboot-reuse-timer-calibration.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Wed, 11 Feb 2015 17:28:14 -0600 4 | Subject: [PATCH] smpboot: reuse timer calibration 5 | 6 | NO point recalibrating for known-constant tsc ... 7 | saves 200ms+ of boot time. 8 | --- 9 | arch/x86/kernel/tsc.c | 3 +++ 10 | 1 file changed, 3 insertions(+) 11 | 12 | diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c 13 | index a698196377be..5f3ee7c31c8a 100644 14 | --- a/arch/x86/kernel/tsc.c 15 | +++ b/arch/x86/kernel/tsc.c 16 | @@ -1569,6 +1569,9 @@ unsigned long calibrate_delay_is_known(void) 17 | if (!constant_tsc || !mask) 18 | return 0; 19 | 20 | + if (cpu != 0) 21 | + return cpu_data(0).loops_per_jiffy; 22 | + 23 | sibling = cpumask_any_but(mask, cpu); 24 | if (sibling < nr_cpu_ids) 25 | return cpu_data(sibling).loops_per_jiffy; 26 | -- 27 | https://clearlinux.org 28 | 29 | -------------------------------------------------------------------------------- /0109-initialize-ata-before-graphics.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Thu, 2 Jun 2016 23:36:32 -0500 4 | Subject: [PATCH] initialize ata before graphics 5 | 6 | ATA init is the long pole in the boot process, and its asynchronous. 7 | move the graphics init after it so that ata and graphics initialize 8 | in parallel 9 | --- 10 | drivers/Makefile | 15 ++++++++------- 11 | 1 file changed, 8 insertions(+), 7 deletions(-) 12 | 13 | diff --git a/drivers/Makefile b/drivers/Makefile 14 | index a110338c860c..f91099276a78 100644 15 | --- a/drivers/Makefile 16 | +++ b/drivers/Makefile 17 | @@ -59,15 +59,8 @@ obj-y += char/ 18 | # iommu/ comes before gpu as gpu are using iommu controllers 19 | obj-y += iommu/ 20 | 21 | -# gpu/ comes after char for AGP vs DRM startup and after iommu 22 | -obj-y += gpu/ 23 | - 24 | obj-$(CONFIG_CONNECTOR) += connector/ 25 | 26 | -# i810fb and intelfb depend on char/agp/ 27 | -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ 28 | -obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ 29 | - 30 | obj-$(CONFIG_PARPORT) += parport/ 31 | obj-y += base/ block/ misc/ mfd/ nfc/ 32 | obj-$(CONFIG_LIBNVDIMM) += nvdimm/ 33 | @@ -79,6 +72,14 @@ obj-y += macintosh/ 34 | obj-y += scsi/ 35 | obj-y += nvme/ 36 | obj-$(CONFIG_ATA) += ata/ 37 | + 38 | +# gpu/ comes after char for AGP vs DRM startup and after iommu 39 | +obj-y += gpu/ 40 | + 41 | +# i810fb and intelfb depend on char/agp/ 42 | +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ 43 | +obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ 44 | + 45 | obj-$(CONFIG_TARGET_CORE) += target/ 46 | obj-$(CONFIG_MTD) += mtd/ 47 | obj-$(CONFIG_SPI) += spi/ 48 | -- 49 | https://clearlinux.org 50 | 51 | -------------------------------------------------------------------------------- /0110-give-rdrand-some-credit.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Fri, 29 Jul 2016 19:10:52 +0000 4 | Subject: [PATCH] give rdrand some credit 5 | 6 | try to credit rdrand/rdseed with some entropy 7 | 8 | In VMs but even modern hardware, we're super starved for entropy, and while we can 9 | and do wear a tin foil hat, it's very hard to argue that 10 | rdrand and rdtsc add zero entropy. 11 | --- 12 | drivers/char/random.c | 2 ++ 13 | 1 file changed, 2 insertions(+) 14 | 15 | diff --git a/drivers/char/random.c b/drivers/char/random.c 16 | index 3404a91edf29..479994faedba 100644 17 | --- a/drivers/char/random.c 18 | +++ b/drivers/char/random.c 19 | @@ -1678,6 +1678,8 @@ static void __init init_std_data(void) 20 | if (!arch_get_random_seed_long(&rv) && 21 | !arch_get_random_long(&rv)) 22 | rv = random_get_entropy(); 23 | + else 24 | + credit_entropy_bits(1); 25 | mix_pool_bytes(&rv, sizeof(rv)); 26 | } 27 | mix_pool_bytes(utsname(), sizeof(*(utsname()))); 28 | -- 29 | https://clearlinux.org 30 | 31 | -------------------------------------------------------------------------------- /0111-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Fri, 6 Jan 2017 15:34:09 +0000 4 | Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little 5 | bigger than default 6 | 7 | --- 8 | net/ipv4/tcp.c | 4 ++-- 9 | 1 file changed, 2 insertions(+), 2 deletions(-) 10 | 11 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c 12 | index 28ff2a820f7c..c4f240da8d70 100644 13 | --- a/net/ipv4/tcp.c 14 | +++ b/net/ipv4/tcp.c 15 | @@ -4604,8 +4604,8 @@ void __init tcp_init(void) 16 | tcp_init_mem(); 17 | /* Set per-socket limits to no more than 1/128 the pressure threshold */ 18 | limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); 19 | - max_wshare = min(4UL*1024*1024, limit); 20 | - max_rshare = min(6UL*1024*1024, limit); 21 | + max_wshare = min(16UL*1024*1024, limit); 22 | + max_rshare = min(16UL*1024*1024, limit); 23 | 24 | init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; 25 | init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; 26 | -- 27 | https://clearlinux.org 28 | 29 | -------------------------------------------------------------------------------- /0112-init-wait-for-partition-and-retry-scan.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Wed, 17 May 2017 01:52:11 +0000 4 | Subject: [PATCH] init: wait for partition and retry scan 5 | 6 | As Clear Linux boots fast the device is not ready when 7 | the mounting code is reached, so a retry device scan will 8 | be performed every 0.5 sec for at least 40 sec 9 | and synchronize the async task. 10 | 11 | Signed-off-by: Miguel Bernal Marin 12 | --- 13 | init/do_mounts.c | 16 ++++++++++++++-- 14 | 1 file changed, 14 insertions(+), 2 deletions(-) 15 | 16 | diff --git a/init/do_mounts.c b/init/do_mounts.c 17 | index 762b534978d9..107b96927049 100644 18 | --- a/init/do_mounts.c 19 | +++ b/init/do_mounts.c 20 | @@ -613,7 +623,9 @@ void __init prepare_namespace(void) 21 | * For example, it is not atypical to wait 5 seconds here 22 | * for the touchpad of a laptop to initialize. 23 | */ 24 | + async_synchronize_full(); 25 | wait_for_device_probe(); 26 | + async_synchronize_full(); 27 | 28 | md_run_setup(); 29 | 30 | -- 31 | https://clearlinux.org 32 | 33 | --- linux-6.5.1/block/early-lookup.c~ 2023-09-02 07:13:30.000000000 +0000 34 | +++ linux-6.5.1/block/early-lookup.c 2023-09-18 14:16:34.721720093 +0000 35 | @@ -243,8 +243,18 @@ 36 | */ 37 | int __init early_lookup_bdev(const char *name, dev_t *devt) 38 | { 39 | - if (strncmp(name, "PARTUUID=", 9) == 0) 40 | - return devt_from_partuuid(name + 9, devt); 41 | + if (strncmp(name, "PARTUUID=", 9) == 0) { 42 | + int res; 43 | + int needtowait = 40<<1; 44 | + res = devt_from_partuuid(name + 9, devt); 45 | + if (!res) return res; 46 | + while (res && needtowait) { 47 | + msleep(500); 48 | + res = devt_from_partuuid(name + 9, devt); 49 | + needtowait--; 50 | + } 51 | + return res; 52 | + } 53 | if (strncmp(name, "PARTLABEL=", 10) == 0) 54 | return devt_from_partlabel(name + 10, devt); 55 | if (strncmp(name, "/dev/", 5) == 0) 56 | --- linux-6.5.1/block/early-lookup.c~ 2023-09-18 14:16:34.000000000 +0000 57 | +++ linux-6.5.1/block/early-lookup.c 2023-09-18 14:27:32.042046852 +0000 58 | @@ -5,6 +5,7 @@ 59 | */ 60 | #include 61 | #include 62 | +#include 63 | 64 | struct uuidcmp { 65 | const char *uuid; 66 | -------------------------------------------------------------------------------- /0113-print-fsync-count-for-bootchart.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Tue, 20 Jun 2017 20:19:08 +0000 4 | Subject: [PATCH] print fsync count for bootchart 5 | 6 | --- 7 | block/blk-core.c | 3 +++ 8 | include/linux/sched.h | 1 + 9 | kernel/sched/debug.c | 1 + 10 | 3 files changed, 5 insertions(+) 11 | 12 | diff --git a/block/blk-core.c b/block/blk-core.c 13 | index 779b4a1f66ac..f1a4da52b511 100644 14 | --- a/block/blk-core.c 15 | +++ b/block/blk-core.c 16 | @@ -945,6 +945,9 @@ void submit_bio(struct bio *bio) 17 | task_io_account_read(bio->bi_iter.bi_size); 18 | count_vm_events(PGPGIN, count); 19 | } 20 | + 21 | + if (bio->bi_opf & REQ_PREFLUSH) 22 | + current->fsync_count++; 23 | } 24 | 25 | /* 26 | diff --git a/include/linux/sched.h b/include/linux/sched.h 27 | index 4b4cc633b266..094875ea5388 100644 28 | --- a/include/linux/sched.h 29 | +++ b/include/linux/sched.h 30 | @@ -1046,6 +1046,7 @@ struct task_struct { 31 | /* Cached requested key. */ 32 | struct key *cached_requested_key; 33 | #endif 34 | + int fsync_count; 35 | 36 | /* 37 | * executable name, excluding path. 38 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c 39 | index 102d6f70e84d..cc06e81a9e61 100644 40 | --- a/kernel/sched/debug.c 41 | +++ b/kernel/sched/debug.c 42 | @@ -960,6 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, 43 | PN(se.exec_start); 44 | PN(se.vruntime); 45 | PN(se.sum_exec_runtime); 46 | + P(fsync_count); 47 | 48 | nr_switches = p->nvcsw + p->nivcsw; 49 | 50 | -- 51 | https://clearlinux.org 52 | 53 | -------------------------------------------------------------------------------- /0114-add-boot-option-to-allow-unsigned-modules.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: "Brett T. Warden" 3 | Date: Mon, 13 Aug 2018 04:01:21 -0500 4 | Subject: [PATCH] add boot option to allow unsigned modules 5 | 6 | Add module.sig_unenforce boot parameter to allow loading unsigned kernel 7 | modules. Parameter is only effective if CONFIG_MODULE_SIG_FORCE is 8 | enabled and system is *not* SecureBooted. 9 | 10 | Signed-off-by: Brett T. Warden 11 | Signed-off-by: Miguel Bernal Marin 12 | --- 13 | kernel/module.c | 20 ++++++++++++++++++++ 14 | 1 file changed, 20 insertions(+) 15 | 16 | --- linux-5.19.1/kernel/module/signing.c~ 2022-08-11 11:22:05.000000000 +0000 17 | +++ linux-5.19.1/kernel/module/signing.c 2022-08-11 15:20:18.199749857 +0000 18 | @@ -14,6 +14,8 @@ 19 | #include 20 | #include 21 | #include 22 | +#include 23 | + 24 | #include "internal.h" 25 | 26 | #undef MODULE_PARAM_PREFIX 27 | @@ -21,6 +23,11 @@ 28 | 29 | static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); 30 | module_param(sig_enforce, bool_enable_only, 0644); 31 | +/* Allow disabling module signature requirement by adding boot param */ 32 | +static bool sig_unenforce = false; 33 | +module_param(sig_unenforce, bool_enable_only, 0644); 34 | + 35 | +extern struct boot_params boot_params; 36 | 37 | /* 38 | * Export sig_enforce kernel cmdline parameter to allow other subsystems rely 39 | @@ -28,6 +35,8 @@ 40 | */ 41 | bool is_module_sig_enforced(void) 42 | { 43 | + if (sig_unenforce) 44 | + return false; 45 | return sig_enforce; 46 | } 47 | EXPORT_SYMBOL(is_module_sig_enforced); 48 | -------------------------------------------------------------------------------- /0115-enable-stateless-firmware-loading.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: William Douglas 3 | Date: Wed, 20 Jun 2018 17:23:21 +0000 4 | Subject: [PATCH] enable stateless firmware loading 5 | 6 | Prefer the order of specific version before generic and /etc before 7 | /lib to enable the user to give specific overrides for generic 8 | firmware and distribution firmware. 9 | --- 10 | drivers/base/firmware_loader/main.c | 2 ++ 11 | 1 file changed, 2 insertions(+) 12 | 13 | diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c 14 | index 406a907a4cae..89890e085171 100644 15 | --- a/drivers/base/firmware_loader/main.c 16 | +++ b/drivers/base/firmware_loader/main.c 17 | @@ -407,6 +407,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv, 18 | static char fw_path_para[256]; 19 | static const char * const fw_path[] = { 20 | fw_path_para, 21 | + "/etc/firmware/" UTS_RELEASE, 22 | + "/etc/firmware", 23 | "/lib/firmware/updates/" UTS_RELEASE, 24 | "/lib/firmware/updates", 25 | "/lib/firmware/" UTS_RELEASE, 26 | -- 27 | https://clearlinux.org 28 | 29 | -------------------------------------------------------------------------------- /0116-migrate-some-systemd-defaults-to-the-kernel-defaults.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Auke Kok 3 | Date: Thu, 2 Aug 2018 12:03:22 -0700 4 | Subject: [PATCH] migrate some systemd defaults to the kernel defaults. 5 | 6 | These settings are needed to prevent networking issues when 7 | the networking modules come up by default without explicit 8 | settings, which breaks some cases. 9 | 10 | We don't want the modprobe settings to be read at boot time 11 | if we're not going to do anything else ever. 12 | --- 13 | drivers/net/dummy.c | 2 +- 14 | include/uapi/linux/if_bonding.h | 2 +- 15 | 2 files changed, 2 insertions(+), 2 deletions(-) 16 | 17 | diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c 18 | index f82ad7419508..5e8faa70aad6 100644 19 | --- a/drivers/net/dummy.c 20 | +++ b/drivers/net/dummy.c 21 | @@ -43,7 +43,7 @@ 22 | 23 | #define DRV_NAME "dummy" 24 | 25 | -static int numdummies = 1; 26 | +static int numdummies = 0; 27 | 28 | /* fake multicast ability */ 29 | static void set_multicast_list(struct net_device *dev) 30 | diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h 31 | index d174914a837d..bf8e2af101a3 100644 32 | --- a/include/uapi/linux/if_bonding.h 33 | +++ b/include/uapi/linux/if_bonding.h 34 | @@ -82,7 +82,7 @@ 35 | #define BOND_STATE_ACTIVE 0 /* link is active */ 36 | #define BOND_STATE_BACKUP 1 /* link is backup */ 37 | 38 | -#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ 39 | +#define BOND_DEFAULT_MAX_BONDS 0 /* Default maximum number of devices to support */ 40 | 41 | #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ 42 | 43 | -- 44 | https://clearlinux.org 45 | 46 | -------------------------------------------------------------------------------- /0117-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Alan Cox 3 | Date: Thu, 10 Mar 2016 15:11:28 +0000 4 | Subject: [PATCH] xattr: allow setting user.* attributes on symlinks by owner 5 | 6 | Kvmtool and clear containers supports using user attributes to label host 7 | files with the virtual uid/guid of the file in the container. This allows an 8 | end user to manage their files and a complete uid space without all the ugly 9 | namespace stuff. 10 | 11 | The one gap in the support is symlinks because an end user can change the 12 | ownership of a symbolic link. We support attributes on these files as you 13 | can already (as root) set security attributes on them. 14 | 15 | The current rules seem slightly over-paranoid and as we have a use case this 16 | patch enables updating the attributes on a symbolic link IFF you are the 17 | owner of the synlink (as permissions are not usually meaningful on the link 18 | itself). 19 | 20 | Signed-off-by: Alan Cox 21 | --- 22 | fs/xattr.c | 15 ++++++++------- 23 | 1 file changed, 8 insertions(+), 7 deletions(-) 24 | 25 | diff --git a/fs/xattr.c b/fs/xattr.c 26 | index 998045165916..62b6fb4dedee 100644 27 | --- a/fs/xattr.c 28 | +++ b/fs/xattr.c 29 | @@ -120,16 +120,17 @@ xattr_permission(struct user_namespace *mnt_userns, struct inode *inode, 30 | } 31 | 32 | /* 33 | - * In the user.* namespace, only regular files and directories can have 34 | - * extended attributes. For sticky directories, only the owner and 35 | - * privileged users can write attributes. 36 | + * In the user.* namespace, only regular files, symbolic links, and 37 | + * directories can have extended attributes. For symbolic links and 38 | + * sticky directories, only the owner and privileged users can write 39 | + * attributes. 40 | */ 41 | if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { 42 | - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 43 | + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) 44 | return (mask & MAY_WRITE) ? -EPERM : -ENODATA; 45 | - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && 46 | - (mask & MAY_WRITE) && 47 | - !inode_owner_or_capable(idmap, inode)) 48 | + if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) 49 | + || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) 50 | + && !inode_owner_or_capable(idmap, inode)) 51 | return -EPERM; 52 | } 53 | 54 | -- 55 | https://clearlinux.org 56 | 57 | -------------------------------------------------------------------------------- /0118-add-scheduler-turbo3-patch.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Wed, 21 Nov 2018 21:21:44 +0000 4 | Subject: [PATCH] add scheduler turbo3 patch 5 | 6 | Small scheduler tweak to make the scheduler more turbo3 aware 7 | --- 8 | arch/x86/kernel/itmt.c | 14 ++++++++++++++ 9 | kernel/sched/fair.c | 19 +++++++++++++++++++ 10 | 2 files changed, 33 insertions(+) 11 | 12 | diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c 13 | index 9ff480e94511..57027bfed25f 100644 14 | --- a/arch/x86/kernel/itmt.c 15 | +++ b/arch/x86/kernel/itmt.c 16 | @@ -172,6 +172,11 @@ int arch_asym_cpu_priority(int cpu) 17 | return per_cpu(sched_core_priority, cpu); 18 | } 19 | 20 | +extern int best_core; 21 | +extern int second_best_core; 22 | +static int best_core_score; 23 | +static int second_best_core_score; 24 | + 25 | /** 26 | * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 27 | * @prio: Priority of cpu core 28 | @@ -201,5 +206,14 @@ void sched_set_itmt_core_prio(int prio, int core_cpu) 29 | smt_prio = prio * smp_num_siblings / (i * i); 30 | per_cpu(sched_core_priority, cpu) = smt_prio; 31 | i++; 32 | + 33 | + if (smt_prio > best_core_score) { 34 | + best_core = cpu; 35 | + best_core_score = smt_prio; 36 | + } else 37 | + if (smt_prio > second_best_core_score) { 38 | + second_best_core = cpu; 39 | + second_best_core_score = smt_prio; 40 | + } 41 | } 42 | } 43 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 44 | index 2f461f059278..632fcb22f4e2 100644 45 | --- a/kernel/sched/fair.c 46 | +++ b/kernel/sched/fair.c 47 | @@ -6854,6 +6854,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 48 | * 49 | * Returns the target CPU number. 50 | */ 51 | + 52 | +int best_core = -1; 53 | +int second_best_core = -1; 54 | + 55 | static int 56 | select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) 57 | { 58 | @@ -6882,6 +6886,21 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) 59 | want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); 60 | } 61 | 62 | + if (prev_cpu != best_core && prev_cpu != second_best_core && 63 | + cpu_rq(prev_cpu)->nr_running != 0) { 64 | + if (second_best_core != -1 && cpu_rq(second_best_core)->nr_running == 0 && 65 | + nr_iowait_cpu(second_best_core) < 2 && cpu_to_node(prev_cpu) == cpu_to_node(second_best_core)) 66 | + prev_cpu = second_best_core; 67 | + if (best_core != -1 && cpu_rq(best_core)->nr_running == 0 && 68 | + nr_iowait_cpu(best_core) < 2 && cpu_to_node(prev_cpu) == cpu_to_node(best_core)) 69 | + prev_cpu = best_core; 70 | + } 71 | +/* 72 | + if (prev_cpu > 0 && cpu_rq(prev_cpu)->nr_running != 0 && cpu_rq(prev_cpu - 1)->nr_running == 0) 73 | + prev_cpu = prev_cpu - 1; 74 | +*/ 75 | + 76 | + 77 | rcu_read_lock(); 78 | for_each_domain(cpu, tmp) { 79 | /* 80 | -- 81 | https://clearlinux.org 82 | 83 | -------------------------------------------------------------------------------- /0120-do-accept-in-LIFO-order-for-cache-efficiency.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Thu, 13 Dec 2018 01:00:49 +0000 4 | Subject: [PATCH] do accept() in LIFO order for cache efficiency 5 | 6 | --- 7 | include/linux/wait.h | 2 ++ 8 | kernel/sched/wait.c | 24 ++++++++++++++++++++++++ 9 | net/ipv4/inet_connection_sock.c | 2 +- 10 | 3 files changed, 27 insertions(+), 1 deletion(-) 11 | 12 | diff --git a/include/linux/wait.h b/include/linux/wait.h 13 | index 851e07da2583..85653fc33274 100644 14 | --- a/include/linux/wait.h 15 | +++ b/include/linux/wait.h 16 | @@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) 17 | 18 | extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 19 | extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 20 | +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 21 | extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 22 | extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 23 | 24 | @@ -1163,6 +1164,7 @@ do { \ 25 | */ 26 | void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); 27 | bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); 28 | +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); 29 | long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); 30 | void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); 31 | long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); 32 | diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c 33 | index eca38107b32f..0306fa23b4f8 100644 34 | --- a/kernel/sched/wait.c 35 | +++ b/kernel/sched/wait.c 36 | @@ -48,6 +48,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ 37 | } 38 | EXPORT_SYMBOL_GPL(add_wait_queue_priority); 39 | 40 | +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 41 | +{ 42 | + unsigned long flags; 43 | + 44 | + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; 45 | + spin_lock_irqsave(&wq_head->lock, flags); 46 | + __add_wait_queue(wq_head, wq_entry); 47 | + spin_unlock_irqrestore(&wq_head->lock, flags); 48 | +} 49 | +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); 50 | + 51 | void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 52 | { 53 | unsigned long flags; 54 | @@ -290,6 +301,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent 55 | } 56 | EXPORT_SYMBOL(prepare_to_wait_exclusive); 57 | 58 | +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) 59 | +{ 60 | + unsigned long flags; 61 | + 62 | + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; 63 | + spin_lock_irqsave(&wq_head->lock, flags); 64 | + if (list_empty(&wq_entry->entry)) 65 | + __add_wait_queue(wq_head, wq_entry); 66 | + set_current_state(state); 67 | + spin_unlock_irqrestore(&wq_head->lock, flags); 68 | +} 69 | +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); 70 | + 71 | void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) 72 | { 73 | wq_entry->flags = flags; 74 | diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c 75 | index fc2a985f6064..02dc861de3d5 100644 76 | --- a/net/ipv4/inet_connection_sock.c 77 | +++ b/net/ipv4/inet_connection_sock.c 78 | @@ -441,7 +441,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) 79 | * having to remove and re-insert us on the wait queue. 80 | */ 81 | for (;;) { 82 | - prepare_to_wait_exclusive(sk_sleep(sk), &wait, 83 | + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, 84 | TASK_INTERRUPTIBLE); 85 | release_sock(sk); 86 | if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 87 | -- 88 | https://clearlinux.org 89 | 90 | -------------------------------------------------------------------------------- /0121-locking-rwsem-spin-faster.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Sun, 18 Feb 2018 23:35:41 +0000 4 | Subject: [PATCH] locking: rwsem: spin faster 5 | 6 | tweak rwsem owner spinning a bit 7 | --- 8 | kernel/locking/rwsem.c | 4 +++- 9 | 1 file changed, 3 insertions(+), 1 deletion(-) 10 | 11 | diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c 12 | index 69aba4abe104..b4818ccad5de 100644 13 | --- a/kernel/locking/rwsem.c 14 | +++ b/kernel/locking/rwsem.c 15 | @@ -707,6 +707,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) 16 | struct task_struct *new, *owner; 17 | unsigned long flags, new_flags; 18 | enum owner_state state; 19 | + int i = 0; 20 | 21 | lockdep_assert_preemption_disabled(); 22 | 23 | @@ -743,7 +744,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) 24 | break; 25 | } 26 | 27 | - cpu_relax(); 28 | + if (i++ > 1000) 29 | + cpu_relax(); 30 | } 31 | 32 | return state; 33 | -- 34 | https://clearlinux.org 35 | 36 | -------------------------------------------------------------------------------- /0122-ata-libahci-ignore-staggered-spin-up.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Joe Konno 3 | Date: Tue, 25 Jun 2019 10:35:54 -0700 4 | Subject: [PATCH] ata: libahci: ignore staggered spin-up 5 | 6 | Change libahci to ignore firmware's staggered spin-up flag. End-users 7 | who wish to honor firmware's SSS flag can add the following kernel 8 | parameter to a new file at /etc/kernel/cmdline.d/ignore_sss.conf: 9 | libahci.ignore_sss=0 10 | 11 | And then run 12 | sudo clr-boot-manager update 13 | 14 | Signed-off-by: Joe Konno 15 | --- 16 | drivers/ata/libahci.c | 4 ++-- 17 | 1 file changed, 2 insertions(+), 2 deletions(-) 18 | 19 | diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c 20 | index 0ed484e04fd6..bbcb54da7a31 100644 21 | --- a/drivers/ata/libahci.c 22 | +++ b/drivers/ata/libahci.c 23 | @@ -33,14 +33,14 @@ 24 | #include "libata.h" 25 | 26 | static int ahci_skip_host_reset; 27 | -int ahci_ignore_sss; 28 | +int ahci_ignore_sss=1; 29 | EXPORT_SYMBOL_GPL(ahci_ignore_sss); 30 | 31 | module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444); 32 | MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)"); 33 | 34 | module_param_named(ignore_sss, ahci_ignore_sss, int, 0444); 35 | -MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)"); 36 | +MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])"); 37 | 38 | static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 39 | unsigned hints); 40 | -- 41 | https://clearlinux.org 42 | 43 | -------------------------------------------------------------------------------- /0123-print-CPU-that-faults.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Sat, 10 Aug 2019 03:19:04 +0000 4 | Subject: [PATCH] print CPU that faults 5 | 6 | print cpu number when we print a crash 7 | --- 8 | arch/x86/mm/fault.c | 4 ++-- 9 | 1 file changed, 2 insertions(+), 2 deletions(-) 10 | 11 | diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c 12 | index d0074c6ed31a..aeeae77fe5dd 100644 13 | --- a/arch/x86/mm/fault.c 14 | +++ b/arch/x86/mm/fault.c 15 | @@ -776,9 +776,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, 16 | if (!printk_ratelimit()) 17 | return; 18 | 19 | - printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", 20 | + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i", 21 | loglvl, tsk->comm, task_pid_nr(tsk), address, 22 | - (void *)regs->ip, (void *)regs->sp, error_code); 23 | + (void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id()); 24 | 25 | print_vma_addr(KERN_CONT " in ", regs->ip); 26 | 27 | -- 28 | https://clearlinux.org 29 | 30 | -------------------------------------------------------------------------------- /0124-x86-microcode-Add-an-option-to-reload-microcode-even.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Ashok Raj 3 | Date: Thu, 19 Aug 2021 14:49:47 -0700 4 | Subject: [PATCH] x86/microcode: Add an option to reload microcode even if 5 | revision is the same 6 | 7 | This is POC to support rollback. This is a simple version, admin uses 8 | echo 2 instead of echo 1 to reload. We don't do the version checks. 9 | 10 | #echo 1 > /sys/devices/system/cpu/microcode/reload 11 | 12 | The following usage, writing 2 to reload file is helpful to reload 13 | the microcode again even if the revision is less than what is loaded. 14 | 15 | #echo 2 > /sys/devices/system/cpu/microcode/reload 16 | 17 | Signed-off-by: Ashok Raj 18 | --- 19 | arch/x86/kernel/cpu/microcode/core.c | 40 ++++++++++++++++++++++++++- 20 | arch/x86/kernel/cpu/microcode/intel.c | 14 ++++++---- 21 | 2 files changed, 47 insertions(+), 7 deletions(-) 22 | 23 | diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c 24 | index 239ff5fcec6a..b096a43b2b9d 100644 25 | --- a/arch/x86/kernel/cpu/microcode/core.c 26 | +++ b/arch/x86/kernel/cpu/microcode/core.c 27 | @@ -44,6 +44,8 @@ 28 | 29 | static struct microcode_ops *microcode_ops; 30 | static bool dis_ucode_ldr = true; 31 | +bool ucode_rollback = false; 32 | +int enable_rollback = 0; 33 | 34 | bool initrd_gone; 35 | 36 | @@ -80,6 +82,26 @@ static u32 final_levels[] = { 37 | 0, /* T-101 terminator */ 38 | }; 39 | 40 | +static int __init ucode_setup(char *str) 41 | +{ 42 | + if (!str) 43 | + return -EINVAL; 44 | + 45 | + while (*str) { 46 | + if (!strncmp(str, "rollback", 8)) { 47 | + enable_rollback = 1; 48 | + pr_info("Microcode Rollback Enabled\n"); 49 | + } 50 | + str += strcspn(str, ","); 51 | + while (*str == ',') 52 | + str++; 53 | + } 54 | + return 0; 55 | +} 56 | + 57 | +__setup("ucode=", ucode_setup); 58 | + 59 | + 60 | /* 61 | * Check the current patch level on this CPU. 62 | * 63 | @@ -600,6 +622,7 @@ static ssize_t reload_store(struct device *dev, 64 | struct device_attribute *attr, 65 | const char *buf, size_t size) 66 | { 67 | + struct cpuinfo_x86 *c = &boot_cpu_data; 68 | enum ucode_state tmp_ret = UCODE_OK; 69 | int bsp = boot_cpu_data.cpu_index; 70 | unsigned long val; 71 | @@ -609,7 +632,7 @@ static ssize_t reload_store(struct device *dev, 72 | if (ret) 73 | return ret; 74 | 75 | - if (val != 1) 76 | + if (!val || val > 2) 77 | return size; 78 | 79 | cpus_read_lock(); 80 | @@ -617,6 +640,20 @@ static ssize_t reload_store(struct device *dev, 81 | ret = check_online_cpus(); 82 | if (ret) 83 | goto put; 84 | + /* 85 | + * Check if the vendor is Intel to permit reloading 86 | + * microcode even if the revision is unchanged. 87 | + * This is typically used during development of microcode 88 | + * and changing rev is a pain. 89 | + */ 90 | + if ((val == 2) && ((c->x86_vendor != X86_VENDOR_INTEL) || 91 | + !enable_rollback)) 92 | + return size; 93 | + else if (val == 2) { 94 | + mutex_lock(µcode_mutex); 95 | + ucode_rollback = true; 96 | + mutex_unlock(µcode_mutex); 97 | + } 98 | 99 | tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); 100 | if (tmp_ret != UCODE_NEW) 101 | @@ -627,6 +664,7 @@ static ssize_t reload_store(struct device *dev, 102 | mutex_unlock(µcode_mutex); 103 | 104 | put: 105 | + ucode_rollback = false; 106 | cpus_read_unlock(); 107 | 108 | if (ret == 0) 109 | diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c 110 | index d28a9f8f3fec..02b506f52a13 100644 111 | --- a/arch/x86/kernel/cpu/microcode/intel.c 112 | +++ b/arch/x86/kernel/cpu/microcode/intel.c 113 | @@ -44,6 +44,7 @@ static struct microcode_intel *intel_ucode_patch; 114 | 115 | /* last level cache size per core */ 116 | static int llc_size_per_core; 117 | +extern bool ucode_rollback; 118 | 119 | static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, 120 | unsigned int s2, unsigned int p2) 121 | @@ -94,7 +95,7 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev 122 | { 123 | struct microcode_header_intel *mc_hdr = mc; 124 | 125 | - if (mc_hdr->rev <= new_rev) 126 | + if (!ucode_rollback && mc_hdr->rev <= new_rev) 127 | return 0; 128 | 129 | return find_matching_signature(mc, csig, cpf); 130 | @@ -134,7 +135,7 @@ static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigne 131 | if (find_matching_signature(data, sig, pf)) { 132 | prev_found = true; 133 | 134 | - if (mc_hdr->rev <= mc_saved_hdr->rev) 135 | + if (!ucode_rollback && mc_hdr->rev <= mc_saved_hdr->rev) 136 | continue; 137 | 138 | p = memdup_patch(data, size); 139 | @@ -694,7 +695,7 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci) 140 | 141 | phdr = (struct microcode_header_intel *)iter->data; 142 | 143 | - if (phdr->rev <= uci->cpu_sig.rev) 144 | + if (!ucode_rollback && phdr->rev <= uci->cpu_sig.rev) 145 | continue; 146 | 147 | if (!find_matching_signature(phdr, 148 | @@ -779,10 +780,11 @@ static enum ucode_state apply_microcode_intel(int cpu) 149 | * already. 150 | */ 151 | rev = intel_get_microcode_revision(); 152 | - if (rev >= mc->hdr.rev) { 153 | + if (!ucode_rollback && rev >= mc->hdr.rev) { 154 | ret = UCODE_OK; 155 | goto out; 156 | - } 157 | + } else if (ucode_rollback) 158 | + ret = UCODE_OK; 159 | 160 | /* 161 | * Writeback and invalidate caches before updating microcode to avoid 162 | @@ -801,7 +803,7 @@ static enum ucode_state apply_microcode_intel(int cpu) 163 | return UCODE_ERROR; 164 | } 165 | 166 | - if (bsp && rev != prev_rev) { 167 | + if (bsp && ((rev != prev_rev) || ucode_rollback)) { 168 | pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", 169 | rev, 170 | mc->hdr.date & 0xffff, 171 | -- 172 | https://clearlinux.org 173 | 174 | -------------------------------------------------------------------------------- /0125-nvme-workaround.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Mon, 11 Nov 2019 23:12:11 +0000 4 | Subject: [PATCH] nvme workaround 5 | 6 | --- 7 | drivers/nvme/host/core.c | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c 11 | index 0abd772c57f0..9129a2179f25 100644 12 | --- a/drivers/nvme/host/core.c 13 | +++ b/drivers/nvme/host/core.c 14 | @@ -48,7 +48,7 @@ static u8 nvme_max_retries = 5; 15 | module_param_named(max_retries, nvme_max_retries, byte, 0644); 16 | MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); 17 | 18 | -static unsigned long default_ps_max_latency_us = 100000; 19 | +static unsigned long default_ps_max_latency_us = 200; 20 | module_param(default_ps_max_latency_us, ulong, 0644); 21 | MODULE_PARM_DESC(default_ps_max_latency_us, 22 | "max power saving latency for new devices; use PM QOS to change per device"); 23 | -- 24 | https://clearlinux.org 25 | 26 | -------------------------------------------------------------------------------- /0126-don-t-report-an-error-if-PowerClamp-run-on-other-CPU.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Alexander Koskovich 3 | Date: Wed, 12 Feb 2020 22:47:12 +0000 4 | Subject: [PATCH] don't report an error if PowerClamp run on other CPU 5 | 6 | --- 7 | drivers/thermal/intel/intel_powerclamp.c | 10 ++++++++++ 8 | 1 file changed, 10 insertions(+) 9 | 10 | diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c 11 | index 14256421d98c..8df2e604ceef 100644 12 | --- a/drivers/thermal/intel/intel_powerclamp.c 13 | +++ b/drivers/thermal/intel/intel_powerclamp.c 14 | @@ -647,6 +647,11 @@ static const struct thermal_cooling_device_ops powerclamp_cooling_ops = { 15 | .set_cur_state = powerclamp_set_cur_state, 16 | }; 17 | 18 | +static const struct x86_cpu_id amd_cpu[] = { 19 | + { X86_VENDOR_AMD }, 20 | + {}, 21 | +}; 22 | + 23 | static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { 24 | X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), 25 | {} 26 | @@ -656,6 +661,11 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); 27 | static int __init powerclamp_probe(void) 28 | { 29 | 30 | + if (x86_match_cpu(amd_cpu)){ 31 | + pr_info("Intel PowerClamp does not support AMD CPUs\n"); 32 | + return -ENODEV; 33 | + } 34 | + 35 | if (!x86_match_cpu(intel_powerclamp_ids)) { 36 | pr_err("CPU does not support MWAIT\n"); 37 | return -ENODEV; 38 | -- 39 | https://clearlinux.org 40 | 41 | -------------------------------------------------------------------------------- /0127-lib-raid6-add-patch.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Mon, 27 Sep 2021 17:43:01 +0000 4 | Subject: [PATCH] lib/raid6: add patch 5 | 6 | --- 7 | lib/raid6/algos.c | 4 +++- 8 | 1 file changed, 3 insertions(+), 1 deletion(-) 9 | 10 | diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c 11 | index 39b74221f4a7..ec3eab8cd6b1 100644 12 | --- a/lib/raid6/algos.c 13 | +++ b/lib/raid6/algos.c 14 | @@ -128,8 +128,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) 15 | 16 | for (best = NULL, algo = raid6_recov_algos; *algo; algo++) 17 | if (!best || (*algo)->priority > best->priority) 18 | - if (!(*algo)->valid || (*algo)->valid()) 19 | + if (!(*algo)->valid || (*algo)->valid()) { 20 | best = *algo; 21 | + break; 22 | + } 23 | 24 | if (best) { 25 | raid6_2data_recov = best->data2; 26 | -- 27 | https://clearlinux.org 28 | 29 | -------------------------------------------------------------------------------- /0128-itmt_epb-use-epb-to-scale-itmt.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Tue, 16 Nov 2021 17:39:25 +0000 4 | Subject: [PATCH] itmt_epb: use epb to scale itmt 5 | 6 | --- 7 | arch/x86/include/asm/topology.h | 1 + 8 | arch/x86/kernel/cpu/intel_epb.c | 4 ++++ 9 | arch/x86/kernel/itmt.c | 29 ++++++++++++++++++++++++++++- 10 | 3 files changed, 33 insertions(+), 1 deletion(-) 11 | 12 | diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h 13 | index 2f0b6be8eaab..c31f81e2ea05 100644 14 | --- a/arch/x86/include/asm/topology.h 15 | +++ b/arch/x86/include/asm/topology.h 16 | @@ -174,6 +174,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled; 17 | 18 | /* Interface to set priority of a cpu */ 19 | void sched_set_itmt_core_prio(int prio, int core_cpu); 20 | +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu); 21 | 22 | /* Interface to notify scheduler that system supports ITMT */ 23 | int sched_set_itmt_support(void); 24 | diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c 25 | index fbaf12e43f41..c8c2d6f1a8ac 100644 26 | --- a/arch/x86/kernel/cpu/intel_epb.c 27 | +++ b/arch/x86/kernel/cpu/intel_epb.c 28 | @@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev, 29 | if (ret < 0) 30 | return ret; 31 | 32 | + /* update the ITMT scheduler logic to use the power policy data */ 33 | + /* scale the val up by 2 so the range is 224 - 256 */ 34 | + sched_set_itmt_power_ratio(256 - val * 2, cpu); 35 | + 36 | return count; 37 | } 38 | 39 | diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c 40 | index 57027bfed25f..596fd7fb7847 100644 41 | --- a/arch/x86/kernel/itmt.c 42 | +++ b/arch/x86/kernel/itmt.c 43 | @@ -25,6 +25,7 @@ 44 | 45 | static DEFINE_MUTEX(itmt_update_mutex); 46 | DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 47 | +DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio); 48 | 49 | /* Boolean to track if system has ITMT capabilities */ 50 | static bool __read_mostly sched_itmt_capable; 51 | @@ -169,7 +170,12 @@ void sched_clear_itmt_support(void) 52 | 53 | int arch_asym_cpu_priority(int cpu) 54 | { 55 | - return per_cpu(sched_core_priority, cpu); 56 | + int power_ratio = per_cpu(sched_power_ratio, cpu); 57 | + 58 | + /* a power ratio of 0 (uninitialized) is assumed to be maximum */ 59 | + if (power_ratio == 0) 60 | + power_ratio = 256 - 2 * 6; 61 | + return per_cpu(sched_core_priority, cpu) * power_ratio / 256; 62 | } 63 | 64 | extern int best_core; 65 | @@ -217,3 +223,24 @@ void sched_set_itmt_core_prio(int prio, int core_cpu) 66 | } 67 | } 68 | } 69 | + 70 | +/** 71 | + * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT 72 | + * @power_ratio: The power scaling ratio [1..256] for the core 73 | + * @core_cpu: The cpu number associated with the core 74 | + * 75 | + * Set a scaling to the cpu performance based on long term power 76 | + * settings (like EPB). 77 | + * 78 | + * Note this is for the policy not for the actual dynamic frequency; 79 | + * the frequency will increase itself as workloads run on a core. 80 | + */ 81 | + 82 | +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu) 83 | +{ 84 | + int cpu; 85 | + 86 | + for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 87 | + per_cpu(sched_power_ratio, cpu) = power_ratio; 88 | + } 89 | +} 90 | -- 91 | https://clearlinux.org 92 | 93 | -------------------------------------------------------------------------------- /0129-mm-wakeups-remove-a-wakeup.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Tue, 16 Nov 2021 22:20:49 +0000 4 | Subject: [PATCH] mm: wakeups: remove a wakeup 5 | 6 | --- 7 | mm/compaction.c | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/mm/compaction.c b/mm/compaction.c 11 | index b4e94cda3019..e9a36942c1fa 100644 12 | --- a/mm/compaction.c 13 | +++ b/mm/compaction.c 14 | @@ -53,7 +53,7 @@ static inline void count_compact_events(enum vm_event_item item, long delta) 15 | /* 16 | * Fragmentation score check interval for proactive compaction purposes. 17 | */ 18 | -static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; 19 | +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 5000; 20 | 21 | /* 22 | * Page order with-respect-to which proactive compaction 23 | -- 24 | https://clearlinux.org 25 | 26 | -------------------------------------------------------------------------------- /0130-itmt2-ADL-fixes.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Srinivas Pandruvada 3 | Date: Thu, 18 Nov 2021 16:09:47 +0000 4 | Subject: [PATCH] itmt2 ADL fixes 5 | 6 | On systems with overclocking enabled, CPPC Highest Performance can be 7 | hard coded to 0xff. In this case even if we have cores with different 8 | highest performance, ITMT can't be enabled as the current implementation 9 | depends on CPPC Highest Performance. 10 | 11 | On such systems we can use MSR_HWP_CAPABILITIES maximum performance field 12 | when CPPC.Highest Performance is 0xff. 13 | 14 | Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as 15 | in some older systems CPPC Highest Performance is the only way to identify 16 | different performing cores. 17 | 18 | Signed-off-by: Srinivas Pandruvada 19 | --- 20 | drivers/cpufreq/intel_pstate.c | 7 +++++++ 21 | 1 file changed, 7 insertions(+) 22 | 23 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c 24 | index bc7f7e6759bd..ee33ad7f6f28 100644 25 | --- a/drivers/cpufreq/intel_pstate.c 26 | +++ b/drivers/cpufreq/intel_pstate.c 27 | @@ -364,6 +364,13 @@ static void intel_pstate_set_itmt_prio(int cpu) 28 | * update them at any time after it has been called. 29 | */ 30 | sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); 31 | + /* 32 | + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. 33 | + * In this case we can't use CPPC.highest_perf to enable ITMT. 34 | + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. 35 | + */ 36 | + if (cppc_perf.highest_perf == 0xff) 37 | + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); 38 | 39 | if (max_highest_perf <= min_highest_perf) { 40 | if (cppc_perf.highest_perf > max_highest_perf) 41 | -- 42 | https://clearlinux.org 43 | 44 | -------------------------------------------------------------------------------- /0131-add-a-per-cpu-minimum-high-watermark-an-tune-batch-s.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Tue, 23 Nov 2021 17:38:50 +0000 4 | Subject: [PATCH] add a per cpu minimum high watermark an tune batch size 5 | 6 | make sure there's at least 1024 per cpu pages... a reasonably small 7 | amount for todays system 8 | --- 9 | mm/page_alloc.c | 5 +++-- 10 | 1 file changed, 3 insertions(+), 2 deletions(-) 11 | 12 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c 13 | index e6f211dcf82e..0ea48434ac7d 100644 14 | --- a/mm/page_alloc.c 15 | +++ b/mm/page_alloc.c 16 | @@ -6836,11 +6836,11 @@ static int zone_batchsize(struct zone *zone) 17 | 18 | /* 19 | * The number of pages to batch allocate is either ~0.1% 20 | - * of the zone or 1MB, whichever is smaller. The batch 21 | + * of the zone or 4MB, whichever is smaller. The batch 22 | * size is striking a balance between allocation latency 23 | * and zone lock contention. 24 | */ 25 | - batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE); 26 | + batch = min(zone_managed_pages(zone) >> 10, 4 * SZ_1M / PAGE_SIZE); 27 | batch /= 4; /* We effectively *= 4 below */ 28 | if (batch < 1) 29 | batch = 1; 30 | -- 31 | https://clearlinux.org 32 | 33 | -------------------------------------------------------------------------------- /0133-novector.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Fri, 15 Apr 2022 00:07:38 +0000 4 | Subject: [PATCH] novector 5 | 6 | gcc12/build workarounds 7 | 8 | --- 9 | arch/x86/Makefile | 2 +- 10 | 1 file changed, 1 insertion(+), 1 deletion(-) 11 | 12 | --- linux-6.9.1/arch/x86/Makefile~ 2024-05-17 10:18:09.000000000 +0000 13 | +++ linux-6.9.1/arch/x86/Makefile 2024-05-17 18:54:31.463961414 +0000 14 | @@ -70,7 +70,7 @@ 15 | # 16 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 17 | # 18 | -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx 19 | +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt -fivopts -fmodulo-sched 20 | KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json 21 | KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 22 | 23 | -------------------------------------------------------------------------------- /0134-md-raid6-algorithms-scale-test-duration-for-speedier.patch: -------------------------------------------------------------------------------- 1 | From 1848e77c8d0356181344a7481f31eea42bf97f9e Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Fri, 28 Apr 2023 17:01:35 +0100 4 | Subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots 5 | 6 | Instead of using jiffies and waiting for jiffies to wrap before 7 | measuring use the higher precision local_time for benchmarking. 8 | Measure 2500 loops, which works out to be accurate enough for 9 | benchmarking the raid algo data rates. Also add division by zero 10 | checking in case timing measurements are bogus. 11 | 12 | Speeds up raid benchmarking from 48,000 usecs to 4000 usecs, saving 13 | 0.044 seconds on boot. 14 | 15 | Signed-off-by: Colin Ian King 16 | --- 17 | lib/raid6/algos.c | 53 ++++++++++++++++++++--------------------------- 18 | 1 file changed, 22 insertions(+), 31 deletions(-) 19 | 20 | diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c 21 | index a22a05c9af8a..e2ba261394f6 100644 22 | --- a/lib/raid6/algos.c 23 | +++ b/lib/raid6/algos.c 24 | @@ -18,6 +18,8 @@ 25 | #else 26 | #include 27 | #include 28 | +#include 29 | + 30 | /* In .bss so it's zeroed */ 31 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 32 | EXPORT_SYMBOL(raid6_empty_zero_page); 33 | @@ -143,12 +145,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) 34 | static inline const struct raid6_calls *raid6_choose_gen( 35 | void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) 36 | { 37 | - unsigned long perf, bestgenperf, j0, j1; 38 | + unsigned long perf; 39 | + const unsigned long max_perf = 2500; 40 | int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ 41 | const struct raid6_calls *const *algo; 42 | const struct raid6_calls *best; 43 | + const u64 ns_per_mb = 1000000000 >> 20; 44 | + u64 n, ns, t, ns_best = ~0ULL; 45 | 46 | - for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { 47 | + for (best = NULL, algo = raid6_algos; *algo; algo++) { 48 | if (!best || (*algo)->priority >= best->priority) { 49 | if ((*algo)->valid && !(*algo)->valid()) 50 | continue; 51 | @@ -158,26 +163,20 @@ static inline const struct raid6_calls *raid6_choose_gen( 52 | break; 53 | } 54 | 55 | - perf = 0; 56 | - 57 | preempt_disable(); 58 | - j0 = jiffies; 59 | - while ((j1 = jiffies) == j0) 60 | - cpu_relax(); 61 | - while (time_before(jiffies, 62 | - j1 + (1<gen_syndrome(disks, PAGE_SIZE, *dptrs); 66 | - perf++; 67 | } 68 | + ns = local_clock() - t; 69 | preempt_enable(); 70 | 71 | - if (perf > bestgenperf) { 72 | - bestgenperf = perf; 73 | + if (ns < ns_best) { 74 | + ns_best = ns; 75 | best = *algo; 76 | } 77 | - pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, 78 | - (perf * HZ * (disks-2)) >> 79 | - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); 80 | + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); 81 | + pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns); 82 | } 83 | } 84 | 85 | @@ -194,31 +193,23 @@ static inline const struct raid6_calls *raid6_choose_gen( 86 | goto out; 87 | } 88 | 89 | - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", 90 | - best->name, 91 | - (bestgenperf * HZ * (disks - 2)) >> 92 | - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); 93 | + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); 94 | + pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n", 95 | + best->name, (ns_best > 0) ? n / ns_best : 0, ns_best); 96 | 97 | if (best->xor_syndrome) { 98 | - perf = 0; 99 | - 100 | preempt_disable(); 101 | - j0 = jiffies; 102 | - while ((j1 = jiffies) == j0) 103 | - cpu_relax(); 104 | - while (time_before(jiffies, 105 | - j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { 106 | + t = local_clock(); 107 | + for (perf = 0; perf < max_perf; perf++) { 108 | best->xor_syndrome(disks, start, stop, 109 | PAGE_SIZE, *dptrs); 110 | - perf++; 111 | } 112 | + ns = local_clock() - t; 113 | preempt_enable(); 114 | 115 | - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", 116 | - (perf * HZ * (disks - 2)) >> 117 | - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); 118 | + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); 119 | + pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns); 120 | } 121 | - 122 | out: 123 | return best; 124 | } 125 | -- 126 | 2.42.0 127 | 128 | -------------------------------------------------------------------------------- /0135-initcall-only-print-non-zero-initcall-debug-to-speed.patch: -------------------------------------------------------------------------------- 1 | From 496f1bc8c53f359a2fe07204d3c5ffdba963994e Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Fri, 20 Jan 2023 11:16:42 +0000 4 | Subject: [PATCH] initcall: only print non-zero initcall debug to speed up boot 5 | 6 | Printing initcall timings that successfully return after 0 usecs 7 | provides not much useful information and takes a small amount of time 8 | to do so. Disable the initcall timings for these specific cases. On 9 | an Alderlake i9-12900 this reduces kernel boot time by 0.67% (timed 10 | up to the invocation of systemd starting) based on 10 boot measurements. 11 | 12 | Signed-off-by: Colin Ian King 13 | --- 14 | init/main.c | 7 +++++-- 15 | 1 file changed, 5 insertions(+), 2 deletions(-) 16 | 17 | diff --git a/init/main.c b/init/main.c 18 | index aa21add5f7c5..715d57f58895 100644 19 | --- a/init/main.c 20 | +++ b/init/main.c 21 | @@ -1254,10 +1254,13 @@ static __init_or_module void 22 | trace_initcall_finish_cb(void *data, initcall_t fn, int ret) 23 | { 24 | ktime_t rettime, *calltime = data; 25 | + long long delta; 26 | 27 | rettime = ktime_get(); 28 | - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", 29 | - fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); 30 | + delta = ktime_us_delta(rettime, *calltime); 31 | + if (ret || delta) 32 | + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", 33 | + fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); 34 | } 35 | 36 | static ktime_t initcall_calltime; 37 | -- 38 | 2.39.1 39 | 40 | -------------------------------------------------------------------------------- /0136-crypto-kdf-make-the-module-init-call-a-late-init-cal.patch: -------------------------------------------------------------------------------- 1 | From 0362ef59fd2c23816de8330e5ebeb2f66a4808c9 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Mon, 6 Mar 2023 12:25:29 +0000 4 | Subject: [PATCH] crypto: kdf: make the module init call a late init call 5 | 6 | Signed-off-by: Colin Ian King 7 | --- 8 | crypto/kdf_sp800108.c | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c 12 | index c3f9938e1ad2..e77478e064d8 100644 13 | --- a/crypto/kdf_sp800108.c 14 | +++ b/crypto/kdf_sp800108.c 15 | @@ -149,7 +149,7 @@ static int __init crypto_kdf108_init(void) 16 | 17 | static void __exit crypto_kdf108_exit(void) { } 18 | 19 | -module_init(crypto_kdf108_init); 20 | +late_initcall(crypto_kdf108_init); 21 | module_exit(crypto_kdf108_exit); 22 | 23 | MODULE_LICENSE("GPL v2"); 24 | -- 25 | 2.39.2 26 | 27 | -------------------------------------------------------------------------------- /0149-select-do_pollfd-add-unlikely-branch-hint-return-pat.patch: -------------------------------------------------------------------------------- 1 | From 5730609ffd7e558e1e3305d0c6839044e8f6591b Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 9 Apr 2025 16:55:10 +0100 4 | Subject: [PATCH] select: do_pollfd: add unlikely branch hint return path 5 | Content-Type: text/plain; charset="utf-8" 6 | Content-Transfer-Encoding: 8bit 7 | 8 | Adding an unlikely() hint on the fd < 0 comparison return path improves 9 | run-time performance of the poll() system call. gcov based coverage 10 | analysis based on running stress-ng and a kernel build shows that this 11 | path return path is highly unlikely. 12 | 13 | Benchmarking on an Debian based Intel(R) Core(TM) Ultra 9 285K with 14 | a 6.15-rc1 kernel and a poll of 1024 file descriptors with zero timeout 15 | shows an call reduction from 32818 ns down to 32635 ns, which is a ~0.5% 16 | performance improvement. 17 | 18 | Results based on running 25 tests with turbo disabled (to reduce clock 19 | freq turbo changes), with 30 second run per test and comparing the number 20 | of poll() calls per second. The % standard deviation of the 25 tests 21 | was 0.08%, so results are reliable. 22 | 23 | Signed-off-by: Colin Ian King 24 | Link: https://lore.kernel.org/20250409155510.577490-1-colin.i.king@gmail.com 25 | Signed-off-by: Christian Brauner 26 | --- 27 | fs/select.c | 2 +- 28 | 1 file changed, 1 insertion(+), 1 deletion(-) 29 | 30 | diff --git a/fs/select.c b/fs/select.c 31 | index 7da531b1cf6b..0eaf3522abe9 100644 32 | --- a/fs/select.c 33 | +++ b/fs/select.c 34 | @@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, 35 | int fd = pollfd->fd; 36 | __poll_t mask, filter; 37 | 38 | - if (fd < 0) 39 | + if (unlikely(fd < 0)) 40 | return 0; 41 | 42 | CLASS(fd, f)(fd); 43 | -- 44 | 2.49.0 45 | 46 | -------------------------------------------------------------------------------- /0150-select-core_sys_select-add-unlikely-branch-hint-on-r.patch: -------------------------------------------------------------------------------- 1 | From 20a4684d124787c865c06c2bd36d6f938fa5e563 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Mon, 14 Apr 2025 09:45:43 +0100 4 | Subject: [PATCH] select: core_sys_select add unlikely branch hint on return 5 | path 6 | Content-Type: text/plain; charset="utf-8" 7 | Content-Transfer-Encoding: 8bit 8 | 9 | Adding an unlikely() hint on the n < 0 comparison return path improves 10 | run-time performance of the select() system call, the negative 11 | value of n is very uncommon in normal select usage. 12 | 13 | Benchmarking on an Debian based Intel(R) Core(TM) Ultra 9 285K with 14 | a 6.15-rc1 kernel built with 14.2.0 using a select of 1000 file 15 | descriptors with zero timeout shows a consistent call reduction from 16 | 258 ns down to 254 ns, which is a ~1.5% performance improvement. 17 | 18 | Results based on running 25 tests with turbo disabled (to reduce clock 19 | freq turbo changes), with 30 second run per test and comparing the number 20 | of select() calls per second. The % standard deviation of the 25 tests 21 | was 0.24%, so results are reliable. 22 | 23 | Signed-off-by: Colin Ian King 24 | --- 25 | fs/select.c | 2 +- 26 | 1 file changed, 1 insertion(+), 1 deletion(-) 27 | 28 | diff --git a/fs/select.c b/fs/select.c 29 | index 0eaf3522abe9..9fb650d03d52 100644 30 | --- a/fs/select.c 31 | +++ b/fs/select.c 32 | @@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 33 | long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 34 | 35 | ret = -EINVAL; 36 | - if (n < 0) 37 | + if (unlikely(n < 0)) 38 | goto out_nofds; 39 | 40 | /* max_fds can increase, so grab it once to avoid race */ 41 | -- 42 | 2.49.0 43 | 44 | -------------------------------------------------------------------------------- /0158-clocksource-only-perform-extended-clocksource-checks.patch: -------------------------------------------------------------------------------- 1 | From a9b2afb45dbf18398c22d9504402dc1258859bec Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 3 May 2023 17:31:05 +0100 4 | Subject: [PATCH] clocksource: only perform extended clocksource checks for AMD 5 | systems 6 | 7 | Signed-off-by: Colin Ian King 8 | --- 9 | drivers/clocksource/acpi_pm.c | 7 +++++-- 10 | 1 file changed, 5 insertions(+), 2 deletions(-) 11 | 12 | diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c 13 | index 82338773602c..d84f0e29452e 100644 14 | --- a/drivers/clocksource/acpi_pm.c 15 | +++ b/drivers/clocksource/acpi_pm.c 16 | @@ -176,13 +176,16 @@ static int verify_pmtmr_rate(void) 17 | static int __init init_acpi_pm_clocksource(void) 18 | { 19 | u64 value1, value2; 20 | - unsigned int i, j = 0; 21 | + unsigned int i, j = 0, checks = 1; 22 | 23 | if (!pmtmr_ioport) 24 | return -ENODEV; 25 | 26 | + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 27 | + checks = ACPI_PM_MONOTONICITY_CHECKS; 28 | + 29 | /* "verify" this timing source: */ 30 | - for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) { 31 | + for (j = 0; j < checks; j++) { 32 | udelay(100 * j); 33 | value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm); 34 | for (i = 0; i < ACPI_PM_READ_CHECKS; i++) { 35 | -- 36 | 2.40.1 37 | 38 | -------------------------------------------------------------------------------- /0161-ACPI-align-slab-buffers-for-improved-memory-performa.patch: -------------------------------------------------------------------------------- 1 | From 9ed82ddb051444a60afcd85fde2c22c8e72ba943 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Tue, 27 Jun 2023 14:12:27 +0100 4 | Subject: [PATCH] ACPI: align slab for improved memory performance 5 | 6 | Enabling SLAB_HWCACHE_ALIGN for the ACPI object caches improves 7 | boot speed in the ACPICA core for object allocation and free'ing 8 | especially in the AML parsing and execution phases in boot. Testing 9 | with 100 boots shows an average boot saving in acpi_init of ~35000 10 | usecs compared to the unaligned version. Most of the ACPI objects 11 | being allocated and free'd are of very short life times in the 12 | critical paths for parsing and execution, so the extra memory used 13 | for alignment isn't too onerous. 14 | 15 | Signed-off-by: Colin Ian King 16 | --- 17 | drivers/acpi/osl.c | 2 +- 18 | 1 file changed, 1 insertion(+), 1 deletion(-) 19 | 20 | diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c 21 | index 3269a888fb7a..72b2a750c258 100644 22 | --- a/drivers/acpi/osl.c 23 | +++ b/drivers/acpi/osl.c 24 | @@ -1556,7 +1556,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags) 25 | acpi_status 26 | acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t ** cache) 27 | { 28 | - *cache = kmem_cache_create(name, size, 0, 0, NULL); 29 | + *cache = kmem_cache_create(name, size, 0, SLAB_HWCACHE_ALIGN, NULL); 30 | if (*cache == NULL) 31 | return AE_ERROR; 32 | else 33 | -- 34 | 2.41.0 35 | 36 | -------------------------------------------------------------------------------- /0162-extra-optmization-flags.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Thu, 3 Aug 2023 16:52:11 +0100 4 | Subject: [PATCH] extra optmization flags 5 | 6 | Enable 2 extra optimimization flags: 7 | -fivopts 8 | Perform induction variable optimizations (strength reduction, 9 | induction variable merging and induction variable elimination) 10 | on trees. 11 | -fmodulo-sched 12 | Perform swing modulo scheduling immediately before the first 13 | scheduling pass. This pass looks at innermost loops and reorders 14 | their instructions by overlapping different iterations. 15 | 16 | stress-ng microbenchmark improvements (average from 10 x 1min runs) on 17 | Alderlake with gcc 13.2.0: 18 | af-alg: 1.0% (kernel AF_ALG crypto) 19 | pipe: 1.5% (pipe + context switch) 20 | fork: 4.4% (process fork/exit) 21 | mmap: 3.0% (memory mapping) 22 | switch: 4.9% (context switching) 23 | 24 | KVM QEMU bootspeed improvements (based on average of 100 boots): 0.5% 25 | 26 | --- 27 | arch/x86/Makefile | 2 +- 28 | 1 file changed, 1 insertion(+), 1 deletion(-) 29 | 30 | diff --git a/arch/x86/Makefile b/arch/x86/Makefile 31 | index 415a5d138de4..6b91d1306a7c 100644 32 | --- a/arch/x86/Makefile 33 | +++ b/arch/x86/Makefile 34 | @@ -67,7 +67,7 @@ export BITS 35 | # 36 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 37 | # 38 | -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt 39 | +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt -fivopts -fmodulo-sched 40 | KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 41 | 42 | ifeq ($(CONFIG_X86_KERNEL_IBT),y) 43 | -- 44 | https://clearlinux.org 45 | 46 | -------------------------------------------------------------------------------- /0163-thermal-intel-powerclamp-check-MWAIT-first-use-pr_wa.patch: -------------------------------------------------------------------------------- 1 | From b323e51b3c5b536c6947541b02a9b5cdc7422343 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Tue, 19 Sep 2023 14:16:21 +0100 4 | Subject: [PATCH] thermal: intel: powerclamp: check MWAIT first, use pr_warn 5 | insteal of pr_err 6 | 7 | For x86 targets it's more pertinant to check for lack of MWAIT than AMD 8 | specific cpus, so swap the order of tests. Also make the pr_err a 9 | pr_warn to align with other ENODEV warning messages. 10 | 11 | Signed-off-by: Colin Ian King 12 | --- 13 | drivers/thermal/intel/intel_powerclamp.c | 9 ++++----- 14 | 1 file changed, 4 insertions(+), 5 deletions(-) 15 | 16 | diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c 17 | index 4419ad0a3d4a..c787c1d2390f 100644 18 | --- a/drivers/thermal/intel/intel_powerclamp.c 19 | +++ b/drivers/thermal/intel/intel_powerclamp.c 20 | @@ -752,14 +752,13 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); 21 | 22 | static int __init powerclamp_probe(void) 23 | { 24 | - 25 | - if (x86_match_cpu(amd_cpu)){ 26 | - pr_info("Intel PowerClamp does not support AMD CPUs\n"); 27 | + if (!x86_match_cpu(intel_powerclamp_ids)) { 28 | + pr_info("CPU does not support MWAIT\n"); 29 | return -ENODEV; 30 | } 31 | 32 | - if (!x86_match_cpu(intel_powerclamp_ids)) { 33 | - pr_err("CPU does not support MWAIT\n"); 34 | + if (x86_match_cpu(amd_cpu)){ 35 | + pr_info("Intel PowerClamp does not support AMD CPUs\n"); 36 | return -ENODEV; 37 | } 38 | 39 | -- 40 | 2.42.0 41 | 42 | -------------------------------------------------------------------------------- /0164-KVM-VMX-make-vmx-init-a-late-init-call-to-get-to-ini.patch: -------------------------------------------------------------------------------- 1 | From 4e6585f34be8b87fe5258233aaa8c002ab561897 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Tue, 10 Oct 2023 12:41:00 +0100 4 | Subject: [PATCH] KVM: VMX: make vmx_init a late init call to get to init process faster 5 | 6 | Making vmx_init a late initcall improves QEMU kernel boot times to 7 | get to the init process. Average of 100 boots, QEMU boot average 8 | reduced from 0.776 seconds to 0.622 seconds (~19.8% faster) on 9 | Alderlake i9-12900 and ~0.5% faster for non-QEMU UEFI boots. 10 | 11 | Signed-off-by: Colin Ian King 12 | --- 13 | arch/x86/kvm/vmx/vmx.c | 2 +- 14 | 1 file changed, 1 insertion(+), 1 deletion(-) 15 | 16 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c 17 | index bc6f0fea48b4..e671fbe70d5a 100644 18 | --- a/arch/x86/kvm/vmx/vmx.c 19 | +++ b/arch/x86/kvm/vmx/vmx.c 20 | @@ -8702,4 +8702,4 @@ static int __init vmx_init(void) 21 | kvm_x86_vendor_exit(); 22 | return r; 23 | } 24 | -module_init(vmx_init); 25 | +late_initcall(vmx_init); 26 | -- 27 | 2.42.0 28 | 29 | -------------------------------------------------------------------------------- /0166-sched-fair-remove-upper-limit-on-cpu-number.patch: -------------------------------------------------------------------------------- 1 | From 362f86369d1930ad177acaa47225d24d26b02c8d Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Tue, 14 Nov 2023 13:29:45 +0000 4 | Subject: [PATCH] sched/fair: remove upper limit on cpu number 5 | 6 | Signed-off-by: Colin Ian King 7 | --- 8 | kernel/sched/fair.c | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 12 | index 2048138ce54b..903ead0afacb 100644 13 | --- a/kernel/sched/fair.c 14 | +++ b/kernel/sched/fair.c 15 | @@ -197,7 +197,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) 16 | */ 17 | static unsigned int get_update_sysctl_factor(void) 18 | { 19 | - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); 20 | + unsigned int cpus = num_online_cpus(); 21 | unsigned int factor; 22 | 23 | switch (sysctl_sched_tunable_scaling) { 24 | -- 25 | 2.42.1 26 | 27 | -------------------------------------------------------------------------------- /0167-net-sock-increase-default-number-of-_SK_MEM_PACKETS-.patch: -------------------------------------------------------------------------------- 1 | From 4ba5a01513a6b3487613e7186cac4f3f2f4c5091 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 24 Apr 2024 16:45:47 +0100 4 | Subject: [PATCH] net: sock: increase default number of _SK_MEM_PACKETS to 1024 5 | 6 | scale these by a factor of 4 to improve socket performance 7 | 8 | Signed-off-by: Colin Ian King 9 | --- 10 | include/net/sock.h | 2 +- 11 | 1 file changed, 1 insertion(+), 1 deletion(-) 12 | 13 | diff --git a/include/net/sock.h b/include/net/sock.h 14 | index 54ca8dcbfb43..9adc51e8085b 100644 15 | --- a/include/net/sock.h 16 | +++ b/include/net/sock.h 17 | @@ -2903,7 +2903,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo); 18 | * platforms. This makes socket queueing behavior and performance 19 | * not depend upon such differences. 20 | */ 21 | -#define _SK_MEM_PACKETS 256 22 | +#define _SK_MEM_PACKETS 1024 23 | #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 24 | #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 25 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 26 | -- 27 | 2.44.0 28 | 29 | -------------------------------------------------------------------------------- /0169-mm-mincore-improve-performance-by-adding-an-unlikely.patch: -------------------------------------------------------------------------------- 1 | From d2d33210a595ce1b1ec9ca94edc8bd40af0f66c5 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 19 Feb 2025 08:36:07 +0000 4 | Subject: [PATCH] mm/mincore: improve performance by adding an unlikely hint 5 | 6 | Adding an unlikely() hint on the masked start comparison error return path 7 | improves run-time performance of the mincore system call. 8 | 9 | Benchmarking on an i9-12900 shows an improvement of 7ns on mincore calls 10 | on a 256KB mmap'd region where 50% of the pages we resident. Improvement 11 | was from ~970 ns down to 963 ns, so a small ~0.7% improvement. 12 | 13 | Results based on running 20 tests with turbo disabled (to reduce clock 14 | freq turbo changes), with 10 second run per test and comparing the number 15 | of mincores calls per second. The % standard deviation of the 20 tests 16 | was ~0.10%, so results are reliable. 17 | 18 | Link: https://lkml.kernel.org/r/20250219083607.5183-1-colin.i.king@gmail.com 19 | Signed-off-by: Colin Ian King 20 | Cc: Matthew Wilcow 21 | Signed-off-by: Andrew Morton 22 | --- 23 | mm/mincore.c | 2 +- 24 | 1 file changed, 1 insertion(+), 1 deletion(-) 25 | 26 | diff --git a/mm/mincore.c b/mm/mincore.c 27 | index d6bd19e520fc..832f29f46767 100644 28 | --- a/mm/mincore.c 29 | +++ b/mm/mincore.c 30 | @@ -239,7 +239,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, 31 | start = untagged_addr(start); 32 | 33 | /* Check the start address: needs to be page-aligned.. */ 34 | - if (start & ~PAGE_MASK) 35 | + if (unlikely(start & ~PAGE_MASK)) 36 | return -EINVAL; 37 | 38 | /* ..and we need to be passed a valid user-space range */ 39 | -- 40 | 2.48.1 41 | 42 | -------------------------------------------------------------------------------- /0170-sched-Add-unlikey-branch-hints-to-several-system-cal.patch: -------------------------------------------------------------------------------- 1 | From 1a5d3492f8e14719184945893c610e0802c05533 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Wed, 19 Feb 2025 14:24:23 +0000 4 | Subject: [PATCH] sched: Add unlikey branch hints to several system calls 5 | 6 | Adding an unlikely() hint on early error return paths improves the 7 | run-time performance of several sched related system calls. 8 | 9 | Benchmarking on an i9-12900 shows the following per system call 10 | performance improvements: 11 | 12 | before after improvement 13 | sched_getattr 182.4ns 170.6ns ~6.5% 14 | sched_setattr 284.3ns 267.6ns ~5.9% 15 | sched_getparam 161.6ns 148.1ns ~8.4% 16 | sched_setparam 1265.4ns 1227.6ns ~3.0% 17 | sched_getscheduler 129.4ns 118.2ns ~8.7% 18 | sched_setscheduler 1237.3ns 1216.7ns ~1.7% 19 | 20 | Results are based on running 20 tests with turbo disabled (to reduce 21 | clock freq turbo changes), with 10 second run per test based on the 22 | number of system calls per second. The % standard deviation of the 23 | measurements for the 20 tests was 0.05% to 0.40%, so the results are 24 | reliable. 25 | 26 | Tested on kernel build with gcc 14.2.1 27 | 28 | Signed-off-by: Colin Ian King 29 | Signed-off-by: Peter Zijlstra (Intel) 30 | Link: https://lkml.kernel.org/r/20250219142423.45516-1-colin.i.king@gmail.com 31 | --- 32 | kernel/sched/syscalls.c | 10 +++++----- 33 | 1 file changed, 5 insertions(+), 5 deletions(-) 34 | 35 | diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c 36 | index 456d339be98f..9f40348f1dc7 100644 37 | --- a/kernel/sched/syscalls.c 38 | +++ b/kernel/sched/syscalls.c 39 | @@ -875,7 +875,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) 40 | { 41 | struct sched_param lparam; 42 | 43 | - if (!param || pid < 0) 44 | + if (unlikely(!param || pid < 0)) 45 | return -EINVAL; 46 | if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 47 | return -EFAULT; 48 | @@ -984,7 +984,7 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, 49 | struct sched_attr attr; 50 | int retval; 51 | 52 | - if (!uattr || pid < 0 || flags) 53 | + if (unlikely(!uattr || pid < 0 || flags)) 54 | return -EINVAL; 55 | 56 | retval = sched_copy_attr(uattr, &attr); 57 | @@ -1049,7 +1049,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) 58 | struct task_struct *p; 59 | int retval; 60 | 61 | - if (!param || pid < 0) 62 | + if (unlikely(!param || pid < 0)) 63 | return -EINVAL; 64 | 65 | scoped_guard (rcu) { 66 | @@ -1085,8 +1085,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, 67 | struct task_struct *p; 68 | int retval; 69 | 70 | - if (!uattr || pid < 0 || usize > PAGE_SIZE || 71 | - usize < SCHED_ATTR_SIZE_VER0 || flags) 72 | + if (unlikely(!uattr || pid < 0 || usize > PAGE_SIZE || 73 | + usize < SCHED_ATTR_SIZE_VER0 || flags)) 74 | return -EINVAL; 75 | 76 | scoped_guard (rcu) { 77 | -- 78 | 2.48.1 79 | 80 | -------------------------------------------------------------------------------- /0171-kcmp-improve-performance-adding-an-unlikely-hint-to-.patch: -------------------------------------------------------------------------------- 1 | From d1c735d44c12544cea9b04ca88d65c12892c0539 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Thu, 13 Feb 2025 16:39:16 +0000 4 | Subject: [PATCH] kcmp: improve performance adding an unlikely hint to task 5 | comparisons 6 | 7 | Adding an unlikely() hint on task comparisons on an unlikely error 8 | return path improves run-time performance of the kcmp system call. 9 | 10 | Benchmarking on an i9-12900 shows an improvement of ~5.5% on kcmp(). 11 | Results based on running 20 tests with turbo disabled (to reduce 12 | clock freq turbo changes), with 10 second run per test and comparing 13 | the number of kcmp calls per second. The % Standard deviation of 20 14 | tests was ~0.25%, results are reliable. 15 | 16 | Signed-off-by: Colin Ian King 17 | Link: https://lore.kernel.org/r/20250213163916.709392-1-colin.i.king@gmail.com 18 | Signed-off-by: Christian Brauner 19 | --- 20 | kernel/kcmp.c | 2 +- 21 | 1 file changed, 1 insertion(+), 1 deletion(-) 22 | 23 | diff --git a/kernel/kcmp.c b/kernel/kcmp.c 24 | index 2c596851f8a9..7c1a65bd5f8d 100644 25 | --- a/kernel/kcmp.c 26 | +++ b/kernel/kcmp.c 27 | @@ -145,7 +145,7 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, 28 | */ 29 | task1 = find_task_by_vpid(pid1); 30 | task2 = find_task_by_vpid(pid2); 31 | - if (!task1 || !task2) 32 | + if (unlikely(!task1 || !task2)) 33 | goto err_no_task; 34 | 35 | get_task_struct(task1); 36 | -- 37 | 2.48.1 38 | 39 | -------------------------------------------------------------------------------- /0174-memcg-increase-MEMCG_CHARGE_BATCH-to-128.patch: -------------------------------------------------------------------------------- 1 | From 0841fd2c59e7d4c4dc55bbdc4e69d08db775df68 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Thu, 22 May 2025 15:23:07 +0100 4 | Subject: [PATCH] memcg: increase MEMCG_CHARGE_BATCH to 128 5 | 6 | MEMCG_CHARGE_BATCH was last changed to 64 back in 2022, systems have 7 | grown in memory and speed and it's useful to increase this to 128. 8 | 9 | Benchmarking the stress-ng mmap stressor shows a performance improvement 10 | of ~7.4% and malloc stressor by 2.8%, tested on an Ultra 9 285K with 11 | turbo disabled to avoid test result jitter. 12 | 13 | Signed-off-by: Colin Ian King 14 | --- 15 | include/linux/memcontrol.h | 2 +- 16 | 1 file changed, 1 insertion(+), 1 deletion(-) 17 | 18 | diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h 19 | index 6e74b8254d9b..a47c977bea18 100644 20 | --- a/include/linux/memcontrol.h 21 | +++ b/include/linux/memcontrol.h 22 | @@ -316,7 +316,7 @@ struct mem_cgroup { 23 | * TODO: maybe necessary to use big numbers in big irons or dynamic based of the 24 | * workload. 25 | */ 26 | -#define MEMCG_CHARGE_BATCH 64U 27 | +#define MEMCG_CHARGE_BATCH 128U 28 | 29 | extern struct mem_cgroup *root_mem_cgroup; 30 | 31 | -- 32 | 2.49.0 33 | 34 | -------------------------------------------------------------------------------- /0175-readdir-add-unlikely-hint-on-len-check.patch: -------------------------------------------------------------------------------- 1 | From 09c259399447ca0d828c65946b7d938e4692d593 Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Tue, 27 May 2025 15:12:58 +0100 4 | Subject: [PATCH] readdir: add unlikely hint on len check 5 | Content-Type: text/plain; charset="utf-8" 6 | Content-Transfer-Encoding: 8bit 7 | 8 | Currently the out of bounds check for the length is very unlikely 9 | to be false for valid name strings. Analysis with gcov coverage show 10 | this to be so. 11 | 12 | Add an unlikely hint on the error return path check. This improves 13 | performance when testing with single instance stress-ng dentry and 14 | dirent stressors. Tested with a 6.15 kernel, built with gcc 14.2.0 15 | on a Debian Ultra 9 285K system with turbo disabled to reduce test 16 | jitter on tmpfs. Each test case was run 25 times and the % standard 17 | deviation was less than 0.4%. Geometric mean of 25 results show the 18 | following stress-ng bogo-ops performance improvments: 19 | 20 | getdent: 1.1% 21 | dentry: 0.9% 22 | 23 | Signed-off-by: Colin Ian King 24 | --- 25 | fs/readdir.c | 2 +- 26 | 1 file changed, 1 insertion(+), 1 deletion(-) 27 | 28 | diff --git a/fs/readdir.c b/fs/readdir.c 29 | index 7764b8638978..c501155ed99a 100644 30 | --- a/fs/readdir.c 31 | +++ b/fs/readdir.c 32 | @@ -147,7 +147,7 @@ EXPORT_SYMBOL(iterate_dir); 33 | */ 34 | static int verify_dirent_name(const char *name, int len) 35 | { 36 | - if (len <= 0 || len >= PATH_MAX) 37 | + if (unlikely(len <= 0 || len >= PATH_MAX)) 38 | return -EIO; 39 | if (memchr(name, '/', len)) 40 | return -EIO; 41 | -- 42 | 2.49.0 43 | 44 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PKG_NAME := linux 2 | 3 | include ../common/Makefile.common 4 | -------------------------------------------------------------------------------- /Makefile.custom: -------------------------------------------------------------------------------- 1 | MOCK_COMMON_ARGS = -n --result=results/ --no-cleanup-after --uniqueext=$(PKG_NAME) 2 | 3 | #help oldconfig: Uses the current config file as input to `make oldconfig` and 4 | #help applies the results to the local copy. 5 | oldconfig: $(SRPMFILE) config 6 | $(MOCK) $(MOCK_COMMON_ARGS) --installdeps $(SRPMFILE) 7 | $(MOCK) $(MOCK_COMMON_ARGS) --chroot 'rpmbuild -bp /builddir/build/SPECS/$(SPECFILE)' 8 | $(MOCK) $(MOCK_COMMON_ARGS) --copyin $(filter-out %.rpm,$^) /builddir/build/BUILD/.config 9 | $(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --chroot 'mv .config linux-*/' 10 | $(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --shell 'make -C linux-* oldconfig' 11 | $(MOCK) $(MOCK_COMMON_ARGS) --copyout /builddir/build/BUILD/linux-*/.config config 12 | 13 | #help menuconfig: Uses the current config file as input to `make menuconfig` and 14 | #help applies the results to the local copy. 15 | menuconfig: $(SRPMFILE) config 16 | $(MOCK) $(MOCK_COMMON_ARGS) --installdeps $(SRPMFILE) 17 | $(MOCK) $(MOCK_COMMON_ARGS) --install ncurses-dev 18 | $(MOCK) $(MOCK_COMMON_ARGS) --chroot 'rpmbuild -bp /builddir/build/SPECS/$(SPECFILE)' 19 | $(MOCK) $(MOCK_COMMON_ARGS) --copyin $(filter-out %.rpm,$^) /builddir/build/BUILD/.config 20 | $(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --chroot 'mv .config linux-*/' 21 | $(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --shell 'make -C linux-* menuconfig' 22 | $(MOCK) $(MOCK_COMMON_ARGS) --copyout /builddir/build/BUILD/linux-*/.config config 23 | 24 | #help kdevelop: Download and extract the package sources and apply the clear Linux 25 | #help patches on top of it using the git tool. You can use DESTDIR=target to 26 | #help extrat to a specific target directory. i.e. "make develop DESTDIR=/tmp" 27 | kdevelop: 28 | @scripts/develop.sh $(SPECFILE) $(DESTDIR) 29 | -------------------------------------------------------------------------------- /adlrdt.patch: -------------------------------------------------------------------------------- 1 | work around https://bugzilla.kernel.org/show_bug.cgi?id=215141 2 | 3 | --- linux-5.19.1/arch/x86/kernel/cpu/resctrl/core.c~ 2022-08-11 11:22:05.000000000 +0000 4 | +++ linux-5.19.1/arch/x86/kernel/cpu/resctrl/core.c 2022-08-18 21:06:56.235417914 +0000 5 | @@ -955,6 +955,22 @@ 6 | */ 7 | rdt_init_res_defs(); 8 | 9 | + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { 10 | + if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE) 11 | + return -ENODEV; 12 | + if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE_L) 13 | + return -ENODEV; 14 | + if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE_N) 15 | + return -ENODEV; 16 | + if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE) 17 | + return -ENODEV; 18 | + if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE_P) 19 | + return -ENODEV; 20 | + if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE_S) 21 | + return -ENODEV; 22 | + } 23 | + 24 | + 25 | check_quirks(); 26 | 27 | if (!get_rdt_resources()) 28 | -------------------------------------------------------------------------------- /archive/0114-tweak-perfbias.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Sun, 22 Jan 2017 18:51:13 +0000 4 | Subject: [PATCH] tweak perfbias 5 | 6 | --- 7 | arch/x86/kernel/cpu/intel.c | 8 +++++--- 8 | 1 file changed, 5 insertions(+), 3 deletions(-) 9 | 10 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c 11 | index 3142fd7a9b32..41d5ccae7890 100644 12 | --- a/arch/x86/kernel/cpu/intel.c 13 | +++ b/arch/x86/kernel/cpu/intel.c 14 | @@ -608,13 +608,15 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c) 15 | return; 16 | 17 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); 18 | - if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE) 19 | + if ((epb & 0xF) >= ENERGY_PERF_BIAS_NORMAL) 20 | return; 21 | 22 | - pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); 23 | + pr_info_once("ENERGY_PERF_BIAS: Set to 'performance', was 'normal'\n"); 24 | pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); 25 | - epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; 26 | + epb = (epb & ~0xF) | ENERGY_PERF_BIAS_PERFORMANCE; 27 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); 28 | + 29 | + pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); 30 | } 31 | 32 | static void intel_bsp_resume(struct cpuinfo_x86 *c) 33 | -- 34 | https://clearlinux.org 35 | 36 | -------------------------------------------------------------------------------- /archive/0123-zero-extra-registers.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Arjan van de Ven 3 | Date: Thu, 8 Feb 2018 16:49:38 +0000 4 | Subject: [PATCH] zero extra registers 5 | 6 | This for Zero used caller-saved general registers upon function return. 7 | --- 8 | arch/x86/Makefile | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/arch/x86/Makefile b/arch/x86/Makefile 12 | index 513a555..984121f 100644 13 | --- a/arch/x86/Makefile 14 | +++ b/arch/x86/Makefile 15 | @@ -217,7 +217,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 16 | 17 | # Avoid indirect branches in kernel to deal with Spectre 18 | ifdef CONFIG_RETPOLINE 19 | - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) 20 | + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -mzero-caller-saved-regs=used 21 | # Additionally, avoid generating expensive indirect jumps which 22 | # are subject to retpolines for small number of switch cases. 23 | # clang turns off jump table generation by default when under 24 | -- 25 | https://clearlinux.org 26 | 27 | -------------------------------------------------------------------------------- /archive/0131-overload-on-wakeup.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: jplozi 3 | Date: Thu, 16 Apr 2020 14:23:27 -0500 4 | Subject: [PATCH] overload on wakeup 5 | 6 | As an experiment, apply the learnings from the wasted-cores paper 7 | and see how the performance works out. With the data from this we should 8 | be able to work with Peter and the rest of the scheduler folks on 9 | a more permanent/elegant solution. 10 | 11 | Source: https://github.com/jplozi/wastedcores 12 | --- 13 | kernel/sched/fair.c | 28 ++++++++++++++++++++++++++++ 14 | 1 file changed, 28 insertions(+) 15 | 16 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 17 | index e8815af..34a42a1 100644 18 | --- a/kernel/sched/fair.c 19 | +++ b/kernel/sched/fair.c 20 | @@ -6378,6 +6378,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) 21 | return -1; 22 | } 23 | 24 | + 25 | +static unsigned int once_in_a_while; 26 | /* 27 | * select_task_rq_fair: Select target runqueue for the waking task in domains 28 | * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, 29 | @@ -6433,6 +6435,32 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f 30 | 31 | 32 | rcu_read_lock(); 33 | + 34 | + once_in_a_while++; 35 | + 36 | + if (cpu_rq(prev_cpu)->nr_running || (once_in_a_while & 15) == 0) { 37 | + int _cpu; 38 | + int bestprio = -5000; 39 | + int bestcpu = -1; 40 | + 41 | + for_each_online_cpu(_cpu) { 42 | + if (!cpumask_test_cpu(_cpu, p->cpus_ptr) 43 | + || cpu_rq(_cpu)->nr_running) 44 | + continue; 45 | + if (arch_asym_cpu_priority(_cpu) > bestprio 46 | + || (prev_cpu == _cpu 47 | + && bestprio == arch_asym_cpu_priority(_cpu))) { 48 | + bestcpu = _cpu; 49 | + bestprio = arch_asym_cpu_priority(_cpu); 50 | + } 51 | + } 52 | + 53 | + if (bestcpu >= 0) { 54 | + rcu_read_unlock(); 55 | + return bestcpu; 56 | + } 57 | + } 58 | + 59 | for_each_domain(cpu, tmp) { 60 | if (!(tmp->flags & SD_LOAD_BALANCE)) 61 | break; 62 | -- 63 | https://clearlinux.org 64 | 65 | -------------------------------------------------------------------------------- /archive/0151-mm-Export-do_madvise.patch-: -------------------------------------------------------------------------------- 1 | From fc4ee73f68d0e9da4ba61112416849c18d933882 Mon Sep 17 00:00:00 2001 2 | From: Sebastien Boeuf 3 | Date: Mon, 23 Jan 2017 15:03:52 -0800 4 | Subject: [PATCH 151/154] mm: Export do_madvise() 5 | 6 | Combined with some interesting flags madvise() system call 7 | allows to free memory more smartly and more efficiently than 8 | we could do with a simple free(). The issue is that is not 9 | available for kernel modules that could need it. 10 | 11 | In order to solve this lack of support, this patch exports 12 | do_madvise() so as to make it available to the entire kernel. 13 | The already existing madvise() system call is unchanged and 14 | now relies on this new do_madvise() function. 15 | 16 | Suggested-by: Arjan van de Ven 17 | Signed-off-by: Sebastien Boeuf 18 | --- 19 | include/linux/mm.h | 2 ++ 20 | mm/madvise.c | 25 +++++++++++++++++++++---- 21 | 2 files changed, 23 insertions(+), 4 deletions(-) 22 | 23 | diff --git a/include/linux/mm.h b/include/linux/mm.h 24 | index 43edf659453b..c3153e9ee7ea 100644 25 | --- a/include/linux/mm.h 26 | +++ b/include/linux/mm.h 27 | @@ -2603,5 +2603,7 @@ void __init setup_nr_node_ids(void); 28 | static inline void setup_nr_node_ids(void) {} 29 | #endif 30 | 31 | +extern int do_madvise(unsigned long start, size_t len_in, int behavior); 32 | + 33 | #endif /* __KERNEL__ */ 34 | #endif /* _LINUX_MM_H */ 35 | diff --git a/mm/madvise.c b/mm/madvise.c 36 | index 375cf32087e4..3798dd68692e 100644 37 | --- a/mm/madvise.c 38 | +++ b/mm/madvise.c 39 | @@ -730,9 +730,7 @@ madvise_behavior_valid(int behavior) 40 | } 41 | 42 | /* 43 | - * The madvise(2) system call. 44 | - * 45 | - * Applications can use madvise() to advise the kernel how it should 46 | + * Kernel modules can use do_madvise() to advise the kernel how it should 47 | * handle paging I/O in this VM area. The idea is to help the kernel 48 | * use appropriate read-ahead and caching techniques. The information 49 | * provided is advisory only, and can be safely disregarded by the 50 | @@ -790,7 +788,7 @@ madvise_behavior_valid(int behavior) 51 | * -EBADF - map exists, but area maps something that isn't a file. 52 | * -EAGAIN - a kernel resource was temporarily unavailable. 53 | */ 54 | -SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) 55 | +int do_madvise(unsigned long start, size_t len_in, int behavior) 56 | { 57 | unsigned long end, tmp; 58 | struct vm_area_struct *vma, *prev; 59 | @@ -885,3 +883,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) 60 | 61 | return error; 62 | } 63 | +EXPORT_SYMBOL_GPL(do_madvise); 64 | + 65 | +/* 66 | + * The madvise(2) system call. 67 | + * 68 | + * Applications can use madvise() system call to advise the kernel how 69 | + * it should handle paging I/O in this VM area. The idea is to help 70 | + * the kernel use appropriate read-ahead and caching techniques. The 71 | + * information provided is advisory only, and can be safely disregarded 72 | + * by the kernel without affecting the correct operation of the application. 73 | + * 74 | + * behavior values are the same than the ones defined in madvise() 75 | + * 76 | + * return values are the same than the ones defined in madvise() 77 | + */ 78 | +SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) 79 | +{ 80 | + return do_madvise(start, len_in, behavior); 81 | +} 82 | -- 83 | 2.15.0 84 | 85 | -------------------------------------------------------------------------------- /archive/0154-sysctl-vm-Fine-grained-cache-shrinking.patch-: -------------------------------------------------------------------------------- 1 | From 130d5d976b920aec243e0fa63273f3143660054b Mon Sep 17 00:00:00 2001 2 | From: Sebastien Boeuf 3 | Date: Mon, 23 Jan 2017 15:32:39 -0800 4 | Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking 5 | 6 | Lots of virtual machines are let in idle state for days until they 7 | are terminated, and they can keep a large amount of memory in their 8 | cache, meaning this memory cannot be used by other processes. 9 | 10 | We tried to release this memory using existing drop_caches sysctl, 11 | but it led to the complete cache loss while it could have been used 12 | whether the idle process wakes up. Indeed, the process can't find any 13 | available cached data and it directly affects performances to rebuild 14 | it from scratch. 15 | 16 | Instead, the solution we want is based on shrinking gradually system 17 | cache over time. This patch adds a new sysctl shrink_caches_mb so as 18 | to allow userspace applications indicating the kernel it should shrink 19 | system cache up to the amount (in MiB) specified. 20 | 21 | There is an application called "memshrinker" which uses this new 22 | mechanism. It runs in the background and periodically releases a 23 | specified amount of cache. This amount is based on the remaining 24 | cache on the system, and period is computed to follow a shrinking 25 | model. It results in saving a lot of memory for other processes 26 | running on the system. 27 | 28 | Suggested-by: Arjan van de Ven 29 | Signed-off-by: Sebastien Boeuf 30 | --- 31 | fs/drop_caches.c | 25 +++++++++++++++++++++++++ 32 | include/linux/mm.h | 4 ++++ 33 | kernel/sysctl.c | 8 ++++++++ 34 | mm/vmscan.c | 2 -- 35 | 4 files changed, 37 insertions(+), 2 deletions(-) 36 | 37 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c 38 | index 82377017130f..f8de1383498b 100644 39 | --- a/fs/drop_caches.c 40 | +++ b/fs/drop_caches.c 41 | @@ -9,10 +9,12 @@ 42 | #include 43 | #include 44 | #include 45 | +#include 46 | #include "internal.h" 47 | 48 | /* A global variable is a bit ugly, but it keeps the code simple */ 49 | int sysctl_drop_caches; 50 | +int sysctl_shrink_caches_mb; 51 | 52 | static void drop_pagecache_sb(struct super_block *sb, void *unused) 53 | { 54 | @@ -68,3 +70,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write, 55 | } 56 | return 0; 57 | } 58 | + 59 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, 60 | + void __user *buffer, size_t *length, loff_t *ppos) 61 | +{ 62 | + int ret; 63 | + unsigned long nr_to_reclaim, page_reclaimed; 64 | + 65 | + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 66 | + if (ret) 67 | + return ret; 68 | + 69 | + nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE; 70 | + if (write) { 71 | + page_reclaimed = shrink_all_memory(nr_to_reclaim); 72 | + if (page_reclaimed > 0) 73 | + lru_add_drain_all(); 74 | + 75 | + if (page_reclaimed != nr_to_reclaim) 76 | + return page_reclaimed; 77 | + } 78 | + 79 | + return 0; 80 | +} 81 | diff --git a/include/linux/mm.h b/include/linux/mm.h 82 | index 15e02bf3a6b3..9f9b967ad2c9 100644 83 | --- a/include/linux/mm.h 84 | +++ b/include/linux/mm.h 85 | @@ -2457,6 +2457,10 @@ extern int kvm_ret_mem_advice; 86 | int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write, 87 | void __user *buffer, size_t *length, 88 | loff_t *ppos); 89 | +extern int sysctl_shrink_caches_mb; 90 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write, 91 | + void __user *buffer, size_t *length, 92 | + loff_t *ppos); 93 | #endif 94 | 95 | void drop_slab(void); 96 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c 97 | index 9a1611f92a2a..9b74b4f0251d 100644 98 | --- a/kernel/sysctl.c 99 | +++ b/kernel/sysctl.c 100 | @@ -1417,6 +1417,14 @@ static struct ctl_table vm_table[] = { 101 | .mode = 0644, 102 | .proc_handler = kvm_madv_instant_free_sysctl_handler, 103 | }, 104 | + { 105 | + .procname = "shrink_caches_mb", 106 | + .data = &sysctl_shrink_caches_mb, 107 | + .maxlen = sizeof(int), 108 | + .mode = 0644, 109 | + .proc_handler = shrink_caches_sysctl_handler, 110 | + .extra1 = &one, 111 | + }, 112 | #ifdef CONFIG_COMPACTION 113 | { 114 | .procname = "compact_memory", 115 | diff --git a/mm/vmscan.c b/mm/vmscan.c 116 | index eb2f0315b8c0..b16f327b0211 100644 117 | --- a/mm/vmscan.c 118 | +++ b/mm/vmscan.c 119 | @@ -3646,7 +3646,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) 120 | wake_up_interruptible(&pgdat->kswapd_wait); 121 | } 122 | 123 | -#ifdef CONFIG_HIBERNATION 124 | /* 125 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of 126 | * freed pages. 127 | @@ -3686,7 +3685,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) 128 | 129 | return nr_reclaimed; 130 | } 131 | -#endif /* CONFIG_HIBERNATION */ 132 | 133 | /* It's optimal to keep kswapds on the same CPUs as their memory, but 134 | not required for correctness. So if the last cpu in a node goes 135 | -- 136 | 2.15.0 137 | 138 | -------------------------------------------------------------------------------- /archive/2002-opae-add-Kconfig-and-Makefile.patch-: -------------------------------------------------------------------------------- 1 | From 1cbc611a7b7fa5a61524179cd39792441f78a62b Mon Sep 17 00:00:00 2001 2 | From: Miguel Bernal Marin 3 | Date: Thu, 1 Feb 2018 11:08:18 -0600 4 | Subject: [PATCH 2002/2002] opae: add Kconfig and Makefile 5 | 6 | Signed-off-by: Miguel Bernal Marin 7 | --- 8 | drivers/Makefile | 1 + 9 | drivers/fpga/Kconfig | 1 + 10 | 2 files changed, 2 insertions(+) 11 | 12 | diff --git a/drivers/Makefile b/drivers/Makefile 13 | index 46053df6f069..48b6d693e752 100644 14 | --- a/drivers/Makefile 15 | +++ b/drivers/Makefile 16 | @@ -183,6 +183,7 @@ obj-$(CONFIG_ANDROID) += android/ 17 | obj-$(CONFIG_ANDROID) += android/ 18 | obj-$(CONFIG_NVMEM) += nvmem/ 19 | obj-$(CONFIG_FPGA) += fpga/ 20 | +obj-$(CONFIG_FPGA_INTEL_OPAE) += fpga/intel/ 21 | obj-$(CONFIG_FSI) += fsi/ 22 | obj-$(CONFIG_TEE) += tee/ 23 | obj-$(CONFIG_MULTIPLEXER) += mux/ 24 | --- linux-4.16.1/drivers/fpga/Kconfig~ 2018-04-08 12:29:52.000000000 +0000 25 | +++ linux-4.16.1/drivers/fpga/Kconfig 2018-04-08 16:32:22.109015136 +0000 26 | @@ -109,6 +109,8 @@ 27 | region of the FPGA from the busses while that region is 28 | being reprogrammed during partial reconfig. 29 | 30 | +source "drivers/fpga/intel/Kconfig" 31 | + 32 | config FPGA_REGION 33 | tristate "FPGA Region" 34 | depends on FPGA_BRIDGE 35 | -------------------------------------------------------------------------------- /archive/3002Add-sysdig-to-kernel-build-system.patch-: -------------------------------------------------------------------------------- 1 | From 6ea3b762497fc02f6b79378cc950caf041a1b647 Mon Sep 17 00:00:00 2001 2 | From: Icarus Sparry 3 | Date: Sun, 25 Feb 2018 14:11:04 -0800 4 | Subject: [PATCH 2/2] patch in driver 5 | 6 | --- 7 | kernel/trace/Kconfig | 10 ++++++++++ 8 | kernel/trace/Makefile | 1 + 9 | 2 files changed, 11 insertions(+) 10 | 11 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig 12 | index f54dc62..b8bd7be 100644 13 | --- a/kernel/trace/Kconfig 14 | +++ b/kernel/trace/Kconfig 15 | @@ -714,6 +714,16 @@ config TRACING_EVENTS_GPIO 16 | help 17 | Enable tracing events for gpio subsystem 18 | 19 | +config SYSDIG 20 | + tristate "sysdig-probe kernel module" 21 | + depends on TRACEPOINTS 22 | + depends on HAVE_SYSCALL_TRACEPOINTS 23 | + default m 24 | + help 25 | + Build the kernel module to support sysdig. 26 | + 27 | + If unsure, say M 28 | + 29 | endif # FTRACE 30 | 31 | endif # TRACING_SUPPORT 32 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile 33 | index e2538c7..70a4eb1 100644 34 | --- a/kernel/trace/Makefile 35 | +++ b/kernel/trace/Makefile 36 | @@ -73,3 +73,4 @@ obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o 37 | obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o 38 | 39 | libftrace-y := ftrace.o 40 | +obj-$(CONFIG_SYSDIG) += sysdig/ 41 | -- 42 | 2.16.2 43 | 44 | -------------------------------------------------------------------------------- /backport-ioboost.patch: -------------------------------------------------------------------------------- 1 | From mboxrd@z Thu Jan 1 00:00:00 1970 2 | Return-Path: 3 | X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on 4 | aws-us-west-2-korg-lkml-1.web.codeaurora.org 5 | Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) 6 | by smtp.lore.kernel.org (Postfix) with ESMTP id C293AC64EC4 7 | for ; Fri, 3 Mar 2023 04:14:19 +0000 (UTC) 8 | Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand 9 | id S229452AbjCCEOS (ORCPT ); 10 | Thu, 2 Mar 2023 23:14:18 -0500 11 | Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42550 "EHLO 12 | lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org 13 | with ESMTP id S229451AbjCCEOR (ORCPT 14 | ); Thu, 2 Mar 2023 23:14:17 -0500 15 | Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) 16 | by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 36FEA12BE6; 17 | Thu, 2 Mar 2023 20:14:17 -0800 (PST) 18 | DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; 19 | d=intel.com; i=@intel.com; q=dns/txt; s=Intel; 20 | t=1677816857; x=1709352857; 21 | h=from:to:cc:subject:date:message-id:mime-version: 22 | content-transfer-encoding; 23 | bh=vF/1poQJ5i2/fALYadh5wXENB15OfhGT7T0JvZIYi0I=; 24 | b=Bl0Lg5MRLFrwskFU/gAuNLdwv4PpTyiVrjfu3N5Zdxid4A2tB92dYd29 25 | RYXmVbhLiibvpaMQ8ha33UxEliZlZge5KZJS1W8cR0n4DsuUOpuZhEaMG 26 | 3y4hMbWU2nC0xjisZdfqlK74peb/RVI7xKAQlDFrzCdAcdppr7G9cP9GZ 27 | DLpsIoNwFkTCeoe29VOWAIvf9Lv6nm/W6KYSvdNSImFLAj69VzRAkYfJy 28 | 5Kk73rkEoXJkShsKGkBWDxJsKIhD+XCTi7XgDMkX+FX9lVP0rRiVurabW 29 | D9MjJp3QJfPDwzwz159d6KSDp0C0kP/bRPhyBD7vNZSM//LkOzorTMNgT 30 | A==; 31 | X-IronPort-AV: E=McAfee;i="6500,9779,10637"; a="333671402" 32 | X-IronPort-AV: E=Sophos;i="5.98,229,1673942400"; 33 | d="scan'208";a="333671402" 34 | Received: from fmsmga004.fm.intel.com ([10.253.24.48]) 35 | by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 02 Mar 2023 20:14:16 -0800 36 | X-ExtLoop1: 1 37 | X-IronPort-AV: E=McAfee;i="6500,9779,10637"; a="744119122" 38 | X-IronPort-AV: E=Sophos;i="5.98,229,1673942400"; 39 | d="scan'208";a="744119122" 40 | Received: from spandruv-desk.jf.intel.com ([10.54.75.8]) 41 | by fmsmga004.fm.intel.com with ESMTP; 02 Mar 2023 20:14:16 -0800 42 | From: Srinivas Pandruvada 43 | To: rafael@kernel.org, lenb@kernel.org, viresh.kumar@linaro.org 44 | Cc: linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org, 45 | Srinivas Pandruvada 46 | Subject: [PATCH] cpufreq: intel_pstate: Enable HWP IO boost for all servers 47 | Date: Thu, 2 Mar 2023 20:14:11 -0800 48 | Message-Id: <20230303041411.3161780-1-srinivas.pandruvada@linux.intel.com> 49 | X-Mailer: git-send-email 2.39.1 50 | MIME-Version: 1.0 51 | Content-Transfer-Encoding: 8bit 52 | Precedence: bulk 53 | List-ID: 54 | X-Mailing-List: linux-pm@vger.kernel.org 55 | 56 | The HWP IO boost results in slight improvements for IO performance on 57 | both Ice Lake and Sapphire Rapid servers. 58 | 59 | Currently there is a CPU model check for Skylake desktop and server along 60 | with the ACPI PM profile for performance and enterprise servers to enable 61 | IO boost. 62 | 63 | Remove the CPU model check, so that all current server models enable HWP 64 | IO boost by default. 65 | 66 | Signed-off-by: Srinivas Pandruvada 67 | --- 68 | drivers/cpufreq/intel_pstate.c | 11 +---------- 69 | 1 file changed, 1 insertion(+), 10 deletions(-) 70 | 71 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c 72 | index cb4beec27555..8edbc0856892 100644 73 | --- a/drivers/cpufreq/intel_pstate.c 74 | +++ b/drivers/cpufreq/intel_pstate.c 75 | @@ -2384,12 +2384,6 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { 76 | {} 77 | }; 78 | 79 | -static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { 80 | - X86_MATCH(SKYLAKE_X, core_funcs), 81 | - X86_MATCH(SKYLAKE, core_funcs), 82 | - {} 83 | -}; 84 | - 85 | static int intel_pstate_init_cpu(unsigned int cpunum) 86 | { 87 | struct cpudata *cpu; 88 | @@ -2408,12 +2402,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum) 89 | cpu->epp_default = -EINVAL; 90 | 91 | if (hwp_active) { 92 | - const struct x86_cpu_id *id; 93 | - 94 | intel_pstate_hwp_enable(cpu); 95 | 96 | - id = x86_match_cpu(intel_pstate_hwp_boost_ids); 97 | - if (id && intel_pstate_acpi_pm_profile_server()) 98 | + if (intel_pstate_acpi_pm_profile_server()) 99 | hwp_boost = true; 100 | } 101 | } else if (hwp_active) { 102 | -- 103 | 2.34.1 104 | 105 | 106 | -------------------------------------------------------------------------------- /better_idle_balance.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.3.1/kernel/sched/fair.c~ 2023-04-30 23:32:26.000000000 +0000 2 | +++ linux-6.3.1/kernel/sched/fair.c 2023-06-27 15:01:52.301844933 +0000 3 | @@ -11745,7 +11745,7 @@ 4 | 5 | update_next_balance(sd, &next_balance); 6 | 7 | - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) 8 | + if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost) 9 | break; 10 | 11 | if (sd->flags & SD_BALANCE_NEWIDLE) { 12 | -------------------------------------------------------------------------------- /cmdline: -------------------------------------------------------------------------------- 1 | quiet 2 | console=tty0 3 | console=ttyS0,115200n8 4 | cryptomgr.notests 5 | init=/usr/bin/initra-desktop 6 | initcall_debug 7 | intel_iommu=igfx_off 8 | kvm-intel.nested=1 9 | no_timer_check 10 | noreplace-smp 11 | page_alloc.shuffle=1 12 | rcupdate.rcu_expedited=1 13 | rootfstype=ext4,btrfs,xfs,f2fs 14 | tsc=reliable 15 | rw 16 | -------------------------------------------------------------------------------- /epp-retune.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.1/arch/x86/include/asm/msr-index.h~ 2022-12-11 22:15:18.000000000 +0000 2 | +++ linux-6.1/arch/x86/include/asm/msr-index.h 2022-12-16 01:31:32.266119875 +0000 3 | @@ -472,7 +472,7 @@ 4 | #define HWP_MAX_PERF(x) ((x & 0xff) << 8) 5 | #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) 6 | #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) 7 | -#define HWP_EPP_PERFORMANCE 0x00 8 | +#define HWP_EPP_PERFORMANCE 0x01 9 | #define HWP_EPP_BALANCE_PERFORMANCE 0x80 10 | #define HWP_EPP_BALANCE_POWERSAVE 0xC0 11 | #define HWP_EPP_POWERSAVE 0xFF 12 | -------------------------------------------------------------------------------- /filter-stable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import json 4 | import sys 5 | 6 | if len(sys.argv) != 3: 7 | print("Usage: filter-stable STABLE-VER JSON-FILE") 8 | sys.exit(0) 9 | 10 | STABLE_VER = sys.argv[1] 11 | JSON_FILE = sys.argv[2] 12 | 13 | with open(JSON_FILE, "r", encoding="latin-1") as myfile: 14 | data = json.load(myfile) 15 | 16 | if "releases" not in data: 17 | print("Unexpected releases.json format", file=sys.stderr) 18 | sys.exit(1) 19 | 20 | stable_ver = "" 21 | for release in data["releases"]: 22 | if release["iseol"]: 23 | continue 24 | if release["moniker"] != "stable": 25 | continue 26 | ver = release["version"] 27 | if ver.startswith(f"{STABLE_VER}."): 28 | stable_ver = ver 29 | 30 | if not stable_ver: 31 | print(f"No version found for {STABLE_VER} series", file=sys.stderr) 32 | sys.exit(1) 33 | 34 | print(stable_ver) 35 | -------------------------------------------------------------------------------- /iommu.patch: -------------------------------------------------------------------------------- 1 | Subject: [PATCH v2] iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode 2 | Date: Wed, 8 Feb 2023 10:18:34 -0800 3 | Message-Id: <20230208181834.1601211-1-jacob.jun.pan@linux.intel.com> 4 | X-Mailer: git-send-email 2.25.1 5 | MIME-Version: 1.0 6 | Content-Transfer-Encoding: 8bit 7 | Precedence: bulk 8 | List-ID: 9 | X-Mailing-List: linux-kernel@vger.kernel.org 10 | 11 | Intel IOMMU driver implements IOTLB flush queue with domain selective 12 | or PASID selective invalidations. In this case there's no need to track 13 | IOVA page range and sync IOTLBs, which may cause significant performance 14 | hit. 15 | 16 | This patch adds a check to avoid IOVA gather page and IOTLB sync for 17 | the lazy path. 18 | 19 | The performance difference on Sapphire Rapids 100Gb NIC is improved by 20 | the following (as measured by iperf send): 21 | 22 | w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s 23 | 24 | Cc: 25 | Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers") 26 | Reviewed-by: Robin Murphy 27 | Tested-by: Sanjay Kumar 28 | Signed-off-by: Sanjay Kumar 29 | Signed-off-by: Jacob Pan 30 | --- 31 | v2: use helper function iommu_iotlb_gather_queued() instead of open 32 | coding 33 | --- 34 | drivers/iommu/intel/iommu.c | 8 +++++++- 35 | 1 file changed, 7 insertions(+), 1 deletion(-) 36 | 37 | diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c 38 | index 161342e7149d..18265fa07828 100644 39 | --- a/drivers/iommu/intel/iommu.c 40 | +++ b/drivers/iommu/intel/iommu.c 41 | @@ -4348,7 +4348,13 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, 42 | if (dmar_domain->max_addr == iova + size) 43 | dmar_domain->max_addr = iova; 44 | 45 | - iommu_iotlb_gather_add_page(domain, gather, iova, size); 46 | + /* 47 | + * We do not use page-selective IOTLB invalidation in flush queue, 48 | + * There is no need to track page and sync iotlb. Domain-selective or 49 | + * PASID-selective validation are used in the flush queue. 50 | + */ 51 | + if (!iommu_iotlb_gather_queued(gather)) 52 | + iommu_iotlb_gather_add_page(domain, gather, iova, size); 53 | 54 | return size; 55 | } 56 | -- 57 | 2.25.1 58 | 59 | 60 | -------------------------------------------------------------------------------- /kdf-boottime.patch: -------------------------------------------------------------------------------- 1 | --- linux-5.19.1/crypto/kdf_sp800108.c~ 2022-08-11 11:22:05.000000000 +0000 2 | +++ linux-5.19.1/crypto/kdf_sp800108.c 2022-08-11 16:16:31.178018142 +0000 3 | @@ -125,6 +125,7 @@ 4 | 5 | static int __init crypto_kdf108_init(void) 6 | { 7 | + return 0; 8 | int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)", 9 | crypto_kdf108_setkey, crypto_kdf108_ctr_generate); 10 | 11 | -------------------------------------------------------------------------------- /kvm-printk.patch: -------------------------------------------------------------------------------- 1 | these error messages are causing support isseus 2 | 3 | --- linux-6.1/arch/x86/kvm/x86.c~ 2022-12-11 22:15:18.000000000 +0000 4 | +++ linux-6.1/arch/x86/kvm/x86.c 2022-12-15 22:15:07.085648692 +0000 5 | @@ -9340,13 +9340,9 @@ 6 | } 7 | 8 | if (!ops->cpu_has_kvm_support()) { 9 | - pr_err_ratelimited("kvm: no hardware support for '%s'\n", 10 | - ops->runtime_ops->name); 11 | return -EOPNOTSUPP; 12 | } 13 | if (ops->disabled_by_bios()) { 14 | - pr_err_ratelimited("kvm: support for '%s' disabled by bios\n", 15 | - ops->runtime_ops->name); 16 | return -EOPNOTSUPP; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /libsgrowdown.patch: -------------------------------------------------------------------------------- 1 | Place libraries right below the binary for PIE binaries, this helps code locality 2 | (and thus performance). 3 | 4 | --- linux-5.18.2/fs/binfmt_elf.c~ 2022-06-06 06:49:00.000000000 +0000 5 | +++ linux-5.18.2/fs/binfmt_elf.c 2022-08-10 13:53:04.878633166 +0000 6 | @@ -1297,6 +1297,8 @@ 7 | mm = current->mm; 8 | mm->end_code = end_code; 9 | mm->start_code = start_code; 10 | + if (start_code >= ELF_ET_DYN_BASE) 11 | + mm->mmap_base = start_code; 12 | mm->start_data = start_data; 13 | mm->end_data = end_data; 14 | mm->start_stack = bprm->p; 15 | -------------------------------------------------------------------------------- /mm-lru_cache_disable-use-synchronize_rcu_expedited.patch: -------------------------------------------------------------------------------- 1 | From: Marcelo Tosatti 2 | Subject: mm: lru_cache_disable: use synchronize_rcu_expedited 3 | Date: Mon, 30 May 2022 12:51:56 -0300 4 | 5 | commit ff042f4a9b050 ("mm: lru_cache_disable: replace work queue 6 | synchronization with synchronize_rcu") replaced lru_cache_disable's usage 7 | of work queues with synchronize_rcu. 8 | 9 | Some users reported large performance regressions due to this commit, for 10 | example: 11 | https://lore.kernel.org/all/20220521234616.GO1790663@paulmck-ThinkPad-P17-Gen-1/T/ 12 | 13 | Switching to synchronize_rcu_expedited fixes the problem. 14 | 15 | Link: https://lkml.kernel.org/r/YpToHCmnx/HEcVyR@fuller.cnet 16 | Fixes: ff042f4a9b050 ("mm: lru_cache_disable: replace work queue synchronization with synchronize_rcu") 17 | Signed-off-by: Marcelo Tosatti 18 | Tested-by: Stefan Wahren 19 | Tested-by: Michael Larabel 20 | Cc: Sebastian Andrzej Siewior 21 | Cc: Nicolas Saenz Julienne 22 | Cc: Borislav Petkov 23 | Cc: Minchan Kim 24 | Cc: Matthew Wilcox 25 | Cc: Mel Gorman 26 | Cc: Juri Lelli 27 | Cc: Thomas Gleixner 28 | Cc: Paul E. McKenney 29 | Cc: Phil Elwell 30 | Cc: 31 | Signed-off-by: Andrew Morton 32 | --- 33 | 34 | mm/swap.c | 2 +- 35 | 1 file changed, 1 insertion(+), 1 deletion(-) 36 | 37 | --- a/mm/swap.c~mm-lru_cache_disable-use-synchronize_rcu_expedited 38 | +++ a/mm/swap.c 39 | @@ -881,7 +881,7 @@ void lru_cache_disable(void) 40 | * lru_disable_count = 0 will have exited the critical 41 | * section when synchronize_rcu() returns. 42 | */ 43 | - synchronize_rcu(); 44 | + synchronize_rcu_expedited(); 45 | #ifdef CONFIG_SMP 46 | __lru_add_drain_all(true); 47 | #else 48 | _ 49 | -------------------------------------------------------------------------------- /mmput_async.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.0/kernel/exit.c~ 2022-10-02 21:09:07.000000000 +0000 2 | +++ linux-6.0/kernel/exit.c 2022-11-18 17:25:22.445717556 +0000 3 | @@ -507,7 +507,7 @@ 4 | task_unlock(current); 5 | mmap_read_unlock(mm); 6 | mm_update_next_owner(mm); 7 | - mmput(mm); 8 | + mmput_async(mm); 9 | if (test_thread_flag(TIF_MEMDIE)) 10 | exit_oom_victim(); 11 | } 12 | --- linux-6.0/kernel/fork.c~ 2022-11-18 18:53:36.000000000 +0000 13 | +++ linux-6.0/kernel/fork.c 2022-11-18 19:01:44.058654439 +0000 14 | @@ -1222,7 +1222,7 @@ 15 | { 16 | if (atomic_dec_and_test(&mm->mm_users)) { 17 | INIT_WORK(&mm->async_put_work, mmput_async_fn); 18 | - schedule_work(&mm->async_put_work); 19 | + schedule_work_on(0, &mm->async_put_work); 20 | } 21 | } 22 | EXPORT_SYMBOL_GPL(mmput_async); 23 | -------------------------------------------------------------------------------- /netscale.patch: -------------------------------------------------------------------------------- 1 | Author: Wangyang Guo Guo, Wangyang 2 | 3 | 4 | The dst_entry struct has a reference count that gets touched 5 | quite a bit in scenarios where many connections happen 6 | from/to the same IP. This dirty cache line is shared 7 | with other members that are read (mostly) that are 8 | used quite a bit (for example, ltwstate) 9 | 10 | In addition, this struct is embedded in other structs 11 | and the __refcnt shows up as falls sharing even in that 12 | context. 13 | 14 | An example workload is the phoronix pts/memcached benchmark 15 | (in 1:100 mode) that exagerates the problem in its setup. 16 | 17 | Without the patch below the workload gets a score of 18 | 771377, while the patch below improves this to 19 | 1027113.. a 30%+ gain. 20 | 21 | It's likely possible to reorder some fields in the struct 22 | to reduce the size of the needed padding, but this is 23 | the simplest solution. 24 | 25 | Signed-off-by: Arjan van de Ven 26 | 27 | 28 | --- a/include/net/dst.h 2022-12-11 22:15:18.000000000 +0000 29 | +++ b/include/net/dst.h 2023-02-13 14:48:45.498505188 +0000 30 | @@ -66,6 +66,7 @@ 31 | */ 32 | #ifdef CONFIG_64BIT 33 | atomic_t __refcnt; /* 64-bit offset 64 */ 34 | + int __pad2[15]; 35 | #endif 36 | int __use; 37 | unsigned long lastuse; 38 | -------------------------------------------------------------------------------- /nonapi-realtek.patch: -------------------------------------------------------------------------------- 1 | see issue #3018 2 | 3 | --- linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c~ 2023-11-08 10:56:25.000000000 +0000 4 | +++ linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c 2024-01-04 00:15:54.348399491 +0000 5 | @@ -4648,7 +4648,7 @@ 6 | phy_init_hw(tp->phydev); 7 | phy_resume(tp->phydev); 8 | rtl8169_init_phy(tp); 9 | - napi_enable(&tp->napi); 10 | +// napi_enable(&tp->napi); 11 | set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); 12 | rtl_reset_work(tp); 13 | 14 | --- linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c~ 2024-01-04 00:15:54.000000000 +0000 15 | +++ linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c 2024-01-04 17:25:36.357314237 +0000 16 | @@ -3962,7 +3962,7 @@ 17 | for (i = 0; i < NUM_RX_DESC; i++) 18 | rtl8169_mark_to_asic(tp->RxDescArray + i); 19 | 20 | - napi_enable(&tp->napi); 21 | +// napi_enable(&tp->napi); 22 | rtl_hw_start(tp); 23 | } 24 | 25 | -------------------------------------------------------------------------------- /options.conf: -------------------------------------------------------------------------------- 1 | autoupdate = true 2 | -------------------------------------------------------------------------------- /posted_msi.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.12.1/drivers/iommu/irq_remapping.c~ 2024-11-22 14:30:26.000000000 +0000 2 | +++ linux-6.12.1/drivers/iommu/irq_remapping.c 2025-03-12 17:17:13.684425681 +0000 3 | @@ -24,7 +24,7 @@ 4 | 5 | int disable_irq_post = 0; 6 | 7 | -bool enable_posted_msi __ro_after_init; 8 | +bool enable_posted_msi __ro_after_init = true; 9 | 10 | static int disable_irq_remap; 11 | static struct irq_remap_ops *remap_ops; 12 | -------------------------------------------------------------------------------- /ratelimit-sched-yield.patch: -------------------------------------------------------------------------------- 1 | From 467904416b3786c9f2b29ca683d36cb2523ae7ce Mon Sep 17 00:00:00 2001 2 | From: Colin Ian King 3 | Date: Thu, 17 Oct 2024 16:29:50 +0100 4 | Subject: [PATCH] handle sched_yield gracefully when being hammered 5 | 6 | Some misguided apps hammer sched_yield() in a tight loop (they should be using futexes instead) 7 | which causes massive lock contention even if there is little work to do or to yield to. 8 | rare limit yielding since the base scheduler does a pretty good job already about just 9 | running the right things 10 | 11 | Signed-off-by: Colin Ian King 12 | --- 13 | kernel/sched/syscalls.c | 12 ++++++++++++ 14 | 1 file changed, 12 insertions(+) 15 | 16 | diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c 17 | index ae1b42775ef9..441ac65f4f15 100644 18 | --- a/kernel/sched/syscalls.c 19 | +++ b/kernel/sched/syscalls.c 20 | @@ -1456,10 +1456,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, 21 | return ret; 22 | } 23 | 24 | +static DEFINE_PER_CPU(unsigned long, last_yield); 25 | + 26 | static void do_sched_yield(void) 27 | { 28 | struct rq_flags rf; 29 | struct rq *rq; 30 | + int cpu = raw_smp_processor_id(); 31 | + 32 | + cond_resched(); 33 | + 34 | + /* rate limit yielding to something sensible */ 35 | + 36 | + if (!time_after(jiffies, per_cpu(last_yield, cpu))) 37 | + return; 38 | + 39 | + per_cpu(last_yield, cpu) = jiffies; 40 | 41 | rq = this_rq_lock_irq(&rf); 42 | 43 | -- 44 | 2.46.2 45 | 46 | -------------------------------------------------------------------------------- /release: -------------------------------------------------------------------------------- 1 | 1574 2 | -------------------------------------------------------------------------------- /revert-regression.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.10.11/mm/mmap.c~ 2024-09-18 17:25:18.000000000 +0000 2 | +++ linux-6.10.11/mm/mmap.c 2024-09-26 23:45:20.399101364 +0000 3 | @@ -1881,10 +1881,11 @@ 4 | 5 | if (get_area) { 6 | addr = get_area(file, addr, len, pgoff, flags); 7 | - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 8 | +// } 9 | +//else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 10 | /* Ensures that larger anonymous mappings are THP aligned. */ 11 | - addr = thp_get_unmapped_area_vmflags(file, addr, len, 12 | - pgoff, flags, vm_flags); 13 | + //addr = thp_get_unmapped_area_vmflags(file, addr, len, 14 | +// pgoff, flags, vm_flags); 15 | } else { 16 | addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len, 17 | pgoff, flags, vm_flags); 18 | -------------------------------------------------------------------------------- /scale-net-alloc.patch: -------------------------------------------------------------------------------- 1 | diff --git a/include/net/sock.h b/include/net/sock.h 2 | index 4e787285fc66..3e045f6eb6ee 100644 3 | --- a/include/net/sock.h 4 | +++ b/include/net/sock.h 5 | @@ -1684,10 +1684,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) 6 | 7 | static inline void sk_mem_uncharge(struct sock *sk, int size) 8 | { 9 | + int reclaimable, reclaim_threshold; 10 | + 11 | + reclaim_threshold = 64 * 1024; 12 | if (!sk_has_account(sk)) 13 | return; 14 | sk_forward_alloc_add(sk, size); 15 | - sk_mem_reclaim(sk); 16 | + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); 17 | + if (reclaimable > reclaim_threshold) { 18 | + reclaimable -= reclaim_threshold; 19 | + __sk_mem_reclaim(sk, reclaimable); 20 | + } 21 | } 22 | 23 | /* 24 | -------------------------------------------------------------------------------- /scale.patch: -------------------------------------------------------------------------------- 1 | From 0986b8be8b9fe5baad3d34fc9f687dfedf28e100 Mon Sep 17 00:00:00 2001 2 | From: "Brett T. Warden" 3 | Date: Mon, 19 Sep 2022 08:52:45 -0700 4 | Subject: [PATCH] scale 5 | 6 | --- 7 | include/linux/page_counter.h | 1 + 8 | mm/memcontrol.c | 2 +- 9 | 2 files changed, 2 insertions(+), 1 deletion(-) 10 | 11 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c 12 | index b69979c9ced5..7eadbafc006b 100644 13 | --- a/mm/memcontrol.c 14 | +++ b/mm/memcontrol.c 15 | @@ -625,7 +625,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) 16 | cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); 17 | 18 | x = __this_cpu_add_return(stats_updates, abs(val)); 19 | - if (x > MEMCG_CHARGE_BATCH) { 20 | + if (x > MEMCG_CHARGE_BATCH * 128) { 21 | /* 22 | * If stats_flush_threshold exceeds the threshold 23 | * (>num_online_cpus()), cgroup stats update will be triggered 24 | -- 25 | -------------------------------------------------------------------------------- /sched-hybrid3.patch: -------------------------------------------------------------------------------- 1 | From mboxrd@z Thu Jan 1 00:00:00 1970 2 | Return-Path: 3 | X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on 4 | aws-us-west-2-korg-lkml-1.web.codeaurora.org 5 | Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) 6 | by smtp.lore.kernel.org (Postfix) with ESMTP id DC516ECAAA3 7 | for ; Thu, 25 Aug 2022 22:49:59 +0000 (UTC) 8 | Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand 9 | id S244115AbiHYWt6 (ORCPT ); 10 | Thu, 25 Aug 2022 18:49:58 -0400 11 | Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38550 "EHLO 12 | lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org 13 | with ESMTP id S243799AbiHYWtn (ORCPT 14 | ); 15 | Thu, 25 Aug 2022 18:49:43 -0400 16 | DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; 17 | d=intel.com; i=@intel.com; q=dns/txt; s=Intel; 18 | t=1661467782; x=1693003782; 19 | h=from:to:cc:subject:date:message-id:in-reply-to: 20 | references; 21 | bh=GkDXgCnuNRjnWESTDjDb3InPMKM7a1XvTOW385F9W9Q=; 22 | b=RlqfFRV48+YgCLi3VvMumNkY8iTCQ+ZgMOi9qBiRWJQPbHoq8dp3elKs 23 | cO4ZrsipTyb7Aze7C01EWhkyyKGKN3ymsclD4XTGj3yl+szdhV9MXOviP 24 | JErGdVvnIz3bR1LEt2mWZ0ct+MJwYPz5b1uaENwlXPfBURNyErllrjI5A 25 | ULF/1vD/z+RiReQBOQ4OcZzBVDblX5sCtn7pREX91EWlqsXeNJwomHqp8 26 | zG0QiSdgk7wt8XFqLuvo2x8w77etQsXepcyFu5c/JN2uzY5iOm5v6rDuk 27 | lSpik8kjAHerCkLSJzyoxuzW0N+yLTYLbU93JcT4AHh37xp0gTZIVnEUC 28 | Q==; 29 | X-IronPort-AV: E=McAfee;i="6500,9779,10450"; a="295153922" 30 | X-IronPort-AV: E=Sophos;i="5.93,264,1654585200"; 31 | d="scan'208";a="295153922" 32 | X-ExtLoop1: 1 33 | X-IronPort-AV: E=Sophos;i="5.93,264,1654585200"; 34 | d="scan'208";a="678642681" 35 | From: Ricardo Neri 36 | To: "Peter Zijlstra (Intel)" , 37 | Juri Lelli , 38 | Vincent Guittot 39 | Cc: Ricardo Neri , 40 | "Ravi V. Shankar" , 41 | Ben Segall , 42 | Daniel Bristot de Oliveira , 43 | Dietmar Eggemann , 44 | Len Brown , Mel Gorman , 45 | "Rafael J. Wysocki" , 46 | Srinivas Pandruvada , 47 | Steven Rostedt , 48 | Tim Chen , 49 | Valentin Schneider , x86@kernel.org, 50 | linux-kernel@vger.kernel.org, 51 | Ricardo Neri , 52 | "Tim C . Chen" 53 | Subject: [PATCH 3/4] sched/fair: Let lower-priority CPUs do active balancing 54 | Date: Thu, 25 Aug 2022 15:55:28 -0700 55 | Message-Id: <20220825225529.26465-4-ricardo.neri-calderon@linux.intel.com> 56 | X-Mailer: git-send-email 2.17.1 57 | In-Reply-To: <20220825225529.26465-1-ricardo.neri-calderon@linux.intel.com> 58 | References: <20220825225529.26465-1-ricardo.neri-calderon@linux.intel.com> 59 | Precedence: bulk 60 | List-ID: 61 | X-Mailing-List: linux-kernel@vger.kernel.org 62 | 63 | When more than one SMT siblings of a physical core are busy, an idle CPU 64 | of lower priority can help. 65 | 66 | Indicate that the low priority CPU can do active balancing from the high- 67 | priority CPU only if they belong to separate cores. 68 | 69 | Cc: Ben Segall 70 | Cc: Daniel Bristot de Oliveira 71 | Cc: Dietmar Eggemann 72 | Cc: Len Brown 73 | Cc: Mel Gorman 74 | Cc: Rafael J. Wysocki 75 | Cc: Srinivas Pandruvada 76 | Cc: Steven Rostedt 77 | Cc: Tim C. Chen 78 | Cc: Valentin Schneider 79 | Cc: x86@kernel.org 80 | Cc: linux-kernel@vger.kernel.org 81 | Reviewed-by: Len Brown 82 | Signed-off-by: Ricardo Neri 83 | --- 84 | kernel/sched/fair.c | 7 ++++++- 85 | 1 file changed, 6 insertions(+), 1 deletion(-) 86 | 87 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c 88 | index 810645eb58ed..9b608b31080f 100644 89 | --- a/kernel/sched/fair.c 90 | +++ b/kernel/sched/fair.c 91 | @@ -9759,9 +9759,14 @@ asym_active_balance(struct lb_env *env) 92 | * ASYM_PACKING needs to force migrate tasks from busy but 93 | * lower priority CPUs in order to pack all tasks in the 94 | * highest priority CPUs. 95 | + * 96 | + * If the busy CPU has higher priority but is an SMT sibling 97 | + * in which other SMT siblings are also busy, a lower-priority 98 | + * CPU in a separate core can help. 99 | */ 100 | return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) && 101 | - sched_asym_prefer(env->dst_cpu, env->src_cpu); 102 | + (sched_asym_prefer(env->dst_cpu, env->src_cpu) || 103 | + !(env->sd->flags & SD_SHARE_CPUCAPACITY)); 104 | } 105 | 106 | static inline bool 107 | -- 108 | 2.25.1 109 | 110 | 111 | -------------------------------------------------------------------------------- /scripts/develop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh 4 | 5 | SPECFILE=$1 6 | DESTDIR=$2 7 | 8 | for c in grep cmp sha1sum make tar git 9 | do 10 | if ! command -v $c > /dev/null 11 | then 12 | echo >&2 "The script needs the \"$c\" command, and it was not found." 13 | exit 1 14 | fi 15 | done 16 | 17 | if [ ! ${SPECFILE} ] 18 | then 19 | echo >&2 "${SPECFILE} not found" 20 | exit 1 21 | fi 22 | 23 | if [ -z "${DESTDIR}" ] 24 | then 25 | DESTDIR=. 26 | else 27 | mkdir -p ${DESTDIR} 28 | fi 29 | 30 | SRC_URL=$(grep "^Source0:" "${SPECFILE}" | cut -f 2- -d ':' | tr -d " ") 31 | SRC_FILE=${SRC_URL##*/} 32 | SRC_DIR=${SRC_FILE%*.tar.xz} 33 | SRC_VER=${SRC_DIR#*-} 34 | 35 | if [ ! -f ${SRC_FILE} ] 36 | then 37 | # Get upstream sources 38 | if ! curl --fail -LO ${SRC_URL} 39 | then 40 | echo >&2 "Cannot download ${SRC_FILE}" 41 | exit 3 42 | fi 43 | fi 44 | 45 | echo $(sha1sum ${SRC_FILE} | cut -d\ -f1)/${SRC_FILE} > upstream.check 46 | 47 | if ! cmp --quiet upstream upstream.check 48 | then 49 | echo >&2 "${SRC_FILE} checksum fails" 50 | rm upstream.check 51 | exit 2 52 | fi 53 | 54 | rm upstream.check 55 | 56 | rm -rf ${DESTDIR}/${SRC_DIR} 57 | tar xf ${SRC_FILE} -C ${DESTDIR} 58 | 59 | git -C ${DESTDIR}/${SRC_DIR} init --quiet 60 | git -C ${DESTDIR}/${SRC_DIR} config gc.auto 0 61 | git -C ${DESTDIR}/${SRC_DIR} add --all 62 | git -C ${DESTDIR}/${SRC_DIR} commit -m "${PKG_NAME} ${SRC_VER}" --quiet 63 | git -C ${DESTDIR}/${SRC_DIR} tag -a -m "v${SRC_VER}" "v${SRC_VER}" 64 | 65 | for p in CVE* [0-9]*.patch 66 | do 67 | if [ -f $p ] 68 | then 69 | if ! git -C ${DESTDIR}/${SRC_DIR} am --quiet $(realpath $p) 70 | then 71 | echo >&2 "Error at: ${p}" 72 | exit 4 73 | fi 74 | fi 75 | done 76 | 77 | cp config ${DESTDIR}/${SRC_DIR}/.config 78 | 79 | echo 80 | echo "The linux source plus Clear Linux patches is" 81 | echo "placed at: \"$(realpath ${DESTDIR}/${SRC_DIR})\"" 82 | -------------------------------------------------------------------------------- /scripts/port-to-current.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh 4 | # 5 | # SPDX-License-Identifier: GPL-2.0 6 | # 7 | # Port old patches to new base 8 | # 9 | # Usage: 10 | # 1. Goto current kernel source tree 11 | # 2. /path/to/this/script /path/to/old/patch/set/*.patch 12 | 13 | tmpdir=$(mktemp -d) 14 | rejf=${tmpdir}/rej 15 | 16 | for p in $* 17 | do 18 | if ! git am --quiet $p 2> /dev/null 19 | then 20 | rm -f ${rejf} 21 | if ! patch --quiet --reject-file=${rejf} --forward -p1 < $p 22 | then 23 | if [ -f ${rejf} ] 24 | then 25 | if [ -n "${DISPLAY}" ] 26 | then 27 | gvim -f ${rejf} 28 | else 29 | vim ${rejf} 30 | fi 31 | fi 32 | fi 33 | git status 34 | echo $p 35 | read dopause 36 | if git diff --no-ext-diff --quiet 37 | then 38 | git am --quiet --skip 39 | git -C ${p%/*} rm --quiet ${p#*/} 40 | else 41 | git add --all 42 | git am --quiet --continue 43 | fi 44 | fi 45 | done 46 | rm -rf ${tmpdir} 47 | -------------------------------------------------------------------------------- /scripts/to-spec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh 4 | # 5 | # SPDX-License-Identifier: GPL-3.0-or-later 6 | 7 | KLR_SPEC_FILE=$1 8 | repo_path=. 9 | tmpd=$(mktemp -d /tmp/spec.XXX) 10 | 11 | sed -i '/PK XXXX/,/#END/{//!d}' ${KLR_SPEC_FILE} 12 | sed -i '/patchXXXX/,/End XXXX/{//!d}' ${KLR_SPEC_FILE} 13 | 14 | for patch in ${repo_path}/[01234]*.patch 15 | do 16 | P=${patch##*/} 17 | N=$(echo ${P} | cut -c 1-4) 18 | echo "Patch${N}: ${P}" >> ${tmpd}/PatchXXXX 19 | echo "%patch${N} -p1" >> ${tmpd}/patchYYYY 20 | done 21 | 22 | sed -i "/PK XXXX/r ${tmpd}/PatchXXXX" ${KLR_SPEC_FILE} 23 | sed -i "/patchXXXX/r ${tmpd}/patchYYYY" ${KLR_SPEC_FILE} 24 | 25 | rm -rf ${tmpd} 26 | -------------------------------------------------------------------------------- /slack.patch: -------------------------------------------------------------------------------- 1 | --- linux-6.5.1/init/init_task.c~ 2023-09-02 07:13:30.000000000 +0000 2 | +++ linux-6.5.1/init/init_task.c 2023-10-30 15:12:13.920976572 +0000 3 | @@ -130,7 +130,7 @@ 4 | .journal_info = NULL, 5 | INIT_CPU_TIMERS(init_task) 6 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), 7 | - .timer_slack_ns = 50000, /* 50 usec default slack */ 8 | + .timer_slack_ns = 50, /* 50 nsec default slack */ 9 | .thread_pid = &init_struct_pid, 10 | .thread_group = LIST_HEAD_INIT(init_task.thread_group), 11 | .thread_node = LIST_HEAD_INIT(init_signals.thread_head), 12 | -------------------------------------------------------------------------------- /testresults: -------------------------------------------------------------------------------- 1 | Total : 0 2 | Pass : 0 3 | Fail : 0 4 | Skip : 0 5 | XFail : 0 6 | -------------------------------------------------------------------------------- /update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | PKG=linux 6 | STABLE_VER=6.14 7 | SPEC=./$PKG.spec 8 | 9 | CUR_VER=$(rpmspec --srpm -q --qf="%{VERSION}" $SPEC) 10 | CUR_VER=${CUR_VER//./\\.} 11 | 12 | rm -f releases.json 13 | curl -sSf -O -L https://www.kernel.org/releases.json 14 | NEW_VER=$(python3 ./filter-stable.py $STABLE_VER releases.json) 15 | 16 | sed -i -e "s/$CUR_VER/$NEW_VER/g" $SPEC 17 | 18 | if ! git diff --quiet $SPEC; then 19 | make generateupstream 20 | make bumpnogit 21 | git add $SPEC upstream release 22 | git commit -m "Stable update to $NEW_VER" $SPEC upstream release 23 | make koji-nowait 24 | fi 25 | -------------------------------------------------------------------------------- /upstream: -------------------------------------------------------------------------------- 1 | f0d072bf33666a4fc96bc1a9246d0747371e4c88/linux-6.14.8.tar.xz 2 | --------------------------------------------------------------------------------