├── .gitignore
├── 0001-add-umonitor-umwait-C0.x-C-states.patch
├── 0001-mm-memcontrol-add-some-branch-hints-based-on-gcov-an.patch
├── 0001-powerbump-functionality.patch
├── 0001-sched-cpuset-Fix-dl_cpu_busy-panic-due-to-empty-cs-c.patch
├── 0001-sched-migrate.patch
├── 0001-sched-numa-Initialise-numa_migrate_retry.patch
├── 0002-add-networking-support-for-powerbump.patch
├── 0002-exit-Fix-typo-in-comment-s-sub-theads-sub-threads.patch
├── 0002-sched-core-add-some-branch-hints-based-on-gcov-analy.patch
├── 0002-sched-migrate.patch
├── 0002-sched-numa-Do-not-swap-tasks-between-nodes-when-spar.patch
├── 0003-futex-bump.patch
├── 0003-sched-numa-Apply-imbalance-limitations-consistently.patch
├── 0003-sched-rt-Fix-Sparse-warnings-due-to-undefined-rt.c-d.patch
├── 0004-sched-core-Do-not-requeue-task-on-CPU-excluded-from-.patch
├── 0004-sched-numa-Adjust-imb_numa_nr-to-a-better-approximat.patch
├── 0005-sched-fair-Consider-CPU-affinity-when-allowing-NUMA-.patch
├── 0006-sched-fair-Optimize-and-simplify-rq-leaf_cfs_rq_list.patch
├── 0007-sched-deadline-Use-proc_douintvec_minmax-limit-minim.patch
├── 0008-sched-Allow-newidle-balancing-to-bail-out-of-load_ba.patch
├── 0009-sched-Fix-the-check-of-nr_running-at-queue-wakelist.patch
├── 0010-sched-Remove-the-limitation-of-WF_ON_CPU-on-wakelist.patch
├── 0011-selftests-rseq-riscv-use-rseq_get_abi-helper.patch
├── 0012-selftests-rseq-riscv-fix-literal-suffix-warning.patch
├── 0013-selftests-rseq-check-if-libc-rseq-support-is-registe.patch
├── 0014-sched-fair-Remove-redundant-word.patch
├── 0015-sched-Remove-unused-function-group_first_cpu.patch
├── 0016-sched-only-perform-capability-check-on-privileged-op.patch
├── 0017-sched-fair-Introduce-SIS_UTIL-to-search-idle-CPU-bas.patch
├── 0018-sched-fair-Provide-u64-read-for-32-bits-arch-helper.patch
├── 0019-sched-fair-Decay-task-PELT-values-during-wakeup-migr.patch
├── 0020-sched-drivers-Remove-max-param-from-effective_cpu_ut.patch
├── 0021-sched-fair-Rename-select_idle_mask-to-select_rq_mask.patch
├── 0022-sched-fair-Use-the-same-cpumask-per-PD-throughout-fi.patch
├── 0023-sched-fair-Remove-task_util-from-effective-utilizati.patch
├── 0024-sched-fair-Remove-the-energy-margin-in-feec.patch
├── 0025-sched-core-add-forced-idle-accounting-for-cgroups.patch
├── 0026-sched-core-Use-try_cmpxchg-in-set_nr_-and_not-if-_po.patch
├── 0027-sched-fair-fix-case-with-reduced-capacity-CPU.patch
├── 0028-sched-core-Always-flush-pending-blk_plug.patch
├── 0029-nohz-full-sched-rt-Fix-missed-tick-reenabling-bug-in.patch
├── 0030-sched-core-Fix-the-bug-that-task-won-t-enqueue-into-.patch
├── 0031-rseq-Deprecate-RSEQ_CS_FLAG_NO_RESTART_ON_-flags.patch
├── 0032-rseq-Kill-process-when-unknown-flags-are-encountered.patch
├── 0050-Revert-ext4-do-not-create-EA-inode-under-buffer-lock.patch
├── 0051-block-bfq-Fix-division-by-zero-error-on-zero-wsum.patch
├── 0101-i8042-decrease-debug-message-level-to-info.patch
├── 0102-increase-the-ext4-default-commit-age.patch
├── 0103-silence-rapl.patch
├── 0104-pci-pme-wakeups.patch
├── 0106-intel_idle-tweak-cpuidle-cstates.patch
├── 0107-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch
├── 0108-smpboot-reuse-timer-calibration.patch
├── 0109-initialize-ata-before-graphics.patch
├── 0110-give-rdrand-some-credit.patch
├── 0111-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch
├── 0112-init-wait-for-partition-and-retry-scan.patch
├── 0113-print-fsync-count-for-bootchart.patch
├── 0114-add-boot-option-to-allow-unsigned-modules.patch
├── 0115-enable-stateless-firmware-loading.patch
├── 0116-migrate-some-systemd-defaults-to-the-kernel-defaults.patch
├── 0117-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch
├── 0118-add-scheduler-turbo3-patch.patch
├── 0120-do-accept-in-LIFO-order-for-cache-efficiency.patch
├── 0121-locking-rwsem-spin-faster.patch
├── 0122-ata-libahci-ignore-staggered-spin-up.patch
├── 0123-print-CPU-that-faults.patch
├── 0124-x86-microcode-Add-an-option-to-reload-microcode-even.patch
├── 0125-nvme-workaround.patch
├── 0126-don-t-report-an-error-if-PowerClamp-run-on-other-CPU.patch
├── 0127-lib-raid6-add-patch.patch
├── 0128-itmt_epb-use-epb-to-scale-itmt.patch
├── 0129-mm-wakeups-remove-a-wakeup.patch
├── 0130-itmt2-ADL-fixes.patch
├── 0131-add-a-per-cpu-minimum-high-watermark-an-tune-batch-s.patch
├── 0132-prezero-20220308.patch
├── 0133-novector.patch
├── 0134-md-raid6-algorithms-scale-test-duration-for-speedier.patch
├── 0135-initcall-only-print-non-zero-initcall-debug-to-speed.patch
├── 0136-crypto-kdf-make-the-module-init-call-a-late-init-cal.patch
├── 0149-select-do_pollfd-add-unlikely-branch-hint-return-pat.patch
├── 0150-select-core_sys_select-add-unlikely-branch-hint-on-r.patch
├── 0158-clocksource-only-perform-extended-clocksource-checks.patch
├── 0161-ACPI-align-slab-buffers-for-improved-memory-performa.patch
├── 0162-extra-optmization-flags.patch
├── 0163-thermal-intel-powerclamp-check-MWAIT-first-use-pr_wa.patch
├── 0164-KVM-VMX-make-vmx-init-a-late-init-call-to-get-to-ini.patch
├── 0166-sched-fair-remove-upper-limit-on-cpu-number.patch
├── 0167-net-sock-increase-default-number-of-_SK_MEM_PACKETS-.patch
├── 0169-mm-mincore-improve-performance-by-adding-an-unlikely.patch
├── 0170-sched-Add-unlikey-branch-hints-to-several-system-cal.patch
├── 0171-kcmp-improve-performance-adding-an-unlikely-hint-to-.patch
├── 0173-cpuidle-psd-add-power-sleep-demotion-prevention-for-.patch
├── 0174-memcg-increase-MEMCG_CHARGE_BATCH-to-128.patch
├── 0175-readdir-add-unlikely-hint-on-len-check.patch
├── Makefile
├── Makefile.custom
├── adlrdt.patch
├── archive
    ├── 0114-tweak-perfbias.patch
    ├── 0123-zero-extra-registers.patch
    ├── 0131-overload-on-wakeup.patch
    ├── 0151-mm-Export-do_madvise.patch-
    ├── 0152-x86-kvm-Notify-host-to-release-pages.patch-
    ├── 0153-x86-Return-memory-from-guest-to-host-kernel.patch-
    ├── 0154-sysctl-vm-Fine-grained-cache-shrinking.patch-
    ├── 1011-virtualbox-add-module-sources.patch
    ├── 1012-virtualbox-setup-Kconfig-and-Makefiles.patch
    ├── 2001-opae-add-intel-fpga-drivers.patch-
    ├── 2002-opae-add-Kconfig-and-Makefile.patch-
    ├── 3001-Add-sysdig-0.20-driver.patch-
    └── 3002Add-sysdig-to-kernel-build-system.patch-
├── backport-ioboost.patch
├── better_idle_balance.patch
├── cmdline
├── config
├── cstatedemotion.patch
├── epp-retune.patch
├── filter-stable.py
├── iommu.patch
├── kdf-boottime.patch
├── kvm-printk.patch
├── libsgrowdown.patch
├── linux.spec
├── mm-lru_cache_disable-use-synchronize_rcu_expedited.patch
├── mmput_async.patch
├── netscale.patch
├── nonapi-realtek.patch
├── options.conf
├── posted_msi.patch
├── ratelimit-sched-yield.patch
├── rcuref-1.patch
├── rcuref-2.patch
├── rcuref-3.patch
├── release
├── revert-regression.patch
├── scale-net-alloc.patch
├── scale.patch
├── sched-hybrid1.patch
├── sched-hybrid2.patch
├── sched-hybrid3.patch
├── sched-hybrid4.patch
├── scripts
    ├── develop.sh
    ├── port-to-current.sh
    └── to-spec.sh
├── slack.patch
├── testresults
├── update.sh
├── upstream
└── vmidle.patch


/.gitignore:
--------------------------------------------------------------------------------
 1 | .*~
 2 | *~
 3 | *.info
 4 | *.mod
 5 | *.swp
 6 | .repo-index
 7 | *.log
 8 | build.log.round*
 9 | *.tar.*
10 | *.tgz
11 | !*.tar.*.*
12 | *.zip
13 | *.jar
14 | *.pom
15 | *.xml
16 | commitmsg
17 | results/
18 | rpms/
19 | for-review.txt
20 | 
21 | linux-5.*
22 | /releases.json
23 | 


--------------------------------------------------------------------------------
/0001-mm-memcontrol-add-some-branch-hints-based-on-gcov-an.patch:
--------------------------------------------------------------------------------
 1 | From c50d383b767ea7337b58fc004dd9e2cffebb8524 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Wed, 1 Feb 2023 10:40:24 +0000
 4 | Subject: [PATCH] mm/memcontrol: add some branch hints based on gcov analysis
 5 | 
 6 | Signed-off-by: Colin Ian King <colin.king@intel.com>
 7 | ---
 8 |  mm/memcontrol.c | 6 +++---
 9 |  1 file changed, 3 insertions(+), 3 deletions(-)
10 | 
11 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c
12 | index a1a35c12635e..762d8a819c4a 100644
13 | --- a/mm/memcontrol.c
14 | +++ b/mm/memcontrol.c
15 | @@ -611,7 +611,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
16 |  	cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
17 |  
18 |  	x = __this_cpu_add_return(stats_updates, abs(val));
19 | -	if (x > MEMCG_CHARGE_BATCH * 128) {
20 | +	if (unlikely(x > MEMCG_CHARGE_BATCH * 128)) {
21 |  		/*
22 |  		 * If stats_flush_threshold exceeds the threshold
23 |  		 * (>num_online_cpus()), cgroup stats update will be triggered
24 | @@ -817,7 +817,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
25 |  	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
26 |  
27 |  	/* Update memcg and lruvec */
28 | -	if (!mem_cgroup_disabled())
29 | +	if (likely(!mem_cgroup_disabled()))
30 |  		__mod_memcg_lruvec_state(lruvec, idx, val);
31 |  }
32 |  
33 | @@ -2136,7 +2136,7 @@ void lock_page_memcg(struct page *page)
34 |  
35 |  static void __folio_memcg_unlock(struct mem_cgroup *memcg)
36 |  {
37 | -	if (memcg && memcg->move_lock_task == current) {
38 | +	if (likely(memcg && memcg->move_lock_task == current)) {
39 |  		unsigned long flags = memcg->move_lock_flags;
40 |  
41 |  		memcg->move_lock_task = NULL;
42 | -- 
43 | 2.39.1
44 | 
45 | 


--------------------------------------------------------------------------------
/0001-sched-cpuset-Fix-dl_cpu_busy-panic-due-to-empty-cs-c.patch:
--------------------------------------------------------------------------------
  1 | From b6e8d40d43ae4dec00c8fea2593eeea3114b8f44 Mon Sep 17 00:00:00 2001
  2 | From: Waiman Long <longman@redhat.com>
  3 | Date: Tue, 2 Aug 2022 21:54:51 -0400
  4 | Subject: [PATCH 1/4] sched, cpuset: Fix dl_cpu_busy() panic due to empty
  5 |  cs->cpus_allowed
  6 | 
  7 | With cgroup v2, the cpuset's cpus_allowed mask can be empty indicating
  8 | that the cpuset will just use the effective CPUs of its parent. So
  9 | cpuset_can_attach() can call task_can_attach() with an empty mask.
 10 | This can lead to cpumask_any_and() returns nr_cpu_ids causing the call
 11 | to dl_bw_of() to crash due to percpu value access of an out of bound
 12 | CPU value. For example:
 13 | 
 14 | 	[80468.182258] BUG: unable to handle page fault for address: ffffffff8b6648b0
 15 | 	  :
 16 | 	[80468.191019] RIP: 0010:dl_cpu_busy+0x30/0x2b0
 17 | 	  :
 18 | 	[80468.207946] Call Trace:
 19 | 	[80468.208947]  cpuset_can_attach+0xa0/0x140
 20 | 	[80468.209953]  cgroup_migrate_execute+0x8c/0x490
 21 | 	[80468.210931]  cgroup_update_dfl_csses+0x254/0x270
 22 | 	[80468.211898]  cgroup_subtree_control_write+0x322/0x400
 23 | 	[80468.212854]  kernfs_fop_write_iter+0x11c/0x1b0
 24 | 	[80468.213777]  new_sync_write+0x11f/0x1b0
 25 | 	[80468.214689]  vfs_write+0x1eb/0x280
 26 | 	[80468.215592]  ksys_write+0x5f/0xe0
 27 | 	[80468.216463]  do_syscall_64+0x5c/0x80
 28 | 	[80468.224287]  entry_SYSCALL_64_after_hwframe+0x44/0xae
 29 | 
 30 | Fix that by using effective_cpus instead. For cgroup v1, effective_cpus
 31 | is the same as cpus_allowed. For v2, effective_cpus is the real cpumask
 32 | to be used by tasks within the cpuset anyway.
 33 | 
 34 | Also update task_can_attach()'s 2nd argument name to cs_effective_cpus to
 35 | reflect the change. In addition, a check is added to task_can_attach()
 36 | to guard against the possibility that cpumask_any_and() may return a
 37 | value >= nr_cpu_ids.
 38 | 
 39 | Fixes: 7f51412a415d ("sched/deadline: Fix bandwidth check/update when migrating tasks between exclusive cpusets")
 40 | Signed-off-by: Waiman Long <longman@redhat.com>
 41 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
 42 | Acked-by: Juri Lelli <juri.lelli@redhat.com>
 43 | Link: https://lore.kernel.org/r/20220803015451.2219567-1-longman@redhat.com
 44 | ---
 45 |  include/linux/sched.h  | 2 +-
 46 |  kernel/cgroup/cpuset.c | 2 +-
 47 |  kernel/sched/core.c    | 8 +++++---
 48 |  3 files changed, 7 insertions(+), 5 deletions(-)
 49 | 
 50 | diff --git a/include/linux/sched.h b/include/linux/sched.h
 51 | index 88b8817b827d..6a060160f0db 100644
 52 | --- a/include/linux/sched.h
 53 | +++ b/include/linux/sched.h
 54 | @@ -1813,7 +1813,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
 55 |  }
 56 |  
 57 |  extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
 58 | -extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
 59 | +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus);
 60 |  #ifdef CONFIG_SMP
 61 |  extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
 62 |  extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
 63 | diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
 64 | index 71a418858a5e..58aadfda9b8b 100644
 65 | --- a/kernel/cgroup/cpuset.c
 66 | +++ b/kernel/cgroup/cpuset.c
 67 | @@ -2239,7 +2239,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 68 |  		goto out_unlock;
 69 |  
 70 |  	cgroup_taskset_for_each(task, css, tset) {
 71 | -		ret = task_can_attach(task, cs->cpus_allowed);
 72 | +		ret = task_can_attach(task, cs->effective_cpus);
 73 |  		if (ret)
 74 |  			goto out_unlock;
 75 |  		ret = security_task_setscheduler(task);
 76 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 77 | index 5555e49c4e12..addc3c2d2122 100644
 78 | --- a/kernel/sched/core.c
 79 | +++ b/kernel/sched/core.c
 80 | @@ -8980,7 +8980,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
 81 |  }
 82 |  
 83 |  int task_can_attach(struct task_struct *p,
 84 | -		    const struct cpumask *cs_cpus_allowed)
 85 | +		    const struct cpumask *cs_effective_cpus)
 86 |  {
 87 |  	int ret = 0;
 88 |  
 89 | @@ -8999,9 +8999,11 @@ int task_can_attach(struct task_struct *p,
 90 |  	}
 91 |  
 92 |  	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
 93 | -					      cs_cpus_allowed)) {
 94 | -		int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
 95 | +					      cs_effective_cpus)) {
 96 | +		int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
 97 |  
 98 | +		if (unlikely(cpu >= nr_cpu_ids))
 99 | +			return -EINVAL;
100 |  		ret = dl_cpu_busy(cpu, p);
101 |  	}
102 |  
103 | -- 
104 | 2.37.1
105 | 
106 | 


--------------------------------------------------------------------------------
/0001-sched-migrate.patch:
--------------------------------------------------------------------------------
  1 | Subject: [PATCH v5 1/2] sched/fair: Record the average duration of a task
  2 | Date:   Fri,  3 Feb 2023 13:17:59 +0800
  3 | Message-Id: <155aa36ba14b8a1f8e6c3ccda7999125edfff990.1675361144.git.yu.c.chen@intel.com>
  4 | X-Mailer: git-send-email 2.25.1
  5 | In-Reply-To: <cover.1675361144.git.yu.c.chen@intel.com>
  6 | References: <cover.1675361144.git.yu.c.chen@intel.com>
  7 | MIME-Version: 1.0
  8 | Content-Transfer-Encoding: 8bit
  9 | Precedence: bulk
 10 | List-ID: <linux-kernel.vger.kernel.org>
 11 | X-Mailing-List: linux-kernel@vger.kernel.org
 12 | 
 13 | Record the average duration of a task, as there is a requirement
 14 | to leverage this information for better task placement.
 15 | 
 16 | At first thought the (p->se.sum_exec_runtime / p->nvcsw)
 17 | can be used to measure the task duration. However, the
 18 | history long past was factored too heavily in such a formula.
 19 | Ideally, the old activity should decay and not affect
 20 | the current status too much.
 21 | 
 22 | Although something based on PELT can be used, se.util_avg might
 23 | not be appropriate to describe the task duration:
 24 | Task p1 and task p2 are doing frequent ping-pong scheduling on
 25 | one CPU, both p1 and p2 have a short duration, but the util_avg
 26 | can be up to 50%, which is inconsistent with task duration.
 27 | 
 28 | It was found that there was once a similar feature to track the
 29 | duration of a task:
 30 | commit ad4b78bbcbab ("sched: Add new wakeup preemption mode: WAKEUP_RUNNING")
 31 | Unfortunately, it was reverted because it was an experiment. Pick the
 32 | patch up again, by recording the average duration when a task voluntarily
 33 | switches out.
 34 | 
 35 | For example, suppose on CPU1, task p1 and p2 run alternatively:
 36 | 
 37 |  --------------------> time
 38 | 
 39 |  | p1 runs 1ms | p2 preempt p1 | p1 switch in, runs 0.5ms and blocks |
 40 |                ^               ^                                     ^
 41 |  |_____________|               |_____________________________________|
 42 |                                                                      ^
 43 |                                                                      |
 44 |                                                                   p1 dequeued
 45 | 
 46 | p1's duration in one section is (1 + 0.5)ms. Because if p2 does not
 47 | preempt p1, p1 can run 1.5ms. This reflects the nature of a task:
 48 | how long it wishes to run at most.
 49 | 
 50 | Suggested-by: Tim Chen <tim.c.chen@intel.com>
 51 | Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
 52 | Signed-off-by: Chen Yu <yu.c.chen@intel.com>
 53 | ---
 54 |  include/linux/sched.h |  3 +++
 55 |  kernel/sched/core.c   |  2 ++
 56 |  kernel/sched/debug.c  |  1 +
 57 |  kernel/sched/fair.c   | 13 +++++++++++++
 58 |  4 files changed, 19 insertions(+)
 59 | 
 60 | diff --git a/include/linux/sched.h b/include/linux/sched.h
 61 | index 4df2b3e76b30..e21709402a31 100644
 62 | --- a/include/linux/sched.h
 63 | +++ b/include/linux/sched.h
 64 | @@ -557,6 +557,9 @@ struct sched_entity {
 65 |  	u64				prev_sum_exec_runtime;
 66 |  
 67 |  	u64				nr_migrations;
 68 | +	u64				prev_sleep_sum_runtime;
 69 | +	/* average duration of a task */
 70 | +	u64				dur_avg;
 71 |  
 72 |  #ifdef CONFIG_FAIR_GROUP_SCHED
 73 |  	int				depth;
 74 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 75 | index 03b8529db73f..b805c5bdc7ff 100644
 76 | --- a/kernel/sched/core.c
 77 | +++ b/kernel/sched/core.c
 78 | @@ -4379,6 +4379,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 79 |  	p->se.prev_sum_exec_runtime	= 0;
 80 |  	p->se.nr_migrations		= 0;
 81 |  	p->se.vruntime			= 0;
 82 | +	p->se.dur_avg			= 0;
 83 | +	p->se.prev_sleep_sum_runtime	= 0;
 84 |  	INIT_LIST_HEAD(&p->se.group_node);
 85 |  
 86 |  #ifdef CONFIG_FAIR_GROUP_SCHED
 87 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
 88 | index 1637b65ba07a..8d64fba16cfe 100644
 89 | --- a/kernel/sched/debug.c
 90 | +++ b/kernel/sched/debug.c
 91 | @@ -1024,6 +1024,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 92 |  	__PS("nr_involuntary_switches", p->nivcsw);
 93 |  
 94 |  	P(se.load.weight);
 95 | +	P(se.dur_avg);
 96 |  #ifdef CONFIG_SMP
 97 |  	P(se.avg.load_sum);
 98 |  	P(se.avg.runnable_sum);
 99 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
100 | index d4db72f8f84e..aa16611c7263 100644
101 | --- a/kernel/sched/fair.c
102 | +++ b/kernel/sched/fair.c
103 | @@ -6271,6 +6271,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
104 |  
105 |  static void set_next_buddy(struct sched_entity *se);
106 |  
107 | +static inline void dur_avg_update(struct task_struct *p, bool task_sleep)
108 | +{
109 | +	u64 dur;
110 | +
111 | +	if (!task_sleep)
112 | +		return;
113 | +
114 | +	dur = p->se.sum_exec_runtime - p->se.prev_sleep_sum_runtime;
115 | +	p->se.prev_sleep_sum_runtime = p->se.sum_exec_runtime;
116 | +	update_avg(&p->se.dur_avg, dur);
117 | +}
118 | +
119 |  /*
120 |   * The dequeue_task method is called before nr_running is
121 |   * decreased. We remove the task from the rbtree and
122 | @@ -6343,6 +6355,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
123 |  
124 |  dequeue_throttle:
125 |  	util_est_update(&rq->cfs, p, task_sleep);
126 | +	dur_avg_update(p, task_sleep);
127 |  	hrtick_update(rq);
128 |  }
129 |  
130 | -- 
131 | 2.25.1
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/0001-sched-numa-Initialise-numa_migrate_retry.patch:
--------------------------------------------------------------------------------
 1 | From 70ce3ea9aa4ed901c8a90de667df5ef307766e71 Mon Sep 17 00:00:00 2001
 2 | From: Mel Gorman <mgorman@techsingularity.net>
 3 | Date: Fri, 20 May 2022 11:35:16 +0100
 4 | Subject: [PATCH 01/32] sched/numa: Initialise numa_migrate_retry
 5 | 
 6 | On clone, numa_migrate_retry is inherited from the parent which means
 7 | that the first NUMA placement of a task is non-deterministic. This
 8 | affects when load balancing recognises numa tasks and whether to
 9 | migrate "regular", "remote" or "all" tasks between NUMA scheduler
10 | domains.
11 | 
12 | Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
13 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
14 | Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
15 | Link: https://lore.kernel.org/r/20220520103519.1863-2-mgorman@techsingularity.net
16 | ---
17 |  kernel/sched/fair.c | 1 +
18 |  1 file changed, 1 insertion(+)
19 | 
20 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
21 | index 77b2048a9326..51836efe5931 100644
22 | --- a/kernel/sched/fair.c
23 | +++ b/kernel/sched/fair.c
24 | @@ -2885,6 +2885,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
25 |  	p->node_stamp			= 0;
26 |  	p->numa_scan_seq		= mm ? mm->numa_scan_seq : 0;
27 |  	p->numa_scan_period		= sysctl_numa_balancing_scan_delay;
28 | +	p->numa_migrate_retry		= 0;
29 |  	/* Protect against double add, see task_tick_numa and task_numa_work */
30 |  	p->numa_work.next		= &p->numa_work;
31 |  	p->numa_faults			= NULL;
32 | -- 
33 | 2.37.1
34 | 
35 | 


--------------------------------------------------------------------------------
/0002-add-networking-support-for-powerbump.patch:
--------------------------------------------------------------------------------
 1 | From 3265f948dab9253e087030794b3f02c86c07dc92 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Thu, 5 Jan 2023 16:52:33 +0000
 4 | Subject: [PATCH 2/2] add networking support for powerbump
 5 | 
 6 | ---
 7 |  include/linux/powerbump.h | 3 ++-
 8 |  net/core/dev.c            | 3 +++
 9 |  2 files changed, 5 insertions(+), 1 deletion(-)
10 | 
11 | diff --git a/include/linux/powerbump.h b/include/linux/powerbump.h
12 | index 8fc81d958484..79dd40620ba0 100644
13 | --- a/include/linux/powerbump.h
14 | +++ b/include/linux/powerbump.h
15 | @@ -5,7 +5,8 @@
16 |  
17 |  
18 |  /* bump time constants, in msec */
19 | -#define BUMP_FOR_DISK	3
20 | +#define BUMP_FOR_DISK		3
21 | +#define BUMP_FOR_NETWORK	3
22 |  
23 |  
24 |  
25 | diff --git a/net/core/dev.c b/net/core/dev.c
26 | index 70e06853ba25..054fe9024982 100644
27 | --- a/net/core/dev.c
28 | +++ b/net/core/dev.c
29 | @@ -150,6 +150,7 @@
30 |  #include <linux/pm_runtime.h>
31 |  #include <linux/prandom.h>
32 |  #include <linux/once_lite.h>
33 | +#include <linux/powerbump.h>
34 |  
35 |  #include "dev.h"
36 |  #include "net-sysfs.h"
37 | @@ -5744,6 +5745,7 @@ int netif_receive_skb(struct sk_buff *skb)
38 |  	int ret;
39 |  
40 |  	trace_netif_receive_skb_entry(skb);
41 | +	give_power_bump(BUMP_FOR_NETWORK);
42 |  
43 |  	ret = netif_receive_skb_internal(skb);
44 |  	trace_netif_receive_skb_exit(ret);
45 | @@ -5768,6 +5770,7 @@ void netif_receive_skb_list(struct list_head *head)
46 |  
47 |  	if (list_empty(head))
48 |  		return;
49 | +	give_power_bump(BUMP_FOR_NETWORK);
50 |  	if (trace_netif_receive_skb_list_entry_enabled()) {
51 |  		list_for_each_entry(skb, head, list)
52 |  			trace_netif_receive_skb_list_entry(skb);
53 | -- 
54 | 2.39.0
55 | 
56 | 


--------------------------------------------------------------------------------
/0002-exit-Fix-typo-in-comment-s-sub-theads-sub-threads.patch:
--------------------------------------------------------------------------------
 1 | From dcca34754a3f5290406403b8066e3b15dda9f4bf Mon Sep 17 00:00:00 2001
 2 | From: Ingo Molnar <mingo@kernel.org>
 3 | Date: Wed, 3 Aug 2022 10:43:42 +0200
 4 | Subject: [PATCH 2/4] exit: Fix typo in comment: s/sub-theads/sub-threads
 5 | 
 6 | Cc: linux-kernel@vger.kernel.org
 7 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
 8 | ---
 9 |  kernel/exit.c | 2 +-
10 |  1 file changed, 1 insertion(+), 1 deletion(-)
11 | 
12 | diff --git a/kernel/exit.c b/kernel/exit.c
13 | index 64c938ce36fe..84021b24f79e 100644
14 | --- a/kernel/exit.c
15 | +++ b/kernel/exit.c
16 | @@ -1051,7 +1051,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
17 |  		 * p->signal fields because the whole thread group is dead
18 |  		 * and nobody can change them.
19 |  		 *
20 | -		 * psig->stats_lock also protects us from our sub-theads
21 | +		 * psig->stats_lock also protects us from our sub-threads
22 |  		 * which can reap other children at the same time. Until
23 |  		 * we change k_getrusage()-like users to rely on this lock
24 |  		 * we have to take ->siglock as well.
25 | -- 
26 | 2.37.1
27 | 
28 | 


--------------------------------------------------------------------------------
/0002-sched-core-add-some-branch-hints-based-on-gcov-analy.patch:
--------------------------------------------------------------------------------
 1 | From eae943f2b22979ae1b378d72f9b94085577f5800 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Mon, 17 Mar 2025 12:03:19 +0000
 4 | Subject: [PATCH] sched/core: add some branch hints based on gcov analysis
 5 | 
 6 | Patch refreshed for v6.13.7
 7 | 
 8 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
 9 | ---
10 |  kernel/sched/core.c | 8 ++++----
11 |  1 file changed, 4 insertions(+), 4 deletions(-)
12 | 
13 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
14 | index 86cb6db08168..d2ccde102870 100644
15 | --- a/kernel/sched/core.c
16 | +++ b/kernel/sched/core.c
17 | @@ -594,7 +594,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
18 |  
19 |  	/* Matches synchronize_rcu() in __sched_core_enable() */
20 |  	preempt_disable();
21 | -	if (sched_core_disabled()) {
22 | +	if (likely(sched_core_disabled())) {
23 |  		raw_spin_lock_nested(&rq->__lock, subclass);
24 |  		/* preempt_count *MUST* be > 1 */
25 |  		preempt_enable_no_resched();
26 | @@ -804,7 +804,7 @@ void update_rq_clock(struct rq *rq)
27 |  #endif
28 |  
29 |  	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
30 | -	if (delta < 0)
31 | +	if (unlikely(delta < 0))
32 |  		return;
33 |  	rq->clock += delta;
34 |  	update_rq_clock_task(rq, delta);
35 | @@ -6106,7 +6106,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
36 |  	struct rq *rq_i;
37 |  	bool need_sync;
38 |  
39 | -	if (!sched_core_enabled(rq))
40 | +	if (likely(!sched_core_enabled(rq)))
41 |  		return __pick_next_task(rq, prev, rf);
42 |  
43 |  	cpu = cpu_of(rq);
44 | @@ -7278,7 +7278,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
45 |  #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
46 |  int __sched __cond_resched(void)
47 |  {
48 | -	if (should_resched(0) && !irqs_disabled()) {
49 | +	if (unlikely(should_resched(0) && !irqs_disabled())) {
50 |  		preempt_schedule_common();
51 |  		return 1;
52 |  	}
53 | -- 
54 | 2.48.1
55 | 
56 | 


--------------------------------------------------------------------------------
/0002-sched-numa-Do-not-swap-tasks-between-nodes-when-spar.patch:
--------------------------------------------------------------------------------
 1 | From 13ede33150877d44756171e33570076882b17b0b Mon Sep 17 00:00:00 2001
 2 | From: Mel Gorman <mgorman@techsingularity.net>
 3 | Date: Fri, 20 May 2022 11:35:17 +0100
 4 | Subject: [PATCH 02/32] sched/numa: Do not swap tasks between nodes when spare
 5 |  capacity is available
 6 | 
 7 | If a destination node has spare capacity but there is an imbalance then
 8 | two tasks are selected for swapping. If the tasks have no numa group
 9 | or are within the same NUMA group, it's simply shuffling tasks around
10 | without having any impact on the compute imbalance. Instead, it's just
11 | punishing one task to help another.
12 | 
13 | Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
14 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
15 | Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
16 | Link: https://lore.kernel.org/r/20220520103519.1863-3-mgorman@techsingularity.net
17 | ---
18 |  kernel/sched/fair.c | 9 +++++++++
19 |  1 file changed, 9 insertions(+)
20 | 
21 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
22 | index 51836efe5931..23da36c9cacb 100644
23 | --- a/kernel/sched/fair.c
24 | +++ b/kernel/sched/fair.c
25 | @@ -1790,6 +1790,15 @@ static bool task_numa_compare(struct task_numa_env *env,
26 |  	 */
27 |  	cur_ng = rcu_dereference(cur->numa_group);
28 |  	if (cur_ng == p_ng) {
29 | +		/*
30 | +		 * Do not swap within a group or between tasks that have
31 | +		 * no group if there is spare capacity. Swapping does
32 | +		 * not address the load imbalance and helps one task at
33 | +		 * the cost of punishing another.
34 | +		 */
35 | +		if (env->dst_stats.node_type == node_has_spare)
36 | +			goto unlock;
37 | +
38 |  		imp = taskimp + task_weight(cur, env->src_nid, dist) -
39 |  		      task_weight(cur, env->dst_nid, dist);
40 |  		/*
41 | -- 
42 | 2.37.1
43 | 
44 | 


--------------------------------------------------------------------------------
/0003-futex-bump.patch:
--------------------------------------------------------------------------------
 1 | From c47e3b2e38ac2ac4c401f02048a2745f75e27f88 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Thu, 12 Jan 2023 19:19:04 +0000
 4 | Subject: [PATCH 3/3] futex bump
 5 | 
 6 | ---
 7 |  include/linux/powerbump.h | 1 +
 8 |  kernel/futex/waitwake.c   | 2 ++
 9 |  2 files changed, 3 insertions(+)
10 | 
11 | diff --git a/include/linux/powerbump.h b/include/linux/powerbump.h
12 | index 79dd40620ba0..1de5bb88725a 100644
13 | --- a/include/linux/powerbump.h
14 | +++ b/include/linux/powerbump.h
15 | @@ -7,6 +7,7 @@
16 |  /* bump time constants, in msec */
17 |  #define BUMP_FOR_DISK		3
18 |  #define BUMP_FOR_NETWORK	3
19 | +#define BUMP_FOR_FUTEX		3
20 |  
21 |  
22 |  
23 | diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
24 | index ba01b9408203..e4fc09a98cbc 100644
25 | --- a/kernel/futex/waitwake.c
26 | +++ b/kernel/futex/waitwake.c
27 | @@ -3,6 +3,7 @@
28 |  #include <linux/sched/task.h>
29 |  #include <linux/sched/signal.h>
30 |  #include <linux/freezer.h>
31 | +#include <linux/powerbump.h>
32 |  
33 |  #include "futex.h"
34 |  
35 | @@ -336,6 +337,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
36 |  	 */
37 |  	set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
38 |  	futex_queue(q, hb);
39 | +	give_power_bump(BUMP_FOR_FUTEX);
40 |  
41 |  	/* Arm the timer */
42 |  	if (timeout)
43 | -- 
44 | 2.39.0
45 | 
46 | 


--------------------------------------------------------------------------------
/0003-sched-rt-Fix-Sparse-warnings-due-to-undefined-rt.c-d.patch:
--------------------------------------------------------------------------------
 1 | From 87514b2c24f294c32e9e743b095541dcf43928f7 Mon Sep 17 00:00:00 2001
 2 | From: Ben Dooks <ben-linux@fluff.org>
 3 | Date: Thu, 21 Jul 2022 15:51:55 +0100
 4 | Subject: [PATCH 3/4] sched/rt: Fix Sparse warnings due to undefined rt.c
 5 |  declarations
 6 | 
 7 | There are several symbols defined in kernel/sched/sched.h but get wrapped
 8 | in CONFIG_CGROUP_SCHED, even though dummy versions get built in rt.c and
 9 | therefore trigger Sparse warnings:
10 | 
11 |   kernel/sched/rt.c:309:6: warning: symbol 'unregister_rt_sched_group' was not declared. Should it be static?
12 |   kernel/sched/rt.c:311:6: warning: symbol 'free_rt_sched_group' was not declared. Should it be static?
13 |   kernel/sched/rt.c:313:5: warning: symbol 'alloc_rt_sched_group' was not declared. Should it be static?
14 | 
15 | Fix this by moving them outside the CONFIG_CGROUP_SCHED block.
16 | 
17 | [ mingo: Refreshed to the latest scheduler tree, tweaked changelog. ]
18 | 
19 | Signed-off-by: Ben Dooks <ben-linux@fluff.org>
20 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
21 | Link: https://lore.kernel.org/r/20220721145155.358366-1-ben-linux@fluff.org
22 | ---
23 |  kernel/sched/sched.h | 7 ++++---
24 |  1 file changed, 4 insertions(+), 3 deletions(-)
25 | 
26 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
27 | index aad7f5ee9666..1429315610d9 100644
28 | --- a/kernel/sched/sched.h
29 | +++ b/kernel/sched/sched.h
30 | @@ -480,9 +480,6 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
31 |  extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
32 |  extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
33 |  
34 | -extern void unregister_rt_sched_group(struct task_group *tg);
35 | -extern void free_rt_sched_group(struct task_group *tg);
36 | -extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
37 |  extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
38 |  		struct sched_rt_entity *rt_se, int cpu,
39 |  		struct sched_rt_entity *parent);
40 | @@ -520,6 +517,10 @@ struct cfs_bandwidth { };
41 |  
42 |  #endif	/* CONFIG_CGROUP_SCHED */
43 |  
44 | +extern void unregister_rt_sched_group(struct task_group *tg);
45 | +extern void free_rt_sched_group(struct task_group *tg);
46 | +extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
47 | +
48 |  /*
49 |   * u64_u32_load/u64_u32_store
50 |   *
51 | -- 
52 | 2.37.1
53 | 
54 | 


--------------------------------------------------------------------------------
/0004-sched-core-Do-not-requeue-task-on-CPU-excluded-from-.patch:
--------------------------------------------------------------------------------
 1 | From 751d4cbc43879229dbc124afefe240b70fd29a85 Mon Sep 17 00:00:00 2001
 2 | From: Mel Gorman <mgorman@techsingularity.net>
 3 | Date: Thu, 4 Aug 2022 10:21:19 +0100
 4 | Subject: [PATCH 4/4] sched/core: Do not requeue task on CPU excluded from
 5 |  cpus_mask
 6 | 
 7 | The following warning was triggered on a large machine early in boot on
 8 | a distribution kernel but the same problem should also affect mainline.
 9 | 
10 |    WARNING: CPU: 439 PID: 10 at ../kernel/workqueue.c:2231 process_one_work+0x4d/0x440
11 |    Call Trace:
12 |     <TASK>
13 |     rescuer_thread+0x1f6/0x360
14 |     kthread+0x156/0x180
15 |     ret_from_fork+0x22/0x30
16 |     </TASK>
17 | 
18 | Commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
19 | optimises ttwu by queueing a task that is descheduling on the wakelist,
20 | but does not check if the task descheduling is still allowed to run on that CPU.
21 | 
22 | In this warning, the problematic task is a workqueue rescue thread which
23 | checks if the rescue is for a per-cpu workqueue and running on the wrong CPU.
24 | While this is early in boot and it should be possible to create workers,
25 | the rescue thread may still used if the MAYDAY_INITIAL_TIMEOUT is reached
26 | or MAYDAY_INTERVAL and on a sufficiently large machine, the rescue
27 | thread is being used frequently.
28 | 
29 | Tracing confirmed that the task should have migrated properly using the
30 | stopper thread to handle the migration. However, a parallel wakeup from udev
31 | running on another CPU that does not share CPU cache observes p->on_cpu and
32 | uses task_cpu(p), queues the task on the old CPU and triggers the warning.
33 | 
34 | Check that the wakee task that is descheduling is still allowed to run
35 | on its current CPU and if not, wait for the descheduling to complete
36 | and select an allowed CPU.
37 | 
38 | Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu")
39 | Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
40 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
41 | Link: https://lore.kernel.org/r/20220804092119.20137-1-mgorman@techsingularity.net
42 | ---
43 |  kernel/sched/core.c | 8 ++++++--
44 |  1 file changed, 6 insertions(+), 2 deletions(-)
45 | 
46 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
47 | index addc3c2d2122..02afa1cc3c8c 100644
48 | --- a/kernel/sched/core.c
49 | +++ b/kernel/sched/core.c
50 | @@ -3802,7 +3802,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
51 |  	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
52 |  }
53 |  
54 | -static inline bool ttwu_queue_cond(int cpu)
55 | +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu)
56 |  {
57 |  	/*
58 |  	 * Do not complicate things with the async wake_list while the CPU is
59 | @@ -3811,6 +3811,10 @@ static inline bool ttwu_queue_cond(int cpu)
60 |  	if (!cpu_active(cpu))
61 |  		return false;
62 |  
63 | +	/* Ensure the task will still be allowed to run on the CPU. */
64 | +	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
65 | +		return false;
66 | +
67 |  	/*
68 |  	 * If the CPU does not share cache, then queue the task on the
69 |  	 * remote rqs wakelist to avoid accessing remote data.
70 | @@ -3840,7 +3844,7 @@ static inline bool ttwu_queue_cond(int cpu)
71 |  
72 |  static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
73 |  {
74 | -	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
75 | +	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) {
76 |  		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
77 |  		__ttwu_queue_wakelist(p, cpu, wake_flags);
78 |  		return true;
79 | -- 
80 | 2.37.1
81 | 
82 | 


--------------------------------------------------------------------------------
/0004-sched-numa-Adjust-imb_numa_nr-to-a-better-approximat.patch:
--------------------------------------------------------------------------------
 1 | From 026b98a93bbdbefb37ab8008df84e38e2fedaf92 Mon Sep 17 00:00:00 2001
 2 | From: Mel Gorman <mgorman@techsingularity.net>
 3 | Date: Fri, 20 May 2022 11:35:19 +0100
 4 | Subject: [PATCH 04/32] sched/numa: Adjust imb_numa_nr to a better
 5 |  approximation of memory channels
 6 | 
 7 | For a single LLC per node, a NUMA imbalance is allowed up until 25%
 8 | of CPUs sharing a node could be active. One intent of the cut-off is
 9 | to avoid an imbalance of memory channels but there is no topological
10 | information based on active memory channels. Furthermore, there can
11 | be differences between nodes depending on the number of populated
12 | DIMMs.
13 | 
14 | A cut-off of 25% was arbitrary but generally worked. It does have a severe
15 | corner cases though when an parallel workload is using 25% of all available
16 | CPUs over-saturates memory channels. This can happen due to the initial
17 | forking of tasks that get pulled more to one node after early wakeups
18 | (e.g. a barrier synchronisation) that is not quickly corrected by the
19 | load balancer. The LB may fail to act quickly as the parallel tasks are
20 | considered to be poor migrate candidates due to locality or cache hotness.
21 | 
22 | On a range of modern Intel CPUs, 12.5% appears to be a better cut-off
23 | assuming all memory channels are populated and is used as the new cut-off
24 | point. A minimum of 1 is specified to allow a communicating pair to
25 | remain local even for CPUs with low numbers of cores. For modern AMDs,
26 | there are multiple LLCs and are not affected.
27 | 
28 | Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
29 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
30 | Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
31 | Link: https://lore.kernel.org/r/20220520103519.1863-5-mgorman@techsingularity.net
32 | ---
33 |  kernel/sched/topology.c | 23 +++++++++++++++--------
34 |  1 file changed, 15 insertions(+), 8 deletions(-)
35 | 
36 | diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
37 | index 05b6c2ad90b9..8739c2a5a54e 100644
38 | --- a/kernel/sched/topology.c
39 | +++ b/kernel/sched/topology.c
40 | @@ -2316,23 +2316,30 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
41 |  
42 |  				/*
43 |  				 * For a single LLC per node, allow an
44 | -				 * imbalance up to 25% of the node. This is an
45 | -				 * arbitrary cutoff based on SMT-2 to balance
46 | -				 * between memory bandwidth and avoiding
47 | -				 * premature sharing of HT resources and SMT-4
48 | -				 * or SMT-8 *may* benefit from a different
49 | -				 * cutoff.
50 | +				 * imbalance up to 12.5% of the node. This is
51 | +				 * arbitrary cutoff based two factors -- SMT and
52 | +				 * memory channels. For SMT-2, the intent is to
53 | +				 * avoid premature sharing of HT resources but
54 | +				 * SMT-4 or SMT-8 *may* benefit from a different
55 | +				 * cutoff. For memory channels, this is a very
56 | +				 * rough estimate of how many channels may be
57 | +				 * active and is based on recent CPUs with
58 | +				 * many cores.
59 |  				 *
60 |  				 * For multiple LLCs, allow an imbalance
61 |  				 * until multiple tasks would share an LLC
62 |  				 * on one node while LLCs on another node
63 | -				 * remain idle.
64 | +				 * remain idle. This assumes that there are
65 | +				 * enough logical CPUs per LLC to avoid SMT
66 | +				 * factors and that there is a correlation
67 | +				 * between LLCs and memory channels.
68 |  				 */
69 |  				nr_llcs = sd->span_weight / child->span_weight;
70 |  				if (nr_llcs == 1)
71 | -					imb = sd->span_weight >> 2;
72 | +					imb = sd->span_weight >> 3;
73 |  				else
74 |  					imb = nr_llcs;
75 | +				imb = max(1U, imb);
76 |  				sd->imb_numa_nr = imb;
77 |  
78 |  				/* Set span based on the first NUMA domain. */
79 | -- 
80 | 2.37.1
81 | 
82 | 


--------------------------------------------------------------------------------
/0007-sched-deadline-Use-proc_douintvec_minmax-limit-minim.patch:
--------------------------------------------------------------------------------
 1 | From 2ed81e765417ec2526f901366167a13294ef09ce Mon Sep 17 00:00:00 2001
 2 | From: Yajun Deng <yajun.deng@linux.dev>
 3 | Date: Tue, 7 Jun 2022 18:18:07 +0800
 4 | Subject: [PATCH 07/32] sched/deadline: Use proc_douintvec_minmax() limit
 5 |  minimum value
 6 | 
 7 | sysctl_sched_dl_period_max and sysctl_sched_dl_period_min are unsigned
 8 | integer, but proc_dointvec() wouldn't return error even if we set a
 9 | negative number.
10 | 
11 | Use proc_douintvec_minmax() instead of proc_dointvec(). Add extra1 for
12 | sysctl_sched_dl_period_max and extra2 for sysctl_sched_dl_period_min.
13 | 
14 | It's just an optimization for match data and proc_handler in struct
15 | ctl_table. The 'if (period < min || period > max)' in __checkparam_dl()
16 | will work fine even if there hasn't this patch.
17 | 
18 | Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
19 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
20 | Reviewed-by: Daniel Bristot de Oliveira <bristot@kernel.org>
21 | Link: https://lore.kernel.org/r/20220607101807.249965-1-yajun.deng@linux.dev
22 | ---
23 |  kernel/sched/deadline.c | 6 ++++--
24 |  1 file changed, 4 insertions(+), 2 deletions(-)
25 | 
26 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
27 | index b5152961b743..5867e186c39a 100644
28 | --- a/kernel/sched/deadline.c
29 | +++ b/kernel/sched/deadline.c
30 | @@ -30,14 +30,16 @@ static struct ctl_table sched_dl_sysctls[] = {
31 |  		.data           = &sysctl_sched_dl_period_max,
32 |  		.maxlen         = sizeof(unsigned int),
33 |  		.mode           = 0644,
34 | -		.proc_handler   = proc_dointvec,
35 | +		.proc_handler   = proc_douintvec_minmax,
36 | +		.extra1         = (void *)&sysctl_sched_dl_period_min,
37 |  	},
38 |  	{
39 |  		.procname       = "sched_deadline_period_min_us",
40 |  		.data           = &sysctl_sched_dl_period_min,
41 |  		.maxlen         = sizeof(unsigned int),
42 |  		.mode           = 0644,
43 | -		.proc_handler   = proc_dointvec,
44 | +		.proc_handler   = proc_douintvec_minmax,
45 | +		.extra2         = (void *)&sysctl_sched_dl_period_max,
46 |  	},
47 |  	{}
48 |  };
49 | -- 
50 | 2.37.1
51 | 
52 | 


--------------------------------------------------------------------------------
/0008-sched-Allow-newidle-balancing-to-bail-out-of-load_ba.patch:
--------------------------------------------------------------------------------
 1 | From 792b9f65a568f48c50b3175536db9cde5a1edcc0 Mon Sep 17 00:00:00 2001
 2 | From: Josh Don <joshdon@google.com>
 3 | Date: Wed, 8 Jun 2022 19:55:15 -0700
 4 | Subject: [PATCH 08/32] sched: Allow newidle balancing to bail out of
 5 |  load_balance
 6 | 
 7 | While doing newidle load balancing, it is possible for new tasks to
 8 | arrive, such as with pending wakeups. newidle_balance() already accounts
 9 | for this by exiting the sched_domain load_balance() iteration if it
10 | detects these cases. This is very important for minimizing wakeup
11 | latency.
12 | 
13 | However, if we are already in load_balance(), we may stay there for a
14 | while before returning back to newidle_balance(). This is most
15 | exacerbated if we enter a 'goto redo' loop in the LBF_ALL_PINNED case. A
16 | very straightforward workaround to this is to adjust should_we_balance()
17 | to bail out if we're doing a CPU_NEWLY_IDLE balance and new tasks are
18 | detected.
19 | 
20 | This was tested with the following reproduction:
21 | - two threads that take turns sleeping and waking each other up are
22 |   affined to two cores
23 | - a large number of threads with 100% utilization are pinned to all
24 |   other cores
25 | 
26 | Without this patch, wakeup latency was ~120us for the pair of threads,
27 | almost entirely spent in load_balance(). With this patch, wakeup latency
28 | is ~6us.
29 | 
30 | Signed-off-by: Josh Don <joshdon@google.com>
31 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
32 | Link: https://lkml.kernel.org/r/20220609025515.2086253-1-joshdon@google.com
33 | ---
34 |  kernel/sched/fair.c | 8 +++++++-
35 |  1 file changed, 7 insertions(+), 1 deletion(-)
36 | 
37 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
38 | index 7d8ef01669a5..8bed75757e65 100644
39 | --- a/kernel/sched/fair.c
40 | +++ b/kernel/sched/fair.c
41 | @@ -9824,9 +9824,15 @@ static int should_we_balance(struct lb_env *env)
42 |  	/*
43 |  	 * In the newly idle case, we will allow all the CPUs
44 |  	 * to do the newly idle load balance.
45 | +	 *
46 | +	 * However, we bail out if we already have tasks or a wakeup pending,
47 | +	 * to optimize wakeup latency.
48 |  	 */
49 | -	if (env->idle == CPU_NEWLY_IDLE)
50 | +	if (env->idle == CPU_NEWLY_IDLE) {
51 | +		if (env->dst_rq->nr_running > 0 || env->dst_rq->ttwu_pending)
52 | +			return 0;
53 |  		return 1;
54 | +	}
55 |  
56 |  	/* Try to find first idle CPU */
57 |  	for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
58 | -- 
59 | 2.37.1
60 | 
61 | 


--------------------------------------------------------------------------------
/0009-sched-Fix-the-check-of-nr_running-at-queue-wakelist.patch:
--------------------------------------------------------------------------------
 1 | From 28156108fecb1f808b21d216e8ea8f0d205a530c Mon Sep 17 00:00:00 2001
 2 | From: Tianchen Ding <dtcccc@linux.alibaba.com>
 3 | Date: Thu, 9 Jun 2022 07:34:11 +0800
 4 | Subject: [PATCH 09/32] sched: Fix the check of nr_running at queue wakelist
 5 | 
 6 | The commit 2ebb17717550 ("sched/core: Offload wakee task activation if it
 7 | the wakee is descheduling") checked rq->nr_running <= 1 to avoid task
 8 | stacking when WF_ON_CPU.
 9 | 
10 | Per the ordering of writes to p->on_rq and p->on_cpu, observing p->on_cpu
11 | (WF_ON_CPU) in ttwu_queue_cond() implies !p->on_rq, IOW p has gone through
12 | the deactivate_task() in __schedule(), thus p has been accounted out of
13 | rq->nr_running. As such, the task being the only runnable task on the rq
14 | implies reading rq->nr_running == 0 at that point.
15 | 
16 | The benchmark result is in [1].
17 | 
18 | [1] https://lore.kernel.org/all/e34de686-4e85-bde1-9f3c-9bbc86b38627@linux.alibaba.com/
19 | 
20 | Suggested-by: Valentin Schneider <vschneid@redhat.com>
21 | Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
22 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
23 | Reviewed-by: Valentin Schneider <vschneid@redhat.com>
24 | Link: https://lore.kernel.org/r/20220608233412.327341-2-dtcccc@linux.alibaba.com
25 | ---
26 |  kernel/sched/core.c | 6 +++++-
27 |  1 file changed, 5 insertions(+), 1 deletion(-)
28 | 
29 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
30 | index bfa7452ca92e..294b9184dfe1 100644
31 | --- a/kernel/sched/core.c
32 | +++ b/kernel/sched/core.c
33 | @@ -3829,8 +3829,12 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
34 |  	 * CPU then use the wakelist to offload the task activation to
35 |  	 * the soon-to-be-idle CPU as the current CPU is likely busy.
36 |  	 * nr_running is checked to avoid unnecessary task stacking.
37 | +	 *
38 | +	 * Note that we can only get here with (wakee) p->on_rq=0,
39 | +	 * p->on_cpu can be whatever, we've done the dequeue, so
40 | +	 * the wakee has been accounted out of ->nr_running.
41 |  	 */
42 | -	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
43 | +	if ((wake_flags & WF_ON_CPU) && !cpu_rq(cpu)->nr_running)
44 |  		return true;
45 |  
46 |  	return false;
47 | -- 
48 | 2.37.1
49 | 
50 | 


--------------------------------------------------------------------------------
/0012-selftests-rseq-riscv-fix-literal-suffix-warning.patch:
--------------------------------------------------------------------------------
 1 | From d47c0cc94a86b9098930523a9e68180bef6b26cf Mon Sep 17 00:00:00 2001
 2 | From: Michael Jeanson <mjeanson@efficios.com>
 3 | Date: Tue, 14 Jun 2022 11:48:29 -0400
 4 | Subject: [PATCH 12/32] selftests/rseq: riscv: fix 'literal-suffix' warning
 5 | 
 6 | This header is also used in librseq where it can be included in C++
 7 | code, add a space between literals and string macros.
 8 | 
 9 | Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
10 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
11 | Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
12 | Link: https://lore.kernel.org/r/20220614154830.1367382-3-mjeanson@efficios.com
13 | ---
14 |  tools/testing/selftests/rseq/rseq-riscv.h | 14 +++++++-------
15 |  1 file changed, 7 insertions(+), 7 deletions(-)
16 | 
17 | diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
18 | index 6f8a605b75c0..3a391c9bf468 100644
19 | --- a/tools/testing/selftests/rseq/rseq-riscv.h
20 | +++ b/tools/testing/selftests/rseq/rseq-riscv.h
21 | @@ -86,7 +86,7 @@ do {									\
22 |  
23 |  #define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
24 |  	RSEQ_INJECT_ASM(1)						\
25 | -	"la	"RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n"	\
26 | +	"la	" RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n"	\
27 |  	REG_S	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n"	\
28 |  	__rseq_str(label) ":\n"
29 |  
30 | @@ -103,17 +103,17 @@ do {									\
31 |  
32 |  #define RSEQ_ASM_OP_CMPEQ(var, expect, label)				\
33 |  	REG_L	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"		\
34 | -	"bne	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
35 | +	"bne	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
36 |  		  __rseq_str(label) "\n"
37 |  
38 |  #define RSEQ_ASM_OP_CMPEQ32(var, expect, label)				\
39 | -	"lw	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"	\
40 | -	"bne	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
41 | +	"lw	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"	\
42 | +	"bne	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
43 |  		  __rseq_str(label) "\n"
44 |  
45 |  #define RSEQ_ASM_OP_CMPNE(var, expect, label)				\
46 |  	REG_L	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"		\
47 | -	"beq	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
48 | +	"beq	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ,"	\
49 |  		  __rseq_str(label) "\n"
50 |  
51 |  #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
52 | @@ -127,12 +127,12 @@ do {									\
53 |  	REG_S	RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
54 |  
55 |  #define RSEQ_ASM_OP_R_LOAD_OFF(offset)					\
56 | -	"add	"RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], "	\
57 | +	"add	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], "	\
58 |  		 RSEQ_ASM_TMP_REG_1 "\n"				\
59 |  	REG_L	RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
60 |  
61 |  #define RSEQ_ASM_OP_R_ADD(count)					\
62 | -	"add	"RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1		\
63 | +	"add	" RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1		\
64 |  		", %[" __rseq_str(count) "]\n"
65 |  
66 |  #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)		\
67 | -- 
68 | 2.37.1
69 | 
70 | 


--------------------------------------------------------------------------------
/0013-selftests-rseq-check-if-libc-rseq-support-is-registe.patch:
--------------------------------------------------------------------------------
 1 | From d1a997ba4c1bf65497d956aea90de42a6398f73a Mon Sep 17 00:00:00 2001
 2 | From: Michael Jeanson <mjeanson@efficios.com>
 3 | Date: Tue, 14 Jun 2022 11:48:30 -0400
 4 | Subject: [PATCH 13/32] selftests/rseq: check if libc rseq support is
 5 |  registered
 6 | 
 7 | When checking for libc rseq support in the library constructor, don't
 8 | only depend on the symbols presence, check that the registration was
 9 | completed.
10 | 
11 | This targets a scenario where the libc has rseq support but it is not
12 | wired for the current architecture in 'bits/rseq.h', we want to fallback
13 | to our internal registration mechanism.
14 | 
15 | Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
16 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
17 | Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
18 | Link: https://lore.kernel.org/r/20220614154830.1367382-4-mjeanson@efficios.com
19 | ---
20 |  tools/testing/selftests/rseq/rseq.c | 3 ++-
21 |  1 file changed, 2 insertions(+), 1 deletion(-)
22 | 
23 | diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
24 | index 986b9458efb2..4177f9507bbe 100644
25 | --- a/tools/testing/selftests/rseq/rseq.c
26 | +++ b/tools/testing/selftests/rseq/rseq.c
27 | @@ -111,7 +111,8 @@ void rseq_init(void)
28 |  	libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
29 |  	libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
30 |  	libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
31 | -	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) {
32 | +	if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
33 | +			*libc_rseq_size_p != 0) {
34 |  		/* rseq registration owned by glibc */
35 |  		rseq_offset = *libc_rseq_offset_p;
36 |  		rseq_size = *libc_rseq_size_p;
37 | -- 
38 | 2.37.1
39 | 
40 | 


--------------------------------------------------------------------------------
/0014-sched-fair-Remove-redundant-word.patch:
--------------------------------------------------------------------------------
 1 | From fb95a5a04d72aecdd5e151a4c2f7e4cde368bc10 Mon Sep 17 00:00:00 2001
 2 | From: Zhang Qiao <zhangqiao22@huawei.com>
 3 | Date: Sat, 18 Jun 2022 02:11:50 +0800
 4 | Subject: [PATCH 14/32] sched/fair: Remove redundant word " *"
 5 | 
 6 | " *" is redundant. so remove it.
 7 | 
 8 | Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
 9 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
10 | Link: https://lore.kernel.org/r/20220617181151.29980-2-zhangqiao22@huawei.com
11 | ---
12 |  kernel/sched/fair.c | 2 +-
13 |  1 file changed, 1 insertion(+), 1 deletion(-)
14 | 
15 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
16 | index 8bed75757e65..7400600b4db6 100644
17 | --- a/kernel/sched/fair.c
18 | +++ b/kernel/sched/fair.c
19 | @@ -8496,7 +8496,7 @@ static inline int sg_imbalanced(struct sched_group *group)
20 |  /*
21 |   * group_has_capacity returns true if the group has spare capacity that could
22 |   * be used by some tasks.
23 | - * We consider that a group has spare capacity if the  * number of task is
24 | + * We consider that a group has spare capacity if the number of task is
25 |   * smaller than the number of CPUs or if the utilization is lower than the
26 |   * available capacity for CFS tasks.
27 |   * For the latter, we use a threshold to stabilize the state, to take into
28 | -- 
29 | 2.37.1
30 | 
31 | 


--------------------------------------------------------------------------------
/0015-sched-Remove-unused-function-group_first_cpu.patch:
--------------------------------------------------------------------------------
 1 | From c64b551f6a338eb9724a2f9ef3dddf80ccef2894 Mon Sep 17 00:00:00 2001
 2 | From: Zhang Qiao <zhangqiao22@huawei.com>
 3 | Date: Sat, 18 Jun 2022 02:11:51 +0800
 4 | Subject: [PATCH 15/32] sched: Remove unused function group_first_cpu()
 5 | 
 6 | As of commit afe06efdf07c ("sched: Extend scheduler's asym packing")
 7 | group_first_cpu() became an unused function, remove it.
 8 | 
 9 | Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
10 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
11 | Reviewed-by: Valentin Schneider <vschneid@redhat.com>
12 | Link: https://lore.kernel.org/r/20220617181151.29980-3-zhangqiao22@huawei.com
13 | ---
14 |  kernel/sched/sched.h | 9 ---------
15 |  1 file changed, 9 deletions(-)
16 | 
17 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
18 | index 1e34bb4527fd..02c970501295 100644
19 | --- a/kernel/sched/sched.h
20 | +++ b/kernel/sched/sched.h
21 | @@ -1810,15 +1810,6 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
22 |  	return to_cpumask(sg->sgc->cpumask);
23 |  }
24 |  
25 | -/**
26 | - * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
27 | - * @group: The group whose first CPU is to be returned.
28 | - */
29 | -static inline unsigned int group_first_cpu(struct sched_group *group)
30 | -{
31 | -	return cpumask_first(sched_group_span(group));
32 | -}
33 | -
34 |  extern int group_balance_cpu(struct sched_group *sg);
35 |  
36 |  #ifdef CONFIG_SCHED_DEBUG
37 | -- 
38 | 2.37.1
39 | 
40 | 


--------------------------------------------------------------------------------
/0021-sched-fair-Rename-select_idle_mask-to-select_rq_mask.patch:
--------------------------------------------------------------------------------
  1 | From ec4fc801a02d96180c597238fe87141471b70971 Mon Sep 17 00:00:00 2001
  2 | From: Dietmar Eggemann <dietmar.eggemann@arm.com>
  3 | Date: Thu, 23 Jun 2022 11:11:02 +0200
  4 | Subject: [PATCH 21/32] sched/fair: Rename select_idle_mask to select_rq_mask
  5 | 
  6 | On 21/06/2022 11:04, Vincent Donnefort wrote:
  7 | > From: Dietmar Eggemann <dietmar.eggemann@arm.com>
  8 | 
  9 | https://lkml.kernel.org/r/202206221253.ZVyGQvPX-lkp@intel.com discovered
 10 | that this patch doesn't build anymore (on tip sched/core or linux-next)
 11 | because of commit f5b2eeb499910 ("sched/fair: Consider CPU affinity when
 12 | allowing NUMA imbalance in find_idlest_group()").
 13 | 
 14 | New version of [PATCH v11 4/7] sched/fair: Rename select_idle_mask to
 15 | select_rq_mask below.
 16 | 
 17 | -- >8 --
 18 | 
 19 | Decouple the name of the per-cpu cpumask select_idle_mask from its usage
 20 | in select_idle_[cpu/capacity]() of the CFS run-queue selection
 21 | (select_task_rq_fair()).
 22 | 
 23 | This is to support the reuse of this cpumask in the Energy Aware
 24 | Scheduling (EAS) path (find_energy_efficient_cpu()) of the CFS run-queue
 25 | selection.
 26 | 
 27 | Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
 28 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 29 | Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
 30 | Tested-by: Lukasz Luba <lukasz.luba@arm.com>
 31 | Link: https://lkml.kernel.org/r/250691c7-0e2b-05ab-bedf-b245c11d9400@arm.com
 32 | ---
 33 |  kernel/sched/core.c |  4 ++--
 34 |  kernel/sched/fair.c | 10 +++++-----
 35 |  2 files changed, 7 insertions(+), 7 deletions(-)
 36 | 
 37 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 38 | index c538a0ac4617..dd69e85b7879 100644
 39 | --- a/kernel/sched/core.c
 40 | +++ b/kernel/sched/core.c
 41 | @@ -9536,7 +9536,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
 42 |  #endif
 43 |  
 44 |  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 45 | -DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 46 | +DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
 47 |  
 48 |  void __init sched_init(void)
 49 |  {
 50 | @@ -9585,7 +9585,7 @@ void __init sched_init(void)
 51 |  	for_each_possible_cpu(i) {
 52 |  		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
 53 |  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 54 | -		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
 55 | +		per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
 56 |  			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 57 |  	}
 58 |  #endif /* CONFIG_CPUMASK_OFFSTACK */
 59 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 60 | index 6de09b26b455..e3f750135f78 100644
 61 | --- a/kernel/sched/fair.c
 62 | +++ b/kernel/sched/fair.c
 63 | @@ -5894,7 +5894,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 64 |  
 65 |  /* Working cpumask for: load_balance, load_balance_newidle. */
 66 |  DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 67 | -DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
 68 | +DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
 69 |  
 70 |  #ifdef CONFIG_NO_HZ_COMMON
 71 |  
 72 | @@ -6384,7 +6384,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 73 |   */
 74 |  static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_core, int target)
 75 |  {
 76 | -	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
 77 | +	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
 78 |  	int i, cpu, idle_cpu = -1, nr = INT_MAX;
 79 |  	struct sched_domain_shared *sd_share;
 80 |  	struct rq *this_rq = this_rq();
 81 | @@ -6482,7 +6482,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
 82 |  	int cpu, best_cpu = -1;
 83 |  	struct cpumask *cpus;
 84 |  
 85 | -	cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
 86 | +	cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
 87 |  	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
 88 |  
 89 |  	task_util = uclamp_task_util(p);
 90 | @@ -6532,7 +6532,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 91 |  	}
 92 |  
 93 |  	/*
 94 | -	 * per-cpu select_idle_mask usage
 95 | +	 * per-cpu select_rq_mask usage
 96 |  	 */
 97 |  	lockdep_assert_irqs_disabled();
 98 |  
 99 | @@ -9255,7 +9255,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
100 |  			 * take care of it.
101 |  			 */
102 |  			if (p->nr_cpus_allowed != NR_CPUS) {
103 | -				struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
104 | +				struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
105 |  
106 |  				cpumask_and(cpus, sched_group_span(local), p->cpus_ptr);
107 |  				imb_numa_nr = min(cpumask_weight(cpus), sd->imb_numa_nr);
108 | -- 
109 | 2.37.1
110 | 
111 | 


--------------------------------------------------------------------------------
/0022-sched-fair-Use-the-same-cpumask-per-PD-throughout-fi.patch:
--------------------------------------------------------------------------------
  1 | From 9b340131a4bcf6d0a282a2bdcd8ca268a74da709 Mon Sep 17 00:00:00 2001
  2 | From: Dietmar Eggemann <dietmar.eggemann@arm.com>
  3 | Date: Tue, 21 Jun 2022 10:04:12 +0100
  4 | Subject: [PATCH 22/32] sched/fair: Use the same cpumask per-PD throughout
  5 |  find_energy_efficient_cpu()
  6 | 
  7 | The Perf Domain (PD) cpumask (struct em_perf_domain.cpus) stays
  8 | invariant after Energy Model creation, i.e. it is not updated after
  9 | CPU hotplug operations.
 10 | 
 11 | That's why the PD mask is used in conjunction with the cpu_online_mask
 12 | (or Sched Domain cpumask). Thereby the cpu_online_mask is fetched
 13 | multiple times (in compute_energy()) during a run-queue selection
 14 | for a task.
 15 | 
 16 | cpu_online_mask may change during this time which can lead to wrong
 17 | energy calculations.
 18 | 
 19 | To be able to avoid this, use the select_rq_mask per-cpu cpumask to
 20 | create a cpumask out of PD cpumask and cpu_online_mask and pass it
 21 | through the function calls of the EAS run-queue selection path.
 22 | 
 23 | The PD cpumask for max_spare_cap_cpu/compute_prev_delta selection
 24 | (find_energy_efficient_cpu()) is now ANDed not only with the SD mask
 25 | but also with the cpu_online_mask. This is fine since this cpumask
 26 | has to be in syc with the one used for energy computation
 27 | (compute_energy()).
 28 | An exclusive cpuset setup with at least one asymmetric CPU capacity
 29 | island (hence the additional AND with the SD cpumask) is the obvious
 30 | exception here.
 31 | 
 32 | Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
 33 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 34 | Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
 35 | Tested-by: Lukasz Luba <lukasz.luba@arm.com>
 36 | Link: https://lkml.kernel.org/r/20220621090414.433602-6-vdonnefort@google.com
 37 | ---
 38 |  kernel/sched/fair.c | 22 +++++++++++++---------
 39 |  1 file changed, 13 insertions(+), 9 deletions(-)
 40 | 
 41 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 42 | index e3f750135f78..46d669297b1f 100644
 43 | --- a/kernel/sched/fair.c
 44 | +++ b/kernel/sched/fair.c
 45 | @@ -6709,14 +6709,14 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 46 |   * task.
 47 |   */
 48 |  static long
 49 | -compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 50 | +compute_energy(struct task_struct *p, int dst_cpu, struct cpumask *cpus,
 51 | +	       struct perf_domain *pd)
 52 |  {
 53 | -	struct cpumask *pd_mask = perf_domain_span(pd);
 54 |  	unsigned long max_util = 0, sum_util = 0, cpu_cap;
 55 |  	int cpu;
 56 |  
 57 | -	cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
 58 | -	cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask));
 59 | +	cpu_cap = arch_scale_cpu_capacity(cpumask_first(cpus));
 60 | +	cpu_cap -= arch_scale_thermal_pressure(cpumask_first(cpus));
 61 |  
 62 |  	/*
 63 |  	 * The capacity state of CPUs of the current rd can be driven by CPUs
 64 | @@ -6727,7 +6727,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 65 |  	 * If an entire pd is outside of the current rd, it will not appear in
 66 |  	 * its pd list and will not be accounted by compute_energy().
 67 |  	 */
 68 | -	for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
 69 | +	for_each_cpu(cpu, cpus) {
 70 |  		unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu);
 71 |  		unsigned long cpu_util, util_running = util_freq;
 72 |  		struct task_struct *tsk = NULL;
 73 | @@ -6814,6 +6814,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 74 |   */
 75 |  static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 76 |  {
 77 | +	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
 78 |  	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 79 |  	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
 80 |  	int cpu, best_energy_cpu = prev_cpu, target = -1;
 81 | @@ -6848,7 +6849,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 82 |  		unsigned long base_energy_pd;
 83 |  		int max_spare_cap_cpu = -1;
 84 |  
 85 | -		for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
 86 | +		cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
 87 | +
 88 | +		for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
 89 |  			if (!cpumask_test_cpu(cpu, p->cpus_ptr))
 90 |  				continue;
 91 |  
 92 | @@ -6885,12 +6888,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 93 |  			continue;
 94 |  
 95 |  		/* Compute the 'base' energy of the pd, without @p */
 96 | -		base_energy_pd = compute_energy(p, -1, pd);
 97 | +		base_energy_pd = compute_energy(p, -1, cpus, pd);
 98 |  		base_energy += base_energy_pd;
 99 |  
100 |  		/* Evaluate the energy impact of using prev_cpu. */
101 |  		if (compute_prev_delta) {
102 | -			prev_delta = compute_energy(p, prev_cpu, pd);
103 | +			prev_delta = compute_energy(p, prev_cpu, cpus, pd);
104 |  			if (prev_delta < base_energy_pd)
105 |  				goto unlock;
106 |  			prev_delta -= base_energy_pd;
107 | @@ -6899,7 +6902,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
108 |  
109 |  		/* Evaluate the energy impact of using max_spare_cap_cpu. */
110 |  		if (max_spare_cap_cpu >= 0) {
111 | -			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
112 | +			cur_delta = compute_energy(p, max_spare_cap_cpu, cpus,
113 | +						   pd);
114 |  			if (cur_delta < base_energy_pd)
115 |  				goto unlock;
116 |  			cur_delta -= base_energy_pd;
117 | -- 
118 | 2.37.1
119 | 
120 | 


--------------------------------------------------------------------------------
/0026-sched-core-Use-try_cmpxchg-in-set_nr_-and_not-if-_po.patch:
--------------------------------------------------------------------------------
 1 | From c02d5546ea34d589c83eda5055dbd727a396642b Mon Sep 17 00:00:00 2001
 2 | From: Uros Bizjak <ubizjak@gmail.com>
 3 | Date: Wed, 29 Jun 2022 17:15:52 +0200
 4 | Subject: [PATCH 26/32] sched/core: Use try_cmpxchg in
 5 |  set_nr_{and_not,if}_polling
 6 | 
 7 | Use try_cmpxchg instead of cmpxchg (*ptr, old, new) != old in
 8 | set_nr_{and_not,if}_polling. x86 cmpxchg returns success in ZF flag,
 9 | so this change saves a compare after cmpxchg.
10 | 
11 | The definition of cmpxchg based fetch_or was changed in the
12 | same way as atomic_fetch_##op definitions were changed
13 | in e6790e4b5d5e97dc287f3496dd2cf2dbabdfdb35.
14 | 
15 | Also declare these two functions as inline to ensure inlining. In the
16 | case of set_nr_and_not_polling, the compiler (gcc) tries to outsmart
17 | itself by constructing the boolean return value with logic operations
18 | on the fetched value, and these extra operations enlarge the function
19 | over the inlining threshold value.
20 | 
21 | Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
22 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
23 | Link: https://lkml.kernel.org/r/20220629151552.6015-1-ubizjak@gmail.com
24 | ---
25 |  kernel/sched/core.c | 24 +++++++++---------------
26 |  1 file changed, 9 insertions(+), 15 deletions(-)
27 | 
28 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c
29 | index dd69e85b7879..c703d177f62d 100644
30 | --- a/kernel/sched/core.c
31 | +++ b/kernel/sched/core.c
32 | @@ -873,15 +873,11 @@ static inline void hrtick_rq_init(struct rq *rq)
33 |  	({								\
34 |  		typeof(ptr) _ptr = (ptr);				\
35 |  		typeof(mask) _mask = (mask);				\
36 | -		typeof(*_ptr) _old, _val = *_ptr;			\
37 | +		typeof(*_ptr) _val = *_ptr;				\
38 |  									\
39 | -		for (;;) {						\
40 | -			_old = cmpxchg(_ptr, _val, _val | _mask);	\
41 | -			if (_old == _val)				\
42 | -				break;					\
43 | -			_val = _old;					\
44 | -		}							\
45 | -	_old;								\
46 | +		do {							\
47 | +		} while (!try_cmpxchg(_ptr, &_val, _val | _mask));	\
48 | +	_val;								\
49 |  })
50 |  
51 |  #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
52 | @@ -890,7 +886,7 @@ static inline void hrtick_rq_init(struct rq *rq)
53 |   * this avoids any races wrt polling state changes and thereby avoids
54 |   * spurious IPIs.
55 |   */
56 | -static bool set_nr_and_not_polling(struct task_struct *p)
57 | +static inline bool set_nr_and_not_polling(struct task_struct *p)
58 |  {
59 |  	struct thread_info *ti = task_thread_info(p);
60 |  	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
61 | @@ -905,30 +901,28 @@ static bool set_nr_and_not_polling(struct task_struct *p)
62 |  static bool set_nr_if_polling(struct task_struct *p)
63 |  {
64 |  	struct thread_info *ti = task_thread_info(p);
65 | -	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
66 | +	typeof(ti->flags) val = READ_ONCE(ti->flags);
67 |  
68 |  	for (;;) {
69 |  		if (!(val & _TIF_POLLING_NRFLAG))
70 |  			return false;
71 |  		if (val & _TIF_NEED_RESCHED)
72 |  			return true;
73 | -		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
74 | -		if (old == val)
75 | +		if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED))
76 |  			break;
77 | -		val = old;
78 |  	}
79 |  	return true;
80 |  }
81 |  
82 |  #else
83 | -static bool set_nr_and_not_polling(struct task_struct *p)
84 | +static inline bool set_nr_and_not_polling(struct task_struct *p)
85 |  {
86 |  	set_tsk_need_resched(p);
87 |  	return true;
88 |  }
89 |  
90 |  #ifdef CONFIG_SMP
91 | -static bool set_nr_if_polling(struct task_struct *p)
92 | +static inline bool set_nr_if_polling(struct task_struct *p)
93 |  {
94 |  	return false;
95 |  }
96 | -- 
97 | 2.37.1
98 | 
99 | 


--------------------------------------------------------------------------------
/0027-sched-fair-fix-case-with-reduced-capacity-CPU.patch:
--------------------------------------------------------------------------------
  1 | From c82a69629c53eda5233f13fc11c3c01585ef48a2 Mon Sep 17 00:00:00 2001
  2 | From: Vincent Guittot <vincent.guittot@linaro.org>
  3 | Date: Fri, 8 Jul 2022 17:44:01 +0200
  4 | Subject: [PATCH 27/32] sched/fair: fix case with reduced capacity CPU
  5 | 
  6 | The capacity of the CPU available for CFS tasks can be reduced because of
  7 | other activities running on the latter. In such case, it's worth trying to
  8 | move CFS tasks on a CPU with more available capacity.
  9 | 
 10 | The rework of the load balance has filtered the case when the CPU is
 11 | classified to be fully busy but its capacity is reduced.
 12 | 
 13 | Check if CPU's capacity is reduced while gathering load balance statistic
 14 | and classify it group_misfit_task instead of group_fully_busy so we can
 15 | try to move the load on another CPU.
 16 | 
 17 | Reported-by: David Chen <david.chen@nutanix.com>
 18 | Reported-by: Zhang Qiao <zhangqiao22@huawei.com>
 19 | Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
 20 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 21 | Tested-by: David Chen <david.chen@nutanix.com>
 22 | Tested-by: Zhang Qiao <zhangqiao22@huawei.com>
 23 | Link: https://lkml.kernel.org/r/20220708154401.21411-1-vincent.guittot@linaro.org
 24 | ---
 25 |  kernel/sched/fair.c | 54 +++++++++++++++++++++++++++++++++++----------
 26 |  1 file changed, 42 insertions(+), 12 deletions(-)
 27 | 
 28 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 29 | index a78d2e3b9d49..914096c5b1ae 100644
 30 | --- a/kernel/sched/fair.c
 31 | +++ b/kernel/sched/fair.c
 32 | @@ -7711,8 +7711,8 @@ enum group_type {
 33 |  	 */
 34 |  	group_fully_busy,
 35 |  	/*
 36 | -	 * SD_ASYM_CPUCAPACITY only: One task doesn't fit with CPU's capacity
 37 | -	 * and must be migrated to a more powerful CPU.
 38 | +	 * One task doesn't fit with CPU's capacity and must be migrated to a
 39 | +	 * more powerful CPU.
 40 |  	 */
 41 |  	group_misfit_task,
 42 |  	/*
 43 | @@ -8798,6 +8798,19 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
 44 |  	return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
 45 |  }
 46 |  
 47 | +static inline bool
 48 | +sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 49 | +{
 50 | +	/*
 51 | +	 * When there is more than 1 task, the group_overloaded case already
 52 | +	 * takes care of cpu with reduced capacity
 53 | +	 */
 54 | +	if (rq->cfs.h_nr_running != 1)
 55 | +		return false;
 56 | +
 57 | +	return check_cpu_capacity(rq, sd);
 58 | +}
 59 | +
 60 |  /**
 61 |   * update_sg_lb_stats - Update sched_group's statistics for load balancing.
 62 |   * @env: The load balancing environment.
 63 | @@ -8820,8 +8833,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 64 |  
 65 |  	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
 66 |  		struct rq *rq = cpu_rq(i);
 67 | +		unsigned long load = cpu_load(rq);
 68 |  
 69 | -		sgs->group_load += cpu_load(rq);
 70 | +		sgs->group_load += load;
 71 |  		sgs->group_util += cpu_util_cfs(i);
 72 |  		sgs->group_runnable += cpu_runnable(rq);
 73 |  		sgs->sum_h_nr_running += rq->cfs.h_nr_running;
 74 | @@ -8851,11 +8865,17 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 75 |  		if (local_group)
 76 |  			continue;
 77 |  
 78 | -		/* Check for a misfit task on the cpu */
 79 | -		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
 80 | -		    sgs->group_misfit_task_load < rq->misfit_task_load) {
 81 | -			sgs->group_misfit_task_load = rq->misfit_task_load;
 82 | -			*sg_status |= SG_OVERLOAD;
 83 | +		if (env->sd->flags & SD_ASYM_CPUCAPACITY) {
 84 | +			/* Check for a misfit task on the cpu */
 85 | +			if (sgs->group_misfit_task_load < rq->misfit_task_load) {
 86 | +				sgs->group_misfit_task_load = rq->misfit_task_load;
 87 | +				*sg_status |= SG_OVERLOAD;
 88 | +			}
 89 | +		} else if ((env->idle != CPU_NOT_IDLE) &&
 90 | +			   sched_reduced_capacity(rq, env->sd)) {
 91 | +			/* Check for a task running on a CPU with reduced capacity */
 92 | +			if (sgs->group_misfit_task_load < load)
 93 | +				sgs->group_misfit_task_load = load;
 94 |  		}
 95 |  	}
 96 |  
 97 | @@ -8908,7 +8928,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 98 |  	 * CPUs in the group should either be possible to resolve
 99 |  	 * internally or be covered by avg_load imbalance (eventually).
100 |  	 */
101 | -	if (sgs->group_type == group_misfit_task &&
102 | +	if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
103 | +	    (sgs->group_type == group_misfit_task) &&
104 |  	    (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
105 |  	     sds->local_stat.group_type != group_has_spare))
106 |  		return false;
107 | @@ -9517,9 +9538,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
108 |  	busiest = &sds->busiest_stat;
109 |  
110 |  	if (busiest->group_type == group_misfit_task) {
111 | -		/* Set imbalance to allow misfit tasks to be balanced. */
112 | -		env->migration_type = migrate_misfit;
113 | -		env->imbalance = 1;
114 | +		if (env->sd->flags & SD_ASYM_CPUCAPACITY) {
115 | +			/* Set imbalance to allow misfit tasks to be balanced. */
116 | +			env->migration_type = migrate_misfit;
117 | +			env->imbalance = 1;
118 | +		} else {
119 | +			/*
120 | +			 * Set load imbalance to allow moving task from cpu
121 | +			 * with reduced capacity.
122 | +			 */
123 | +			env->migration_type = migrate_load;
124 | +			env->imbalance = busiest->group_misfit_task_load;
125 | +		}
126 |  		return;
127 |  	}
128 |  
129 | -- 
130 | 2.37.1
131 | 
132 | 


--------------------------------------------------------------------------------
/0029-nohz-full-sched-rt-Fix-missed-tick-reenabling-bug-in.patch:
--------------------------------------------------------------------------------
  1 | From 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a Mon Sep 17 00:00:00 2001
  2 | From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
  3 | Date: Tue, 28 Jun 2022 11:22:59 +0200
  4 | Subject: [PATCH 29/32] nohz/full, sched/rt: Fix missed tick-reenabling bug in
  5 |  dequeue_task_rt()
  6 | 
  7 | dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
  8 | called sched_update_tick_dependency() preventing it from re-enabling the
  9 | tick on systems that no longer have pending SCHED_RT tasks but have
 10 | multiple runnable SCHED_OTHER tasks:
 11 | 
 12 |   dequeue_task_rt()
 13 |     dequeue_rt_entity()
 14 |       dequeue_rt_stack()
 15 |         dequeue_top_rt_rq()
 16 | 	  sub_nr_running()	// decrements rq->nr_running
 17 | 	    sched_update_tick_dependency()
 18 | 	      sched_can_stop_tick()	// checks rq->rt.rt_nr_running,
 19 | 	      ...
 20 |         __dequeue_rt_entity()
 21 |           dec_rt_tasks()	// decrements rq->rt.rt_nr_running
 22 | 	  ...
 23 | 
 24 | Every other scheduler class performs the operation in the opposite
 25 | order, and sched_update_tick_dependency() expects the values to be
 26 | updated as such. So avoid the misbehaviour by inverting the order in
 27 | which the above operations are performed in the RT scheduler.
 28 | 
 29 | Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
 30 | Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
 31 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
 32 | Reviewed-by: Valentin Schneider <vschneid@redhat.com>
 33 | Reviewed-by: Phil Auld <pauld@redhat.com>
 34 | Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com
 35 | ---
 36 |  kernel/sched/rt.c | 15 +++++++++------
 37 |  1 file changed, 9 insertions(+), 6 deletions(-)
 38 | 
 39 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
 40 | index 8c9ed9664840..55f39c8f4203 100644
 41 | --- a/kernel/sched/rt.c
 42 | +++ b/kernel/sched/rt.c
 43 | @@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
 44 |  #endif /* CONFIG_SMP */
 45 |  
 46 |  static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
 47 | -static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
 48 | +static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
 49 |  
 50 |  static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 51 |  {
 52 | @@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 53 |  	rt_se = rt_rq->tg->rt_se[cpu];
 54 |  
 55 |  	if (!rt_se) {
 56 | -		dequeue_top_rt_rq(rt_rq);
 57 | +		dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 58 |  		/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 59 |  		cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
 60 |  	}
 61 | @@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 62 |  
 63 |  static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 64 |  {
 65 | -	dequeue_top_rt_rq(rt_rq);
 66 | +	dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 67 |  }
 68 |  
 69 |  static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 70 | @@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
 71 |  }
 72 |  
 73 |  static void
 74 | -dequeue_top_rt_rq(struct rt_rq *rt_rq)
 75 | +dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
 76 |  {
 77 |  	struct rq *rq = rq_of_rt_rq(rt_rq);
 78 |  
 79 | @@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 80 |  
 81 |  	BUG_ON(!rq->nr_running);
 82 |  
 83 | -	sub_nr_running(rq, rt_rq->rt_nr_running);
 84 | +	sub_nr_running(rq, count);
 85 |  	rt_rq->rt_queued = 0;
 86 |  
 87 |  }
 88 | @@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 89 |  static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 90 |  {
 91 |  	struct sched_rt_entity *back = NULL;
 92 | +	unsigned int rt_nr_running;
 93 |  
 94 |  	for_each_sched_rt_entity(rt_se) {
 95 |  		rt_se->back = back;
 96 |  		back = rt_se;
 97 |  	}
 98 |  
 99 | -	dequeue_top_rt_rq(rt_rq_of_se(back));
100 | +	rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
101 |  
102 |  	for (rt_se = back; rt_se; rt_se = rt_se->back) {
103 |  		if (on_rt_rq(rt_se))
104 |  			__dequeue_rt_entity(rt_se, flags);
105 |  	}
106 | +
107 | +	dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
108 |  }
109 |  
110 |  static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
111 | -- 
112 | 2.37.1
113 | 
114 | 


--------------------------------------------------------------------------------
/0030-sched-core-Fix-the-bug-that-task-won-t-enqueue-into-.patch:
--------------------------------------------------------------------------------
 1 | From 91caa5ae242465c3ab9fd473e50170faa7e944f4 Mon Sep 17 00:00:00 2001
 2 | From: Cruz Zhao <CruzZhao@linux.alibaba.com>
 3 | Date: Tue, 28 Jun 2022 15:57:23 +0800
 4 | Subject: [PATCH 30/32] sched/core: Fix the bug that task won't enqueue into
 5 |  core tree when update cookie
 6 | 
 7 | In function sched_core_update_cookie(), a task will enqueue into the
 8 | core tree only when it enqueued before, that is, if an uncookied task
 9 | is cookied, it will not enqueue into the core tree until it enqueue
10 | again, which will result in unnecessary force idle.
11 | 
12 | Here follows the scenario:
13 |   CPU x and CPU y are a pair of SMT siblings.
14 |   1. Start task a running on CPU x without sleeping, and task b and
15 |      task c running on CPU y without sleeping.
16 |   2. We create a cookie and share it to task a and task b, and then
17 |      we create another cookie and share it to task c.
18 |   3. Simpling core_forceidle_sum of task a and b from /proc/PID/sched
19 | 
20 | And we will find out that core_forceidle_sum of task a takes 30%
21 | time of the sampling period, which shouldn't happen as task a and b
22 | have the same cookie.
23 | 
24 | Then we migrate task a to CPU x', migrate task b and c to CPU y', where
25 | CPU x' and CPU y' are a pair of SMT siblings, and sampling again, we
26 | will found out that core_forceidle_sum of task a and b are almost zero.
27 | 
28 | To solve this problem, we enqueue the task into the core tree if it's
29 | on rq.
30 | 
31 | Fixes: 6e33cad0af49("sched: Trivial core scheduling cookie management")
32 | Signed-off-by: Cruz Zhao <CruzZhao@linux.alibaba.com>
33 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
34 | Link: https://lkml.kernel.org/r/1656403045-100840-2-git-send-email-CruzZhao@linux.alibaba.com
35 | ---
36 |  kernel/sched/core_sched.c | 9 +++++----
37 |  1 file changed, 5 insertions(+), 4 deletions(-)
38 | 
39 | diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
40 | index 5103502da7ba..93878cb2a46d 100644
41 | --- a/kernel/sched/core_sched.c
42 | +++ b/kernel/sched/core_sched.c
43 | @@ -56,7 +56,6 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
44 |  	unsigned long old_cookie;
45 |  	struct rq_flags rf;
46 |  	struct rq *rq;
47 | -	bool enqueued;
48 |  
49 |  	rq = task_rq_lock(p, &rf);
50 |  
51 | @@ -68,14 +67,16 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
52 |  	 */
53 |  	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
54 |  
55 | -	enqueued = sched_core_enqueued(p);
56 | -	if (enqueued)
57 | +	if (sched_core_enqueued(p))
58 |  		sched_core_dequeue(rq, p, DEQUEUE_SAVE);
59 |  
60 |  	old_cookie = p->core_cookie;
61 |  	p->core_cookie = cookie;
62 |  
63 | -	if (enqueued)
64 | +	/*
65 | +	 * Consider the cases: !prev_cookie and !cookie.
66 | +	 */
67 | +	if (cookie && task_on_rq_queued(p))
68 |  		sched_core_enqueue(rq, p);
69 |  
70 |  	/*
71 | -- 
72 | 2.37.1
73 | 
74 | 


--------------------------------------------------------------------------------
/0031-rseq-Deprecate-RSEQ_CS_FLAG_NO_RESTART_ON_-flags.patch:
--------------------------------------------------------------------------------
 1 | From 0190e4198e47fe99d002d72588f34fd62c9ab570 Mon Sep 17 00:00:00 2001
 2 | From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 3 | Date: Wed, 22 Jun 2022 15:46:16 -0400
 4 | Subject: [PATCH 31/32] rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_* flags
 5 | 
 6 | The pretty much unused RSEQ_CS_FLAG_NO_RESTART_ON_* flags introduce
 7 | complexity in rseq, and are subtly buggy [1]. Solving those issues
 8 | requires introducing additional complexity in the rseq implementation
 9 | for each supported architecture.
10 | 
11 | Considering that it complexifies the rseq ABI, I am proposing that we
12 | deprecate those flags. [2]
13 | 
14 | So far there appears to be consensus from maintainers of user-space
15 | projects impacted by this feature that its removal would be a welcome
16 | simplification. [3]
17 | 
18 | The deprecation approach proposed here is to issue WARN_ON_ONCE() when
19 | encountering those flags and kill the offending process with sigsegv.
20 | This should allow us to quickly identify whether anyone yells at us for
21 | removing this.
22 | 
23 | Link: https://lore.kernel.org/lkml/20220618182515.95831-1-minhquangbui99@gmail.com/ [1]
24 | Link: https://lore.kernel.org/lkml/258546133.12151.1655739550814.JavaMail.zimbra@efficios.com/ [2]
25 | Link: https://lore.kernel.org/lkml/87pmj1enjh.fsf@email.froward.int.ebiederm.org/ [3]
26 | Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
27 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
28 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
29 | Link: https://lore.kernel.org/lkml/20220622194617.1155957-1-mathieu.desnoyers@efficios.com
30 | ---
31 |  kernel/rseq.c | 23 ++++++++---------------
32 |  1 file changed, 8 insertions(+), 15 deletions(-)
33 | 
34 | diff --git a/kernel/rseq.c b/kernel/rseq.c
35 | index 97ac20b4f738..81d7dc80787b 100644
36 | --- a/kernel/rseq.c
37 | +++ b/kernel/rseq.c
38 | @@ -18,8 +18,9 @@
39 |  #define CREATE_TRACE_POINTS
40 |  #include <trace/events/rseq.h>
41 |  
42 | -#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \
43 | -				       RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT)
44 | +#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
45 | +				  RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
46 | +				  RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
47 |  
48 |  /*
49 |   *
50 | @@ -175,23 +176,15 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
51 |  	u32 flags, event_mask;
52 |  	int ret;
53 |  
54 | +	if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS))
55 | +		return -EINVAL;
56 | +
57 |  	/* Get thread flags. */
58 |  	ret = get_user(flags, &t->rseq->flags);
59 |  	if (ret)
60 |  		return ret;
61 |  
62 | -	/* Take critical section flags into account. */
63 | -	flags |= cs_flags;
64 | -
65 | -	/*
66 | -	 * Restart on signal can only be inhibited when restart on
67 | -	 * preempt and restart on migrate are inhibited too. Otherwise,
68 | -	 * a preempted signal handler could fail to restart the prior
69 | -	 * execution context on sigreturn.
70 | -	 */
71 | -	if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) &&
72 | -		     (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) !=
73 | -		     RSEQ_CS_PREEMPT_MIGRATE_FLAGS))
74 | +	if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS))
75 |  		return -EINVAL;
76 |  
77 |  	/*
78 | @@ -203,7 +196,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
79 |  	t->rseq_event_mask = 0;
80 |  	preempt_enable();
81 |  
82 | -	return !!(event_mask & ~flags);
83 | +	return !!event_mask;
84 |  }
85 |  
86 |  static int clear_rseq_cs(struct task_struct *t)
87 | -- 
88 | 2.37.1
89 | 
90 | 


--------------------------------------------------------------------------------
/0032-rseq-Kill-process-when-unknown-flags-are-encountered.patch:
--------------------------------------------------------------------------------
 1 | From c17a6ff9321355487d7d5ccaa7d406a0ea06b6c4 Mon Sep 17 00:00:00 2001
 2 | From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 3 | Date: Wed, 22 Jun 2022 15:46:17 -0400
 4 | Subject: [PATCH 32/32] rseq: Kill process when unknown flags are encountered
 5 |  in ABI structures
 6 | 
 7 | rseq_abi()->flags and rseq_abi()->rseq_cs->flags 29 upper bits are
 8 | currently unused.
 9 | 
10 | The current behavior when those bits are set is to ignore them. This is
11 | not an ideal behavior, because when future features will start using
12 | those flags, if user-space fails to correctly validate that the kernel
13 | indeed supports those flags (e.g. with a new sys_rseq flags bit) before
14 | using them, it may incorrectly assume that the kernel will handle those
15 | flags way when in fact those will be silently ignored on older kernels.
16 | 
17 | Validating that unused flags bits are cleared will allow a smoother
18 | transition when those flags will start to be used by allowing
19 | applications to fail early, and obviously, when they attempt to use the
20 | new flags on an older kernel that does not support them.
21 | 
22 | Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
23 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
24 | Signed-off-by: Ingo Molnar <mingo@kernel.org>
25 | Link: https://lkml.kernel.org/r/20220622194617.1155957-2-mathieu.desnoyers@efficios.com
26 | ---
27 |  kernel/rseq.c | 4 ++--
28 |  1 file changed, 2 insertions(+), 2 deletions(-)
29 | 
30 | diff --git a/kernel/rseq.c b/kernel/rseq.c
31 | index 81d7dc80787b..bda8175f8f99 100644
32 | --- a/kernel/rseq.c
33 | +++ b/kernel/rseq.c
34 | @@ -176,7 +176,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
35 |  	u32 flags, event_mask;
36 |  	int ret;
37 |  
38 | -	if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS))
39 | +	if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags)
40 |  		return -EINVAL;
41 |  
42 |  	/* Get thread flags. */
43 | @@ -184,7 +184,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
44 |  	if (ret)
45 |  		return ret;
46 |  
47 | -	if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS))
48 | +	if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags)
49 |  		return -EINVAL;
50 |  
51 |  	/*
52 | -- 
53 | 2.37.1
54 | 
55 | 


--------------------------------------------------------------------------------
/0051-block-bfq-Fix-division-by-zero-error-on-zero-wsum.patch:
--------------------------------------------------------------------------------
 1 | From e53413f8deedf738a6782cc14cc00bd5852ccf18 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Thu, 13 Apr 2023 14:30:09 +0100
 4 | Subject: [PATCH] block, bfq: Fix division by zero error on zero wsum
 5 | Content-Type: text/plain; charset="utf-8"
 6 | Content-Transfer-Encoding: 8bit
 7 | 
 8 | When the weighted sum is zero the calculation of limit causes
 9 | a division by zero error. Fix this by continuing to the next level.
10 | 
11 | This was discovered by running as root:
12 | 
13 | stress-ng --ioprio 0
14 | 
15 | Fixes divison by error oops:
16 | 
17 | [  521.450556] divide error: 0000 [#1] SMP NOPTI
18 | [  521.450766] CPU: 2 PID: 2684464 Comm: stress-ng-iopri Not tainted 6.2.1-1280.native #1
19 | [  521.451117] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
20 | [  521.451627] RIP: 0010:bfqq_request_over_limit+0x207/0x400
21 | [  521.451875] Code: 01 48 8d 0c c8 74 0b 48 8b 82 98 00 00 00 48 8d 0c c8 8b 85 34 ff ff ff 48 89 ca 41 0f af 41 50 48 d1 ea 48 98 48 01 d0 31 d2 <48> f7 f1 41 39 41 48 89 85 34 ff ff ff 0f 8c 7b 01 00 00 49 8b 44
22 | [  521.452699] RSP: 0018:ffffb1af84eb3948 EFLAGS: 00010046
23 | [  521.452938] RAX: 000000000000003c RBX: 0000000000000000 RCX: 0000000000000000
24 | [  521.453262] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb1af84eb3978
25 | [  521.453584] RBP: ffffb1af84eb3a30 R08: 0000000000000001 R09: ffff8f88ab8a4ba0
26 | [  521.453905] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8f88ab8a4b18
27 | [  521.454224] R13: ffff8f8699093000 R14: 0000000000000001 R15: ffffb1af84eb3970
28 | [  521.454549] FS:  00005640b6b0b580(0000) GS:ffff8f88b3880000(0000) knlGS:0000000000000000
29 | [  521.454912] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
30 | [  521.455170] CR2: 00007ffcbcae4e38 CR3: 00000002e46de001 CR4: 0000000000770ee0
31 | [  521.455491] PKRU: 55555554
32 | [  521.455619] Call Trace:
33 | [  521.455736]  <TASK>
34 | [  521.455837]  ? bfq_request_merge+0x3a/0xc0
35 | [  521.456027]  ? elv_merge+0x115/0x140
36 | [  521.456191]  bfq_limit_depth+0xc8/0x240
37 | [  521.456366]  __blk_mq_alloc_requests+0x21a/0x2c0
38 | [  521.456577]  blk_mq_submit_bio+0x23c/0x6c0
39 | [  521.456766]  __submit_bio+0xb8/0x140
40 | [  521.457236]  submit_bio_noacct_nocheck+0x212/0x300
41 | [  521.457748]  submit_bio_noacct+0x1a6/0x580
42 | [  521.458220]  submit_bio+0x43/0x80
43 | [  521.458660]  ext4_io_submit+0x23/0x80
44 | [  521.459116]  ext4_do_writepages+0x40a/0xd00
45 | [  521.459596]  ext4_writepages+0x65/0x100
46 | [  521.460050]  do_writepages+0xb7/0x1c0
47 | [  521.460492]  __filemap_fdatawrite_range+0xa6/0x100
48 | [  521.460979]  file_write_and_wait_range+0xbf/0x140
49 | [  521.461452]  ext4_sync_file+0x105/0x340
50 | [  521.461882]  __x64_sys_fsync+0x67/0x100
51 | [  521.462305]  ? syscall_exit_to_user_mode+0x2c/0x1c0
52 | [  521.462768]  do_syscall_64+0x3b/0xc0
53 | [  521.463165]  entry_SYSCALL_64_after_hwframe+0x5a/0xc4
54 | [  521.463621] RIP: 0033:0x5640b6c56590
55 | [  521.464006] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 80 3d 71 70 0e 00 00 74 17 b8 4a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c
56 | 
57 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
58 | Link: https://lore.kernel.org/r/20230413133009.1605335-1-colin.i.king@gmail.com
59 | Signed-off-by: Jens Axboe <axboe@kernel.dk>
60 | ---
61 |  block/bfq-iosched.c | 2 ++
62 |  1 file changed, 2 insertions(+)
63 | 
64 | diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
65 | index b4c4b4808c6c..3164e3177965 100644
66 | --- a/block/bfq-iosched.c
67 | +++ b/block/bfq-iosched.c
68 | @@ -648,6 +648,8 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
69 |  					sched_data->service_tree[i].wsum;
70 |  			}
71 |  		}
72 | +		if (!wsum)
73 | +			continue;
74 |  		limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
75 |  		if (entity->allocated >= limit) {
76 |  			bfq_log_bfqq(bfqq->bfqd, bfqq,
77 | -- 
78 | 2.30.2
79 | 
80 | 


--------------------------------------------------------------------------------
/0101-i8042-decrease-debug-message-level-to-info.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Tue, 23 Jun 2015 01:26:52 -0500
 4 | Subject: [PATCH] i8042: decrease debug message level to info
 5 | 
 6 | Author:    Arjan van de Ven <arjan@linux.intel.com>
 7 | 
 8 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
 9 | Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com>
10 | ---
11 |  drivers/input/serio/i8042.c | 10 +++++-----
12 |  1 file changed, 5 insertions(+), 5 deletions(-)
13 | 
14 | diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
15 | index 3fc0a89cc785..a7c103f9dfd3 100644
16 | --- a/drivers/input/serio/i8042.c
17 | +++ b/drivers/input/serio/i8042.c
18 | @@ -621,7 +621,7 @@ static int i8042_enable_kbd_port(void)
19 |  	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
20 |  		i8042_ctr &= ~I8042_CTR_KBDINT;
21 |  		i8042_ctr |= I8042_CTR_KBDDIS;
22 | -		pr_err("Failed to enable KBD port\n");
23 | +		pr_info("Failed to enable KBD port\n");
24 |  		return -EIO;
25 |  	}
26 |  
27 | @@ -640,7 +640,7 @@ static int i8042_enable_aux_port(void)
28 |  	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
29 |  		i8042_ctr &= ~I8042_CTR_AUXINT;
30 |  		i8042_ctr |= I8042_CTR_AUXDIS;
31 | -		pr_err("Failed to enable AUX port\n");
32 | +		pr_info("Failed to enable AUX port\n");
33 |  		return -EIO;
34 |  	}
35 |  
36 | @@ -732,7 +732,7 @@ static int i8042_check_mux(void)
37 |  	i8042_ctr &= ~I8042_CTR_AUXINT;
38 |  
39 |  	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
40 | -		pr_err("Failed to disable AUX port, can't use MUX\n");
41 | +		pr_info("Failed to disable AUX port, can't use MUX\n");
42 |  		return -EIO;
43 |  	}
44 |  
45 | @@ -955,7 +955,7 @@ static int i8042_controller_selftest(void)
46 |  	do {
47 |  
48 |  		if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
49 | -			pr_err("i8042 controller selftest timeout\n");
50 | +			pr_info("i8042 controller selftest timeout\n");
51 |  			return -ENODEV;
52 |  		}
53 |  
54 | @@ -977,7 +977,7 @@ static int i8042_controller_selftest(void)
55 |  	pr_info("giving up on controller selftest, continuing anyway...\n");
56 |  	return 0;
57 |  #else
58 | -	pr_err("i8042 controller selftest failed\n");
59 | +	pr_info("i8042 controller selftest failed\n");
60 |  	return -EIO;
61 |  #endif
62 |  }
63 | -- 
64 | https://clearlinux.org
65 | 
66 | 


--------------------------------------------------------------------------------
/0102-increase-the-ext4-default-commit-age.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Mon, 11 Jan 2016 10:01:44 -0600
 4 | Subject: [PATCH] increase the ext4 default commit age
 5 | 
 6 | Both the VM and EXT4 have a "commit to disk after X seconds" time.
 7 | Currently the EXT4 time is shorter than our VM time, which is a bit
 8 | suboptional,
 9 | it's better for performance to let the VM do the writeouts in bulk
10 | rather than something deep in the journalling layer.
11 | 
12 | (DISTRO TWEAK -- NOT FOR UPSTREAM)
13 | 
14 | Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
15 | Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
16 | ---
17 |  include/linux/jbd2.h | 2 +-
18 |  1 file changed, 1 insertion(+), 1 deletion(-)
19 | 
20 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
21 | index 9c3ada74ffb1..c4aef0bb2661 100644
22 | --- a/include/linux/jbd2.h
23 | +++ b/include/linux/jbd2.h
24 | @@ -45,7 +45,7 @@
25 |  /*
26 |   * The default maximum commit age, in seconds.
27 |   */
28 | -#define JBD2_DEFAULT_MAX_COMMIT_AGE 5
29 | +#define JBD2_DEFAULT_MAX_COMMIT_AGE 30
30 |  
31 |  #ifdef CONFIG_JBD2_DEBUG
32 |  /*
33 | -- 
34 | https://clearlinux.org
35 | 
36 | 


--------------------------------------------------------------------------------
/0103-silence-rapl.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Mon, 14 Mar 2016 11:22:09 -0600
 4 | Subject: [PATCH] silence rapl
 5 | 
 6 | ---
 7 |  drivers/powercap/intel_rapl_common.c | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
11 | index 07611a00b78f..4031d810def5 100644
12 | --- a/drivers/powercap/intel_rapl_common.c
13 | +++ b/drivers/powercap/intel_rapl_common.c
14 | @@ -1512,7 +1512,7 @@ static int __init rapl_init(void)
15 |  
16 |  	id = x86_match_cpu(rapl_ids);
17 |  	if (!id) {
18 | -		pr_err("driver does not support CPU family %d model %d\n",
19 | +		pr_info("driver does not support CPU family %d model %d\n",
20 |  		       boot_cpu_data.x86, boot_cpu_data.x86_model);
21 |  
22 |  		return -ENODEV;
23 | -- 
24 | https://clearlinux.org
25 | 
26 | 


--------------------------------------------------------------------------------
/0104-pci-pme-wakeups.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Mon, 14 Mar 2016 11:10:58 -0600
 4 | Subject: [PATCH] pci pme wakeups
 5 | 
 6 | Reduce wakeups for PME checks, which are a workaround for miswired
 7 | boards (sadly, too many of them) in laptops.
 8 | ---
 9 |  drivers/pci/pci.c | 2 +-
10 |  1 file changed, 1 insertion(+), 1 deletion(-)
11 | 
12 | diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
13 | index d25122fbe98a..dbfb6aaa4a07 100644
14 | --- a/drivers/pci/pci.c
15 | +++ b/drivers/pci/pci.c
16 | @@ -60,7 +60,7 @@ struct pci_pme_device {
17 |  	struct pci_dev *dev;
18 |  };
19 |  
20 | -#define PME_TIMEOUT 1000 /* How long between PME checks */
21 | +#define PME_TIMEOUT 4000 /* How long between PME checks */
22 |  
23 |  static void pci_dev_d3_sleep(struct pci_dev *dev)
24 |  {
25 | -- 
26 | https://clearlinux.org
27 | 
28 | 


--------------------------------------------------------------------------------
/0107-bootstats-add-printk-s-to-measure-boot-time-in-more-.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Wed, 11 Feb 2015 16:05:23 -0600
 4 | Subject: [PATCH] bootstats: add printk's to measure boot time in more detail
 5 | 
 6 | Few distro-tweaks to add printk's to visualize boot time better
 7 | 
 8 | Author:    Arjan van de Ven <arjan@linux.intel.com>
 9 | 
10 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
11 | ---
12 |  arch/x86/kernel/alternative.c | 2 ++
13 |  1 file changed, 2 insertions(+)
14 | 
15 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
16 | index b4470eabf151..f9de9eb7b8e1 100644
17 | --- a/arch/x86/kernel/alternative.c
18 | +++ b/arch/x86/kernel/alternative.c
19 | @@ -829,7 +829,9 @@ void __init alternative_instructions(void)
20 |  	 * Then patch alternatives, such that those paravirt calls that are in
21 |  	 * alternatives can be overwritten by their immediate fragments.
22 |  	 */
23 | +	printk("clr: Applying alternatives\n");
24 |  	apply_alternatives(__alt_instructions, __alt_instructions_end);
25 | +	printk("clr: Applying alternatives done\n");
26 |  
27 |  #ifdef CONFIG_SMP
28 |  	/* Patch to UP if other cpus not imminent. */
29 | -- 
30 | https://clearlinux.org
31 | 
32 | 


--------------------------------------------------------------------------------
/0108-smpboot-reuse-timer-calibration.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Wed, 11 Feb 2015 17:28:14 -0600
 4 | Subject: [PATCH] smpboot: reuse timer calibration
 5 | 
 6 | NO point recalibrating for known-constant tsc ...
 7 | saves 200ms+ of boot time.
 8 | ---
 9 |  arch/x86/kernel/tsc.c | 3 +++
10 |  1 file changed, 3 insertions(+)
11 | 
12 | diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
13 | index a698196377be..5f3ee7c31c8a 100644
14 | --- a/arch/x86/kernel/tsc.c
15 | +++ b/arch/x86/kernel/tsc.c
16 | @@ -1569,6 +1569,9 @@ unsigned long calibrate_delay_is_known(void)
17 |  	if (!constant_tsc || !mask)
18 |  		return 0;
19 |  
20 | +	if (cpu != 0)
21 | +		return cpu_data(0).loops_per_jiffy;
22 | +
23 |  	sibling = cpumask_any_but(mask, cpu);
24 |  	if (sibling < nr_cpu_ids)
25 |  		return cpu_data(sibling).loops_per_jiffy;
26 | -- 
27 | https://clearlinux.org
28 | 
29 | 


--------------------------------------------------------------------------------
/0109-initialize-ata-before-graphics.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Thu, 2 Jun 2016 23:36:32 -0500
 4 | Subject: [PATCH] initialize ata before graphics
 5 | 
 6 | ATA init is the long pole in the boot process, and its asynchronous.
 7 | move the graphics init after it so that ata and graphics initialize
 8 | in parallel
 9 | ---
10 |  drivers/Makefile | 15 ++++++++-------
11 |  1 file changed, 8 insertions(+), 7 deletions(-)
12 | 
13 | diff --git a/drivers/Makefile b/drivers/Makefile
14 | index a110338c860c..f91099276a78 100644
15 | --- a/drivers/Makefile
16 | +++ b/drivers/Makefile
17 | @@ -59,15 +59,8 @@ obj-y				+= char/
18 |  # iommu/ comes before gpu as gpu are using iommu controllers
19 |  obj-y				+= iommu/
20 |  
21 | -# gpu/ comes after char for AGP vs DRM startup and after iommu
22 | -obj-y				+= gpu/
23 | -
24 |  obj-$(CONFIG_CONNECTOR)		+= connector/
25 |  
26 | -# i810fb and intelfb depend on char/agp/
27 | -obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
28 | -obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
29 | -
30 |  obj-$(CONFIG_PARPORT)		+= parport/
31 |  obj-y				+= base/ block/ misc/ mfd/ nfc/
32 |  obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
33 | @@ -79,6 +72,14 @@ obj-y				+= macintosh/
34 |  obj-y				+= scsi/
35 |  obj-y				+= nvme/
36 |  obj-$(CONFIG_ATA)		+= ata/
37 | +
38 | +# gpu/ comes after char for AGP vs DRM startup and after iommu
39 | +obj-y				+= gpu/
40 | +
41 | +# i810fb and intelfb depend on char/agp/
42 | +obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
43 | +obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
44 | +
45 |  obj-$(CONFIG_TARGET_CORE)	+= target/
46 |  obj-$(CONFIG_MTD)		+= mtd/
47 |  obj-$(CONFIG_SPI)		+= spi/
48 | -- 
49 | https://clearlinux.org
50 | 
51 | 


--------------------------------------------------------------------------------
/0110-give-rdrand-some-credit.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Fri, 29 Jul 2016 19:10:52 +0000
 4 | Subject: [PATCH] give rdrand some credit
 5 | 
 6 | try to credit rdrand/rdseed with some entropy
 7 | 
 8 | In VMs but even modern hardware, we're super starved for entropy, and while we can
 9 | and do wear a tin foil hat, it's very hard to argue that
10 | rdrand and rdtsc add zero entropy.
11 | ---
12 |  drivers/char/random.c | 2 ++
13 |  1 file changed, 2 insertions(+)
14 | 
15 | diff --git a/drivers/char/random.c b/drivers/char/random.c
16 | index 3404a91edf29..479994faedba 100644
17 | --- a/drivers/char/random.c
18 | +++ b/drivers/char/random.c
19 | @@ -1678,6 +1678,8 @@ static void __init init_std_data(void)
20 |  		if (!arch_get_random_seed_long(&rv) &&
21 |  		    !arch_get_random_long(&rv))
22 |  			rv = random_get_entropy();
23 | +		else
24 | +			credit_entropy_bits(1);
25 |  		mix_pool_bytes(&rv, sizeof(rv));
26 |  	}
27 |  	mix_pool_bytes(utsname(), sizeof(*(utsname())));
28 | -- 
29 | https://clearlinux.org
30 | 
31 | 


--------------------------------------------------------------------------------
/0111-ipv4-tcp-allow-the-memory-tuning-for-tcp-to-go-a-lit.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Fri, 6 Jan 2017 15:34:09 +0000
 4 | Subject: [PATCH] ipv4/tcp: allow the memory tuning for tcp to go a little
 5 |  bigger than default
 6 | 
 7 | ---
 8 |  net/ipv4/tcp.c | 4 ++--
 9 |  1 file changed, 2 insertions(+), 2 deletions(-)
10 | 
11 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
12 | index 28ff2a820f7c..c4f240da8d70 100644
13 | --- a/net/ipv4/tcp.c
14 | +++ b/net/ipv4/tcp.c
15 | @@ -4604,8 +4604,8 @@ void __init tcp_init(void)
16 |  	tcp_init_mem();
17 |  	/* Set per-socket limits to no more than 1/128 the pressure threshold */
18 |  	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
19 | -	max_wshare = min(4UL*1024*1024, limit);
20 | -	max_rshare = min(6UL*1024*1024, limit);
21 | +	max_wshare = min(16UL*1024*1024, limit);
22 | +	max_rshare = min(16UL*1024*1024, limit);
23 |  
24 |  	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
25 |  	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
26 | -- 
27 | https://clearlinux.org
28 | 
29 | 


--------------------------------------------------------------------------------
/0112-init-wait-for-partition-and-retry-scan.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Wed, 17 May 2017 01:52:11 +0000
 4 | Subject: [PATCH] init: wait for partition and retry scan
 5 | 
 6 | As Clear Linux boots fast the device is not ready when
 7 | the mounting code is reached, so a retry device scan will
 8 | be performed every 0.5 sec for at least 40 sec
 9 | and synchronize the async task.
10 | 
11 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
12 | ---
13 |  init/do_mounts.c | 16 ++++++++++++++--
14 |  1 file changed, 14 insertions(+), 2 deletions(-)
15 | 
16 | diff --git a/init/do_mounts.c b/init/do_mounts.c
17 | index 762b534978d9..107b96927049 100644
18 | --- a/init/do_mounts.c
19 | +++ b/init/do_mounts.c
20 | @@ -613,7 +623,9 @@ void __init prepare_namespace(void)
21 |  	 * For example, it is not atypical to wait 5 seconds here
22 |  	 * for the touchpad of a laptop to initialize.
23 |  	 */
24 | +	async_synchronize_full();
25 |  	wait_for_device_probe();
26 | +	async_synchronize_full();
27 |  
28 |  	md_run_setup();
29 |  
30 | -- 
31 | https://clearlinux.org
32 | 
33 | --- linux-6.5.1/block/early-lookup.c~	2023-09-02 07:13:30.000000000 +0000
34 | +++ linux-6.5.1/block/early-lookup.c	2023-09-18 14:16:34.721720093 +0000
35 | @@ -243,8 +243,18 @@
36 |   */
37 |  int __init early_lookup_bdev(const char *name, dev_t *devt)
38 |  {
39 | -	if (strncmp(name, "PARTUUID=", 9) == 0)
40 | -		return devt_from_partuuid(name + 9, devt);
41 | +	if (strncmp(name, "PARTUUID=", 9) == 0) {
42 | +		int res;
43 | +		int  needtowait = 40<<1;
44 | +		res = devt_from_partuuid(name + 9, devt);
45 | +		if (!res) return res;
46 | +		while (res && needtowait) {
47 | +			msleep(500);
48 | +			res = devt_from_partuuid(name + 9, devt);
49 | +			needtowait--;
50 | +		}
51 | +		return res;
52 | +	}
53 |  	if (strncmp(name, "PARTLABEL=", 10) == 0)
54 |  		return devt_from_partlabel(name + 10, devt);
55 |  	if (strncmp(name, "/dev/", 5) == 0)
56 | --- linux-6.5.1/block/early-lookup.c~	2023-09-18 14:16:34.000000000 +0000
57 | +++ linux-6.5.1/block/early-lookup.c	2023-09-18 14:27:32.042046852 +0000
58 | @@ -5,6 +5,7 @@
59 |   */
60 |  #include <linux/blkdev.h>
61 |  #include <linux/ctype.h>
62 | +#include <linux/delay.h>
63 |  
64 |  struct uuidcmp {
65 |  	const char *uuid;
66 | 


--------------------------------------------------------------------------------
/0113-print-fsync-count-for-bootchart.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Tue, 20 Jun 2017 20:19:08 +0000
 4 | Subject: [PATCH] print fsync count for bootchart
 5 | 
 6 | ---
 7 |  block/blk-core.c      | 3 +++
 8 |  include/linux/sched.h | 1 +
 9 |  kernel/sched/debug.c  | 1 +
10 |  3 files changed, 5 insertions(+)
11 | 
12 | diff --git a/block/blk-core.c b/block/blk-core.c
13 | index 779b4a1f66ac..f1a4da52b511 100644
14 | --- a/block/blk-core.c
15 | +++ b/block/blk-core.c
16 | @@ -945,6 +945,9 @@ void submit_bio(struct bio *bio)
17 |  			task_io_account_read(bio->bi_iter.bi_size);
18 |  			count_vm_events(PGPGIN, count);
19 |  		}
20 | +
21 | +		if (bio->bi_opf & REQ_PREFLUSH)
22 | +			current->fsync_count++;
23 |  	}
24 |  
25 |  	/*
26 | diff --git a/include/linux/sched.h b/include/linux/sched.h
27 | index 4b4cc633b266..094875ea5388 100644
28 | --- a/include/linux/sched.h
29 | +++ b/include/linux/sched.h
30 | @@ -1046,6 +1046,7 @@ struct task_struct {
31 |  	/* Cached requested key. */
32 |  	struct key			*cached_requested_key;
33 |  #endif
34 | +	int fsync_count;
35 |  
36 |  	/*
37 |  	 * executable name, excluding path.
38 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
39 | index 102d6f70e84d..cc06e81a9e61 100644
40 | --- a/kernel/sched/debug.c
41 | +++ b/kernel/sched/debug.c
42 | @@ -960,6 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
43 |  	PN(se.exec_start);
44 |  	PN(se.vruntime);
45 |  	PN(se.sum_exec_runtime);
46 | +	P(fsync_count);
47 |  
48 |  	nr_switches = p->nvcsw + p->nivcsw;
49 |  
50 | -- 
51 | https://clearlinux.org
52 | 
53 | 


--------------------------------------------------------------------------------
/0114-add-boot-option-to-allow-unsigned-modules.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: "Brett T. Warden" <brett.t.warden@intel.com>
 3 | Date: Mon, 13 Aug 2018 04:01:21 -0500
 4 | Subject: [PATCH] add boot option to allow unsigned modules
 5 | 
 6 | Add module.sig_unenforce boot parameter to allow loading unsigned kernel
 7 | modules. Parameter is only effective if CONFIG_MODULE_SIG_FORCE is
 8 | enabled and system is *not* SecureBooted.
 9 | 
10 | Signed-off-by: Brett T. Warden <brett.t.warden@intel.com>
11 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
12 | ---
13 |  kernel/module.c | 20 ++++++++++++++++++++
14 |  1 file changed, 20 insertions(+)
15 | 
16 | --- linux-5.19.1/kernel/module/signing.c~	2022-08-11 11:22:05.000000000 +0000
17 | +++ linux-5.19.1/kernel/module/signing.c	2022-08-11 15:20:18.199749857 +0000
18 | @@ -14,6 +14,8 @@
19 |  #include <linux/security.h>
20 |  #include <crypto/public_key.h>
21 |  #include <uapi/linux/module.h>
22 | +#include <linux/efi.h>
23 | +
24 |  #include "internal.h"
25 |  
26 |  #undef MODULE_PARAM_PREFIX
27 | @@ -21,6 +23,11 @@
28 |  
29 |  static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
30 |  module_param(sig_enforce, bool_enable_only, 0644);
31 | +/* Allow disabling module signature requirement by adding boot param */
32 | +static bool sig_unenforce = false;
33 | +module_param(sig_unenforce, bool_enable_only, 0644);
34 | +
35 | +extern struct boot_params boot_params;
36 |  
37 |  /*
38 |   * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
39 | @@ -28,6 +35,8 @@
40 |   */
41 |  bool is_module_sig_enforced(void)
42 |  {
43 | +	if (sig_unenforce)
44 | +		return false;
45 |  	return sig_enforce;
46 |  }
47 |  EXPORT_SYMBOL(is_module_sig_enforced);
48 | 


--------------------------------------------------------------------------------
/0115-enable-stateless-firmware-loading.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: William Douglas <william.douglas@intel.com>
 3 | Date: Wed, 20 Jun 2018 17:23:21 +0000
 4 | Subject: [PATCH] enable stateless firmware loading
 5 | 
 6 | Prefer the order of specific version before generic and /etc before
 7 | /lib to enable the user to give specific overrides for generic
 8 | firmware and distribution firmware.
 9 | ---
10 |  drivers/base/firmware_loader/main.c | 2 ++
11 |  1 file changed, 2 insertions(+)
12 | 
13 | diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
14 | index 406a907a4cae..89890e085171 100644
15 | --- a/drivers/base/firmware_loader/main.c
16 | +++ b/drivers/base/firmware_loader/main.c
17 | @@ -407,6 +407,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv,
18 |  static char fw_path_para[256];
19 |  static const char * const fw_path[] = {
20 |  	fw_path_para,
21 | +	"/etc/firmware/" UTS_RELEASE,
22 | +	"/etc/firmware",
23 |  	"/lib/firmware/updates/" UTS_RELEASE,
24 |  	"/lib/firmware/updates",
25 |  	"/lib/firmware/" UTS_RELEASE,
26 | -- 
27 | https://clearlinux.org
28 | 
29 | 


--------------------------------------------------------------------------------
/0116-migrate-some-systemd-defaults-to-the-kernel-defaults.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Auke Kok <auke-jan.h.kok@intel.com>
 3 | Date: Thu, 2 Aug 2018 12:03:22 -0700
 4 | Subject: [PATCH] migrate some systemd defaults to the kernel defaults.
 5 | 
 6 | These settings are needed to prevent networking issues when
 7 | the networking modules come up by default without explicit
 8 | settings, which breaks some cases.
 9 | 
10 | We don't want the modprobe settings to be read at boot time
11 | if we're not going to do anything else ever.
12 | ---
13 |  drivers/net/dummy.c             | 2 +-
14 |  include/uapi/linux/if_bonding.h | 2 +-
15 |  2 files changed, 2 insertions(+), 2 deletions(-)
16 | 
17 | diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
18 | index f82ad7419508..5e8faa70aad6 100644
19 | --- a/drivers/net/dummy.c
20 | +++ b/drivers/net/dummy.c
21 | @@ -43,7 +43,7 @@
22 |  
23 |  #define DRV_NAME	"dummy"
24 |  
25 | -static int numdummies = 1;
26 | +static int numdummies = 0;
27 |  
28 |  /* fake multicast ability */
29 |  static void set_multicast_list(struct net_device *dev)
30 | diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
31 | index d174914a837d..bf8e2af101a3 100644
32 | --- a/include/uapi/linux/if_bonding.h
33 | +++ b/include/uapi/linux/if_bonding.h
34 | @@ -82,7 +82,7 @@
35 |  #define BOND_STATE_ACTIVE       0   /* link is active */
36 |  #define BOND_STATE_BACKUP       1   /* link is backup */
37 |  
38 | -#define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
39 | +#define BOND_DEFAULT_MAX_BONDS  0   /* Default maximum number of devices to support */
40 |  
41 |  #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
42 |  
43 | -- 
44 | https://clearlinux.org
45 | 
46 | 


--------------------------------------------------------------------------------
/0117-xattr-allow-setting-user.-attributes-on-symlinks-by-.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Alan Cox <alan@linux.intel.com>
 3 | Date: Thu, 10 Mar 2016 15:11:28 +0000
 4 | Subject: [PATCH] xattr: allow setting user.* attributes on symlinks by owner
 5 | 
 6 | Kvmtool and clear containers supports using user attributes to label host
 7 | files with the virtual uid/guid of the file in the container. This allows an
 8 | end user to manage their files and a complete uid space without all the ugly
 9 | namespace stuff.
10 | 
11 | The one gap in the support is symlinks because an end user can change the
12 | ownership of a symbolic link. We support attributes on these files as you
13 | can already (as root) set security attributes on them.
14 | 
15 | The current rules seem slightly over-paranoid and as we have a use case this
16 | patch enables updating the attributes on a symbolic link IFF you are the
17 | owner of the synlink (as permissions are not usually meaningful on the link
18 | itself).
19 | 
20 | Signed-off-by: Alan Cox <alan@linux.intel.com>
21 | ---
22 |  fs/xattr.c | 15 ++++++++-------
23 |  1 file changed, 8 insertions(+), 7 deletions(-)
24 | 
25 | diff --git a/fs/xattr.c b/fs/xattr.c
26 | index 998045165916..62b6fb4dedee 100644
27 | --- a/fs/xattr.c
28 | +++ b/fs/xattr.c
29 | @@ -120,16 +120,17 @@ xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
30 |  	}
31 |  
32 |  	/*
33 | -	 * In the user.* namespace, only regular files and directories can have
34 | -	 * extended attributes. For sticky directories, only the owner and
35 | -	 * privileged users can write attributes.
36 | +	 * In the user.* namespace, only regular files, symbolic links, and
37 | +	 * directories can have extended attributes. For symbolic links and
38 | +	 * sticky directories, only the owner and privileged users can write
39 | +	 * attributes.
40 |  	 */
41 |  	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
42 | -		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
43 | +		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode))
44 |  			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
45 | -		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
46 | -		    (mask & MAY_WRITE) &&
47 | -		    !inode_owner_or_capable(idmap, inode))
48 | +		if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX))
49 | +		        || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE)
50 | +		    && !inode_owner_or_capable(idmap, inode))
51 |  			return -EPERM;
52 |  	}
53 |  
54 | -- 
55 | https://clearlinux.org
56 | 
57 | 


--------------------------------------------------------------------------------
/0118-add-scheduler-turbo3-patch.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Wed, 21 Nov 2018 21:21:44 +0000
 4 | Subject: [PATCH] add scheduler turbo3 patch
 5 | 
 6 | Small scheduler tweak to make the scheduler more turbo3 aware
 7 | ---
 8 |  arch/x86/kernel/itmt.c | 14 ++++++++++++++
 9 |  kernel/sched/fair.c    | 19 +++++++++++++++++++
10 |  2 files changed, 33 insertions(+)
11 | 
12 | diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
13 | index 9ff480e94511..57027bfed25f 100644
14 | --- a/arch/x86/kernel/itmt.c
15 | +++ b/arch/x86/kernel/itmt.c
16 | @@ -172,6 +172,11 @@ int arch_asym_cpu_priority(int cpu)
17 |  	return per_cpu(sched_core_priority, cpu);
18 |  }
19 |  
20 | +extern int best_core;
21 | +extern int second_best_core;
22 | +static int best_core_score;
23 | +static int second_best_core_score;
24 | +
25 |  /**
26 |   * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
27 |   * @prio:	Priority of cpu core
28 | @@ -201,5 +206,14 @@ void sched_set_itmt_core_prio(int prio, int core_cpu)
29 |  		smt_prio = prio * smp_num_siblings / (i * i);
30 |  		per_cpu(sched_core_priority, cpu) = smt_prio;
31 |  		i++;
32 | +
33 | +		if (smt_prio > best_core_score) {
34 | +			best_core = cpu;
35 | +			best_core_score = smt_prio;
36 | +		} else
37 | +		if (smt_prio > second_best_core_score) {
38 | +			second_best_core = cpu;
39 | +			second_best_core_score = smt_prio;
40 | +		}
41 |  	}
42 |  }
43 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
44 | index 2f461f059278..632fcb22f4e2 100644
45 | --- a/kernel/sched/fair.c
46 | +++ b/kernel/sched/fair.c
47 | @@ -6854,6 +6854,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
48 |   *
49 |   * Returns the target CPU number.
50 |   */
51 | +
52 | +int best_core = -1;
53 | +int second_best_core = -1;
54 | +
55 |  static int
56 |  select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
57 |  {
58 | @@ -6882,6 +6886,21 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
59 |  		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
60 |  	}
61 |  
62 | +	if (prev_cpu != best_core && prev_cpu != second_best_core &&
63 | +		       cpu_rq(prev_cpu)->nr_running != 0) {
64 | +		if (second_best_core != -1 && cpu_rq(second_best_core)->nr_running == 0 &&
65 | +			       nr_iowait_cpu(second_best_core) < 2 && cpu_to_node(prev_cpu) == cpu_to_node(second_best_core))
66 | +			prev_cpu = second_best_core;
67 | +		if (best_core != -1 && cpu_rq(best_core)->nr_running == 0 &&
68 | +			       nr_iowait_cpu(best_core) < 2  && cpu_to_node(prev_cpu) == cpu_to_node(best_core))
69 | +			prev_cpu = best_core;
70 | +	}
71 | +/*
72 | +	if (prev_cpu > 0 && cpu_rq(prev_cpu)->nr_running != 0 && cpu_rq(prev_cpu - 1)->nr_running == 0)
73 | +		prev_cpu = prev_cpu - 1;
74 | +*/
75 | +
76 | +
77 |  	rcu_read_lock();
78 |  	for_each_domain(cpu, tmp) {
79 |  		/*
80 | -- 
81 | https://clearlinux.org
82 | 
83 | 


--------------------------------------------------------------------------------
/0120-do-accept-in-LIFO-order-for-cache-efficiency.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Thu, 13 Dec 2018 01:00:49 +0000
 4 | Subject: [PATCH] do accept() in LIFO order for cache efficiency
 5 | 
 6 | ---
 7 |  include/linux/wait.h            |  2 ++
 8 |  kernel/sched/wait.c             | 24 ++++++++++++++++++++++++
 9 |  net/ipv4/inet_connection_sock.c |  2 +-
10 |  3 files changed, 27 insertions(+), 1 deletion(-)
11 | 
12 | diff --git a/include/linux/wait.h b/include/linux/wait.h
13 | index 851e07da2583..85653fc33274 100644
14 | --- a/include/linux/wait.h
15 | +++ b/include/linux/wait.h
16 | @@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
17 |  
18 |  extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
19 |  extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
20 | +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
21 |  extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
22 |  extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
23 |  
24 | @@ -1163,6 +1164,7 @@ do {										\
25 |   */
26 |  void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
27 |  bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
28 | +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
29 |  long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
30 |  void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
31 |  long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
32 | diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
33 | index eca38107b32f..0306fa23b4f8 100644
34 | --- a/kernel/sched/wait.c
35 | +++ b/kernel/sched/wait.c
36 | @@ -48,6 +48,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
37 |  }
38 |  EXPORT_SYMBOL_GPL(add_wait_queue_priority);
39 |  
40 | +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
41 | +{
42 | +	unsigned long flags;
43 | +
44 | +	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
45 | +	spin_lock_irqsave(&wq_head->lock, flags);
46 | +	__add_wait_queue(wq_head, wq_entry);
47 | +	spin_unlock_irqrestore(&wq_head->lock, flags);
48 | +}
49 | +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
50 | +
51 |  void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
52 |  {
53 |  	unsigned long flags;
54 | @@ -290,6 +301,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
55 |  }
56 |  EXPORT_SYMBOL(prepare_to_wait_exclusive);
57 |  
58 | +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
59 | +{
60 | +	unsigned long flags;
61 | +
62 | +	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
63 | +	spin_lock_irqsave(&wq_head->lock, flags);
64 | +	if (list_empty(&wq_entry->entry))
65 | +		__add_wait_queue(wq_head, wq_entry);
66 | +	set_current_state(state);
67 | +	spin_unlock_irqrestore(&wq_head->lock, flags);
68 | +}
69 | +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
70 | +
71 |  void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
72 |  {
73 |  	wq_entry->flags = flags;
74 | diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
75 | index fc2a985f6064..02dc861de3d5 100644
76 | --- a/net/ipv4/inet_connection_sock.c
77 | +++ b/net/ipv4/inet_connection_sock.c
78 | @@ -441,7 +441,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
79 |  	 * having to remove and re-insert us on the wait queue.
80 |  	 */
81 |  	for (;;) {
82 | -		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
83 | +		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
84 |  					  TASK_INTERRUPTIBLE);
85 |  		release_sock(sk);
86 |  		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
87 | -- 
88 | https://clearlinux.org
89 | 
90 | 


--------------------------------------------------------------------------------
/0121-locking-rwsem-spin-faster.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Sun, 18 Feb 2018 23:35:41 +0000
 4 | Subject: [PATCH] locking: rwsem: spin faster
 5 | 
 6 | tweak rwsem owner spinning a bit
 7 | ---
 8 |  kernel/locking/rwsem.c | 4 +++-
 9 |  1 file changed, 3 insertions(+), 1 deletion(-)
10 | 
11 | diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
12 | index 69aba4abe104..b4818ccad5de 100644
13 | --- a/kernel/locking/rwsem.c
14 | +++ b/kernel/locking/rwsem.c
15 | @@ -707,6 +707,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
16 |  	struct task_struct *new, *owner;
17 |  	unsigned long flags, new_flags;
18 |  	enum owner_state state;
19 | +	int i = 0;
20 |  
21 |  	lockdep_assert_preemption_disabled();
22 |  
23 | @@ -743,7 +744,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
24 |  			break;
25 |  		}
26 |  
27 | -		cpu_relax();
28 | +		if (i++ > 1000)
29 | +			cpu_relax();
30 |  	}
31 |  
32 |  	return state;
33 | -- 
34 | https://clearlinux.org
35 | 
36 | 


--------------------------------------------------------------------------------
/0122-ata-libahci-ignore-staggered-spin-up.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Joe Konno <joe.konno@intel.com>
 3 | Date: Tue, 25 Jun 2019 10:35:54 -0700
 4 | Subject: [PATCH] ata: libahci: ignore staggered spin-up
 5 | 
 6 | Change libahci to ignore firmware's staggered spin-up flag. End-users
 7 | who wish to honor firmware's SSS flag can add the following kernel
 8 | parameter to a new file at /etc/kernel/cmdline.d/ignore_sss.conf:
 9 |     libahci.ignore_sss=0
10 | 
11 | And then run
12 |     sudo clr-boot-manager update
13 | 
14 | Signed-off-by: Joe Konno <joe.konno@intel.com>
15 | ---
16 |  drivers/ata/libahci.c | 4 ++--
17 |  1 file changed, 2 insertions(+), 2 deletions(-)
18 | 
19 | diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
20 | index 0ed484e04fd6..bbcb54da7a31 100644
21 | --- a/drivers/ata/libahci.c
22 | +++ b/drivers/ata/libahci.c
23 | @@ -33,14 +33,14 @@
24 |  #include "libata.h"
25 |  
26 |  static int ahci_skip_host_reset;
27 | -int ahci_ignore_sss;
28 | +int ahci_ignore_sss=1;
29 |  EXPORT_SYMBOL_GPL(ahci_ignore_sss);
30 |  
31 |  module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444);
32 |  MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)");
33 |  
34 |  module_param_named(ignore_sss, ahci_ignore_sss, int, 0444);
35 | -MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)");
36 | +MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])");
37 |  
38 |  static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
39 |  			unsigned hints);
40 | -- 
41 | https://clearlinux.org
42 | 
43 | 


--------------------------------------------------------------------------------
/0123-print-CPU-that-faults.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Sat, 10 Aug 2019 03:19:04 +0000
 4 | Subject: [PATCH] print CPU that faults
 5 | 
 6 | print cpu number when we print a crash
 7 | ---
 8 |  arch/x86/mm/fault.c | 4 ++--
 9 |  1 file changed, 2 insertions(+), 2 deletions(-)
10 | 
11 | diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
12 | index d0074c6ed31a..aeeae77fe5dd 100644
13 | --- a/arch/x86/mm/fault.c
14 | +++ b/arch/x86/mm/fault.c
15 | @@ -776,9 +776,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
16 |  	if (!printk_ratelimit())
17 |  		return;
18 |  
19 | -	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
20 | +	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i",
21 |  		loglvl, tsk->comm, task_pid_nr(tsk), address,
22 | -		(void *)regs->ip, (void *)regs->sp, error_code);
23 | +		(void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id());
24 |  
25 |  	print_vma_addr(KERN_CONT " in ", regs->ip);
26 |  
27 | -- 
28 | https://clearlinux.org
29 | 
30 | 


--------------------------------------------------------------------------------
/0124-x86-microcode-Add-an-option-to-reload-microcode-even.patch:
--------------------------------------------------------------------------------
  1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
  2 | From: Ashok Raj <ashok.raj@intel.com>
  3 | Date: Thu, 19 Aug 2021 14:49:47 -0700
  4 | Subject: [PATCH] x86/microcode: Add an option to reload microcode even if
  5 |  revision is the same
  6 | 
  7 | This is POC to support rollback. This is a simple version, admin uses
  8 | echo 2 instead of echo 1 to reload. We don't do the version checks.
  9 | 
 10 | #echo 1 > /sys/devices/system/cpu/microcode/reload
 11 | 
 12 | The following usage, writing 2 to reload file is helpful to reload
 13 | the microcode again even if the revision is less than what is loaded.
 14 | 
 15 | #echo 2 > /sys/devices/system/cpu/microcode/reload
 16 | 
 17 | Signed-off-by: Ashok Raj <ashok.raj@intel.com>
 18 | ---
 19 |  arch/x86/kernel/cpu/microcode/core.c  | 40 ++++++++++++++++++++++++++-
 20 |  arch/x86/kernel/cpu/microcode/intel.c | 14 ++++++----
 21 |  2 files changed, 47 insertions(+), 7 deletions(-)
 22 | 
 23 | diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
 24 | index 239ff5fcec6a..b096a43b2b9d 100644
 25 | --- a/arch/x86/kernel/cpu/microcode/core.c
 26 | +++ b/arch/x86/kernel/cpu/microcode/core.c
 27 | @@ -44,6 +44,8 @@
 28 |  
 29 |  static struct microcode_ops	*microcode_ops;
 30 |  static bool dis_ucode_ldr = true;
 31 | +bool ucode_rollback = false;
 32 | +int enable_rollback = 0;
 33 |  
 34 |  bool initrd_gone;
 35 |  
 36 | @@ -80,6 +82,26 @@ static u32 final_levels[] = {
 37 |  	0, /* T-101 terminator */
 38 |  };
 39 |  
 40 | +static int __init ucode_setup(char *str)
 41 | +{
 42 | +	if (!str)
 43 | +		return -EINVAL;
 44 | +
 45 | +	while (*str) {
 46 | +		if (!strncmp(str, "rollback", 8)) {
 47 | +			enable_rollback = 1;
 48 | +			pr_info("Microcode Rollback Enabled\n");
 49 | +		}
 50 | +		str += strcspn(str, ",");
 51 | +		while (*str == ',')
 52 | +			str++;
 53 | +	}
 54 | +	return 0;
 55 | +}
 56 | +
 57 | +__setup("ucode=", ucode_setup);
 58 | +
 59 | +
 60 |  /*
 61 |   * Check the current patch level on this CPU.
 62 |   *
 63 | @@ -600,6 +622,7 @@ static ssize_t reload_store(struct device *dev,
 64 |  			    struct device_attribute *attr,
 65 |  			    const char *buf, size_t size)
 66 |  {
 67 | +	struct cpuinfo_x86 *c = &boot_cpu_data;
 68 |  	enum ucode_state tmp_ret = UCODE_OK;
 69 |  	int bsp = boot_cpu_data.cpu_index;
 70 |  	unsigned long val;
 71 | @@ -609,7 +632,7 @@ static ssize_t reload_store(struct device *dev,
 72 |  	if (ret)
 73 |  		return ret;
 74 |  
 75 | -	if (val != 1)
 76 | +	if (!val || val > 2)
 77 |  		return size;
 78 |  
 79 |  	cpus_read_lock();
 80 | @@ -617,6 +640,20 @@ static ssize_t reload_store(struct device *dev,
 81 |  	ret = check_online_cpus();
 82 |  	if (ret)
 83 |  		goto put;
 84 | +	/*
 85 | +	 * Check if the vendor is Intel to permit reloading
 86 | +	 * microcode even if the revision is unchanged.
 87 | +	 * This is typically used during development of microcode
 88 | +	 * and changing rev is a pain.
 89 | +	 */
 90 | +	if ((val == 2) && ((c->x86_vendor != X86_VENDOR_INTEL) ||
 91 | +	     !enable_rollback))
 92 | +		return size;
 93 | +	else if (val == 2) {
 94 | +		mutex_lock(&microcode_mutex);
 95 | +		ucode_rollback = true;
 96 | +		mutex_unlock(&microcode_mutex);
 97 | +	}
 98 |  
 99 |  	tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
100 |  	if (tmp_ret != UCODE_NEW)
101 | @@ -627,6 +664,7 @@ static ssize_t reload_store(struct device *dev,
102 |  	mutex_unlock(&microcode_mutex);
103 |  
104 |  put:
105 | +	ucode_rollback = false;
106 |  	cpus_read_unlock();
107 |  
108 |  	if (ret == 0)
109 | diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
110 | index d28a9f8f3fec..02b506f52a13 100644
111 | --- a/arch/x86/kernel/cpu/microcode/intel.c
112 | +++ b/arch/x86/kernel/cpu/microcode/intel.c
113 | @@ -44,6 +44,7 @@ static struct microcode_intel *intel_ucode_patch;
114 |  
115 |  /* last level cache size per core */
116 |  static int llc_size_per_core;
117 | +extern bool ucode_rollback;
118 |  
119 |  static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
120 |  					unsigned int s2, unsigned int p2)
121 | @@ -94,7 +95,7 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev
122 |  {
123 |  	struct microcode_header_intel *mc_hdr = mc;
124 |  
125 | -	if (mc_hdr->rev <= new_rev)
126 | +	if (!ucode_rollback && mc_hdr->rev <= new_rev)
127 |  		return 0;
128 |  
129 |  	return find_matching_signature(mc, csig, cpf);
130 | @@ -134,7 +135,7 @@ static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigne
131 |  		if (find_matching_signature(data, sig, pf)) {
132 |  			prev_found = true;
133 |  
134 | -			if (mc_hdr->rev <= mc_saved_hdr->rev)
135 | +			if (!ucode_rollback && mc_hdr->rev <= mc_saved_hdr->rev)
136 |  				continue;
137 |  
138 |  			p = memdup_patch(data, size);
139 | @@ -694,7 +695,7 @@ static struct microcode_intel *find_patch(struct ucode_cpu_info *uci)
140 |  
141 |  		phdr = (struct microcode_header_intel *)iter->data;
142 |  
143 | -		if (phdr->rev <= uci->cpu_sig.rev)
144 | +		if (!ucode_rollback && phdr->rev <= uci->cpu_sig.rev)
145 |  			continue;
146 |  
147 |  		if (!find_matching_signature(phdr,
148 | @@ -779,10 +780,11 @@ static enum ucode_state apply_microcode_intel(int cpu)
149 |  	 * already.
150 |  	 */
151 |  	rev = intel_get_microcode_revision();
152 | -	if (rev >= mc->hdr.rev) {
153 | +	if (!ucode_rollback && rev >= mc->hdr.rev) {
154 |  		ret = UCODE_OK;
155 |  		goto out;
156 | -	}
157 | +	} else if (ucode_rollback)
158 | +		ret = UCODE_OK;
159 |  
160 |  	/*
161 |  	 * Writeback and invalidate caches before updating microcode to avoid
162 | @@ -801,7 +803,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
163 |  		return UCODE_ERROR;
164 |  	}
165 |  
166 | -	if (bsp && rev != prev_rev) {
167 | +	if (bsp && ((rev != prev_rev) || ucode_rollback)) {
168 |  		pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n",
169 |  			rev,
170 |  			mc->hdr.date & 0xffff,
171 | -- 
172 | https://clearlinux.org
173 | 
174 | 


--------------------------------------------------------------------------------
/0125-nvme-workaround.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Mon, 11 Nov 2019 23:12:11 +0000
 4 | Subject: [PATCH] nvme workaround
 5 | 
 6 | ---
 7 |  drivers/nvme/host/core.c | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
11 | index 0abd772c57f0..9129a2179f25 100644
12 | --- a/drivers/nvme/host/core.c
13 | +++ b/drivers/nvme/host/core.c
14 | @@ -48,7 +48,7 @@ static u8 nvme_max_retries = 5;
15 |  module_param_named(max_retries, nvme_max_retries, byte, 0644);
16 |  MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
17 |  
18 | -static unsigned long default_ps_max_latency_us = 100000;
19 | +static unsigned long default_ps_max_latency_us = 200;
20 |  module_param(default_ps_max_latency_us, ulong, 0644);
21 |  MODULE_PARM_DESC(default_ps_max_latency_us,
22 |  		 "max power saving latency for new devices; use PM QOS to change per device");
23 | -- 
24 | https://clearlinux.org
25 | 
26 | 


--------------------------------------------------------------------------------
/0126-don-t-report-an-error-if-PowerClamp-run-on-other-CPU.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Alexander Koskovich <zvnexus@outlook.com>
 3 | Date: Wed, 12 Feb 2020 22:47:12 +0000
 4 | Subject: [PATCH] don't report an error if PowerClamp run on other CPU
 5 | 
 6 | ---
 7 |  drivers/thermal/intel/intel_powerclamp.c | 10 ++++++++++
 8 |  1 file changed, 10 insertions(+)
 9 | 
10 | diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
11 | index 14256421d98c..8df2e604ceef 100644
12 | --- a/drivers/thermal/intel/intel_powerclamp.c
13 | +++ b/drivers/thermal/intel/intel_powerclamp.c
14 | @@ -647,6 +647,11 @@ static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
15 |  	.set_cur_state = powerclamp_set_cur_state,
16 |  };
17 |  
18 | +static const struct x86_cpu_id amd_cpu[] = {
19 | +	{ X86_VENDOR_AMD },
20 | +	{},
21 | +};
22 | +
23 |  static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
24 |  	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
25 |  	{}
26 | @@ -656,6 +661,11 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
27 |  static int __init powerclamp_probe(void)
28 |  {
29 |  
30 | +	if (x86_match_cpu(amd_cpu)){
31 | +		pr_info("Intel PowerClamp does not support AMD CPUs\n");
32 | +		return -ENODEV;
33 | +	}
34 | +
35 |  	if (!x86_match_cpu(intel_powerclamp_ids)) {
36 |  		pr_err("CPU does not support MWAIT\n");
37 |  		return -ENODEV;
38 | -- 
39 | https://clearlinux.org
40 | 
41 | 


--------------------------------------------------------------------------------
/0127-lib-raid6-add-patch.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Mon, 27 Sep 2021 17:43:01 +0000
 4 | Subject: [PATCH] lib/raid6: add patch
 5 | 
 6 | ---
 7 |  lib/raid6/algos.c | 4 +++-
 8 |  1 file changed, 3 insertions(+), 1 deletion(-)
 9 | 
10 | diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
11 | index 39b74221f4a7..ec3eab8cd6b1 100644
12 | --- a/lib/raid6/algos.c
13 | +++ b/lib/raid6/algos.c
14 | @@ -128,8 +128,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
15 |  
16 |  	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
17 |  		if (!best || (*algo)->priority > best->priority)
18 | -			if (!(*algo)->valid || (*algo)->valid())
19 | +			if (!(*algo)->valid || (*algo)->valid()) {
20 |  				best = *algo;
21 | +				break;
22 | +			}
23 |  
24 |  	if (best) {
25 |  		raid6_2data_recov = best->data2;
26 | -- 
27 | https://clearlinux.org
28 | 
29 | 


--------------------------------------------------------------------------------
/0128-itmt_epb-use-epb-to-scale-itmt.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Tue, 16 Nov 2021 17:39:25 +0000
 4 | Subject: [PATCH] itmt_epb: use epb to scale itmt
 5 | 
 6 | ---
 7 |  arch/x86/include/asm/topology.h |  1 +
 8 |  arch/x86/kernel/cpu/intel_epb.c |  4 ++++
 9 |  arch/x86/kernel/itmt.c          | 29 ++++++++++++++++++++++++++++-
10 |  3 files changed, 33 insertions(+), 1 deletion(-)
11 | 
12 | diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
13 | index 2f0b6be8eaab..c31f81e2ea05 100644
14 | --- a/arch/x86/include/asm/topology.h
15 | +++ b/arch/x86/include/asm/topology.h
16 | @@ -174,6 +174,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
17 |  
18 |  /* Interface to set priority of a cpu */
19 |  void sched_set_itmt_core_prio(int prio, int core_cpu);
20 | +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu);
21 |  
22 |  /* Interface to notify scheduler that system supports ITMT */
23 |  int sched_set_itmt_support(void);
24 | diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
25 | index fbaf12e43f41..c8c2d6f1a8ac 100644
26 | --- a/arch/x86/kernel/cpu/intel_epb.c
27 | +++ b/arch/x86/kernel/cpu/intel_epb.c
28 | @@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev,
29 |  	if (ret < 0)
30 |  		return ret;
31 |  
32 | +	/* update the ITMT scheduler logic to use the power policy data */
33 | +	/* scale the val up by 2 so the range is 224 - 256 */
34 | +	sched_set_itmt_power_ratio(256 - val * 2, cpu);
35 | +
36 |  	return count;
37 |  }
38 |  
39 | diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
40 | index 57027bfed25f..596fd7fb7847 100644
41 | --- a/arch/x86/kernel/itmt.c
42 | +++ b/arch/x86/kernel/itmt.c
43 | @@ -25,6 +25,7 @@
44 |  
45 |  static DEFINE_MUTEX(itmt_update_mutex);
46 |  DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
47 | +DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio);
48 |  
49 |  /* Boolean to track if system has ITMT capabilities */
50 |  static bool __read_mostly sched_itmt_capable;
51 | @@ -169,7 +170,12 @@ void sched_clear_itmt_support(void)
52 |  
53 |  int arch_asym_cpu_priority(int cpu)
54 |  {
55 | -	return per_cpu(sched_core_priority, cpu);
56 | +	int power_ratio = per_cpu(sched_power_ratio, cpu);
57 | +
58 | +	/* a power ratio of 0 (uninitialized) is assumed to be maximum */
59 | +	if (power_ratio == 0)
60 | +		power_ratio = 256 - 2 * 6;
61 | +	return per_cpu(sched_core_priority, cpu) * power_ratio / 256;
62 |  }
63 |  
64 |  extern int best_core;
65 | @@ -217,3 +223,24 @@ void sched_set_itmt_core_prio(int prio, int core_cpu)
66 |  		}
67 |  	}
68 |  }
69 | +
70 | +/**
71 | + * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT
72 | + * @power_ratio:	The power scaling ratio [1..256] for the core
73 | + * @core_cpu:		The cpu number associated with the core
74 | + *
75 | + * Set a scaling to the cpu performance based on long term power
76 | + * settings (like EPB).
77 | + *
78 | + * Note this is for the policy not for the actual dynamic frequency;
79 | + * the frequency will increase itself as workloads run on a core.
80 | + */
81 | +
82 | +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu)
83 | +{
84 | +	int cpu;
85 | +
86 | +	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
87 | +		per_cpu(sched_power_ratio, cpu) = power_ratio;
88 | +	}
89 | +}
90 | -- 
91 | https://clearlinux.org
92 | 
93 | 


--------------------------------------------------------------------------------
/0129-mm-wakeups-remove-a-wakeup.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Tue, 16 Nov 2021 22:20:49 +0000
 4 | Subject: [PATCH] mm: wakeups: remove a wakeup
 5 | 
 6 | ---
 7 |  mm/compaction.c | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/mm/compaction.c b/mm/compaction.c
11 | index b4e94cda3019..e9a36942c1fa 100644
12 | --- a/mm/compaction.c
13 | +++ b/mm/compaction.c
14 | @@ -53,7 +53,7 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
15 |  /*
16 |   * Fragmentation score check interval for proactive compaction purposes.
17 |   */
18 | -static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500;
19 | +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 5000;
20 |  
21 |  /*
22 |   * Page order with-respect-to which proactive compaction
23 | -- 
24 | https://clearlinux.org
25 | 
26 | 


--------------------------------------------------------------------------------
/0130-itmt2-ADL-fixes.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 3 | Date: Thu, 18 Nov 2021 16:09:47 +0000
 4 | Subject: [PATCH] itmt2 ADL fixes
 5 | 
 6 | On systems with overclocking enabled, CPPC Highest Performance can be
 7 | hard coded to 0xff. In this case even if we have cores with different
 8 | highest performance, ITMT can't be enabled as the current implementation
 9 | depends on CPPC Highest Performance.
10 | 
11 | On such systems we can use MSR_HWP_CAPABILITIES maximum performance field
12 | when CPPC.Highest Performance is 0xff.
13 | 
14 | Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as
15 | in some older systems CPPC Highest Performance is the only way to identify
16 | different performing cores.
17 | 
18 | Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
19 | ---
20 |  drivers/cpufreq/intel_pstate.c | 7 +++++++
21 |  1 file changed, 7 insertions(+)
22 | 
23 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
24 | index bc7f7e6759bd..ee33ad7f6f28 100644
25 | --- a/drivers/cpufreq/intel_pstate.c
26 | +++ b/drivers/cpufreq/intel_pstate.c
27 | @@ -364,6 +364,13 @@ static void intel_pstate_set_itmt_prio(int cpu)
28 |  	 * update them at any time after it has been called.
29 |  	 */
30 |  	sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
31 | +	/*
32 | +	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
33 | +	 * In this case we can't use CPPC.highest_perf to enable ITMT.
34 | +	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
35 | +	 */
36 | +	if (cppc_perf.highest_perf == 0xff)
37 | +		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
38 |  
39 |  	if (max_highest_perf <= min_highest_perf) {
40 |  		if (cppc_perf.highest_perf > max_highest_perf)
41 | -- 
42 | https://clearlinux.org
43 | 
44 | 


--------------------------------------------------------------------------------
/0131-add-a-per-cpu-minimum-high-watermark-an-tune-batch-s.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Tue, 23 Nov 2021 17:38:50 +0000
 4 | Subject: [PATCH] add a per cpu minimum high watermark an tune batch size
 5 | 
 6 | make sure there's at least 1024 per cpu pages... a reasonably small
 7 | amount for todays system
 8 | ---
 9 |  mm/page_alloc.c | 5 +++--
10 |  1 file changed, 3 insertions(+), 2 deletions(-)
11 | 
12 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c
13 | index e6f211dcf82e..0ea48434ac7d 100644
14 | --- a/mm/page_alloc.c
15 | +++ b/mm/page_alloc.c
16 | @@ -6836,11 +6836,11 @@ static int zone_batchsize(struct zone *zone)
17 |  
18 |  	/*
19 |  	 * The number of pages to batch allocate is either ~0.1%
20 | -	 * of the zone or 1MB, whichever is smaller. The batch
21 | +	 * of the zone or 4MB, whichever is smaller. The batch
22 |  	 * size is striking a balance between allocation latency
23 |  	 * and zone lock contention.
24 |  	 */
25 | -	batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
26 | +	batch = min(zone_managed_pages(zone) >> 10, 4 * SZ_1M / PAGE_SIZE);
27 |  	batch /= 4;		/* We effectively *= 4 below */
28 |  	if (batch < 1)
29 |  		batch = 1;
30 | -- 
31 | https://clearlinux.org
32 | 
33 | 


--------------------------------------------------------------------------------
/0133-novector.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan.van.de.ven@intel.com>
 3 | Date: Fri, 15 Apr 2022 00:07:38 +0000
 4 | Subject: [PATCH] novector
 5 | 
 6 | gcc12/build workarounds
 7 | 
 8 | ---
 9 |  arch/x86/Makefile | 2 +-
10 |  1 file changed, 1 insertion(+), 1 deletion(-)
11 | 
12 | --- linux-6.9.1/arch/x86/Makefile~	2024-05-17 10:18:09.000000000 +0000
13 | +++ linux-6.9.1/arch/x86/Makefile	2024-05-17 18:54:31.463961414 +0000
14 | @@ -70,7 +70,7 @@
15 |  #
16 |  #    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
17 |  #
18 | -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
19 | +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt -fivopts -fmodulo-sched
20 |  KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
21 |  KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
22 |  
23 | 


--------------------------------------------------------------------------------
/0134-md-raid6-algorithms-scale-test-duration-for-speedier.patch:
--------------------------------------------------------------------------------
  1 | From 1848e77c8d0356181344a7481f31eea42bf97f9e Mon Sep 17 00:00:00 2001
  2 | From: Colin Ian King <colin.king@intel.com>
  3 | Date: Fri, 28 Apr 2023 17:01:35 +0100
  4 | Subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  5 | 
  6 | Instead of using jiffies and waiting for jiffies to wrap before
  7 | measuring use the higher precision local_time for benchmarking.
  8 | Measure 2500 loops, which works out to be accurate enough for
  9 | benchmarking the raid algo data rates. Also add division by zero
 10 | checking in case timing measurements are bogus.
 11 | 
 12 | Speeds up raid benchmarking from 48,000 usecs to 4000 usecs, saving
 13 | 0.044 seconds on boot.
 14 | 
 15 | Signed-off-by: Colin Ian King <colin.king@intel.com>
 16 | ---
 17 |  lib/raid6/algos.c | 53 ++++++++++++++++++++---------------------------
 18 |  1 file changed, 22 insertions(+), 31 deletions(-)
 19 | 
 20 | diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
 21 | index a22a05c9af8a..e2ba261394f6 100644
 22 | --- a/lib/raid6/algos.c
 23 | +++ b/lib/raid6/algos.c
 24 | @@ -18,6 +18,8 @@
 25 |  #else
 26 |  #include <linux/module.h>
 27 |  #include <linux/gfp.h>
 28 | +#include <linux/sched/clock.h>
 29 | +
 30 |  /* In .bss so it's zeroed */
 31 |  const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 32 |  EXPORT_SYMBOL(raid6_empty_zero_page);
 33 | @@ -143,12 +145,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 34 |  static inline const struct raid6_calls *raid6_choose_gen(
 35 |  	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 36 |  {
 37 | -	unsigned long perf, bestgenperf, j0, j1;
 38 | +	unsigned long perf;
 39 | +	const unsigned long max_perf = 2500;
 40 |  	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 41 |  	const struct raid6_calls *const *algo;
 42 |  	const struct raid6_calls *best;
 43 | +	const u64 ns_per_mb = 1000000000 >> 20;
 44 | +	u64 n, ns, t, ns_best = ~0ULL;
 45 |  
 46 | -	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
 47 | +	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 48 |  		if (!best || (*algo)->priority >= best->priority) {
 49 |  			if ((*algo)->valid && !(*algo)->valid())
 50 |  				continue;
 51 | @@ -158,26 +163,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 52 |  				break;
 53 |  			}
 54 |  
 55 | -			perf = 0;
 56 | -
 57 |  			preempt_disable();
 58 | -			j0 = jiffies;
 59 | -			while ((j1 = jiffies) == j0)
 60 | -				cpu_relax();
 61 | -			while (time_before(jiffies,
 62 | -					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
 63 | +			t = local_clock();
 64 | +			for (perf = 0; perf < max_perf; perf++) {
 65 |  				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
 66 | -				perf++;
 67 |  			}
 68 | +			ns = local_clock() - t;
 69 |  			preempt_enable();
 70 |  
 71 | -			if (perf > bestgenperf) {
 72 | -				bestgenperf = perf;
 73 | +			if (ns < ns_best) {
 74 | +				ns_best = ns;
 75 |  				best = *algo;
 76 |  			}
 77 | -			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
 78 | -				(perf * HZ * (disks-2)) >>
 79 | -				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
 80 | +			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
 81 | +			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 82 |  		}
 83 |  	}
 84 |  
 85 | @@ -194,31 +193,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 86 |  		goto out;
 87 |  	}
 88 |  
 89 | -	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
 90 | -		best->name,
 91 | -		(bestgenperf * HZ * (disks - 2)) >>
 92 | -		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
 93 | +	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
 94 | +	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
 95 | +		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 96 |  
 97 |  	if (best->xor_syndrome) {
 98 | -		perf = 0;
 99 | -
100 |  		preempt_disable();
101 | -		j0 = jiffies;
102 | -		while ((j1 = jiffies) == j0)
103 | -			cpu_relax();
104 | -		while (time_before(jiffies,
105 | -				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
106 | +		t = local_clock();
107 | +		for (perf = 0; perf < max_perf; perf++) {
108 |  			best->xor_syndrome(disks, start, stop,
109 |  					   PAGE_SIZE, *dptrs);
110 | -			perf++;
111 |  		}
112 | +		ns = local_clock() - t;
113 |  		preempt_enable();
114 |  
115 | -		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
116 | -			(perf * HZ * (disks - 2)) >>
117 | -			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
118 | +		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
119 | +		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
120 |  	}
121 | -
122 |  out:
123 |  	return best;
124 |  }
125 | -- 
126 | 2.42.0
127 | 
128 | 


--------------------------------------------------------------------------------
/0135-initcall-only-print-non-zero-initcall-debug-to-speed.patch:
--------------------------------------------------------------------------------
 1 | From 496f1bc8c53f359a2fe07204d3c5ffdba963994e Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Fri, 20 Jan 2023 11:16:42 +0000
 4 | Subject: [PATCH] initcall: only print non-zero initcall debug to speed up boot
 5 | 
 6 | Printing initcall timings that successfully return after 0 usecs
 7 | provides not much useful information and takes a small amount of time
 8 | to do so. Disable the initcall timings for these specific cases. On
 9 | an Alderlake i9-12900 this reduces kernel boot time by 0.67% (timed
10 | up to the invocation of systemd starting) based on 10 boot measurements.
11 | 
12 | Signed-off-by: Colin Ian King <colin.king@intel.com>
13 | ---
14 |  init/main.c | 7 +++++--
15 |  1 file changed, 5 insertions(+), 2 deletions(-)
16 | 
17 | diff --git a/init/main.c b/init/main.c
18 | index aa21add5f7c5..715d57f58895 100644
19 | --- a/init/main.c
20 | +++ b/init/main.c
21 | @@ -1254,10 +1254,13 @@ static __init_or_module void
22 |  trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
23 |  {
24 |  	ktime_t rettime, *calltime = data;
25 | +	long long delta;
26 |  
27 |  	rettime = ktime_get();
28 | -	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
29 | -		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
30 | +	delta = ktime_us_delta(rettime, *calltime);
31 | +	if (ret || delta)
32 | +		printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
33 | +			fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
34 |  }
35 |  
36 |  static ktime_t initcall_calltime;
37 | -- 
38 | 2.39.1
39 | 
40 | 


--------------------------------------------------------------------------------
/0136-crypto-kdf-make-the-module-init-call-a-late-init-cal.patch:
--------------------------------------------------------------------------------
 1 | From 0362ef59fd2c23816de8330e5ebeb2f66a4808c9 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Mon, 6 Mar 2023 12:25:29 +0000
 4 | Subject: [PATCH] crypto: kdf: make the module init call a late init call
 5 | 
 6 | Signed-off-by: Colin Ian King <colin.king@intel.com>
 7 | ---
 8 |  crypto/kdf_sp800108.c | 2 +-
 9 |  1 file changed, 1 insertion(+), 1 deletion(-)
10 | 
11 | diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
12 | index c3f9938e1ad2..e77478e064d8 100644
13 | --- a/crypto/kdf_sp800108.c
14 | +++ b/crypto/kdf_sp800108.c
15 | @@ -149,7 +149,7 @@ static int __init crypto_kdf108_init(void)
16 |  
17 |  static void __exit crypto_kdf108_exit(void) { }
18 |  
19 | -module_init(crypto_kdf108_init);
20 | +late_initcall(crypto_kdf108_init);
21 |  module_exit(crypto_kdf108_exit);
22 |  
23 |  MODULE_LICENSE("GPL v2");
24 | -- 
25 | 2.39.2
26 | 
27 | 


--------------------------------------------------------------------------------
/0149-select-do_pollfd-add-unlikely-branch-hint-return-pat.patch:
--------------------------------------------------------------------------------
 1 | From 5730609ffd7e558e1e3305d0c6839044e8f6591b Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Wed, 9 Apr 2025 16:55:10 +0100
 4 | Subject: [PATCH] select: do_pollfd: add unlikely branch hint return path
 5 | Content-Type: text/plain; charset="utf-8"
 6 | Content-Transfer-Encoding: 8bit
 7 | 
 8 | Adding an unlikely() hint on the fd < 0 comparison return path improves
 9 | run-time performance of the poll() system call. gcov based coverage
10 | analysis based on running stress-ng and a kernel build shows that this
11 | path return path is highly unlikely.
12 | 
13 | Benchmarking on an Debian based Intel(R) Core(TM) Ultra 9 285K with
14 | a 6.15-rc1 kernel and a poll of 1024 file descriptors with zero timeout
15 | shows an call reduction from 32818 ns down to 32635 ns, which is a ~0.5%
16 | performance improvement.
17 | 
18 | Results based on running 25 tests with turbo disabled (to reduce clock
19 | freq turbo changes), with 30 second run per test and comparing the number
20 | of poll() calls per second. The % standard deviation of the 25 tests
21 | was 0.08%, so results are reliable.
22 | 
23 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
24 | Link: https://lore.kernel.org/20250409155510.577490-1-colin.i.king@gmail.com
25 | Signed-off-by: Christian Brauner <brauner@kernel.org>
26 | ---
27 |  fs/select.c | 2 +-
28 |  1 file changed, 1 insertion(+), 1 deletion(-)
29 | 
30 | diff --git a/fs/select.c b/fs/select.c
31 | index 7da531b1cf6b..0eaf3522abe9 100644
32 | --- a/fs/select.c
33 | +++ b/fs/select.c
34 | @@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
35 |  	int fd = pollfd->fd;
36 |  	__poll_t mask, filter;
37 |  
38 | -	if (fd < 0)
39 | +	if (unlikely(fd < 0))
40 |  		return 0;
41 |  
42 |  	CLASS(fd, f)(fd);
43 | -- 
44 | 2.49.0
45 | 
46 | 


--------------------------------------------------------------------------------
/0150-select-core_sys_select-add-unlikely-branch-hint-on-r.patch:
--------------------------------------------------------------------------------
 1 | From 20a4684d124787c865c06c2bd36d6f938fa5e563 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Mon, 14 Apr 2025 09:45:43 +0100
 4 | Subject: [PATCH] select: core_sys_select add unlikely branch hint on return
 5 |  path
 6 | Content-Type: text/plain; charset="utf-8"
 7 | Content-Transfer-Encoding: 8bit
 8 | 
 9 | Adding an unlikely() hint on the n < 0 comparison return path improves
10 | run-time performance of the select() system call, the negative
11 | value of n is very uncommon in normal select usage.
12 | 
13 | Benchmarking on an Debian based Intel(R) Core(TM) Ultra 9 285K with
14 | a 6.15-rc1 kernel built with 14.2.0 using a select of 1000 file
15 | descriptors with zero timeout shows a consistent call reduction from
16 | 258 ns down to 254 ns, which is a ~1.5% performance improvement.
17 | 
18 | Results based on running 25 tests with turbo disabled (to reduce clock
19 | freq turbo changes), with 30 second run per test and comparing the number
20 | of select() calls per second. The % standard deviation of the 25 tests
21 | was 0.24%, so results are reliable.
22 | 
23 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
24 | ---
25 |  fs/select.c | 2 +-
26 |  1 file changed, 1 insertion(+), 1 deletion(-)
27 | 
28 | diff --git a/fs/select.c b/fs/select.c
29 | index 0eaf3522abe9..9fb650d03d52 100644
30 | --- a/fs/select.c
31 | +++ b/fs/select.c
32 | @@ -630,7 +630,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
33 |  	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
34 |  
35 |  	ret = -EINVAL;
36 | -	if (n < 0)
37 | +	if (unlikely(n < 0))
38 |  		goto out_nofds;
39 |  
40 |  	/* max_fds can increase, so grab it once to avoid race */
41 | -- 
42 | 2.49.0
43 | 
44 | 


--------------------------------------------------------------------------------
/0158-clocksource-only-perform-extended-clocksource-checks.patch:
--------------------------------------------------------------------------------
 1 | From a9b2afb45dbf18398c22d9504402dc1258859bec Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Wed, 3 May 2023 17:31:05 +0100
 4 | Subject: [PATCH] clocksource: only perform extended clocksource checks for AMD
 5 |  systems
 6 | 
 7 | Signed-off-by: Colin Ian King <colin.king@intel.com>
 8 | ---
 9 |  drivers/clocksource/acpi_pm.c | 7 +++++--
10 |  1 file changed, 5 insertions(+), 2 deletions(-)
11 | 
12 | diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
13 | index 82338773602c..d84f0e29452e 100644
14 | --- a/drivers/clocksource/acpi_pm.c
15 | +++ b/drivers/clocksource/acpi_pm.c
16 | @@ -176,13 +176,16 @@ static int verify_pmtmr_rate(void)
17 |  static int __init init_acpi_pm_clocksource(void)
18 |  {
19 |  	u64 value1, value2;
20 | -	unsigned int i, j = 0;
21 | +	unsigned int i, j = 0, checks = 1;
22 |  
23 |  	if (!pmtmr_ioport)
24 |  		return -ENODEV;
25 |  
26 | +	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
27 | +		checks = ACPI_PM_MONOTONICITY_CHECKS;
28 | +
29 |  	/* "verify" this timing source: */
30 | -	for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) {
31 | +	for (j = 0; j < checks; j++) {
32 |  		udelay(100 * j);
33 |  		value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
34 |  		for (i = 0; i < ACPI_PM_READ_CHECKS; i++) {
35 | -- 
36 | 2.40.1
37 | 
38 | 


--------------------------------------------------------------------------------
/0161-ACPI-align-slab-buffers-for-improved-memory-performa.patch:
--------------------------------------------------------------------------------
 1 | From 9ed82ddb051444a60afcd85fde2c22c8e72ba943 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Tue, 27 Jun 2023 14:12:27 +0100
 4 | Subject: [PATCH] ACPI: align slab for improved memory performance
 5 | 
 6 | Enabling SLAB_HWCACHE_ALIGN for the ACPI object caches improves
 7 | boot speed in the ACPICA core for object allocation and free'ing
 8 | especially in the AML parsing and execution phases in boot. Testing
 9 | with 100 boots shows an average boot saving in acpi_init of ~35000
10 | usecs compared to the unaligned version. Most of the ACPI objects
11 | being allocated and free'd are of very short life times in the
12 | critical paths for parsing and execution, so the extra memory used
13 | for alignment isn't too onerous.
14 | 
15 | Signed-off-by: Colin Ian King <colin.king@intel.com>
16 | ---
17 |  drivers/acpi/osl.c | 2 +-
18 |  1 file changed, 1 insertion(+), 1 deletion(-)
19 | 
20 | diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
21 | index 3269a888fb7a..72b2a750c258 100644
22 | --- a/drivers/acpi/osl.c
23 | +++ b/drivers/acpi/osl.c
24 | @@ -1556,7 +1556,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags)
25 |  acpi_status
26 |  acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t ** cache)
27 |  {
28 | -	*cache = kmem_cache_create(name, size, 0, 0, NULL);
29 | +	*cache = kmem_cache_create(name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
30 |  	if (*cache == NULL)
31 |  		return AE_ERROR;
32 |  	else
33 | -- 
34 | 2.41.0
35 | 
36 | 


--------------------------------------------------------------------------------
/0162-extra-optmization-flags.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.king@intel.com>
 3 | Date: Thu, 3 Aug 2023 16:52:11 +0100
 4 | Subject: [PATCH] extra optmization flags
 5 | 
 6 | Enable 2 extra optimimization flags:
 7 |   -fivopts
 8 |      Perform induction variable optimizations (strength reduction,
 9 |      induction variable merging and induction variable elimination)
10 |      on trees.
11 |   -fmodulo-sched
12 |      Perform swing modulo scheduling immediately before the first
13 |      scheduling pass. This pass looks at innermost loops and reorders
14 |      their instructions by overlapping different iterations.
15 | 
16 | stress-ng microbenchmark improvements (average from 10 x 1min runs) on
17 | Alderlake with gcc 13.2.0:
18 |   af-alg:  1.0% (kernel AF_ALG crypto)
19 |   pipe:    1.5% (pipe + context switch)
20 |   fork:    4.4% (process fork/exit)
21 |   mmap:    3.0% (memory mapping)
22 |   switch:  4.9%	(context switching)
23 | 
24 | KVM QEMU bootspeed improvements (based on average of 100 boots): 0.5%
25 | 
26 | ---
27 |  arch/x86/Makefile | 2 +-
28 |  1 file changed, 1 insertion(+), 1 deletion(-)
29 | 
30 | diff --git a/arch/x86/Makefile b/arch/x86/Makefile
31 | index 415a5d138de4..6b91d1306a7c 100644
32 | --- a/arch/x86/Makefile
33 | +++ b/arch/x86/Makefile
34 | @@ -67,7 +67,7 @@ export BITS
35 |  #
36 |  #    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
37 |  #
38 | -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt
39 | +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt -fivopts -fmodulo-sched
40 |  KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
41 | 
42 |  ifeq ($(CONFIG_X86_KERNEL_IBT),y)
43 | --
44 | https://clearlinux.org
45 | 
46 | 


--------------------------------------------------------------------------------
/0163-thermal-intel-powerclamp-check-MWAIT-first-use-pr_wa.patch:
--------------------------------------------------------------------------------
 1 | From b323e51b3c5b536c6947541b02a9b5cdc7422343 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Tue, 19 Sep 2023 14:16:21 +0100
 4 | Subject: [PATCH] thermal: intel: powerclamp: check MWAIT first, use pr_warn
 5 |  insteal of pr_err
 6 | 
 7 | For x86 targets it's more pertinant to check for lack of MWAIT than AMD
 8 | specific cpus, so swap the order of tests. Also make the pr_err a
 9 | pr_warn to align with other ENODEV warning messages.
10 | 
11 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
12 | ---
13 |  drivers/thermal/intel/intel_powerclamp.c | 9 ++++-----
14 |  1 file changed, 4 insertions(+), 5 deletions(-)
15 | 
16 | diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
17 | index 4419ad0a3d4a..c787c1d2390f 100644
18 | --- a/drivers/thermal/intel/intel_powerclamp.c
19 | +++ b/drivers/thermal/intel/intel_powerclamp.c
20 | @@ -752,14 +752,13 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
21 |  
22 |  static int __init powerclamp_probe(void)
23 |  {
24 | -
25 | -	if (x86_match_cpu(amd_cpu)){
26 | -		pr_info("Intel PowerClamp does not support AMD CPUs\n");
27 | +	if (!x86_match_cpu(intel_powerclamp_ids)) {
28 | +		pr_info("CPU does not support MWAIT\n");
29 |  		return -ENODEV;
30 |  	}
31 |  
32 | -	if (!x86_match_cpu(intel_powerclamp_ids)) {
33 | -		pr_err("CPU does not support MWAIT\n");
34 | +	if (x86_match_cpu(amd_cpu)){
35 | +		pr_info("Intel PowerClamp does not support AMD CPUs\n");
36 |  		return -ENODEV;
37 |  	}
38 |  
39 | -- 
40 | 2.42.0
41 | 
42 | 


--------------------------------------------------------------------------------
/0164-KVM-VMX-make-vmx-init-a-late-init-call-to-get-to-ini.patch:
--------------------------------------------------------------------------------
 1 | From 4e6585f34be8b87fe5258233aaa8c002ab561897 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Tue, 10 Oct 2023 12:41:00 +0100
 4 | Subject: [PATCH] KVM: VMX: make vmx_init a late init call to get to init process faster
 5 | 
 6 | Making vmx_init a late initcall improves QEMU kernel boot times to
 7 | get to the init process. Average of 100 boots, QEMU boot average
 8 | reduced from 0.776 seconds to 0.622 seconds (~19.8% faster) on
 9 | Alderlake i9-12900 and ~0.5% faster for non-QEMU UEFI boots.
10 | 
11 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
12 | ---
13 |  arch/x86/kvm/vmx/vmx.c | 2 +-
14 |  1 file changed, 1 insertion(+), 1 deletion(-)
15 | 
16 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
17 | index bc6f0fea48b4..e671fbe70d5a 100644
18 | --- a/arch/x86/kvm/vmx/vmx.c
19 | +++ b/arch/x86/kvm/vmx/vmx.c
20 | @@ -8702,4 +8702,4 @@ static int __init vmx_init(void)
21 |  	kvm_x86_vendor_exit();
22 |  	return r;
23 |  }
24 | -module_init(vmx_init);
25 | +late_initcall(vmx_init);
26 | -- 
27 | 2.42.0
28 | 
29 | 


--------------------------------------------------------------------------------
/0166-sched-fair-remove-upper-limit-on-cpu-number.patch:
--------------------------------------------------------------------------------
 1 | From 362f86369d1930ad177acaa47225d24d26b02c8d Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Tue, 14 Nov 2023 13:29:45 +0000
 4 | Subject: [PATCH] sched/fair: remove upper limit on cpu number
 5 | 
 6 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
 7 | ---
 8 |  kernel/sched/fair.c | 2 +-
 9 |  1 file changed, 1 insertion(+), 1 deletion(-)
10 | 
11 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
12 | index 2048138ce54b..903ead0afacb 100644
13 | --- a/kernel/sched/fair.c
14 | +++ b/kernel/sched/fair.c
15 | @@ -197,7 +197,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
16 |   */
17 |  static unsigned int get_update_sysctl_factor(void)
18 |  {
19 | -	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
20 | +	unsigned int cpus = num_online_cpus();
21 |  	unsigned int factor;
22 |  
23 |  	switch (sysctl_sched_tunable_scaling) {
24 | -- 
25 | 2.42.1
26 | 
27 | 


--------------------------------------------------------------------------------
/0167-net-sock-increase-default-number-of-_SK_MEM_PACKETS-.patch:
--------------------------------------------------------------------------------
 1 | From 4ba5a01513a6b3487613e7186cac4f3f2f4c5091 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Wed, 24 Apr 2024 16:45:47 +0100
 4 | Subject: [PATCH] net: sock: increase default number of _SK_MEM_PACKETS to 1024
 5 | 
 6 | scale these by a factor of 4 to improve socket performance
 7 | 
 8 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
 9 | ---
10 |  include/net/sock.h | 2 +-
11 |  1 file changed, 1 insertion(+), 1 deletion(-)
12 | 
13 | diff --git a/include/net/sock.h b/include/net/sock.h
14 | index 54ca8dcbfb43..9adc51e8085b 100644
15 | --- a/include/net/sock.h
16 | +++ b/include/net/sock.h
17 | @@ -2903,7 +2903,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
18 |   * platforms.  This makes socket queueing behavior and performance
19 |   * not depend upon such differences.
20 |   */
21 | -#define _SK_MEM_PACKETS		256
22 | +#define _SK_MEM_PACKETS		1024
23 |  #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
24 |  #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
25 |  #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
26 | -- 
27 | 2.44.0
28 | 
29 | 


--------------------------------------------------------------------------------
/0169-mm-mincore-improve-performance-by-adding-an-unlikely.patch:
--------------------------------------------------------------------------------
 1 | From d2d33210a595ce1b1ec9ca94edc8bd40af0f66c5 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Wed, 19 Feb 2025 08:36:07 +0000
 4 | Subject: [PATCH] mm/mincore: improve performance by adding an unlikely hint
 5 | 
 6 | Adding an unlikely() hint on the masked start comparison error return path
 7 | improves run-time performance of the mincore system call.
 8 | 
 9 | Benchmarking on an i9-12900 shows an improvement of 7ns on mincore calls
10 | on a 256KB mmap'd region where 50% of the pages we resident.  Improvement
11 | was from ~970 ns down to 963 ns, so a small ~0.7% improvement.
12 | 
13 | Results based on running 20 tests with turbo disabled (to reduce clock
14 | freq turbo changes), with 10 second run per test and comparing the number
15 | of mincores calls per second.  The % standard deviation of the 20 tests
16 | was ~0.10%, so results are reliable.
17 | 
18 | Link: https://lkml.kernel.org/r/20250219083607.5183-1-colin.i.king@gmail.com
19 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
20 | Cc: Matthew Wilcow <willy@infradead.org>
21 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
22 | ---
23 |  mm/mincore.c | 2 +-
24 |  1 file changed, 1 insertion(+), 1 deletion(-)
25 | 
26 | diff --git a/mm/mincore.c b/mm/mincore.c
27 | index d6bd19e520fc..832f29f46767 100644
28 | --- a/mm/mincore.c
29 | +++ b/mm/mincore.c
30 | @@ -239,7 +239,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
31 |  	start = untagged_addr(start);
32 |  
33 |  	/* Check the start address: needs to be page-aligned.. */
34 | -	if (start & ~PAGE_MASK)
35 | +	if (unlikely(start & ~PAGE_MASK))
36 |  		return -EINVAL;
37 |  
38 |  	/* ..and we need to be passed a valid user-space range */
39 | -- 
40 | 2.48.1
41 | 
42 | 


--------------------------------------------------------------------------------
/0170-sched-Add-unlikey-branch-hints-to-several-system-cal.patch:
--------------------------------------------------------------------------------
 1 | From 1a5d3492f8e14719184945893c610e0802c05533 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Wed, 19 Feb 2025 14:24:23 +0000
 4 | Subject: [PATCH] sched: Add unlikey branch hints to several system calls
 5 | 
 6 | Adding an unlikely() hint on early error return paths improves the
 7 | run-time performance of several sched related system calls.
 8 | 
 9 | Benchmarking on an i9-12900 shows the following per system call
10 | performance improvements:
11 | 
12 | 		       before     after     improvement
13 | sched_getattr          182.4ns    170.6ns      ~6.5%
14 | sched_setattr          284.3ns    267.6ns      ~5.9%
15 | sched_getparam         161.6ns    148.1ns      ~8.4%
16 | sched_setparam        1265.4ns   1227.6ns      ~3.0%
17 | sched_getscheduler     129.4ns    118.2ns      ~8.7%
18 | sched_setscheduler    1237.3ns   1216.7ns      ~1.7%
19 | 
20 | Results are based on running 20 tests with turbo disabled (to reduce
21 | clock freq turbo changes), with 10 second run per test based on the
22 | number of system calls per second. The % standard deviation of the
23 | measurements for the 20 tests was 0.05% to 0.40%, so the results are
24 | reliable.
25 | 
26 | Tested on kernel build with gcc 14.2.1
27 | 
28 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
29 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
30 | Link: https://lkml.kernel.org/r/20250219142423.45516-1-colin.i.king@gmail.com
31 | ---
32 |  kernel/sched/syscalls.c | 10 +++++-----
33 |  1 file changed, 5 insertions(+), 5 deletions(-)
34 | 
35 | diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
36 | index 456d339be98f..9f40348f1dc7 100644
37 | --- a/kernel/sched/syscalls.c
38 | +++ b/kernel/sched/syscalls.c
39 | @@ -875,7 +875,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
40 |  {
41 |  	struct sched_param lparam;
42 |  
43 | -	if (!param || pid < 0)
44 | +	if (unlikely(!param || pid < 0))
45 |  		return -EINVAL;
46 |  	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
47 |  		return -EFAULT;
48 | @@ -984,7 +984,7 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
49 |  	struct sched_attr attr;
50 |  	int retval;
51 |  
52 | -	if (!uattr || pid < 0 || flags)
53 | +	if (unlikely(!uattr || pid < 0 || flags))
54 |  		return -EINVAL;
55 |  
56 |  	retval = sched_copy_attr(uattr, &attr);
57 | @@ -1049,7 +1049,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
58 |  	struct task_struct *p;
59 |  	int retval;
60 |  
61 | -	if (!param || pid < 0)
62 | +	if (unlikely(!param || pid < 0))
63 |  		return -EINVAL;
64 |  
65 |  	scoped_guard (rcu) {
66 | @@ -1085,8 +1085,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
67 |  	struct task_struct *p;
68 |  	int retval;
69 |  
70 | -	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
71 | -	    usize < SCHED_ATTR_SIZE_VER0 || flags)
72 | +	if (unlikely(!uattr || pid < 0 || usize > PAGE_SIZE ||
73 | +		      usize < SCHED_ATTR_SIZE_VER0 || flags))
74 |  		return -EINVAL;
75 |  
76 |  	scoped_guard (rcu) {
77 | -- 
78 | 2.48.1
79 | 
80 | 


--------------------------------------------------------------------------------
/0171-kcmp-improve-performance-adding-an-unlikely-hint-to-.patch:
--------------------------------------------------------------------------------
 1 | From d1c735d44c12544cea9b04ca88d65c12892c0539 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Thu, 13 Feb 2025 16:39:16 +0000
 4 | Subject: [PATCH] kcmp: improve performance adding an unlikely hint to task
 5 |  comparisons
 6 | 
 7 | Adding an unlikely() hint on task comparisons on an unlikely error
 8 | return path improves run-time performance of the kcmp system call.
 9 | 
10 | Benchmarking on an i9-12900 shows an improvement of ~5.5% on kcmp().
11 | Results based on running 20 tests with turbo disabled (to reduce
12 | clock freq turbo changes), with 10 second run per test and comparing
13 | the number of kcmp calls per second. The % Standard deviation of 20
14 | tests was ~0.25%, results are reliable.
15 | 
16 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
17 | Link: https://lore.kernel.org/r/20250213163916.709392-1-colin.i.king@gmail.com
18 | Signed-off-by: Christian Brauner <brauner@kernel.org>
19 | ---
20 |  kernel/kcmp.c | 2 +-
21 |  1 file changed, 1 insertion(+), 1 deletion(-)
22 | 
23 | diff --git a/kernel/kcmp.c b/kernel/kcmp.c
24 | index 2c596851f8a9..7c1a65bd5f8d 100644
25 | --- a/kernel/kcmp.c
26 | +++ b/kernel/kcmp.c
27 | @@ -145,7 +145,7 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
28 |  	 */
29 |  	task1 = find_task_by_vpid(pid1);
30 |  	task2 = find_task_by_vpid(pid2);
31 | -	if (!task1 || !task2)
32 | +	if (unlikely(!task1 || !task2))
33 |  		goto err_no_task;
34 |  
35 |  	get_task_struct(task1);
36 | -- 
37 | 2.48.1
38 | 
39 | 


--------------------------------------------------------------------------------
/0174-memcg-increase-MEMCG_CHARGE_BATCH-to-128.patch:
--------------------------------------------------------------------------------
 1 | From 0841fd2c59e7d4c4dc55bbdc4e69d08db775df68 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Thu, 22 May 2025 15:23:07 +0100
 4 | Subject: [PATCH] memcg: increase MEMCG_CHARGE_BATCH to 128
 5 | 
 6 | MEMCG_CHARGE_BATCH was last changed to 64 back in 2022, systems have
 7 | grown in memory and speed and it's useful to increase this to 128.
 8 | 
 9 | Benchmarking the stress-ng mmap stressor shows a performance improvement
10 | of ~7.4% and malloc stressor by 2.8%, tested on an Ultra 9 285K with
11 | turbo disabled to avoid test result jitter.
12 | 
13 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
14 | ---
15 |  include/linux/memcontrol.h | 2 +-
16 |  1 file changed, 1 insertion(+), 1 deletion(-)
17 | 
18 | diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
19 | index 6e74b8254d9b..a47c977bea18 100644
20 | --- a/include/linux/memcontrol.h
21 | +++ b/include/linux/memcontrol.h
22 | @@ -316,7 +316,7 @@ struct mem_cgroup {
23 |   * TODO: maybe necessary to use big numbers in big irons or dynamic based of the
24 |   * workload.
25 |   */
26 | -#define MEMCG_CHARGE_BATCH 64U
27 | +#define MEMCG_CHARGE_BATCH 128U
28 |  
29 |  extern struct mem_cgroup *root_mem_cgroup;
30 |  
31 | -- 
32 | 2.49.0
33 | 
34 | 


--------------------------------------------------------------------------------
/0175-readdir-add-unlikely-hint-on-len-check.patch:
--------------------------------------------------------------------------------
 1 | From 09c259399447ca0d828c65946b7d938e4692d593 Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@gmail.com>
 3 | Date: Tue, 27 May 2025 15:12:58 +0100
 4 | Subject: [PATCH] readdir: add unlikely hint on len check
 5 | Content-Type: text/plain; charset="utf-8"
 6 | Content-Transfer-Encoding: 8bit
 7 | 
 8 | Currently the out of bounds check for the length is very unlikely
 9 | to be false for valid name strings. Analysis with gcov coverage show
10 | this to be so.
11 | 
12 | Add an unlikely hint on the error return path check. This improves
13 | performance when testing with single instance stress-ng dentry and
14 | dirent stressors. Tested with a 6.15 kernel, built with gcc 14.2.0
15 | on a Debian Ultra 9 285K system with turbo disabled to reduce test
16 | jitter on tmpfs. Each test case was run 25 times and the % standard
17 | deviation was less than 0.4%. Geometric mean of 25 results show the
18 | following stress-ng bogo-ops performance improvments:
19 | 
20 | getdent: 1.1%
21 | dentry:  0.9%
22 | 
23 | Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
24 | ---
25 |  fs/readdir.c | 2 +-
26 |  1 file changed, 1 insertion(+), 1 deletion(-)
27 | 
28 | diff --git a/fs/readdir.c b/fs/readdir.c
29 | index 7764b8638978..c501155ed99a 100644
30 | --- a/fs/readdir.c
31 | +++ b/fs/readdir.c
32 | @@ -147,7 +147,7 @@ EXPORT_SYMBOL(iterate_dir);
33 |   */
34 |  static int verify_dirent_name(const char *name, int len)
35 |  {
36 | -	if (len <= 0 || len >= PATH_MAX)
37 | +	if (unlikely(len <= 0 || len >= PATH_MAX))
38 |  		return -EIO;
39 |  	if (memchr(name, '/', len))
40 |  		return -EIO;
41 | -- 
42 | 2.49.0
43 | 
44 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | PKG_NAME := linux
2 | 
3 | include ../common/Makefile.common
4 | 


--------------------------------------------------------------------------------
/Makefile.custom:
--------------------------------------------------------------------------------
 1 | MOCK_COMMON_ARGS = -n --result=results/ --no-cleanup-after --uniqueext=$(PKG_NAME)
 2 | 
 3 | #help oldconfig: Uses the current config file as input to `make oldconfig` and
 4 | #help applies the results to the local copy.
 5 | oldconfig: $(SRPMFILE) config
 6 | 	$(MOCK) $(MOCK_COMMON_ARGS) --installdeps $(SRPMFILE)
 7 | 	$(MOCK) $(MOCK_COMMON_ARGS) --chroot 'rpmbuild -bp /builddir/build/SPECS/$(SPECFILE)'
 8 | 	$(MOCK) $(MOCK_COMMON_ARGS) --copyin $(filter-out %.rpm,$^) /builddir/build/BUILD/.config
 9 | 	$(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --chroot 'mv .config linux-*/'
10 | 	$(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --shell 'make -C linux-* oldconfig'
11 | 	$(MOCK) $(MOCK_COMMON_ARGS) --copyout /builddir/build/BUILD/linux-*/.config config
12 | 
13 | #help menuconfig: Uses the current config file as input to `make menuconfig` and
14 | #help applies the results to the local copy.
15 | menuconfig: $(SRPMFILE) config
16 | 	$(MOCK) $(MOCK_COMMON_ARGS) --installdeps $(SRPMFILE)
17 | 	$(MOCK) $(MOCK_COMMON_ARGS) --install ncurses-dev
18 | 	$(MOCK) $(MOCK_COMMON_ARGS) --chroot 'rpmbuild -bp /builddir/build/SPECS/$(SPECFILE)'
19 | 	$(MOCK) $(MOCK_COMMON_ARGS) --copyin $(filter-out %.rpm,$^) /builddir/build/BUILD/.config
20 | 	$(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --chroot 'mv .config linux-*/'
21 | 	$(MOCK) $(MOCK_COMMON_ARGS) --cwd=/builddir/build/BUILD --shell 'make -C linux-* menuconfig'
22 | 	$(MOCK) $(MOCK_COMMON_ARGS) --copyout /builddir/build/BUILD/linux-*/.config config
23 | 
24 | #help kdevelop: Download and extract the package sources and apply the clear Linux
25 | #help patches on top of it using the git tool. You can use DESTDIR=target to
26 | #help extrat to a specific target directory. i.e. "make develop DESTDIR=/tmp"
27 | kdevelop:
28 | 	@scripts/develop.sh $(SPECFILE) $(DESTDIR)
29 | 


--------------------------------------------------------------------------------
/adlrdt.patch:
--------------------------------------------------------------------------------
 1 | work around https://bugzilla.kernel.org/show_bug.cgi?id=215141
 2 | 
 3 | --- linux-5.19.1/arch/x86/kernel/cpu/resctrl/core.c~	2022-08-11 11:22:05.000000000 +0000
 4 | +++ linux-5.19.1/arch/x86/kernel/cpu/resctrl/core.c	2022-08-18 21:06:56.235417914 +0000
 5 | @@ -955,6 +955,22 @@
 6 |  	 */
 7 |  	rdt_init_res_defs();
 8 |  
 9 | +	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
10 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE)
11 | +			return -ENODEV;
12 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE_L)
13 | +			return -ENODEV;
14 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_ALDERLAKE_N)
15 | +			return -ENODEV;
16 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE)
17 | +			return -ENODEV;
18 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE_P)
19 | +			return -ENODEV;
20 | +		if (boot_cpu_data.x86_model == INTEL_FAM6_RAPTORLAKE_S)
21 | +			return -ENODEV;
22 | +	}
23 | +
24 | +
25 |  	check_quirks();
26 |  
27 |  	if (!get_rdt_resources())
28 | 


--------------------------------------------------------------------------------
/archive/0114-tweak-perfbias.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Sun, 22 Jan 2017 18:51:13 +0000
 4 | Subject: [PATCH] tweak perfbias
 5 | 
 6 | ---
 7 |  arch/x86/kernel/cpu/intel.c | 8 +++++---
 8 |  1 file changed, 5 insertions(+), 3 deletions(-)
 9 | 
10 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
11 | index 3142fd7a9b32..41d5ccae7890 100644
12 | --- a/arch/x86/kernel/cpu/intel.c
13 | +++ b/arch/x86/kernel/cpu/intel.c
14 | @@ -608,13 +608,15 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
15 |  		return;
16 |  
17 |  	rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
18 | -	if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
19 | +	if ((epb & 0xF) >= ENERGY_PERF_BIAS_NORMAL)
20 |  		return;
21 |  
22 | -	pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
23 | +	pr_info_once("ENERGY_PERF_BIAS: Set to 'performance', was 'normal'\n");
24 |  	pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
25 | -	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
26 | +	epb = (epb & ~0xF) | ENERGY_PERF_BIAS_PERFORMANCE;
27 |  	wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
28 | +
29 | + 	pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
30 |  }
31 |  
32 |  static void intel_bsp_resume(struct cpuinfo_x86 *c)
33 | -- 
34 | https://clearlinux.org
35 | 
36 | 


--------------------------------------------------------------------------------
/archive/0123-zero-extra-registers.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: Arjan van de Ven <arjan@linux.intel.com>
 3 | Date: Thu, 8 Feb 2018 16:49:38 +0000
 4 | Subject: [PATCH] zero extra registers
 5 | 
 6 | This for Zero used caller-saved general registers upon function return.
 7 | ---
 8 |  arch/x86/Makefile | 2 +-
 9 |  1 file changed, 1 insertion(+), 1 deletion(-)
10 | 
11 | diff --git a/arch/x86/Makefile b/arch/x86/Makefile
12 | index 513a555..984121f 100644
13 | --- a/arch/x86/Makefile
14 | +++ b/arch/x86/Makefile
15 | @@ -217,7 +217,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
16 |  
17 |  # Avoid indirect branches in kernel to deal with Spectre
18 |  ifdef CONFIG_RETPOLINE
19 | -  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
20 | +  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -mzero-caller-saved-regs=used
21 |    # Additionally, avoid generating expensive indirect jumps which
22 |    # are subject to retpolines for small number of switch cases.
23 |    # clang turns off jump table generation by default when under
24 | -- 
25 | https://clearlinux.org
26 | 
27 | 


--------------------------------------------------------------------------------
/archive/0131-overload-on-wakeup.patch:
--------------------------------------------------------------------------------
 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 2 | From: jplozi <jplozi@unice.fr>
 3 | Date: Thu, 16 Apr 2020 14:23:27 -0500
 4 | Subject: [PATCH] overload on wakeup
 5 | 
 6 | As an experiment, apply the learnings from the wasted-cores paper
 7 | and see how the performance works out. With the data from this we should
 8 | be able to work with Peter and the rest of the scheduler folks on
 9 | a more permanent/elegant solution.
10 | 
11 | Source: https://github.com/jplozi/wastedcores
12 | ---
13 |  kernel/sched/fair.c | 28 ++++++++++++++++++++++++++++
14 |  1 file changed, 28 insertions(+)
15 | 
16 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
17 | index e8815af..34a42a1 100644
18 | --- a/kernel/sched/fair.c
19 | +++ b/kernel/sched/fair.c
20 | @@ -6378,6 +6378,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
21 |  	return -1;
22 |  }
23 |  
24 | +
25 | +static unsigned int once_in_a_while;
26 |  /*
27 |   * select_task_rq_fair: Select target runqueue for the waking task in domains
28 |   * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
29 | @@ -6433,6 +6435,32 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
30 |  
31 |  
32 |  	rcu_read_lock();
33 | +
34 | +	once_in_a_while++;
35 | +
36 | +	if (cpu_rq(prev_cpu)->nr_running || (once_in_a_while & 15) == 0) {
37 | +		int _cpu;
38 | +		int bestprio = -5000;
39 | +		int bestcpu = -1;
40 | +
41 | +		for_each_online_cpu(_cpu) {
42 | +			if (!cpumask_test_cpu(_cpu, p->cpus_ptr)
43 | +				|| cpu_rq(_cpu)->nr_running)
44 | +				continue;
45 | +			if (arch_asym_cpu_priority(_cpu) > bestprio
46 | +				|| (prev_cpu == _cpu
47 | +					&& bestprio == arch_asym_cpu_priority(_cpu))) {
48 | +				bestcpu = _cpu;
49 | +				bestprio = arch_asym_cpu_priority(_cpu);
50 | +			}
51 | +		}
52 | +
53 | +		if (bestcpu >= 0) {
54 | +			rcu_read_unlock();
55 | +			return bestcpu;
56 | +		}
57 | +	}
58 | +
59 |  	for_each_domain(cpu, tmp) {
60 |  		if (!(tmp->flags & SD_LOAD_BALANCE))
61 |  			break;
62 | -- 
63 | https://clearlinux.org
64 | 
65 | 


--------------------------------------------------------------------------------
/archive/0151-mm-Export-do_madvise.patch-:
--------------------------------------------------------------------------------
 1 | From fc4ee73f68d0e9da4ba61112416849c18d933882 Mon Sep 17 00:00:00 2001
 2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com>
 3 | Date: Mon, 23 Jan 2017 15:03:52 -0800
 4 | Subject: [PATCH 151/154] mm: Export do_madvise()
 5 | 
 6 | Combined with some interesting flags madvise() system call
 7 | allows to free memory more smartly and more efficiently than
 8 | we could do with a simple free(). The issue is that is not
 9 | available for kernel modules that could need it.
10 | 
11 | In order to solve this lack of support, this patch exports
12 | do_madvise() so as to make it available to the entire kernel.
13 | The already existing madvise() system call is unchanged and
14 | now relies on this new do_madvise() function.
15 | 
16 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
17 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
18 | ---
19 |  include/linux/mm.h |  2 ++
20 |  mm/madvise.c       | 25 +++++++++++++++++++++----
21 |  2 files changed, 23 insertions(+), 4 deletions(-)
22 | 
23 | diff --git a/include/linux/mm.h b/include/linux/mm.h
24 | index 43edf659453b..c3153e9ee7ea 100644
25 | --- a/include/linux/mm.h
26 | +++ b/include/linux/mm.h
27 | @@ -2603,5 +2603,7 @@ void __init setup_nr_node_ids(void);
28 |  static inline void setup_nr_node_ids(void) {}
29 |  #endif
30 |  
31 | +extern int do_madvise(unsigned long start, size_t len_in, int behavior);
32 | +
33 |  #endif /* __KERNEL__ */
34 |  #endif /* _LINUX_MM_H */
35 | diff --git a/mm/madvise.c b/mm/madvise.c
36 | index 375cf32087e4..3798dd68692e 100644
37 | --- a/mm/madvise.c
38 | +++ b/mm/madvise.c
39 | @@ -730,9 +730,7 @@ madvise_behavior_valid(int behavior)
40 |  }
41 |  
42 |  /*
43 | - * The madvise(2) system call.
44 | - *
45 | - * Applications can use madvise() to advise the kernel how it should
46 | + * Kernel modules can use do_madvise() to advise the kernel how it should
47 |   * handle paging I/O in this VM area.  The idea is to help the kernel
48 |   * use appropriate read-ahead and caching techniques.  The information
49 |   * provided is advisory only, and can be safely disregarded by the
50 | @@ -790,7 +788,7 @@ madvise_behavior_valid(int behavior)
51 |   *  -EBADF  - map exists, but area maps something that isn't a file.
52 |   *  -EAGAIN - a kernel resource was temporarily unavailable.
53 |   */
54 | -SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
55 | +int do_madvise(unsigned long start, size_t len_in, int behavior)
56 |  {
57 |  	unsigned long end, tmp;
58 |  	struct vm_area_struct *vma, *prev;
59 | @@ -885,3 +883,22 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
60 |  
61 |  	return error;
62 |  }
63 | +EXPORT_SYMBOL_GPL(do_madvise);
64 | +
65 | +/*
66 | + * The madvise(2) system call.
67 | + *
68 | + * Applications can use madvise() system call to advise the kernel how
69 | + * it should handle paging I/O in this VM area.  The idea is to help
70 | + * the kernel use appropriate read-ahead and caching techniques.  The
71 | + * information provided is advisory only, and can be safely disregarded
72 | + * by the kernel without affecting the correct operation of the application.
73 | + *
74 | + * behavior values are the same than the ones defined in madvise()
75 | + *
76 | + * return values are the same than the ones defined in madvise()
77 | + */
78 | +SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
79 | +{
80 | +	return do_madvise(start, len_in, behavior);
81 | +}
82 | -- 
83 | 2.15.0
84 | 
85 | 


--------------------------------------------------------------------------------
/archive/0154-sysctl-vm-Fine-grained-cache-shrinking.patch-:
--------------------------------------------------------------------------------
  1 | From 130d5d976b920aec243e0fa63273f3143660054b Mon Sep 17 00:00:00 2001
  2 | From: Sebastien Boeuf <sebastien.boeuf@intel.com>
  3 | Date: Mon, 23 Jan 2017 15:32:39 -0800
  4 | Subject: [PATCH 154/154] sysctl: vm: Fine-grained cache shrinking
  5 | 
  6 | Lots of virtual machines are let in idle state for days until they
  7 | are terminated, and they can keep a large amount of memory in their
  8 | cache, meaning this memory cannot be used by other processes.
  9 | 
 10 | We tried to release this memory using existing drop_caches sysctl,
 11 | but it led to the complete cache loss while it could have been used
 12 | whether the idle process wakes up. Indeed, the process can't find any
 13 | available cached data and it directly affects performances to rebuild
 14 | it from scratch.
 15 | 
 16 | Instead, the solution we want is based on shrinking gradually system
 17 | cache over time. This patch adds a new sysctl shrink_caches_mb so as
 18 | to allow userspace applications indicating the kernel it should shrink
 19 | system cache up to the amount (in MiB) specified.
 20 | 
 21 | There is an application called "memshrinker" which uses this new
 22 | mechanism. It runs in the background and periodically releases a
 23 | specified amount of cache. This amount is based on the remaining
 24 | cache on the system, and period is computed to follow a shrinking
 25 | model. It results in saving a lot of memory for other processes
 26 | running on the system.
 27 | 
 28 | Suggested-by: Arjan van de Ven <arjan.van.de.ven@intel.com>
 29 | Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
 30 | ---
 31 |  fs/drop_caches.c   | 25 +++++++++++++++++++++++++
 32 |  include/linux/mm.h |  4 ++++
 33 |  kernel/sysctl.c    |  8 ++++++++
 34 |  mm/vmscan.c        |  2 --
 35 |  4 files changed, 37 insertions(+), 2 deletions(-)
 36 | 
 37 | diff --git a/fs/drop_caches.c b/fs/drop_caches.c
 38 | index 82377017130f..f8de1383498b 100644
 39 | --- a/fs/drop_caches.c
 40 | +++ b/fs/drop_caches.c
 41 | @@ -9,10 +9,12 @@
 42 |  #include <linux/writeback.h>
 43 |  #include <linux/sysctl.h>
 44 |  #include <linux/gfp.h>
 45 | +#include <linux/swap.h>
 46 |  #include "internal.h"
 47 |  
 48 |  /* A global variable is a bit ugly, but it keeps the code simple */
 49 |  int sysctl_drop_caches;
 50 | +int sysctl_shrink_caches_mb;
 51 |  
 52 |  static void drop_pagecache_sb(struct super_block *sb, void *unused)
 53 |  {
 54 | @@ -68,3 +70,26 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write,
 55 |  	}
 56 |  	return 0;
 57 |  }
 58 | +
 59 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
 60 | +	void __user *buffer, size_t *length, loff_t *ppos)
 61 | +{
 62 | +	int ret;
 63 | +	unsigned long nr_to_reclaim, page_reclaimed;
 64 | +
 65 | +	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 66 | +	if (ret)
 67 | +		return ret;
 68 | +
 69 | +	nr_to_reclaim = sysctl_shrink_caches_mb * (1 << 20) / PAGE_SIZE;
 70 | +	if (write) {
 71 | +		page_reclaimed = shrink_all_memory(nr_to_reclaim);
 72 | +		if (page_reclaimed > 0)
 73 | +			lru_add_drain_all();
 74 | +
 75 | +		if (page_reclaimed != nr_to_reclaim)
 76 | +			return page_reclaimed;
 77 | +	}
 78 | +
 79 | +	return 0;
 80 | +}
 81 | diff --git a/include/linux/mm.h b/include/linux/mm.h
 82 | index 15e02bf3a6b3..9f9b967ad2c9 100644
 83 | --- a/include/linux/mm.h
 84 | +++ b/include/linux/mm.h
 85 | @@ -2457,6 +2457,10 @@ extern int kvm_ret_mem_advice;
 86 |  int kvm_madv_instant_free_sysctl_handler(struct ctl_table *table, int write,
 87 |  					 void __user *buffer, size_t *length,
 88 |  					 loff_t *ppos);
 89 | +extern int sysctl_shrink_caches_mb;
 90 | +int shrink_caches_sysctl_handler(struct ctl_table *table, int write,
 91 | +				 void __user *buffer, size_t *length,
 92 | +				 loff_t *ppos);
 93 |  #endif
 94 |  
 95 |  void drop_slab(void);
 96 | diff --git a/kernel/sysctl.c b/kernel/sysctl.c
 97 | index 9a1611f92a2a..9b74b4f0251d 100644
 98 | --- a/kernel/sysctl.c
 99 | +++ b/kernel/sysctl.c
100 | @@ -1417,6 +1417,14 @@ static struct ctl_table vm_table[] = {
101 |  		.mode		= 0644,
102 |  		.proc_handler	= kvm_madv_instant_free_sysctl_handler,
103 |  	},
104 | +	{
105 | +		.procname       = "shrink_caches_mb",
106 | +		.data           = &sysctl_shrink_caches_mb,
107 | +		.maxlen         = sizeof(int),
108 | +		.mode           = 0644,
109 | +		.proc_handler   = shrink_caches_sysctl_handler,
110 | +		.extra1         = &one,
111 | +	},
112 |  #ifdef CONFIG_COMPACTION
113 |  	{
114 |  		.procname	= "compact_memory",
115 | diff --git a/mm/vmscan.c b/mm/vmscan.c
116 | index eb2f0315b8c0..b16f327b0211 100644
117 | --- a/mm/vmscan.c
118 | +++ b/mm/vmscan.c
119 | @@ -3646,7 +3646,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
120 |  	wake_up_interruptible(&pgdat->kswapd_wait);
121 |  }
122 |  
123 | -#ifdef CONFIG_HIBERNATION
124 |  /*
125 |   * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
126 |   * freed pages.
127 | @@ -3686,7 +3685,6 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
128 |  
129 |  	return nr_reclaimed;
130 |  }
131 | -#endif /* CONFIG_HIBERNATION */
132 |  
133 |  /* It's optimal to keep kswapds on the same CPUs as their memory, but
134 |     not required for correctness.  So if the last cpu in a node goes
135 | -- 
136 | 2.15.0
137 | 
138 | 


--------------------------------------------------------------------------------
/archive/2002-opae-add-Kconfig-and-Makefile.patch-:
--------------------------------------------------------------------------------
 1 | From 1cbc611a7b7fa5a61524179cd39792441f78a62b Mon Sep 17 00:00:00 2001
 2 | From: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
 3 | Date: Thu, 1 Feb 2018 11:08:18 -0600
 4 | Subject: [PATCH 2002/2002] opae: add Kconfig and Makefile
 5 | 
 6 | Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
 7 | ---
 8 |  drivers/Makefile     | 1 +
 9 |  drivers/fpga/Kconfig | 1 +
10 |  2 files changed, 2 insertions(+)
11 | 
12 | diff --git a/drivers/Makefile b/drivers/Makefile
13 | index 46053df6f069..48b6d693e752 100644
14 | --- a/drivers/Makefile
15 | +++ b/drivers/Makefile
16 | @@ -183,6 +183,7 @@ obj-$(CONFIG_ANDROID)		+= android/
17 |  obj-$(CONFIG_ANDROID)		+= android/
18 |  obj-$(CONFIG_NVMEM)		+= nvmem/
19 |  obj-$(CONFIG_FPGA)		+= fpga/
20 | +obj-$(CONFIG_FPGA_INTEL_OPAE)	+= fpga/intel/
21 |  obj-$(CONFIG_FSI)		+= fsi/
22 |  obj-$(CONFIG_TEE)		+= tee/
23 |  obj-$(CONFIG_MULTIPLEXER)	+= mux/
24 | --- linux-4.16.1/drivers/fpga/Kconfig~	2018-04-08 12:29:52.000000000 +0000
25 | +++ linux-4.16.1/drivers/fpga/Kconfig	2018-04-08 16:32:22.109015136 +0000
26 | @@ -109,6 +109,8 @@
27 |  	  region of the FPGA from the busses while that region is
28 |  	  being reprogrammed during partial reconfig.
29 |  
30 | +source "drivers/fpga/intel/Kconfig"
31 | +
32 |  config FPGA_REGION
33 |  	tristate "FPGA Region"
34 |  	depends on FPGA_BRIDGE
35 | 


--------------------------------------------------------------------------------
/archive/3002Add-sysdig-to-kernel-build-system.patch-:
--------------------------------------------------------------------------------
 1 | From 6ea3b762497fc02f6b79378cc950caf041a1b647 Mon Sep 17 00:00:00 2001
 2 | From: Icarus Sparry <icarus.w.sparry@intel.com>
 3 | Date: Sun, 25 Feb 2018 14:11:04 -0800
 4 | Subject: [PATCH 2/2] patch in driver
 5 | 
 6 | ---
 7 |  kernel/trace/Kconfig  | 10 ++++++++++
 8 |  kernel/trace/Makefile |  1 +
 9 |  2 files changed, 11 insertions(+)
10 | 
11 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
12 | index f54dc62..b8bd7be 100644
13 | --- a/kernel/trace/Kconfig
14 | +++ b/kernel/trace/Kconfig
15 | @@ -714,6 +714,16 @@ config TRACING_EVENTS_GPIO
16 |  	help
17 |  	  Enable tracing events for gpio subsystem
18 |  
19 | +config SYSDIG
20 | +       tristate "sysdig-probe kernel module"
21 | +       depends on TRACEPOINTS
22 | +       depends on HAVE_SYSCALL_TRACEPOINTS
23 | +       default m
24 | +       help
25 | +         Build the kernel module to support sysdig.
26 | +
27 | +         If unsure, say M
28 | +
29 |  endif # FTRACE
30 |  
31 |  endif # TRACING_SUPPORT
32 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
33 | index e2538c7..70a4eb1 100644
34 | --- a/kernel/trace/Makefile
35 | +++ b/kernel/trace/Makefile
36 | @@ -73,3 +73,4 @@ obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
37 |  obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
38 |  
39 |  libftrace-y := ftrace.o
40 | +obj-$(CONFIG_SYSDIG) += sysdig/
41 | -- 
42 | 2.16.2
43 | 
44 | 


--------------------------------------------------------------------------------
/backport-ioboost.patch:
--------------------------------------------------------------------------------
  1 | From mboxrd@z Thu Jan  1 00:00:00 1970
  2 | Return-Path: <linux-pm-owner@vger.kernel.org>
  3 | X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
  4 | 	aws-us-west-2-korg-lkml-1.web.codeaurora.org
  5 | Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
  6 | 	by smtp.lore.kernel.org (Postfix) with ESMTP id C293AC64EC4
  7 | 	for <linux-pm@archiver.kernel.org>; Fri,  3 Mar 2023 04:14:19 +0000 (UTC)
  8 | Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
  9 |         id S229452AbjCCEOS (ORCPT <rfc822;linux-pm@archiver.kernel.org>);
 10 |         Thu, 2 Mar 2023 23:14:18 -0500
 11 | Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42550 "EHLO
 12 |         lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 13 |         with ESMTP id S229451AbjCCEOR (ORCPT
 14 |         <rfc822;linux-pm@vger.kernel.org>); Thu, 2 Mar 2023 23:14:17 -0500
 15 | Received: from mga04.intel.com (mga04.intel.com [192.55.52.120])
 16 |         by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 36FEA12BE6;
 17 |         Thu,  2 Mar 2023 20:14:17 -0800 (PST)
 18 | DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 19 |   d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 20 |   t=1677816857; x=1709352857;
 21 |   h=from:to:cc:subject:date:message-id:mime-version:
 22 |    content-transfer-encoding;
 23 |   bh=vF/1poQJ5i2/fALYadh5wXENB15OfhGT7T0JvZIYi0I=;
 24 |   b=Bl0Lg5MRLFrwskFU/gAuNLdwv4PpTyiVrjfu3N5Zdxid4A2tB92dYd29
 25 |    RYXmVbhLiibvpaMQ8ha33UxEliZlZge5KZJS1W8cR0n4DsuUOpuZhEaMG
 26 |    3y4hMbWU2nC0xjisZdfqlK74peb/RVI7xKAQlDFrzCdAcdppr7G9cP9GZ
 27 |    DLpsIoNwFkTCeoe29VOWAIvf9Lv6nm/W6KYSvdNSImFLAj69VzRAkYfJy
 28 |    5Kk73rkEoXJkShsKGkBWDxJsKIhD+XCTi7XgDMkX+FX9lVP0rRiVurabW
 29 |    D9MjJp3QJfPDwzwz159d6KSDp0C0kP/bRPhyBD7vNZSM//LkOzorTMNgT
 30 |    A==;
 31 | X-IronPort-AV: E=McAfee;i="6500,9779,10637"; a="333671402"
 32 | X-IronPort-AV: E=Sophos;i="5.98,229,1673942400"; 
 33 |    d="scan'208";a="333671402"
 34 | Received: from fmsmga004.fm.intel.com ([10.253.24.48])
 35 |   by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 02 Mar 2023 20:14:16 -0800
 36 | X-ExtLoop1: 1
 37 | X-IronPort-AV: E=McAfee;i="6500,9779,10637"; a="744119122"
 38 | X-IronPort-AV: E=Sophos;i="5.98,229,1673942400"; 
 39 |    d="scan'208";a="744119122"
 40 | Received: from spandruv-desk.jf.intel.com ([10.54.75.8])
 41 |   by fmsmga004.fm.intel.com with ESMTP; 02 Mar 2023 20:14:16 -0800
 42 | From:   Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 43 | To:     rafael@kernel.org, lenb@kernel.org, viresh.kumar@linaro.org
 44 | Cc:     linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org,
 45 |         Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 46 | Subject: [PATCH] cpufreq: intel_pstate: Enable HWP IO boost for all servers
 47 | Date:   Thu,  2 Mar 2023 20:14:11 -0800
 48 | Message-Id: <20230303041411.3161780-1-srinivas.pandruvada@linux.intel.com>
 49 | X-Mailer: git-send-email 2.39.1
 50 | MIME-Version: 1.0
 51 | Content-Transfer-Encoding: 8bit
 52 | Precedence: bulk
 53 | List-ID: <linux-pm.vger.kernel.org>
 54 | X-Mailing-List: linux-pm@vger.kernel.org
 55 | 
 56 | The HWP IO boost results in slight improvements for IO performance on
 57 | both Ice Lake and Sapphire Rapid servers.
 58 | 
 59 | Currently there is a CPU model check for Skylake desktop and server along
 60 | with the ACPI PM profile for performance and enterprise servers to enable
 61 | IO boost.
 62 | 
 63 | Remove the CPU model check, so that all current server models enable HWP
 64 | IO boost by default.
 65 | 
 66 | Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 67 | ---
 68 |  drivers/cpufreq/intel_pstate.c | 11 +----------
 69 |  1 file changed, 1 insertion(+), 10 deletions(-)
 70 | 
 71 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
 72 | index cb4beec27555..8edbc0856892 100644
 73 | --- a/drivers/cpufreq/intel_pstate.c
 74 | +++ b/drivers/cpufreq/intel_pstate.c
 75 | @@ -2384,12 +2384,6 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
 76 |  	{}
 77 |  };
 78 |  
 79 | -static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
 80 | -	X86_MATCH(SKYLAKE_X,		core_funcs),
 81 | -	X86_MATCH(SKYLAKE,		core_funcs),
 82 | -	{}
 83 | -};
 84 | -
 85 |  static int intel_pstate_init_cpu(unsigned int cpunum)
 86 |  {
 87 |  	struct cpudata *cpu;
 88 | @@ -2408,12 +2402,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 89 |  		cpu->epp_default = -EINVAL;
 90 |  
 91 |  		if (hwp_active) {
 92 | -			const struct x86_cpu_id *id;
 93 | -
 94 |  			intel_pstate_hwp_enable(cpu);
 95 |  
 96 | -			id = x86_match_cpu(intel_pstate_hwp_boost_ids);
 97 | -			if (id && intel_pstate_acpi_pm_profile_server())
 98 | +			if (intel_pstate_acpi_pm_profile_server())
 99 |  				hwp_boost = true;
100 |  		}
101 |  	} else if (hwp_active) {
102 | -- 
103 | 2.34.1
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/better_idle_balance.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.3.1/kernel/sched/fair.c~	2023-04-30 23:32:26.000000000 +0000
 2 | +++ linux-6.3.1/kernel/sched/fair.c	2023-06-27 15:01:52.301844933 +0000
 3 | @@ -11745,7 +11745,7 @@
 4 |  
 5 |  		update_next_balance(sd, &next_balance);
 6 |  
 7 | -		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
 8 | +		if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost)
 9 |  			break;
10 |  
11 |  		if (sd->flags & SD_BALANCE_NEWIDLE) {
12 | 


--------------------------------------------------------------------------------
/cmdline:
--------------------------------------------------------------------------------
 1 | quiet
 2 | console=tty0
 3 | console=ttyS0,115200n8
 4 | cryptomgr.notests
 5 | init=/usr/bin/initra-desktop
 6 | initcall_debug
 7 | intel_iommu=igfx_off
 8 | kvm-intel.nested=1
 9 | no_timer_check
10 | noreplace-smp
11 | page_alloc.shuffle=1
12 | rcupdate.rcu_expedited=1
13 | rootfstype=ext4,btrfs,xfs,f2fs
14 | tsc=reliable
15 | rw 
16 | 


--------------------------------------------------------------------------------
/epp-retune.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.1/arch/x86/include/asm/msr-index.h~	2022-12-11 22:15:18.000000000 +0000
 2 | +++ linux-6.1/arch/x86/include/asm/msr-index.h	2022-12-16 01:31:32.266119875 +0000
 3 | @@ -472,7 +472,7 @@
 4 |  #define HWP_MAX_PERF(x) 		((x & 0xff) << 8)
 5 |  #define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
 6 |  #define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
 7 | -#define HWP_EPP_PERFORMANCE		0x00
 8 | +#define HWP_EPP_PERFORMANCE		0x01
 9 |  #define HWP_EPP_BALANCE_PERFORMANCE	0x80
10 |  #define HWP_EPP_BALANCE_POWERSAVE	0xC0
11 |  #define HWP_EPP_POWERSAVE		0xFF
12 | 


--------------------------------------------------------------------------------
/filter-stable.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import json
 4 | import sys
 5 | 
 6 | if len(sys.argv) != 3:
 7 |     print("Usage: filter-stable STABLE-VER JSON-FILE")
 8 |     sys.exit(0)
 9 | 
10 | STABLE_VER = sys.argv[1]
11 | JSON_FILE = sys.argv[2]
12 | 
13 | with open(JSON_FILE, "r", encoding="latin-1") as myfile:
14 |     data = json.load(myfile)
15 | 
16 | if "releases" not in data:
17 |     print("Unexpected releases.json format", file=sys.stderr)
18 |     sys.exit(1)
19 | 
20 | stable_ver = ""
21 | for release in data["releases"]:
22 |     if release["iseol"]:
23 |         continue
24 |     if release["moniker"] != "stable":
25 |         continue
26 |     ver = release["version"]
27 |     if ver.startswith(f"{STABLE_VER}."):
28 |         stable_ver = ver
29 | 
30 | if not stable_ver:
31 |     print(f"No version found for {STABLE_VER} series", file=sys.stderr)
32 |     sys.exit(1)
33 | 
34 | print(stable_ver)
35 | 


--------------------------------------------------------------------------------
/iommu.patch:
--------------------------------------------------------------------------------
 1 | Subject: [PATCH v2] iommu/vt-d: Avoid superfluous IOTLB tracking in lazy mode
 2 | Date:   Wed,  8 Feb 2023 10:18:34 -0800
 3 | Message-Id: <20230208181834.1601211-1-jacob.jun.pan@linux.intel.com>
 4 | X-Mailer: git-send-email 2.25.1
 5 | MIME-Version: 1.0
 6 | Content-Transfer-Encoding: 8bit
 7 | Precedence: bulk
 8 | List-ID: <linux-kernel.vger.kernel.org>
 9 | X-Mailing-List: linux-kernel@vger.kernel.org
10 | 
11 | Intel IOMMU driver implements IOTLB flush queue with domain selective
12 | or PASID selective invalidations. In this case there's no need to track
13 | IOVA page range and sync IOTLBs, which may cause significant performance
14 | hit.
15 | 
16 | This patch adds a check to avoid IOVA gather page and IOTLB sync for
17 | the lazy path.
18 | 
19 | The performance difference on Sapphire Rapids 100Gb NIC is improved by
20 | the following (as measured by iperf send):
21 | 
22 | w/o this fix~48 Gbits/s. with this fix ~54 Gbits/s
23 | 
24 | Cc: <stable@vger.kernel.org>
25 | Fixes: 2a2b8eaa5b25 ("iommu: Handle freelists when using deferred flushing in iommu drivers")
26 | Reviewed-by: Robin Murphy <robin.murphy@arm.com>
27 | Tested-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
28 | Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
29 | Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
30 | ---
31 | v2: use helper function iommu_iotlb_gather_queued() instead of open
32 | coding
33 | ---
34 |  drivers/iommu/intel/iommu.c | 8 +++++++-
35 |  1 file changed, 7 insertions(+), 1 deletion(-)
36 | 
37 | diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
38 | index 161342e7149d..18265fa07828 100644
39 | --- a/drivers/iommu/intel/iommu.c
40 | +++ b/drivers/iommu/intel/iommu.c
41 | @@ -4348,7 +4348,13 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
42 |  	if (dmar_domain->max_addr == iova + size)
43 |  		dmar_domain->max_addr = iova;
44 |  
45 | -	iommu_iotlb_gather_add_page(domain, gather, iova, size);
46 | +	/*
47 | +	 * We do not use page-selective IOTLB invalidation in flush queue,
48 | +	 * There is no need to track page and sync iotlb. Domain-selective or
49 | +	 * PASID-selective validation are used in the flush queue.
50 | +	 */
51 | +	if (!iommu_iotlb_gather_queued(gather))
52 | +		iommu_iotlb_gather_add_page(domain, gather, iova, size);
53 |  
54 |  	return size;
55 |  }
56 | -- 
57 | 2.25.1
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/kdf-boottime.patch:
--------------------------------------------------------------------------------
 1 | --- linux-5.19.1/crypto/kdf_sp800108.c~	2022-08-11 11:22:05.000000000 +0000
 2 | +++ linux-5.19.1/crypto/kdf_sp800108.c	2022-08-11 16:16:31.178018142 +0000
 3 | @@ -125,6 +125,7 @@
 4 |  
 5 |  static int __init crypto_kdf108_init(void)
 6 |  {
 7 | +	return 0;
 8 |  	int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
 9 |  			   crypto_kdf108_setkey, crypto_kdf108_ctr_generate);
10 |  
11 | 


--------------------------------------------------------------------------------
/kvm-printk.patch:
--------------------------------------------------------------------------------
 1 | these error messages are causing support isseus
 2 | 
 3 | --- linux-6.1/arch/x86/kvm/x86.c~	2022-12-11 22:15:18.000000000 +0000
 4 | +++ linux-6.1/arch/x86/kvm/x86.c	2022-12-15 22:15:07.085648692 +0000
 5 | @@ -9340,13 +9340,9 @@
 6 |  	}
 7 |  
 8 |  	if (!ops->cpu_has_kvm_support()) {
 9 | -		pr_err_ratelimited("kvm: no hardware support for '%s'\n",
10 | -				   ops->runtime_ops->name);
11 |  		return -EOPNOTSUPP;
12 |  	}
13 |  	if (ops->disabled_by_bios()) {
14 | -		pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
15 | -				   ops->runtime_ops->name);
16 |  		return -EOPNOTSUPP;
17 |  	}
18 |  
19 | 


--------------------------------------------------------------------------------
/libsgrowdown.patch:
--------------------------------------------------------------------------------
 1 | Place libraries right below the binary for PIE binaries, this helps code locality
 2 | (and thus performance).
 3 | 
 4 | --- linux-5.18.2/fs/binfmt_elf.c~	2022-06-06 06:49:00.000000000 +0000
 5 | +++ linux-5.18.2/fs/binfmt_elf.c	2022-08-10 13:53:04.878633166 +0000
 6 | @@ -1297,6 +1297,8 @@
 7 |  	mm = current->mm;
 8 |  	mm->end_code = end_code;
 9 |  	mm->start_code = start_code;
10 | +	if (start_code >= ELF_ET_DYN_BASE)
11 | +		mm->mmap_base = start_code;
12 |  	mm->start_data = start_data;
13 |  	mm->end_data = end_data;
14 |  	mm->start_stack = bprm->p;
15 | 


--------------------------------------------------------------------------------
/mm-lru_cache_disable-use-synchronize_rcu_expedited.patch:
--------------------------------------------------------------------------------
 1 | From: Marcelo Tosatti <mtosatti@redhat.com>
 2 | Subject: mm: lru_cache_disable: use synchronize_rcu_expedited
 3 | Date: Mon, 30 May 2022 12:51:56 -0300
 4 | 
 5 | commit ff042f4a9b050 ("mm: lru_cache_disable: replace work queue
 6 | synchronization with synchronize_rcu") replaced lru_cache_disable's usage
 7 | of work queues with synchronize_rcu.
 8 | 
 9 | Some users reported large performance regressions due to this commit, for
10 | example:
11 | https://lore.kernel.org/all/20220521234616.GO1790663@paulmck-ThinkPad-P17-Gen-1/T/
12 | 
13 | Switching to synchronize_rcu_expedited fixes the problem.
14 | 
15 | Link: https://lkml.kernel.org/r/YpToHCmnx/HEcVyR@fuller.cnet
16 | Fixes: ff042f4a9b050 ("mm: lru_cache_disable: replace work queue synchronization with synchronize_rcu")
17 | Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
18 | Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
19 | Tested-by: Michael Larabel <Michael@MichaelLarabel.com>
20 | Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
21 | Cc: Nicolas Saenz Julienne <nsaenzju@redhat.com>
22 | Cc: Borislav Petkov <bp@alien8.de>
23 | Cc: Minchan Kim <minchan@kernel.org>
24 | Cc: Matthew Wilcox <willy@infradead.org>
25 | Cc: Mel Gorman <mgorman@techsingularity.net>
26 | Cc: Juri Lelli <juri.lelli@redhat.com>
27 | Cc: Thomas Gleixner <tglx@linutronix.de>
28 | Cc: Paul E. McKenney <paulmck@kernel.org>
29 | Cc: Phil Elwell <phil@raspberrypi.com>
30 | Cc: <stable@vger.kernel.org>
31 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
32 | ---
33 | 
34 |  mm/swap.c |    2 +-
35 |  1 file changed, 1 insertion(+), 1 deletion(-)
36 | 
37 | --- a/mm/swap.c~mm-lru_cache_disable-use-synchronize_rcu_expedited
38 | +++ a/mm/swap.c
39 | @@ -881,7 +881,7 @@ void lru_cache_disable(void)
40 |  	 * lru_disable_count = 0 will have exited the critical
41 |  	 * section when synchronize_rcu() returns.
42 |  	 */
43 | -	synchronize_rcu();
44 | +	synchronize_rcu_expedited();
45 |  #ifdef CONFIG_SMP
46 |  	__lru_add_drain_all(true);
47 |  #else
48 | _
49 | 


--------------------------------------------------------------------------------
/mmput_async.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.0/kernel/exit.c~	2022-10-02 21:09:07.000000000 +0000
 2 | +++ linux-6.0/kernel/exit.c	2022-11-18 17:25:22.445717556 +0000
 3 | @@ -507,7 +507,7 @@
 4 |  	task_unlock(current);
 5 |  	mmap_read_unlock(mm);
 6 |  	mm_update_next_owner(mm);
 7 | -	mmput(mm);
 8 | +	mmput_async(mm);
 9 |  	if (test_thread_flag(TIF_MEMDIE))
10 |  		exit_oom_victim();
11 |  }
12 | --- linux-6.0/kernel/fork.c~	2022-11-18 18:53:36.000000000 +0000
13 | +++ linux-6.0/kernel/fork.c	2022-11-18 19:01:44.058654439 +0000
14 | @@ -1222,7 +1222,7 @@
15 |  {
16 |  	if (atomic_dec_and_test(&mm->mm_users)) {
17 |  		INIT_WORK(&mm->async_put_work, mmput_async_fn);
18 | -		schedule_work(&mm->async_put_work);
19 | +		schedule_work_on(0, &mm->async_put_work);
20 |  	}
21 |  }
22 |  EXPORT_SYMBOL_GPL(mmput_async);
23 | 


--------------------------------------------------------------------------------
/netscale.patch:
--------------------------------------------------------------------------------
 1 | Author: Wangyang Guo  Guo, Wangyang <wangyang.guo@intel.com>
 2 | 
 3 | 
 4 | The dst_entry struct has a reference count that gets touched
 5 | quite a bit in scenarios where many connections happen
 6 | from/to the same IP. This dirty cache line is shared
 7 | with other members that are read (mostly) that are 
 8 | used quite a bit (for example, ltwstate)
 9 | 
10 | In addition, this struct is embedded in other structs
11 | and the __refcnt shows up as falls sharing even in that 
12 | context.
13 | 
14 | An example workload is the phoronix pts/memcached benchmark
15 | (in 1:100 mode) that exagerates the problem in its setup.
16 | 
17 | Without the patch below the workload gets a score of
18 | 771377, while the patch below improves this to
19 | 1027113.. a 30%+ gain.
20 | 
21 | It's likely possible to reorder some fields in the struct
22 | to reduce the size of the needed padding, but this is
23 | the simplest solution.
24 | 
25 | Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
26 | 
27 | 
28 | --- a/include/net/dst.h	2022-12-11 22:15:18.000000000 +0000
29 | +++ b/include/net/dst.h	2023-02-13 14:48:45.498505188 +0000
30 | @@ -66,6 +66,7 @@
31 |  	 */
32 |  #ifdef CONFIG_64BIT
33 |  	atomic_t		__refcnt;	/* 64-bit offset 64 */
34 | +	int			__pad2[15];
35 |  #endif
36 |  	int			__use;
37 |  	unsigned long		lastuse;
38 | 


--------------------------------------------------------------------------------
/nonapi-realtek.patch:
--------------------------------------------------------------------------------
 1 | see issue #3018
 2 | 
 3 | --- linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c~	2023-11-08 10:56:25.000000000 +0000
 4 | +++ linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c	2024-01-04 00:15:54.348399491 +0000
 5 | @@ -4648,7 +4648,7 @@
 6 |  	phy_init_hw(tp->phydev);
 7 |  	phy_resume(tp->phydev);
 8 |  	rtl8169_init_phy(tp);
 9 | -	napi_enable(&tp->napi);
10 | +//	napi_enable(&tp->napi);
11 |  	set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
12 |  	rtl_reset_work(tp);
13 |  
14 | --- linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c~	2024-01-04 00:15:54.000000000 +0000
15 | +++ linux-6.6.1/drivers/net/ethernet/realtek/r8169_main.c	2024-01-04 17:25:36.357314237 +0000
16 | @@ -3962,7 +3962,7 @@
17 |  	for (i = 0; i < NUM_RX_DESC; i++)
18 |  		rtl8169_mark_to_asic(tp->RxDescArray + i);
19 |  
20 | -	napi_enable(&tp->napi);
21 | +//	napi_enable(&tp->napi);
22 |  	rtl_hw_start(tp);
23 |  }
24 |  
25 | 


--------------------------------------------------------------------------------
/options.conf:
--------------------------------------------------------------------------------
1 | autoupdate = true
2 | 


--------------------------------------------------------------------------------
/posted_msi.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.12.1/drivers/iommu/irq_remapping.c~	2024-11-22 14:30:26.000000000 +0000
 2 | +++ linux-6.12.1/drivers/iommu/irq_remapping.c	2025-03-12 17:17:13.684425681 +0000
 3 | @@ -24,7 +24,7 @@
 4 |  
 5 |  int disable_irq_post = 0;
 6 |  
 7 | -bool enable_posted_msi __ro_after_init;
 8 | +bool enable_posted_msi __ro_after_init = true;
 9 |  
10 |  static int disable_irq_remap;
11 |  static struct irq_remap_ops *remap_ops;
12 | 


--------------------------------------------------------------------------------
/ratelimit-sched-yield.patch:
--------------------------------------------------------------------------------
 1 | From 467904416b3786c9f2b29ca683d36cb2523ae7ce Mon Sep 17 00:00:00 2001
 2 | From: Colin Ian King <colin.i.king@intel.com>
 3 | Date: Thu, 17 Oct 2024 16:29:50 +0100
 4 | Subject: [PATCH] handle sched_yield gracefully when being hammered
 5 | 
 6 | Some misguided apps hammer sched_yield() in a tight loop (they should be using futexes instead)
 7 | which causes massive lock contention even if there is little work to do or to yield to.
 8 | rare limit yielding since the base scheduler does a pretty good job already about just
 9 | running the right things
10 | 
11 | Signed-off-by: Colin Ian King <colin.i.king@intel.com>
12 | ---
13 |  kernel/sched/syscalls.c | 12 ++++++++++++
14 |  1 file changed, 12 insertions(+)
15 | 
16 | diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
17 | index ae1b42775ef9..441ac65f4f15 100644
18 | --- a/kernel/sched/syscalls.c
19 | +++ b/kernel/sched/syscalls.c
20 | @@ -1456,10 +1456,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
21 |  	return ret;
22 |  }
23 |  
24 | +static DEFINE_PER_CPU(unsigned long, last_yield);
25 | +
26 |  static void do_sched_yield(void)
27 |  {
28 |  	struct rq_flags rf;
29 |  	struct rq *rq;
30 | +	int cpu = raw_smp_processor_id();
31 | +
32 | +	cond_resched();
33 | +
34 | +	/* rate limit yielding to something sensible */
35 | +
36 | +	if (!time_after(jiffies, per_cpu(last_yield, cpu)))
37 | +		return;
38 | +
39 | +	per_cpu(last_yield, cpu) = jiffies;
40 |  
41 |  	rq = this_rq_lock_irq(&rf);
42 |  
43 | -- 
44 | 2.46.2
45 | 
46 | 


--------------------------------------------------------------------------------
/release:
--------------------------------------------------------------------------------
1 | 1574
2 | 


--------------------------------------------------------------------------------
/revert-regression.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.10.11/mm/mmap.c~	2024-09-18 17:25:18.000000000 +0000
 2 | +++ linux-6.10.11/mm/mmap.c	2024-09-26 23:45:20.399101364 +0000
 3 | @@ -1881,10 +1881,11 @@
 4 |  
 5 |  	if (get_area) {
 6 |  		addr = get_area(file, addr, len, pgoff, flags);
 7 | -	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 8 | +//	} 
 9 | +//else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
10 |  		/* Ensures that larger anonymous mappings are THP aligned. */
11 | -		addr = thp_get_unmapped_area_vmflags(file, addr, len,
12 | -						     pgoff, flags, vm_flags);
13 | +		//addr = thp_get_unmapped_area_vmflags(file, addr, len,
14 | +//						     pgoff, flags, vm_flags);
15 |  	} else {
16 |  		addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len,
17 |  						    pgoff, flags, vm_flags);
18 | 


--------------------------------------------------------------------------------
/scale-net-alloc.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/include/net/sock.h b/include/net/sock.h
 2 | index 4e787285fc66..3e045f6eb6ee 100644
 3 | --- a/include/net/sock.h
 4 | +++ b/include/net/sock.h
 5 | @@ -1684,10 +1684,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 6 |  
 7 |  static inline void sk_mem_uncharge(struct sock *sk, int size)
 8 |  {
 9 | +	int reclaimable, reclaim_threshold;
10 | +
11 | +	reclaim_threshold = 64 * 1024;
12 |  	if (!sk_has_account(sk))
13 |  		return;
14 |  	sk_forward_alloc_add(sk, size);
15 | -	sk_mem_reclaim(sk);
16 | +	reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
17 | +	if (reclaimable > reclaim_threshold) {
18 | +		reclaimable -= reclaim_threshold;
19 | +		__sk_mem_reclaim(sk, reclaimable);
20 | +	}
21 |  }
22 |  
23 |  /*
24 | 


--------------------------------------------------------------------------------
/scale.patch:
--------------------------------------------------------------------------------
 1 | From 0986b8be8b9fe5baad3d34fc9f687dfedf28e100 Mon Sep 17 00:00:00 2001
 2 | From: "Brett T. Warden" <brett.t.warden@intel.com>
 3 | Date: Mon, 19 Sep 2022 08:52:45 -0700
 4 | Subject: [PATCH] scale
 5 | 
 6 | ---
 7 |  include/linux/page_counter.h | 1 +
 8 |  mm/memcontrol.c              | 2 +-
 9 |  2 files changed, 2 insertions(+), 1 deletion(-)
10 | 
11 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c
12 | index b69979c9ced5..7eadbafc006b 100644
13 | --- a/mm/memcontrol.c
14 | +++ b/mm/memcontrol.c
15 | @@ -625,7 +625,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
16 |  	cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
17 |  
18 |  	x = __this_cpu_add_return(stats_updates, abs(val));
19 | -	if (x > MEMCG_CHARGE_BATCH) {
20 | +	if (x > MEMCG_CHARGE_BATCH * 128) {
21 |  		/*
22 |  		 * If stats_flush_threshold exceeds the threshold
23 |  		 * (>num_online_cpus()), cgroup stats update will be triggered
24 | -- 
25 | 


--------------------------------------------------------------------------------
/sched-hybrid3.patch:
--------------------------------------------------------------------------------
  1 | From mboxrd@z Thu Jan  1 00:00:00 1970
  2 | Return-Path: <linux-kernel-owner@kernel.org>
  3 | X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
  4 | 	aws-us-west-2-korg-lkml-1.web.codeaurora.org
  5 | Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
  6 | 	by smtp.lore.kernel.org (Postfix) with ESMTP id DC516ECAAA3
  7 | 	for <linux-kernel@archiver.kernel.org>; Thu, 25 Aug 2022 22:49:59 +0000 (UTC)
  8 | Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
  9 |         id S244115AbiHYWt6 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 10 |         Thu, 25 Aug 2022 18:49:58 -0400
 11 | Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38550 "EHLO
 12 |         lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 13 |         with ESMTP id S243799AbiHYWtn (ORCPT
 14 |         <rfc822;linux-kernel@vger.kernel.org>);
 15 |         Thu, 25 Aug 2022 18:49:43 -0400
 16 | DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 17 |   d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
 18 |   t=1661467782; x=1693003782;
 19 |   h=from:to:cc:subject:date:message-id:in-reply-to:
 20 |    references;
 21 |   bh=GkDXgCnuNRjnWESTDjDb3InPMKM7a1XvTOW385F9W9Q=;
 22 |   b=RlqfFRV48+YgCLi3VvMumNkY8iTCQ+ZgMOi9qBiRWJQPbHoq8dp3elKs
 23 |    cO4ZrsipTyb7Aze7C01EWhkyyKGKN3ymsclD4XTGj3yl+szdhV9MXOviP
 24 |    JErGdVvnIz3bR1LEt2mWZ0ct+MJwYPz5b1uaENwlXPfBURNyErllrjI5A
 25 |    ULF/1vD/z+RiReQBOQ4OcZzBVDblX5sCtn7pREX91EWlqsXeNJwomHqp8
 26 |    zG0QiSdgk7wt8XFqLuvo2x8w77etQsXepcyFu5c/JN2uzY5iOm5v6rDuk
 27 |    lSpik8kjAHerCkLSJzyoxuzW0N+yLTYLbU93JcT4AHh37xp0gTZIVnEUC
 28 |    Q==;
 29 | X-IronPort-AV: E=McAfee;i="6500,9779,10450"; a="295153922"
 30 | X-IronPort-AV: E=Sophos;i="5.93,264,1654585200"; 
 31 |    d="scan'208";a="295153922"
 32 | X-ExtLoop1: 1
 33 | X-IronPort-AV: E=Sophos;i="5.93,264,1654585200"; 
 34 |    d="scan'208";a="678642681"
 35 | From:   Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
 36 | To:     "Peter Zijlstra (Intel)" <peterz@infradead.org>,
 37 |         Juri Lelli <juri.lelli@redhat.com>,
 38 |         Vincent Guittot <vincent.guittot@linaro.org>
 39 | Cc:     Ricardo Neri <ricardo.neri@intel.com>,
 40 |         "Ravi V. Shankar" <ravi.v.shankar@intel.com>,
 41 |         Ben Segall <bsegall@google.com>,
 42 |         Daniel Bristot de Oliveira <bristot@redhat.com>,
 43 |         Dietmar Eggemann <dietmar.eggemann@arm.com>,
 44 |         Len Brown <len.brown@intel.com>, Mel Gorman <mgorman@suse.de>,
 45 |         "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>,
 46 |         Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>,
 47 |         Steven Rostedt <rostedt@goodmis.org>,
 48 |         Tim Chen <tim.c.chen@linux.intel.com>,
 49 |         Valentin Schneider <vschneid@redhat.com>, x86@kernel.org,
 50 |         linux-kernel@vger.kernel.org,
 51 |         Ricardo Neri <ricardo.neri-calderon@linux.intel.com>,
 52 |         "Tim C . Chen" <tim.c.chen@intel.com>
 53 | Subject: [PATCH 3/4] sched/fair: Let lower-priority CPUs do active balancing
 54 | Date:   Thu, 25 Aug 2022 15:55:28 -0700
 55 | Message-Id: <20220825225529.26465-4-ricardo.neri-calderon@linux.intel.com>
 56 | X-Mailer: git-send-email 2.17.1
 57 | In-Reply-To: <20220825225529.26465-1-ricardo.neri-calderon@linux.intel.com>
 58 | References: <20220825225529.26465-1-ricardo.neri-calderon@linux.intel.com>
 59 | Precedence: bulk
 60 | List-ID: <linux-kernel.vger.kernel.org>
 61 | X-Mailing-List: linux-kernel@vger.kernel.org
 62 | 
 63 | When more than one SMT siblings of a physical core are busy, an idle CPU
 64 | of lower priority can help.
 65 | 
 66 | Indicate that the low priority CPU can do active balancing from the high-
 67 | priority CPU only if they belong to separate cores.
 68 | 
 69 | Cc: Ben Segall <bsegall@google.com>
 70 | Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
 71 | Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
 72 | Cc: Len Brown <len.brown@intel.com>
 73 | Cc: Mel Gorman <mgorman@suse.de>
 74 | Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 75 | Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
 76 | Cc: Steven Rostedt <rostedt@goodmis.org>
 77 | Cc: Tim C. Chen <tim.c.chen@intel.com>
 78 | Cc: Valentin Schneider <vschneid@redhat.com>
 79 | Cc: x86@kernel.org
 80 | Cc: linux-kernel@vger.kernel.org
 81 | Reviewed-by: Len Brown <len.brown@intel.com>
 82 | Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
 83 | ---
 84 |  kernel/sched/fair.c | 7 ++++++-
 85 |  1 file changed, 6 insertions(+), 1 deletion(-)
 86 | 
 87 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 88 | index 810645eb58ed..9b608b31080f 100644
 89 | --- a/kernel/sched/fair.c
 90 | +++ b/kernel/sched/fair.c
 91 | @@ -9759,9 +9759,14 @@ asym_active_balance(struct lb_env *env)
 92 |  	 * ASYM_PACKING needs to force migrate tasks from busy but
 93 |  	 * lower priority CPUs in order to pack all tasks in the
 94 |  	 * highest priority CPUs.
 95 | +	 *
 96 | +	 * If the busy CPU has higher priority but is an SMT sibling
 97 | +	 * in which other SMT siblings are also busy, a lower-priority
 98 | +	 * CPU in a separate core can help.
 99 |  	 */
100 |  	return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
101 | -	       sched_asym_prefer(env->dst_cpu, env->src_cpu);
102 | +	       (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
103 | +		!(env->sd->flags & SD_SHARE_CPUCAPACITY));
104 |  }
105 |  
106 |  static inline bool
107 | -- 
108 | 2.25.1
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/scripts/develop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh
 4 | 
 5 | SPECFILE=$1
 6 | DESTDIR=$2
 7 | 
 8 | for c in grep cmp sha1sum make tar git
 9 | do
10 |     if ! command -v $c > /dev/null
11 |     then
12 |         echo >&2 "The script needs the \"$c\" command, and it was not found."
13 |         exit 1
14 |     fi
15 | done
16 | 
17 | if [ ! ${SPECFILE} ]
18 | then
19 |     echo >&2 "${SPECFILE} not found"
20 |     exit 1
21 | fi
22 | 
23 | if [ -z "${DESTDIR}" ]
24 | then
25 |     DESTDIR=.
26 | else
27 |     mkdir -p ${DESTDIR}
28 | fi
29 | 
30 | SRC_URL=$(grep "^Source0:" "${SPECFILE}" | cut -f 2- -d ':' | tr -d " ")
31 | SRC_FILE=${SRC_URL##*/}
32 | SRC_DIR=${SRC_FILE%*.tar.xz}
33 | SRC_VER=${SRC_DIR#*-}
34 | 
35 | if [ ! -f ${SRC_FILE} ]
36 | then
37 |     # Get upstream sources
38 |     if ! curl --fail -LO ${SRC_URL}
39 |     then
40 |         echo >&2 "Cannot download ${SRC_FILE}"
41 |         exit 3
42 |     fi
43 | fi
44 | 
45 | echo $(sha1sum ${SRC_FILE} | cut -d\  -f1)/${SRC_FILE} > upstream.check
46 | 
47 | if ! cmp --quiet upstream upstream.check
48 | then
49 |     echo >&2 "${SRC_FILE} checksum fails"
50 |     rm upstream.check
51 |     exit 2
52 | fi
53 | 
54 | rm upstream.check
55 | 
56 | rm -rf ${DESTDIR}/${SRC_DIR}
57 | tar xf ${SRC_FILE} -C ${DESTDIR}
58 | 
59 | git -C ${DESTDIR}/${SRC_DIR} init --quiet
60 | git -C ${DESTDIR}/${SRC_DIR} config gc.auto 0
61 | git -C ${DESTDIR}/${SRC_DIR} add --all
62 | git -C ${DESTDIR}/${SRC_DIR} commit -m "${PKG_NAME} ${SRC_VER}" --quiet
63 | git -C ${DESTDIR}/${SRC_DIR} tag -a -m "v${SRC_VER}" "v${SRC_VER}"
64 | 
65 | for p in CVE* [0-9]*.patch
66 | do
67 |     if [ -f $p ]
68 |     then
69 |         if ! git -C ${DESTDIR}/${SRC_DIR} am --quiet $(realpath $p)
70 |         then
71 |             echo >&2 "Error at: ${p}"
72 |             exit 4
73 |         fi
74 |     fi
75 | done
76 | 
77 | cp config ${DESTDIR}/${SRC_DIR}/.config
78 | 
79 | echo
80 | echo "The linux source plus Clear Linux patches is"
81 | echo "placed at: \"$(realpath ${DESTDIR}/${SRC_DIR})\""
82 | 


--------------------------------------------------------------------------------
/scripts/port-to-current.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh
 4 | #
 5 | # SPDX-License-Identifier: GPL-2.0
 6 | #
 7 | # Port old patches to new base
 8 | #
 9 | # Usage:
10 | # 1. Goto current kernel source tree
11 | # 2. /path/to/this/script /path/to/old/patch/set/*.patch
12 | 
13 | tmpdir=$(mktemp -d)
14 | rejf=${tmpdir}/rej
15 | 
16 | for p in $*
17 | do
18 |     if ! git am --quiet $p 2> /dev/null
19 |     then
20 |         rm -f ${rejf}
21 |         if ! patch --quiet --reject-file=${rejf} --forward -p1 < $p
22 |         then
23 |             if [ -f ${rejf} ]
24 |             then
25 |                 if [ -n "${DISPLAY}" ]
26 |                 then
27 |                     gvim -f ${rejf}
28 |                 else
29 |                     vim ${rejf}
30 |                 fi
31 |             fi
32 |         fi
33 |         git status
34 |         echo $p
35 |         read dopause
36 |         if git diff --no-ext-diff --quiet
37 |         then
38 |             git am --quiet --skip
39 |             git -C ${p%/*} rm --quiet ${p#*/}
40 |         else
41 |             git add --all
42 |             git am --quiet --continue
43 |         fi
44 |     fi
45 | done
46 | rm -rf ${tmpdir}
47 | 


--------------------------------------------------------------------------------
/scripts/to-spec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*-
 3 | # ex: ts=8 sw=4 sts=4 et filetype=sh
 4 | #
 5 | # SPDX-License-Identifier: GPL-3.0-or-later
 6 | 
 7 | KLR_SPEC_FILE=$1
 8 | repo_path=.
 9 | tmpd=$(mktemp -d /tmp/spec.XXX)
10 | 
11 | sed -i '/PK XXXX/,/#END/{//!d}' ${KLR_SPEC_FILE}
12 | sed -i '/patchXXXX/,/End XXXX/{//!d}' ${KLR_SPEC_FILE}
13 | 
14 | for patch in ${repo_path}/[01234]*.patch
15 | do
16 |     P=${patch##*/}
17 |     N=$(echo ${P} | cut -c 1-4)
18 |     echo "Patch${N}: ${P}"  >> ${tmpd}/PatchXXXX
19 |     echo "%patch${N} -p1" >> ${tmpd}/patchYYYY
20 | done
21 | 
22 | sed -i "/PK XXXX/r ${tmpd}/PatchXXXX"   ${KLR_SPEC_FILE}
23 | sed -i "/patchXXXX/r ${tmpd}/patchYYYY" ${KLR_SPEC_FILE}
24 | 
25 | rm -rf ${tmpd}
26 | 


--------------------------------------------------------------------------------
/slack.patch:
--------------------------------------------------------------------------------
 1 | --- linux-6.5.1/init/init_task.c~	2023-09-02 07:13:30.000000000 +0000
 2 | +++ linux-6.5.1/init/init_task.c	2023-10-30 15:12:13.920976572 +0000
 3 | @@ -130,7 +130,7 @@
 4 |  	.journal_info	= NULL,
 5 |  	INIT_CPU_TIMERS(init_task)
 6 |  	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
 7 | -	.timer_slack_ns = 50000, /* 50 usec default slack */
 8 | +	.timer_slack_ns = 50, /* 50 nsec default slack */
 9 |  	.thread_pid	= &init_struct_pid,
10 |  	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
11 |  	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
12 | 


--------------------------------------------------------------------------------
/testresults:
--------------------------------------------------------------------------------
1 | Total : 0
2 | Pass : 0
3 | Fail : 0
4 | Skip : 0
5 | XFail : 0
6 | 


--------------------------------------------------------------------------------
/update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | PKG=linux
 6 | STABLE_VER=6.14
 7 | SPEC=./$PKG.spec
 8 | 
 9 | CUR_VER=$(rpmspec --srpm -q --qf="%{VERSION}" $SPEC)
10 | CUR_VER=${CUR_VER//./\\.}
11 | 
12 | rm -f releases.json
13 | curl -sSf -O -L https://www.kernel.org/releases.json
14 | NEW_VER=$(python3 ./filter-stable.py $STABLE_VER releases.json)
15 | 
16 | sed -i -e "s/$CUR_VER/$NEW_VER/g" $SPEC
17 | 
18 | if ! git diff --quiet $SPEC; then
19 | 	make generateupstream
20 | 	make bumpnogit
21 | 	git add $SPEC upstream release
22 | 	git commit -m "Stable update to $NEW_VER" $SPEC upstream release
23 | 	make koji-nowait
24 | fi
25 | 


--------------------------------------------------------------------------------
/upstream:
--------------------------------------------------------------------------------
1 | f0d072bf33666a4fc96bc1a9246d0747371e4c88/linux-6.14.8.tar.xz
2 | 


--------------------------------------------------------------------------------