Skip to content

Commit

Permalink
6.10: Add bore 5.2.4
Browse files Browse the repository at this point in the history
Signed-off-by: Piotr Gorski <[email protected]>
  • Loading branch information
sirlucjan committed Jul 4, 2024
1 parent be2d247 commit 3559f50
Show file tree
Hide file tree
Showing 4 changed files with 275 additions and 239 deletions.
129 changes: 69 additions & 60 deletions 6.10/sched-dev/0001-bore-cachy-ext.patch
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
From 3b724414651c8a7a2e4b65978435ecedd42f164b Mon Sep 17 00:00:00 2001
From 6e01fbddcdf96c19bfc8b3f539ee44587fce98b6 Mon Sep 17 00:00:00 2001
From: Piotr Gorski <[email protected]>
Date: Thu, 4 Jul 2024 00:39:54 +0200
Date: Thu, 4 Jul 2024 21:34:03 +0200
Subject: [PATCH] bore-cachy-ext

Signed-off-by: Piotr Gorski <[email protected]>
---
include/linux/sched.h | 10 ++
init/Kconfig | 17 +++
kernel/sched/core.c | 143 ++++++++++++++++++
kernel/sched/debug.c | 60 +++++++-
kernel/sched/fair.c | 314 ++++++++++++++++++++++++++++++++++++----
kernel/sched/features.h | 20 ++-
kernel/Kconfig.hz | 16 +++
kernel/sched/core.c | 143 ++++++++++++++++++++
kernel/sched/debug.c | 60 ++++++++-
kernel/sched/fair.c | 288 ++++++++++++++++++++++++++++++++++++----
kernel/sched/features.h | 28 +++-
kernel/sched/sched.h | 7 +
7 files changed, 539 insertions(+), 32 deletions(-)
8 files changed, 538 insertions(+), 31 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55912a383..465a13656 100644
Expand Down Expand Up @@ -63,8 +64,34 @@ index aa28dc0ae..cdf81259f 100644
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select CGROUPS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 0f78364ef..b50189ee5 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -79,5 +79,21 @@ config HZ
default 750 if HZ_750
default 1000 if HZ_1000

+config MIN_BASE_SLICE_NS
+ int "Default value for min_base_slice_ns"
+ default 2000000
+ help
+ The BORE Scheduler automatically calculates the optimal base
+ slice for the configured HZ using the following equation:
+
+ base_slice_ns = max(min_base_slice_ns, 1000000000/HZ)
+
+ This option sets the default lower bound limit of the base slice
+ to prevent the loss of task throughput due to overscheduling.
+
+ Setting this value too high can cause the system to boot with
+ an unnecessarily large base slice, resulting in high scheduling
+ latency and poor system responsiveness.
+
config SCHED_HRTICK
def_bool HIGH_RES_TIMERS
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6161dd192..bff12852b 100644
index 6161dd192..67850fee3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4547,6 +4547,138 @@ int wake_up_state(struct task_struct *p, unsigned int state)
Expand Down Expand Up @@ -232,7 +259,7 @@ index 6161dd192..bff12852b 100644

+#ifdef CONFIG_SCHED_BORE
+ sched_init_bore();
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.2 by Masahito Suzuki");
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.4 by Masahito Suzuki");
+#endif // CONFIG_SCHED_BORE
+
wait_bit_init();
Expand Down Expand Up @@ -346,7 +373,7 @@ index c057ef46c..3cab39e34 100644
P(se.avg.runnable_sum);
P(se.avg.util_sum);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 00fbaec60..b8ea63be3 100644
index 00fbaec60..902948476 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -19,6 +19,9 @@
Expand All @@ -359,7 +386,7 @@ index 00fbaec60..b8ea63be3 100644
*/
#include <linux/energy_model.h>
#include <linux/mmap_lock.h>
@@ -64,28 +67,126 @@
@@ -64,28 +67,124 @@
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
*
Expand Down Expand Up @@ -387,7 +414,7 @@ index 00fbaec60..b8ea63be3 100644
+#ifdef CONFIG_SCHED_BORE
+unsigned int sysctl_sched_base_slice = 1000000000ULL / HZ;
+static unsigned int configured_sched_base_slice = 1000000000ULL / HZ;
+unsigned int sysctl_sched_min_base_slice = 2000000ULL;
+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS;
+#else // !CONFIG_SCHED_BORE
unsigned int sysctl_sched_base_slice = 750000ULL;
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
Expand All @@ -405,11 +432,9 @@ index 00fbaec60..b8ea63be3 100644
+u8 __read_mostly sched_burst_smoothness_long = 1;
+u8 __read_mostly sched_burst_smoothness_short = 0;
+u8 __read_mostly sched_burst_fork_atavistic = 2;
+u8 __read_mostly sched_burst_wakeup_boost = 0;
+u8 __read_mostly sched_burst_penalty_offset = 22;
+uint __read_mostly sched_burst_penalty_scale = 1280;
+uint __read_mostly sched_burst_cache_lifetime = 60000000;
+static int __maybe_unused seven = 7;
+static int __maybe_unused sixty_four = 64;
+static int __maybe_unused maxval_12_bits = 4095;
+
Expand Down Expand Up @@ -497,7 +522,7 @@ index 00fbaec60..b8ea63be3 100644

static int __init setup_sched_thermal_decay_shift(char *str)
{
@@ -130,12 +231,8 @@ int __weak arch_asym_cpu_priority(int cpu)
@@ -130,12 +229,8 @@ int __weak arch_asym_cpu_priority(int cpu)
*
* (default: 5 msec, units: microseconds)
*/
Expand All @@ -510,7 +535,7 @@ index 00fbaec60..b8ea63be3 100644

#ifdef CONFIG_NUMA_BALANCING
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */
@@ -144,6 +241,78 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
@@ -144,6 +239,69 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;

#ifdef CONFIG_SYSCTL
static struct ctl_table sched_fair_sysctls[] = {
Expand Down Expand Up @@ -552,15 +577,6 @@ index 00fbaec60..b8ea63be3 100644
+ .extra2 = SYSCTL_THREE,
+ },
+ {
+ .procname = "sched_burst_wakeup_boost",
+ .data = &sched_burst_wakeup_boost,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &seven,
+ },
+ {
+ .procname = "sched_burst_penalty_offset",
+ .data = &sched_burst_penalty_offset,
+ .maxlen = sizeof(u8),
Expand Down Expand Up @@ -589,7 +605,7 @@ index 00fbaec60..b8ea63be3 100644
#ifdef CONFIG_CFS_BANDWIDTH
{
.procname = "sched_cfs_bandwidth_slice_us",
@@ -201,6 +370,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
@@ -201,6 +359,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
*
* This idea comes from the SD scheduler of Con Kolivas:
*/
Expand All @@ -603,15 +619,15 @@ index 00fbaec60..b8ea63be3 100644
static unsigned int get_update_sysctl_factor(void)
{
unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
@@ -231,6 +407,7 @@ static void update_sysctl(void)
@@ -231,6 +396,7 @@ static void update_sysctl(void)
SET_SYSCTL(sched_base_slice);
#undef SET_SYSCTL
}
+#endif // CONFIG_SCHED_BORE

void __init sched_init_granularity(void)
{
@@ -708,6 +885,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
@@ -708,6 +874,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)

vlag = avruntime - se->vruntime;
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
Expand All @@ -621,7 +637,7 @@ index 00fbaec60..b8ea63be3 100644

return clamp(vlag, -limit, limit);
}
@@ -868,6 +1048,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
@@ -868,6 +1037,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
return __node_2_se(left);
}

Expand Down Expand Up @@ -661,7 +677,7 @@ index 00fbaec60..b8ea63be3 100644
/*
* Earliest Eligible Virtual Deadline First
*
@@ -887,28 +1100,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
@@ -887,28 +1089,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
*
* Which allows tree pruning through eligibility.
*/
Expand Down Expand Up @@ -697,23 +713,23 @@ index 00fbaec60..b8ea63be3 100644
return curr;

/* Pick the leftmost entity if it's eligible */
@@ -967,6 +1179,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
@@ -967,6 +1168,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
* Scheduling class statistics methods:
*/
#ifdef CONFIG_SMP
+#if !defined(CONFIG_SCHED_BORE)
int sched_update_scaling(void)
{
unsigned int factor = get_update_sysctl_factor();
@@ -978,6 +1191,7 @@ int sched_update_scaling(void)
@@ -978,6 +1180,7 @@ int sched_update_scaling(void)

return 0;
}
+#endif // CONFIG_SCHED_BORE
#endif
#endif

@@ -1178,7 +1392,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
@@ -1178,7 +1381,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (unlikely(delta_exec <= 0))
return;

Expand All @@ -727,7 +743,7 @@ index 00fbaec60..b8ea63be3 100644
update_deadline(cfs_rq, curr);
update_min_vruntime(cfs_rq);

@@ -5203,6 +5423,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -5203,6 +5412,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*
* EEVDF: placement strategy #1 / #2
*/
Expand All @@ -737,32 +753,17 @@ index 00fbaec60..b8ea63be3 100644
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
struct sched_entity *curr = cfs_rq->curr;
unsigned long load;
@@ -5278,8 +5501,22 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -5278,7 +5490,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* on average, halfway through their slice, as such start tasks
* off with half a slice to ease into the competition.
*/
- if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
- vslice /= 2;
+ bool half_flag;
+ if (flags & ENQUEUE_INITIAL) {
+ if (!(half_flag = sched_feat(PLACE_DEADLINE_INITIAL)))
+ goto no_boost;
+ }
+ else if (flags & ENQUEUE_WAKEUP)
+ half_flag = sched_burst_wakeup_boost & 0x4;
+ else
+ goto no_boost;
+
+ u64 orig_vslice = vslice;
+ vslice >>= half_flag;
+ u32 lag_flag = sched_burst_wakeup_boost & 0x3;
+ if (lag_flag && (orig_vslice > se->burst_time))
+ vslice -= (orig_vslice - se->burst_time) >> (lag_flag - 1);
+no_boost:
+ if ((sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL)) ||
+ (sched_feat(PLACE_DEADLINE_WAKEUP) && (flags & ENQUEUE_WAKEUP)))
vslice /= 2;

/*
* EEVDF: vd_i = ve_i + r_i/w_i
@@ -5492,7 +5729,7 @@ pick_next_entity(struct cfs_rq *cfs_rq)
@@ -5492,7 +5705,7 @@ pick_next_entity(struct cfs_rq *cfs_rq)
cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
return cfs_rq->next;

Expand All @@ -771,7 +772,7 @@ index 00fbaec60..b8ea63be3 100644
}

static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -6860,6 +7097,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
@@ -6860,6 +7073,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
bool was_sched_idle = sched_idle_rq(rq);

util_est_dequeue(&rq->cfs, p);
Expand All @@ -786,7 +787,7 @@ index 00fbaec60..b8ea63be3 100644

for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
@@ -8425,10 +8670,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
@@ -8425,10 +8646,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
cfs_rq = cfs_rq_of(se);
update_curr(cfs_rq);

Expand All @@ -798,7 +799,7 @@ index 00fbaec60..b8ea63be3 100644
goto preempt;

return;
@@ -8646,16 +8888,25 @@ static void yield_task_fair(struct rq *rq)
@@ -8646,16 +8864,25 @@ static void yield_task_fair(struct rq *rq)
/*
* Are we the only task in the tree?
*/
Expand All @@ -824,7 +825,7 @@ index 00fbaec60..b8ea63be3 100644
/*
* Tell update_rq_clock() that we've just updated,
* so we don't do microscopic update in schedule()
@@ -12719,6 +12970,9 @@ static void task_fork_fair(struct task_struct *p)
@@ -12719,6 +12946,9 @@ static void task_fork_fair(struct task_struct *p)
curr = cfs_rq->curr;
if (curr)
update_curr(cfs_rq);
Expand All @@ -835,10 +836,10 @@ index 00fbaec60..b8ea63be3 100644
rq_unlock(rq, &rf);
}
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 143f55df8..bfeb9f653 100644
index 143f55df8..9ad25e4e7 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -5,8 +5,26 @@
@@ -5,8 +5,34 @@
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/
SCHED_FEAT(PLACE_LAG, true)
Expand All @@ -848,6 +849,14 @@ index 143f55df8..bfeb9f653 100644
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
-SCHED_FEAT(RUN_TO_PARITY, true)
+/*
+ * Give waken tasks half a slice to ease into the competition.
+ */
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(PLACE_DEADLINE_WAKEUP, true)
+#else // !CONFIG_SCHED_BORE
+SCHED_FEAT(PLACE_DEADLINE_WAKEUP, false)
+#endif // CONFIG_SCHED_BORE
+/*
+ * Inhibit (wakeup) preemption until the current task has exhausted its slice.
+ */
+#ifdef CONFIG_SCHED_BORE
Expand Down
Loading

0 comments on commit 3559f50

Please sign in to comment.