-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Piotr Gorski <[email protected]>
- Loading branch information
Showing
4 changed files
with
275 additions
and
239 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,19 @@ | ||
From 3b724414651c8a7a2e4b65978435ecedd42f164b Mon Sep 17 00:00:00 2001 | ||
From 6e01fbddcdf96c19bfc8b3f539ee44587fce98b6 Mon Sep 17 00:00:00 2001 | ||
From: Piotr Gorski <[email protected]> | ||
Date: Thu, 4 Jul 2024 00:39:54 +0200 | ||
Date: Thu, 4 Jul 2024 21:34:03 +0200 | ||
Subject: [PATCH] bore-cachy-ext | ||
|
||
Signed-off-by: Piotr Gorski <[email protected]> | ||
--- | ||
include/linux/sched.h | 10 ++ | ||
init/Kconfig | 17 +++ | ||
kernel/sched/core.c | 143 ++++++++++++++++++ | ||
kernel/sched/debug.c | 60 +++++++- | ||
kernel/sched/fair.c | 314 ++++++++++++++++++++++++++++++++++++---- | ||
kernel/sched/features.h | 20 ++- | ||
kernel/Kconfig.hz | 16 +++ | ||
kernel/sched/core.c | 143 ++++++++++++++++++++ | ||
kernel/sched/debug.c | 60 ++++++++- | ||
kernel/sched/fair.c | 288 ++++++++++++++++++++++++++++++++++++---- | ||
kernel/sched/features.h | 28 +++- | ||
kernel/sched/sched.h | 7 + | ||
7 files changed, 539 insertions(+), 32 deletions(-) | ||
8 files changed, 538 insertions(+), 31 deletions(-) | ||
|
||
diff --git a/include/linux/sched.h b/include/linux/sched.h | ||
index 55912a383..465a13656 100644 | ||
|
@@ -63,8 +64,34 @@ index aa28dc0ae..cdf81259f 100644 | |
config SCHED_AUTOGROUP | ||
bool "Automatic process group scheduling" | ||
select CGROUPS | ||
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz | ||
index 0f78364ef..b50189ee5 100644 | ||
--- a/kernel/Kconfig.hz | ||
+++ b/kernel/Kconfig.hz | ||
@@ -79,5 +79,21 @@ config HZ | ||
default 750 if HZ_750 | ||
default 1000 if HZ_1000 | ||
|
||
+config MIN_BASE_SLICE_NS | ||
+ int "Default value for min_base_slice_ns" | ||
+ default 2000000 | ||
+ help | ||
+ The BORE Scheduler automatically calculates the optimal base | ||
+ slice for the configured HZ using the following equation: | ||
+ | ||
+ base_slice_ns = max(min_base_slice_ns, 1000000000/HZ) | ||
+ | ||
+ This option sets the default lower bound limit of the base slice | ||
+ to prevent the loss of task throughput due to overscheduling. | ||
+ | ||
+ Setting this value too high can cause the system to boot with | ||
+ an unnecessarily large base slice, resulting in high scheduling | ||
+ latency and poor system responsiveness. | ||
+ | ||
config SCHED_HRTICK | ||
def_bool HIGH_RES_TIMERS | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c | ||
index 6161dd192..bff12852b 100644 | ||
index 6161dd192..67850fee3 100644 | ||
--- a/kernel/sched/core.c | ||
+++ b/kernel/sched/core.c | ||
@@ -4547,6 +4547,138 @@ int wake_up_state(struct task_struct *p, unsigned int state) | ||
|
@@ -232,7 +259,7 @@ index 6161dd192..bff12852b 100644 | |
|
||
+#ifdef CONFIG_SCHED_BORE | ||
+ sched_init_bore(); | ||
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.2 by Masahito Suzuki"); | ||
+ printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.2.4 by Masahito Suzuki"); | ||
+#endif // CONFIG_SCHED_BORE | ||
+ | ||
wait_bit_init(); | ||
|
@@ -346,7 +373,7 @@ index c057ef46c..3cab39e34 100644 | |
P(se.avg.runnable_sum); | ||
P(se.avg.util_sum); | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | ||
index 00fbaec60..b8ea63be3 100644 | ||
index 00fbaec60..902948476 100644 | ||
--- a/kernel/sched/fair.c | ||
+++ b/kernel/sched/fair.c | ||
@@ -19,6 +19,9 @@ | ||
|
@@ -359,7 +386,7 @@ index 00fbaec60..b8ea63be3 100644 | |
*/ | ||
#include <linux/energy_model.h> | ||
#include <linux/mmap_lock.h> | ||
@@ -64,28 +67,126 @@ | ||
@@ -64,28 +67,124 @@ | ||
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) | ||
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus | ||
* | ||
|
@@ -387,7 +414,7 @@ index 00fbaec60..b8ea63be3 100644 | |
+#ifdef CONFIG_SCHED_BORE | ||
+unsigned int sysctl_sched_base_slice = 1000000000ULL / HZ; | ||
+static unsigned int configured_sched_base_slice = 1000000000ULL / HZ; | ||
+unsigned int sysctl_sched_min_base_slice = 2000000ULL; | ||
+unsigned int sysctl_sched_min_base_slice = CONFIG_MIN_BASE_SLICE_NS; | ||
+#else // !CONFIG_SCHED_BORE | ||
unsigned int sysctl_sched_base_slice = 750000ULL; | ||
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; | ||
|
@@ -405,11 +432,9 @@ index 00fbaec60..b8ea63be3 100644 | |
+u8 __read_mostly sched_burst_smoothness_long = 1; | ||
+u8 __read_mostly sched_burst_smoothness_short = 0; | ||
+u8 __read_mostly sched_burst_fork_atavistic = 2; | ||
+u8 __read_mostly sched_burst_wakeup_boost = 0; | ||
+u8 __read_mostly sched_burst_penalty_offset = 22; | ||
+uint __read_mostly sched_burst_penalty_scale = 1280; | ||
+uint __read_mostly sched_burst_cache_lifetime = 60000000; | ||
+static int __maybe_unused seven = 7; | ||
+static int __maybe_unused sixty_four = 64; | ||
+static int __maybe_unused maxval_12_bits = 4095; | ||
+ | ||
|
@@ -497,7 +522,7 @@ index 00fbaec60..b8ea63be3 100644 | |
|
||
static int __init setup_sched_thermal_decay_shift(char *str) | ||
{ | ||
@@ -130,12 +231,8 @@ int __weak arch_asym_cpu_priority(int cpu) | ||
@@ -130,12 +229,8 @@ int __weak arch_asym_cpu_priority(int cpu) | ||
* | ||
* (default: 5 msec, units: microseconds) | ||
*/ | ||
|
@@ -510,7 +535,7 @@ index 00fbaec60..b8ea63be3 100644 | |
|
||
#ifdef CONFIG_NUMA_BALANCING | ||
/* Restrict the NUMA promotion throughput (MB/s) for each target node. */ | ||
@@ -144,6 +241,78 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; | ||
@@ -144,6 +239,69 @@ static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; | ||
|
||
#ifdef CONFIG_SYSCTL | ||
static struct ctl_table sched_fair_sysctls[] = { | ||
|
@@ -552,15 +577,6 @@ index 00fbaec60..b8ea63be3 100644 | |
+ .extra2 = SYSCTL_THREE, | ||
+ }, | ||
+ { | ||
+ .procname = "sched_burst_wakeup_boost", | ||
+ .data = &sched_burst_wakeup_boost, | ||
+ .maxlen = sizeof(u8), | ||
+ .mode = 0644, | ||
+ .proc_handler = proc_dou8vec_minmax, | ||
+ .extra1 = SYSCTL_ZERO, | ||
+ .extra2 = &seven, | ||
+ }, | ||
+ { | ||
+ .procname = "sched_burst_penalty_offset", | ||
+ .data = &sched_burst_penalty_offset, | ||
+ .maxlen = sizeof(u8), | ||
|
@@ -589,7 +605,7 @@ index 00fbaec60..b8ea63be3 100644 | |
#ifdef CONFIG_CFS_BANDWIDTH | ||
{ | ||
.procname = "sched_cfs_bandwidth_slice_us", | ||
@@ -201,6 +370,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
@@ -201,6 +359,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
* | ||
* This idea comes from the SD scheduler of Con Kolivas: | ||
*/ | ||
|
@@ -603,15 +619,15 @@ index 00fbaec60..b8ea63be3 100644 | |
static unsigned int get_update_sysctl_factor(void) | ||
{ | ||
unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); | ||
@@ -231,6 +407,7 @@ static void update_sysctl(void) | ||
@@ -231,6 +396,7 @@ static void update_sysctl(void) | ||
SET_SYSCTL(sched_base_slice); | ||
#undef SET_SYSCTL | ||
} | ||
+#endif // CONFIG_SCHED_BORE | ||
|
||
void __init sched_init_granularity(void) | ||
{ | ||
@@ -708,6 +885,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) | ||
@@ -708,6 +874,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se) | ||
|
||
vlag = avruntime - se->vruntime; | ||
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); | ||
|
@@ -621,7 +637,7 @@ index 00fbaec60..b8ea63be3 100644 | |
|
||
return clamp(vlag, -limit, limit); | ||
} | ||
@@ -868,6 +1048,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) | ||
@@ -868,6 +1037,39 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) | ||
return __node_2_se(left); | ||
} | ||
|
||
|
@@ -661,7 +677,7 @@ index 00fbaec60..b8ea63be3 100644 | |
/* | ||
* Earliest Eligible Virtual Deadline First | ||
* | ||
@@ -887,28 +1100,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) | ||
@@ -887,28 +1089,27 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) | ||
* | ||
* Which allows tree pruning through eligibility. | ||
*/ | ||
|
@@ -697,23 +713,23 @@ index 00fbaec60..b8ea63be3 100644 | |
return curr; | ||
|
||
/* Pick the leftmost entity if it's eligible */ | ||
@@ -967,6 +1179,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | ||
@@ -967,6 +1168,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | ||
* Scheduling class statistics methods: | ||
*/ | ||
#ifdef CONFIG_SMP | ||
+#if !defined(CONFIG_SCHED_BORE) | ||
int sched_update_scaling(void) | ||
{ | ||
unsigned int factor = get_update_sysctl_factor(); | ||
@@ -978,6 +1191,7 @@ int sched_update_scaling(void) | ||
@@ -978,6 +1180,7 @@ int sched_update_scaling(void) | ||
|
||
return 0; | ||
} | ||
+#endif // CONFIG_SCHED_BORE | ||
#endif | ||
#endif | ||
|
||
@@ -1178,7 +1392,13 @@ static void update_curr(struct cfs_rq *cfs_rq) | ||
@@ -1178,7 +1381,13 @@ static void update_curr(struct cfs_rq *cfs_rq) | ||
if (unlikely(delta_exec <= 0)) | ||
return; | ||
|
||
|
@@ -727,7 +743,7 @@ index 00fbaec60..b8ea63be3 100644 | |
update_deadline(cfs_rq, curr); | ||
update_min_vruntime(cfs_rq); | ||
|
||
@@ -5203,6 +5423,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | ||
@@ -5203,6 +5412,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | ||
* | ||
* EEVDF: placement strategy #1 / #2 | ||
*/ | ||
|
@@ -737,32 +753,17 @@ index 00fbaec60..b8ea63be3 100644 | |
if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) { | ||
struct sched_entity *curr = cfs_rq->curr; | ||
unsigned long load; | ||
@@ -5278,8 +5501,22 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | ||
@@ -5278,7 +5490,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | ||
* on average, halfway through their slice, as such start tasks | ||
* off with half a slice to ease into the competition. | ||
*/ | ||
- if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL)) | ||
- vslice /= 2; | ||
+ bool half_flag; | ||
+ if (flags & ENQUEUE_INITIAL) { | ||
+ if (!(half_flag = sched_feat(PLACE_DEADLINE_INITIAL))) | ||
+ goto no_boost; | ||
+ } | ||
+ else if (flags & ENQUEUE_WAKEUP) | ||
+ half_flag = sched_burst_wakeup_boost & 0x4; | ||
+ else | ||
+ goto no_boost; | ||
+ | ||
+ u64 orig_vslice = vslice; | ||
+ vslice >>= half_flag; | ||
+ u32 lag_flag = sched_burst_wakeup_boost & 0x3; | ||
+ if (lag_flag && (orig_vslice > se->burst_time)) | ||
+ vslice -= (orig_vslice - se->burst_time) >> (lag_flag - 1); | ||
+no_boost: | ||
+ if ((sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL)) || | ||
+ (sched_feat(PLACE_DEADLINE_WAKEUP) && (flags & ENQUEUE_WAKEUP))) | ||
vslice /= 2; | ||
|
||
/* | ||
* EEVDF: vd_i = ve_i + r_i/w_i | ||
@@ -5492,7 +5729,7 @@ pick_next_entity(struct cfs_rq *cfs_rq) | ||
@@ -5492,7 +5705,7 @@ pick_next_entity(struct cfs_rq *cfs_rq) | ||
cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next)) | ||
return cfs_rq->next; | ||
|
||
|
@@ -771,7 +772,7 @@ index 00fbaec60..b8ea63be3 100644 | |
} | ||
|
||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); | ||
@@ -6860,6 +7097,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | ||
@@ -6860,6 +7073,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | ||
bool was_sched_idle = sched_idle_rq(rq); | ||
|
||
util_est_dequeue(&rq->cfs, p); | ||
|
@@ -786,7 +787,7 @@ index 00fbaec60..b8ea63be3 100644 | |
|
||
for_each_sched_entity(se) { | ||
cfs_rq = cfs_rq_of(se); | ||
@@ -8425,10 +8670,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int | ||
@@ -8425,10 +8646,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int | ||
cfs_rq = cfs_rq_of(se); | ||
update_curr(cfs_rq); | ||
|
||
|
@@ -798,7 +799,7 @@ index 00fbaec60..b8ea63be3 100644 | |
goto preempt; | ||
|
||
return; | ||
@@ -8646,16 +8888,25 @@ static void yield_task_fair(struct rq *rq) | ||
@@ -8646,16 +8864,25 @@ static void yield_task_fair(struct rq *rq) | ||
/* | ||
* Are we the only task in the tree? | ||
*/ | ||
|
@@ -824,7 +825,7 @@ index 00fbaec60..b8ea63be3 100644 | |
/* | ||
* Tell update_rq_clock() that we've just updated, | ||
* so we don't do microscopic update in schedule() | ||
@@ -12719,6 +12970,9 @@ static void task_fork_fair(struct task_struct *p) | ||
@@ -12719,6 +12946,9 @@ static void task_fork_fair(struct task_struct *p) | ||
curr = cfs_rq->curr; | ||
if (curr) | ||
update_curr(cfs_rq); | ||
|
@@ -835,10 +836,10 @@ index 00fbaec60..b8ea63be3 100644 | |
rq_unlock(rq, &rf); | ||
} | ||
diff --git a/kernel/sched/features.h b/kernel/sched/features.h | ||
index 143f55df8..bfeb9f653 100644 | ||
index 143f55df8..9ad25e4e7 100644 | ||
--- a/kernel/sched/features.h | ||
+++ b/kernel/sched/features.h | ||
@@ -5,8 +5,26 @@ | ||
@@ -5,8 +5,34 @@ | ||
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled. | ||
*/ | ||
SCHED_FEAT(PLACE_LAG, true) | ||
|
@@ -848,6 +849,14 @@ index 143f55df8..bfeb9f653 100644 | |
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) | ||
-SCHED_FEAT(RUN_TO_PARITY, true) | ||
+/* | ||
+ * Give waken tasks half a slice to ease into the competition. | ||
+ */ | ||
+#ifdef CONFIG_SCHED_BORE | ||
+SCHED_FEAT(PLACE_DEADLINE_WAKEUP, true) | ||
+#else // !CONFIG_SCHED_BORE | ||
+SCHED_FEAT(PLACE_DEADLINE_WAKEUP, false) | ||
+#endif // CONFIG_SCHED_BORE | ||
+/* | ||
+ * Inhibit (wakeup) preemption until the current task has exhausted its slice. | ||
+ */ | ||
+#ifdef CONFIG_SCHED_BORE | ||
|
Oops, something went wrong.