Skip to content

Commit

Permalink
watchdog: allow nmi watchdog to use raw perf event
Browse files Browse the repository at this point in the history
NMI watchdog permanently consumes one hardware counters per CPU on the
system.  For systems that use many hardware counters, this causes more
aggressive time multiplexing of perf events.

OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely
used.  Add kernel cmdline arg nmi_watchdog=rNNN to configure the watchdog
to use raw event.  For example, on Intel CPUs, we can use "r300" to
configure the watchdog to use ref-cycles event.

If the raw event does not work, fall back to use "cycles".

[[email protected]: fix kerneldoc]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Song Liu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: "Matthew Wilcox (Oracle)" <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
liu-song-6 authored and akpm00 committed May 8, 2024
1 parent 602ba77 commit 393fb31
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 2 deletions.
5 changes: 3 additions & 2 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3773,10 +3773,12 @@
Format: [state][,regs][,debounce][,die]

nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
Format: [panic,][nopanic,][num]
Format: [panic,][nopanic,][rNNN,][num]
Valid num: 0 or 1
0 - turn hardlockup detector in nmi_watchdog off
1 - turn hardlockup detector in nmi_watchdog on
rNNN - configure the watchdog with raw perf event 0xNNN

When panic is specified, panic when an NMI watchdog
timeout occurs (or 'nopanic' to not panic on an NMI
watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set)
Expand Down Expand Up @@ -7464,4 +7466,3 @@
memory, and other data can't be written using
xmon commands.
off xmon is disabled.

2 changes: 2 additions & 0 deletions include/linux/nmi.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
extern void hardlockup_detector_perf_cleanup(void);
extern void hardlockup_config_perf_event(const char *str);
#else
static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
static inline void hardlockup_detector_perf_cleanup(void) { }
static inline void hardlockup_config_perf_event(const char *str) { }
#endif

void watchdog_hardlockup_stop(void);
Expand Down
2 changes: 2 additions & 0 deletions kernel/watchdog.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ static int __init hardlockup_panic_setup(char *str)
watchdog_hardlockup_user_enabled = 0;
else if (!strncmp(str, "1", 1))
watchdog_hardlockup_user_enabled = 1;
else if (!strncmp(str, "r", 1))
hardlockup_config_perf_event(str + 1);
while (*(str++)) {
if (*str == ',') {
str++;
Expand Down
46 changes: 46 additions & 0 deletions kernel/watchdog_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ static struct perf_event_attr wd_hw_attr = {
.disabled = 1,
};

static struct perf_event_attr fallback_wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
};

/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
Expand Down Expand Up @@ -122,6 +130,13 @@ static int hardlockup_detector_event_create(void)
/* Try to register using hardware perf events */
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL);
if (IS_ERR(evt)) {
wd_attr = &fallback_wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL);
}

if (IS_ERR(evt)) {
pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
PTR_ERR(evt));
Expand Down Expand Up @@ -259,3 +274,34 @@ int __init watchdog_hardlockup_probe(void)
}
return ret;
}

/**
* hardlockup_config_perf_event - Overwrite config of wd_hw_attr.
*
* @str: number which identifies the raw perf event to use
*/
void __init hardlockup_config_perf_event(const char *str)
{
u64 config;
char buf[24];
char *comma = strchr(str, ',');

if (!comma) {
if (kstrtoull(str, 16, &config))
return;
} else {
unsigned int len = comma - str;

if (len >= sizeof(buf))
return;

if (strscpy(buf, str, sizeof(buf)) < 0)
return;
buf[len] = 0;
if (kstrtoull(buf, 16, &config))
return;
}

wd_hw_attr.type = PERF_TYPE_RAW;
wd_hw_attr.config = config;
}

0 comments on commit 393fb31

Please sign in to comment.