Skip to content

Commit

Permalink
Kernel: Fix deadlock cancelling timer
Browse files Browse the repository at this point in the history
It's possible that a timer may have been queued to be executed by
the timer irq handler, but if we're in a critical section on the
same processor and are trying to cancel that timer, we would spin
forever waiting for it to be executed.
  • Loading branch information
tomuta authored and awesomekling committed Jul 12, 2021
1 parent 6938be0 commit d9fb93c
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 58 deletions.
124 changes: 69 additions & 55 deletions Kernel/TimerQueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ void TimerQueue::add_timer_locked(NonnullRefPtr<Timer> timer)
{
Time timer_expiration = timer->m_expires;

VERIFY(!timer->is_queued());
timer->clear_cancelled();
timer->clear_callback_finished();

auto& queue = queue_for_timer(*timer);
if (queue.list.is_empty()) {
Expand Down Expand Up @@ -148,75 +149,83 @@ bool TimerQueue::cancel_timer(TimerId id)
};
}

if (!found_timer) {
// The timer may be executing right now, if it is then it should
// be in m_timers_executing. If it is then release the lock
// briefly to allow it to finish by removing itself
// NOTE: This can only happen with multiple processors!
while (true) {
for (auto& timer : m_timers_executing) {
if (timer.m_id == id) {
found_timer = &timer;
break;
}
}
if (found_timer) {
VERIFY(timer_queue);
remove_timer_locked(*timer_queue, *found_timer);
return true;
}

if (found_timer) {
// NOTE: This isn't the most efficient way to wait, but
// it should only happen when multiple processors are used.
// Also, the timers should execute pretty quickly, so it
// should not loop here for very long. But we can't yield.
lock.unlock();
Processor::wait_check();
lock.lock();
found_timer = nullptr;
} else {
// We were not able to cancel the timer, but at this point
// the handler should have completed if it was running!
return false;
}
// The timer may be executing right now, if it is then it should
// be in m_timers_executing. This is the case when the deferred
// call has been queued but not yet executed.
for (auto& timer : m_timers_executing) {
if (timer.m_id == id) {
found_timer = &timer;
break;
}
}

VERIFY(found_timer);
VERIFY(timer_queue);
remove_timer_locked(*timer_queue, *found_timer);
if (!found_timer)
return false;

// Keep a reference while we unlock
NonnullRefPtr<Timer> executing_timer(*found_timer);
lock.unlock();

if (!found_timer->set_cancelled()) {
// We cancelled it even though the deferred call has been queued already.
// We do not unref the timer here because the deferred call is still going
// too need it!
lock.lock();
VERIFY(m_timers_executing.contains(*found_timer));
m_timers_executing.remove(*found_timer);
return true;
}

// At this point the deferred call is queued and is being executed
// on another processor. We need to wait until it's complete!
while (!found_timer->is_callback_finished())
Processor::wait_check();

return true;
}

bool TimerQueue::cancel_timer(Timer& timer)
{
bool did_already_run = timer.set_cancelled();
auto& timer_queue = queue_for_timer(timer);
ScopedSpinLock lock(g_timerqueue_lock);
if (!timer_queue.list.contains(timer)) {
// The timer may be executing right now, if it is then it should
// be in m_timers_executing. If it is then release the lock
// briefly to allow it to finish by removing itself
// NOTE: This can only happen with multiple processors!
while (m_timers_executing.contains(timer)) {
// NOTE: This isn't the most efficient way to wait, but
// it should only happen when multiple processors are used.
// Also, the timers should execute pretty quickly, so it
// should not loop here for very long. But we can't yield.
lock.unlock();
Processor::wait_check();
lock.lock();

if (!did_already_run) {
ScopedSpinLock lock(g_timerqueue_lock);
if (timer_queue.list.contains(timer)) {
// The timer has not fired, remove it
VERIFY(timer.ref_count() > 1);
remove_timer_locked(timer_queue, timer);
return true;
}
// We were not able to cancel the timer, but at this point
// the handler should have completed if it was running!
return false;

// The timer was queued to execute but hasn't had a chance
// to run. In this case, it should still be in m_timers_executing
// and we don't need to spin. It still holds a reference
// that will be dropped when it does get a chance to run,
// but since we called set_cancelled it will only drop its reference
VERIFY(m_timers_executing.contains(timer));
m_timers_executing.remove(timer);
return true;
}

VERIFY(timer.ref_count() > 1);
remove_timer_locked(timer_queue, timer);
// At this point the deferred call is queued and is being executed
// on another processor. We need to wait until it's complete!
while (!timer.is_callback_finished())
Processor::wait_check();

return true;
}

void TimerQueue::remove_timer_locked(Queue& queue, Timer& timer)
{
bool was_next_timer = (queue.list.first() == &timer);
queue.list.remove(timer);
timer.set_queued(false);
auto now = timer.now(false);
if (timer.m_expires > now)
timer.m_remaining = timer.m_expires - now;
Expand All @@ -240,7 +249,6 @@ void TimerQueue::fire()

while (timer && timer->now(true) > timer->m_expires) {
queue.list.remove(*timer);
timer->set_queued(false);

m_timers_executing.append(*timer);

Expand All @@ -249,10 +257,16 @@ void TimerQueue::fire()
lock.unlock();

// Defer executing the timer outside of the irq handler
Processor::current().deferred_call_queue([this, timer]() {
timer->m_callback();
ScopedSpinLock lock(g_timerqueue_lock);
m_timers_executing.remove(*timer);
Processor::deferred_call_queue([this, timer]() {
// Check if we were cancelled in between being triggered
// by the timer irq handler and now. If so, just drop
// our reference and don't execute the callback.
if (!timer->set_cancelled()) {
timer->m_callback();
ScopedSpinLock lock(g_timerqueue_lock);
m_timers_executing.remove(*timer);
}
timer->set_callback_finished();
// Drop the reference we added when queueing the timer
timer->unref();
});
Expand Down
12 changes: 9 additions & 3 deletions Kernel/TimerQueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ class Timer : public RefCounted<Timer> {
Time m_expires;
Time m_remaining {};
Function<void()> m_callback;
Atomic<bool, AK::MemoryOrder::memory_order_relaxed> m_queued { false };
Atomic<bool> m_cancelled { false };
Atomic<bool> m_callback_finished { false };

bool operator<(const Timer& rhs) const
{
Expand All @@ -57,10 +58,15 @@ class Timer : public RefCounted<Timer> {
{
return m_id == rhs.m_id;
}
bool is_queued() const { return m_queued; }
void set_queued(bool queued) { m_queued = queued; }
void clear_cancelled() { return m_cancelled.store(false, AK::memory_order_release); }
bool set_cancelled() { return m_cancelled.exchange(true, AK::memory_order_acq_rel); }
bool is_callback_finished() const { return m_callback_finished.load(AK::memory_order_acquire); }
void clear_callback_finished() { m_callback_finished.store(false, AK::memory_order_release); }
void set_callback_finished() { m_callback_finished.store(true, AK::memory_order_release); }
Time now(bool) const;

bool is_queued() const { return m_list_node.is_in_list(); }

public:
IntrusiveListNode<Timer> m_list_node;
using List = IntrusiveList<Timer, RawPtr<Timer>, &Timer::m_list_node>;
Expand Down

0 comments on commit d9fb93c

Please sign in to comment.