Skip to content

Commit

Permalink
Kernel: Fix some issues related to fixes and block conditions
Browse files Browse the repository at this point in the history
Fix some problems with join blocks where the joining thread block
condition was added twice, which lead to a crash when trying to
unblock that condition a second time.

Deferred block condition evaluation by File objects were also not
properly keeping the File object alive, which lead to some random
crashes and corruption problems.

Other problems were caused by the fact that the Queued state didn't
handle signals/interruptions consistently. To solve these issues we
remove this state entirely, along with Thread::wait_on and change
the WaitQueue into a BlockCondition instead.

Also, deliver signals even if there isn't going to be a context switch
to another thread.

Fixes SerenityOS#4336 and SerenityOS#4330
  • Loading branch information
tomuta authored and awesomekling committed Dec 12, 2020
1 parent 0918d8b commit da5cc34
Show file tree
Hide file tree
Showing 22 changed files with 475 additions and 435 deletions.
137 changes: 102 additions & 35 deletions Kernel/Arch/i386/CPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <Kernel/Thread.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>
#include <Kernel/VM/ProcessPagingScope.h>
#include <LibC/mallocdefs.h>

//#define PAGE_FAULT_DEBUG
Expand Down Expand Up @@ -1282,54 +1283,120 @@ const DescriptorTablePointer& Processor::get_gdtr()
return m_gdtr;
}

bool Processor::get_context_frame_ptr(Thread& thread, u32& frame_ptr, u32& eip, bool from_other_processor)
Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
{
bool ret = true;
ScopedCritical critical;
FlatPtr frame_ptr = 0, eip = 0;
Vector<FlatPtr, 32> stack_trace;

auto walk_stack = [&](FlatPtr stack_ptr)
{
stack_trace.append(eip);
size_t count = 1;
while (stack_ptr) {
FlatPtr retaddr;

count++;
if (max_frames != 0 && count > max_frames)
break;

if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr)
break;
stack_trace.append(retaddr);
if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr))
break;
} else {
void* fault_at;
if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
break;
stack_trace.append(retaddr);
if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
break;
}
}
};
auto capture_current_thread = [&]()
{
frame_ptr = (FlatPtr)__builtin_frame_address(0);
eip = (FlatPtr)__builtin_return_address(0);

walk_stack(frame_ptr);
};

// Since the thread may be running on another processor, there
// is a chance a context switch may happen while we're trying
// to get it. It also won't be entirely accurate and merely
// reflect the status at the last context switch.
ScopedSpinLock lock(g_scheduler_lock);
auto& proc = Processor::current();
if (&thread == proc.current_thread()) {
ASSERT(thread.state() == Thread::Running);
asm volatile("movl %%ebp, %%eax"
: "=g"(frame_ptr));
// Leave the scheduler lock. If we trigger page faults we may
// need to be preempted. Since this is our own thread it won't
// cause any problems as the stack won't change below this frame.
lock.unlock();
capture_current_thread();
} else if (thread.is_active()) {
ASSERT(thread.cpu() != proc.id());
// If this is the case, the thread is currently running
// on another processor. We can't trust the kernel stack as
// it may be changing at any time. We need to probably send
// an IPI to that processor, have it walk the stack and wait
// until it returns the data back to us
smp_unicast(thread.cpu(),
[&]() {
dbg() << "CPU[" << Processor::current().id() << "] getting stack for cpu #" << proc.id();
ProcessPagingScope paging_scope(thread.process());
auto& target_proc = Processor::current();
ASSERT(&target_proc != &proc);
ASSERT(&thread == target_proc.current_thread());
// NOTE: Because the other processor is still holding the
// scheduler lock while waiting for this callback to finish,
// the current thread on the target processor cannot change

// TODO: What to do about page faults here? We might deadlock
// because the other processor is still holding the
// scheduler lock...
capture_current_thread();
}, false);
} else {
// If this triggered from another processor, we should never
// hit this code path because the other processor is still holding
// the scheduler lock, which should prevent us from switching
// contexts
ASSERT(!from_other_processor);

// Since the thread may be running on another processor, there
// is a chance a context switch may happen while we're trying
// to get it. It also won't be entirely accurate and merely
// reflect the status at the last context switch.
ScopedSpinLock lock(g_scheduler_lock);
if (thread.state() == Thread::Running) {
ASSERT(thread.cpu() != proc.id());
// If this is the case, the thread is currently running
// on another processor. We can't trust the kernel stack as
// it may be changing at any time. We need to probably send
// an IPI to that processor, have it walk the stack and wait
// until it returns the data back to us
smp_unicast(thread.cpu(),
[&]() {
dbg() << "CPU[" << Processor::current().id() << "] getting stack for cpu #" << proc.id();
// NOTE: Because we are holding the scheduler lock while
// waiting for this callback to finish, the current thread
// on the target processor cannot change
ret = get_context_frame_ptr(thread, frame_ptr, eip, true);
}, false);
} else {
switch (thread.state()) {
case Thread::Running:
ASSERT_NOT_REACHED(); // should have been handled above
case Thread::Runnable:
case Thread::Stopped:
case Thread::Blocked:
case Thread::Dying:
case Thread::Dead: {
// We need to retrieve ebp from what was last pushed to the kernel
// stack. Before switching out of that thread, it switch_context
// pushed the callee-saved registers, and the last of them happens
// to be ebp.
ProcessPagingScope paging_scope(thread.process());
auto& tss = thread.tss();
u32* stack_top = reinterpret_cast<u32*>(tss.esp);
frame_ptr = stack_top[0];
if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]))
frame_ptr = 0;
} else {
void* fault_at;
if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
frame_ptr = 0;
}
eip = tss.eip;
// TODO: We need to leave the scheduler lock here, but we also
// need to prevent the target thread from being run while
// we walk the stack
lock.unlock();
walk_stack(frame_ptr);
break;
}
default:
dbg() << "Cannot capture stack trace for thread " << thread << " in state " << thread.state_string();
break;
}
}
return true;
return stack_trace;
}

extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
Expand Down Expand Up @@ -1435,7 +1502,7 @@ extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapF

ASSERT(to_thread == Thread::current());

Scheduler::enter_current(*from_thread);
Scheduler::enter_current(*from_thread, true);

// Since we got here and don't have Scheduler::context_switch in the
// call stack (because this is the first time we switched into this
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Arch/i386/CPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ class Processor {
void switch_context(Thread*& from_thread, Thread*& to_thread);
[[noreturn]] static void assume_context(Thread& thread, u32 flags);
u32 init_context(Thread& thread, bool leave_crit);
static bool get_context_frame_ptr(Thread& thread, u32& frame_ptr, u32& eip, bool = false);
static Vector<FlatPtr> capture_stack_trace(Thread& thread, size_t max_frames = 0);

void set_thread_specific(u8* data, size_t len);
};
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Devices/AsyncDeviceRequest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ auto AsyncDeviceRequest::wait(timeval* timeout) -> RequestWaitResult
auto request_result = get_request_result();
if (is_completed_result(request_result))
return { request_result, Thread::BlockResult::NotBlocked };
auto wait_result = Thread::current()->wait_on(m_queue, name(), Thread::BlockTimeout(false, timeout));
auto wait_result = m_queue.wait_on(Thread::BlockTimeout(false, timeout), name());
return { get_request_result(), wait_result };
}

Expand Down
2 changes: 1 addition & 1 deletion Kernel/Devices/SB16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void SB16::handle_irq(const RegisterState&)

void SB16::wait_for_irq()
{
Thread::current()->wait_on(m_irq_queue, "SB16");
m_irq_queue.wait_on(nullptr, "SB16");
disable_irq();
}

Expand Down
4 changes: 2 additions & 2 deletions Kernel/FileSystem/FIFO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ NonnullRefPtr<FileDescription> FIFO::open_direction_blocking(FIFO::Direction dir

if (m_writers == 0) {
locker.unlock();
Thread::current()->wait_on(m_write_open_queue, "FIFO");
m_write_open_queue.wait_on(nullptr, "FIFO");
locker.lock();
}
}
Expand All @@ -81,7 +81,7 @@ NonnullRefPtr<FileDescription> FIFO::open_direction_blocking(FIFO::Direction dir

if (m_readers == 0) {
locker.unlock();
Thread::current()->wait_on(m_read_open_queue, "FIFO");
m_read_open_queue.wait_on(nullptr, "FIFO");
locker.lock();
}
}
Expand Down
17 changes: 12 additions & 5 deletions Kernel/FileSystem/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,24 @@ class File
{
if (Processor::current().in_irq()) {
// If called from an IRQ handler we need to delay evaluation
// and unblocking of waiting threads
Processor::deferred_call_queue([this]() {
ASSERT(!Processor::current().in_irq());
evaluate_block_conditions();
// and unblocking of waiting threads. Note that this File
// instance may be deleted until the deferred call is executed!
Processor::deferred_call_queue([self = make_weak_ptr()]() {
if (auto file = self.strong_ref())
file->do_evaluate_block_conditions();
});
} else {
block_condition().unblock();
do_evaluate_block_conditions();
}
}

private:
ALWAYS_INLINE void do_evaluate_block_conditions()
{
ASSERT(!Processor::current().in_irq());
block_condition().unblock();
}

FileBlockCondition m_block_condition;
};

Expand Down
12 changes: 7 additions & 5 deletions Kernel/Lock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ void Lock::lock(Mode mode)
m_lock.store(false, AK::memory_order_release);
return;
}
} while (current_thread->wait_on(m_queue, m_name, nullptr, &m_lock, m_holder) == Thread::BlockResult::NotBlocked);
m_lock.store(false, AK::memory_order_release);
} while (m_queue.wait_on(nullptr, m_name) == Thread::BlockResult::NotBlocked);
} else {
// I don't know *who* is using "m_lock", so just yield.
Scheduler::yield_from_critical();
Expand Down Expand Up @@ -114,7 +115,8 @@ void Lock::unlock()
return;
}
m_mode = Mode::Unlocked;
m_queue.wake_one(&m_lock);
m_lock.store(false, AK::memory_order_release);
m_queue.wake_one();
return;
}
// I don't know *who* is using "m_lock", so just yield.
Expand Down Expand Up @@ -142,7 +144,8 @@ bool Lock::force_unlock_if_locked()
m_holder = nullptr;
m_mode = Mode::Unlocked;
m_times_locked = 0;
m_queue.wake_one(&m_lock);
m_lock.store(false, AK::memory_order_release);
m_queue.wake_one();
break;
}
// I don't know *who* is using "m_lock", so just yield.
Expand All @@ -154,8 +157,7 @@ bool Lock::force_unlock_if_locked()
void Lock::clear_waiters()
{
ASSERT(m_mode != Mode::Shared);
ScopedCritical critical;
m_queue.clear();
m_queue.wake_all();
}

}
2 changes: 1 addition & 1 deletion Kernel/Net/E1000NetworkAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ void E1000NetworkAdapter::send_raw(ReadonlyBytes payload)
sti();
break;
}
Thread::current()->wait_on(m_wait_queue, "E1000NetworkAdapter");
m_wait_queue.wait_on(nullptr, "E1000NetworkAdapter");
}
#ifdef E1000_DEBUG
klog() << "E1000: Sent packet, status is now " << String::format("%b", descriptor.status) << "!";
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Net/NetworkTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ void NetworkTask_main(void*)
for (;;) {
size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
if (!packet_size) {
Thread::current()->wait_on(packet_wait_queue, "NetworkTask");
packet_wait_queue.wait_on(nullptr, "NetworkTask");
continue;
}
if (packet_size < sizeof(EthernetFrameHeader)) {
Expand Down
1 change: 1 addition & 0 deletions Kernel/Process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ void Process::terminate_due_to_signal(u8 signal)
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(signal < 32);
ASSERT(Process::current() == this);
dbg() << "Terminating " << *this << " due to signal " << signal;
m_termination_status = 0;
m_termination_signal = signal;
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Random.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ KernelRng::KernelRng()
void KernelRng::wait_for_entropy()
{
if (!resource().is_ready()) {
Thread::current()->wait_on(m_seed_queue, "KernelRng");
m_seed_queue.wait_on(nullptr, "KernelRng");
}
}

Expand Down
28 changes: 14 additions & 14 deletions Kernel/Scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ bool Scheduler::pick_next()
#ifdef SCHEDULER_RUNNABLE_DEBUG
dbg() << "Scheduler[" << Processor::current().id() << "]: Non-runnables:";
Scheduler::for_each_nonrunnable([&](Thread& thread) -> IterationDecision {
if (thread.state() == Thread::Queued)
dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip) << " Reason: " << (thread.wait_reason() ? thread.wait_reason() : "none");
else if (thread.state() == Thread::Dying)
if (thread.state() == Thread::Dying)
dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip) << " Finalizable: " << thread.is_finalizable();
else
dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip);
Expand Down Expand Up @@ -324,14 +322,6 @@ bool Scheduler::context_switch(Thread* thread)
thread->did_schedule();

auto from_thread = Thread::current();

// Check if we have any signals we should deliver (even if we don't
// end up switching to another thread)
if (from_thread && from_thread->state() == Thread::Running && from_thread->pending_signals_for_state()) {
ScopedSpinLock lock(from_thread->get_lock());
from_thread->dispatch_one_pending_signal();
}

if (from_thread == thread)
return false;

Expand Down Expand Up @@ -364,21 +354,31 @@ bool Scheduler::context_switch(Thread* thread)

// NOTE: from_thread at this point reflects the thread we were
// switched from, and thread reflects Thread::current()
enter_current(*from_thread);
enter_current(*from_thread, false);
ASSERT(thread == Thread::current());

return true;
}

void Scheduler::enter_current(Thread& prev_thread)
void Scheduler::enter_current(Thread& prev_thread, bool is_first)
{
ASSERT(g_scheduler_lock.is_locked());
ASSERT(g_scheduler_lock.own_lock());
prev_thread.set_active(false);
if (prev_thread.state() == Thread::Dying) {
// If the thread we switched from is marked as dying, then notify
// the finalizer. Note that as soon as we leave the scheduler lock
// the finalizer may free from_thread!
notify_finalizer();
} else if (!is_first) {
// Check if we have any signals we should deliver (even if we don't
// end up switching to another thread).
auto current_thread = Thread::current();
if (!current_thread->is_in_block()) {
ScopedSpinLock lock(current_thread->get_lock());
if (current_thread->state() == Thread::Running && current_thread->pending_signals_for_state()) {
current_thread->dispatch_one_pending_signal();
}
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion Kernel/Scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Scheduler {
static bool donate_to_and_switch(Thread*, const char* reason);
static bool donate_to(RefPtr<Thread>&, const char* reason);
static bool context_switch(Thread*);
static void enter_current(Thread& prev_thread);
static void enter_current(Thread& prev_thread, bool is_first);
static void leave_on_first_switch(u32 flags);
static void prepare_after_exec();
static void prepare_for_idle_loop();
Expand Down
Loading

0 comments on commit da5cc34

Please sign in to comment.