Skip to content

Commit

Permalink
Kernel: Move kernel above the 3GB virtual address mark
Browse files Browse the repository at this point in the history
The kernel and its static data structures are no longer identity-mapped
in the bottom 8MB of the address space, but instead move above 3GB.

The first 8MB above 3GB are pseudo-identity-mapped to the bottom 8MB of
the physical address space. But things don't have to stay this way!

Thanks to Jesse who made an earlier attempt at this, it was really easy
to get device drivers working once the page tables were in place! :^)

Fixes SerenityOS#734.
  • Loading branch information
awesomekling committed Jan 17, 2020
1 parent cee597a commit e362b56
Show file tree
Hide file tree
Showing 17 changed files with 325 additions and 125 deletions.
1 change: 0 additions & 1 deletion Kernel/Arch/i386/APIC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ bool init()
g_apic_base = apic_base.as_ptr();

// copy ap init code to P8000
MM.map_for_kernel(VirtualAddress(0x8000), PhysicalAddress(0x8000));
memcpy(reinterpret_cast<u8*>(0x8000), reinterpret_cast<const u8*>(apic_ap_start), apic_ap_start_size);
return true;
}
Expand Down
126 changes: 120 additions & 6 deletions Kernel/Arch/i386/Boot/boot.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,20 @@ stack_bottom:
.skip 32768
stack_top:

.section .page_tables
.section .page_tables, "aw", @nobits
.align 4096
page_tables_start:
.skip 4096*9
.global boot_pdpt
boot_pdpt:
.skip 4096
.global boot_pd0
boot_pd0:
.skip 4096
.global boot_pd3
boot_pd3:
.skip 4096
.global boot_pd3_pde1023_pt
boot_pd3_pde1023_pt:
.skip 4096

.section .text

Expand All @@ -47,17 +57,121 @@ page_tables_start:
.extern multiboot_info_ptr
.type multiboot_info_ptr, @object

/*
construct the following (32-bit PAE) page table layout:
pdpt
0: boot_pd0 (0-1GB)
1: n/a (1-2GB)
2: n/a (2-3GB)
3: boot_pd3 (3-4GB)
boot_pd0 : 512 pde's
0: (0-2MB) (id 2MB page)
1: (2-4MB) (id 2MB page)
2: (4-6MB) (id 2MB page)
3: (6-8MB) (id 2MB page)
boot_pd3 : 512 pde's
0: boot_pd3_pde0 (3072-3074MB) (pseudo)
1: boot_pd3_pde1 (3074-3076MB) (pseudo)
2: boot_pd3_pde2 (3076-3078MB) (pseudo)
3: boot_pd3_pde3 (3078-3080MB) (pseudo)
4: boot_pd3_pde1023_pt (4094-4096MB) (for page table mappings)
*/

start:
cli
cld

/* clear pdpt */
movl $(boot_pdpt - 0xc0000000), %edi
movl $1024, %ecx
xorl %eax, %eax
rep stosl

/* set up pdpt[0] and pdpt[3] */
movl $(boot_pdpt - 0xc0000000), %edi
movl $((boot_pd0 - 0xc0000000) + 1), 0(%edi)
movl $((boot_pd3 - 0xc0000000) + 1), 24(%edi)

/* clear pd0 */
movl $(boot_pd0 - 0xc0000000), %edi
movl $1024, %ecx
xorl %eax, %eax
rep stosl

/* identity map bottom 8MB using 2MB pages (only PDE, no PTE) */
movl $4, %ecx
xorl %eax, %eax
movl $(boot_pd0 - 0xc0000000), %edi
1:
movl %eax, 0(%edi)
/* PS(2MB) + R/W + Present */
orl $0x83, 0(%edi)

addl $8, %edi
addl $(1048576 * 2), %eax
loop 1b

/* clear pd3 */
movl $(boot_pd3 - 0xc0000000), %edi
movl $1024, %ecx
xorl %eax, %eax
rep stosl

/* pseudo-identity map first 8MB above 3GB mark using 2MB pages again */
movl $4, %ecx
xorl %eax, %eax
movl $(boot_pd3 - 0xc0000000), %edi
1:
movl %eax, 0(%edi)
/* PS(2MB) + R/W + Present */
orl $0x83, 0(%edi)

addl $8, %edi
addl $(1048576 * 2), %eax
loop 1b

/* create an empty page table for the top 2MB at the 4GB mark */
movl $(boot_pd3 - 0xc0000000), %edi
movl $(boot_pd3_pde1023_pt - 0xc0000000), 4088(%edi)
orl $0x3, 4088(%edi)
movl $0, 4092(%edi)

/* point CR3 to PDPT */
movl $(boot_pdpt - 0xc0000000), %eax
movl %eax, %cr3

/* enable PAE + PSE */
movl %cr4, %eax
orl $0x60, %eax
movl %eax, %cr4

/* enable PG */
movl %cr0, %eax
orl $0x80000000, %eax
movl %eax, %cr0

/* jmp to an address above the 3GB mark */
push %cs
push $1f
retf
1:

movl %cr3, %eax
movl %eax, %cr3

/* set up initial stack and jump into C++ land */
mov $stack_top, %esp

and $-16, %esp

mov %ebx, multiboot_info_ptr
addl $0xc0000000, %ebx
movl %ebx, multiboot_info_ptr

pushl $page_tables_start
call init
add $4, %esp

Expand Down
19 changes: 17 additions & 2 deletions Kernel/Arch/i386/CPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ static void dump(const RegisterDump& regs)
kprintf("eax=%08x ebx=%08x ecx=%08x edx=%08x\n", regs.eax, regs.ebx, regs.ecx, regs.edx);
kprintf("ebp=%08x esp=%08x esi=%08x edi=%08x\n", regs.ebp, esp, regs.esi, regs.edi);

u32 cr0;
asm("movl %%cr0, %%eax"
: "=a"(cr0));
u32 cr2;
asm("movl %%cr2, %%eax"
: "=a"(cr2));
u32 cr3;
asm("movl %%cr3, %%eax"
: "=a"(cr3));
u32 cr4;
asm("movl %%cr4, %%eax"
: "=a"(cr4));
kprintf("cr0=%08x cr2=%08x cr3=%08x cr4=%08x\n", cr0, cr2, cr3, cr4);

if (current && current->process().validate_read((void*)regs.eip, 8)) {
SmapDisabler disabler;
u8* codeptr = (u8*)regs.eip;
Expand Down Expand Up @@ -221,7 +235,7 @@ EH_ENTRY(14, page_fault);
void page_fault_handler(RegisterDump regs)
{
clac();
ASSERT(current);
//ASSERT(current);

u32 fault_address;
asm("movl %%cr2, %%eax"
Expand All @@ -232,12 +246,13 @@ void page_fault_handler(RegisterDump regs)
: "=a"(fault_page_directory));

#ifdef PAGE_FAULT_DEBUG
dbgprintf("%s(%u): ring%u %s page fault in PD=%x, %s V%08x\n",
dbgprintf("%s(%u): ring%u %s page fault in PD=%x, %s%s V%08x\n",
current ? current->process().name().characters() : "(none)",
current ? current->pid() : 0,
regs.cs & 3,
regs.exception_code & 1 ? "PV" : "NP",
fault_page_directory,
regs.exception_code & 8 ? "reserved-bit " : "",
regs.exception_code & 2 ? "write" : "read",
fault_address);
#endif
Expand Down
6 changes: 6 additions & 0 deletions Kernel/Arch/i386/CPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class PageDirectoryEntry {
m_raw |= value & 0xfffff000;
}

void clear() { m_raw = 0; }

u64 raw() const { return m_raw; }
void copy_from(Badge<PageDirectory>, const PageDirectoryEntry& other) { m_raw = other.m_raw; }

Expand All @@ -104,6 +106,7 @@ class PageDirectoryEntry {
UserSupervisor = 1 << 2,
WriteThrough = 1 << 3,
CacheDisabled = 1 << 4,
Huge = 1 << 7,
Global = 1 << 8,
NoExecute = 0x8000000000000000ULL,
};
Expand All @@ -114,6 +117,9 @@ class PageDirectoryEntry {
bool is_user_allowed() const { return raw() & UserSupervisor; }
void set_user_allowed(bool b) { set_bit(UserSupervisor, b); }

bool is_huge() const { return raw() & Huge; }
void set_huge(bool b) { set_bit(Huge, b); }

bool is_writable() const { return raw() & ReadWrite; }
void set_writable(bool b) { set_bit(ReadWrite, b); }

Expand Down
24 changes: 13 additions & 11 deletions Kernel/Devices/PATAChannel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ PATAChannel::PATAChannel(ChannelType type, bool force_pio)
m_dma_enabled.resource() = true;
ProcFS::add_sys_bool("ide_dma", m_dma_enabled);

m_prdt_page = MM.allocate_supervisor_physical_page();

initialize(force_pio);
detect_disks();
}
Expand Down Expand Up @@ -131,7 +133,7 @@ void PATAChannel::initialize(bool force_pio)

// Let's try to set up DMA transfers.
PCI::enable_bus_mastering(m_pci_address);
m_prdt.end_of_table = 0x8000;
prdt().end_of_table = 0x8000;
m_bus_master_base = PCI::get_BAR4(m_pci_address) & 0xfffc;
m_dma_buffer_page = MM.allocate_supervisor_physical_page();
kprintf("PATAChannel: Bus master IDE: I/O @ %x\n", m_bus_master_base);
Expand Down Expand Up @@ -253,16 +255,16 @@ bool PATAChannel::ata_read_sectors_with_dma(u32 lba, u16 count, u8* outbuf, bool
current->pid(), lba, count, outbuf);
#endif

m_prdt.offset = m_dma_buffer_page->paddr();
m_prdt.size = 512 * count;
prdt().offset = m_dma_buffer_page->paddr();
prdt().size = 512 * count;

ASSERT(m_prdt.size <= PAGE_SIZE);
ASSERT(prdt().size <= PAGE_SIZE);

// Stop bus master
IO::out8(m_bus_master_base, 0);

// Write the PRDT location
IO::out32(m_bus_master_base + 4, (u32)&m_prdt);
IO::out32(m_bus_master_base + 4, m_prdt_page->paddr().get());

// Turn on "Interrupt" and "Error" flag. The error flag should be cleared by hardware.
IO::out8(m_bus_master_base + 2, IO::in8(m_bus_master_base + 2) | 0x6);
Expand Down Expand Up @@ -310,7 +312,7 @@ bool PATAChannel::ata_read_sectors_with_dma(u32 lba, u16 count, u8* outbuf, bool
if (m_device_error)
return false;

memcpy(outbuf, m_dma_buffer_page->paddr().as_ptr(), 512 * count);
memcpy(outbuf, m_dma_buffer_page->paddr().offset(0xc0000000).as_ptr(), 512 * count);

// I read somewhere that this may trigger a cache flush so let's do it.
IO::out8(m_bus_master_base + 2, IO::in8(m_bus_master_base + 2) | 0x6);
Expand All @@ -326,18 +328,18 @@ bool PATAChannel::ata_write_sectors_with_dma(u32 lba, u16 count, const u8* inbuf
current->pid(), lba, count, inbuf);
#endif

m_prdt.offset = m_dma_buffer_page->paddr();
m_prdt.size = 512 * count;
prdt().offset = m_dma_buffer_page->paddr();
prdt().size = 512 * count;

memcpy(m_dma_buffer_page->paddr().as_ptr(), inbuf, 512 * count);
memcpy(m_dma_buffer_page->paddr().offset(0xc0000000).as_ptr(), inbuf, 512 * count);

ASSERT(m_prdt.size <= PAGE_SIZE);
ASSERT(prdt().size <= PAGE_SIZE);

// Stop bus master
IO::out8(m_bus_master_base, 0);

// Write the PRDT location
IO::out32(m_bus_master_base + 4, (u32)&m_prdt);
IO::out32(m_bus_master_base + 4, m_prdt_page->paddr().get());

// Turn on "Interrupt" and "Error" flag. The error flag should be cleared by hardware.
IO::out8(m_bus_master_base + 2, IO::in8(m_bus_master_base + 2) | 0x6);
Expand Down
3 changes: 2 additions & 1 deletion Kernel/Devices/PATAChannel.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class PATAChannel final : public IRQHandler {
WaitQueue m_irq_queue;

PCI::Address m_pci_address;
PhysicalRegionDescriptor m_prdt;
PhysicalRegionDescriptor& prdt() { return *reinterpret_cast<PhysicalRegionDescriptor*>(m_prdt_page->paddr().offset(0xc0000000).as_ptr()); }
RefPtr<PhysicalPage> m_prdt_page;
RefPtr<PhysicalPage> m_dma_buffer_page;
u16 m_bus_master_base { 0 };
Lockable<bool> m_dma_enabled;
Expand Down
4 changes: 2 additions & 2 deletions Kernel/Heap/kmalloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ struct [[gnu::packed]] allocation_t
size_t nchunk;
};

#define BASE_PHYSICAL (4 * MB)
#define BASE_PHYSICAL (0xc0000000 + (4 * MB))
#define CHUNK_SIZE 8
#define POOL_SIZE (3 * MB)

#define ETERNAL_BASE_PHYSICAL (2 * MB)
#define ETERNAL_BASE_PHYSICAL (0xc0000000 + (2 * MB))
#define ETERNAL_RANGE_SIZE (2 * MB)

static u8 alloc_map[POOL_SIZE / CHUNK_SIZE / 8];
Expand Down
2 changes: 1 addition & 1 deletion Kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ SUBPROJECT_CXXFLAGS += -nostdlib -nostdinc -nostdinc++ -g3
SUBPROJECT_CXXFLAGS += -I../Toolchain/Local/i686-pc-serenity/include/c++/9.2.0/
SUBPROJECT_CXXFLAGS += -I../Toolchain/Local/i686-pc-serenity/include/c++/9.2.0/i686-pc-serenity/

LDFLAGS += -Ttext 0x100000 -Wl,-T linker.ld -nostdlib -lgcc -lstdc++ -g3
LDFLAGS += -Wl,-T linker.ld -nostdlib -lgcc -lstdc++ -g3

all: $(PROGRAM) $(MODULE_OBJS) kernel.map

Expand Down
19 changes: 13 additions & 6 deletions Kernel/Net/E1000NetworkAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,14 @@ void E1000NetworkAdapter::initialize_rx_descriptors()
m_rx_descriptors = (e1000_rx_desc*)ptr;
for (int i = 0; i < number_of_rx_descriptors; ++i) {
auto& descriptor = m_rx_descriptors[i];
descriptor.addr = (u64)kmalloc_eternal(8192 + 16);
auto addr = (u32)kmalloc_eternal(8192 + 16);
if (addr % 16)
addr = (addr + 16) - (addr % 16);
descriptor.addr = addr - 0xc0000000;
descriptor.status = 0;
}

out32(REG_RXDESCLO, ptr);
out32(REG_RXDESCLO, (u32)ptr - 0xc0000000);
out32(REG_RXDESCHI, 0);
out32(REG_RXDESCLEN, number_of_rx_descriptors * sizeof(e1000_rx_desc));
out32(REG_RXDESCHEAD, 0);
Expand All @@ -256,11 +259,14 @@ void E1000NetworkAdapter::initialize_tx_descriptors()
m_tx_descriptors = (e1000_tx_desc*)ptr;
for (int i = 0; i < number_of_tx_descriptors; ++i) {
auto& descriptor = m_tx_descriptors[i];
descriptor.addr = (u64)kmalloc_eternal(8192 + 16);
auto addr = (u32)kmalloc_eternal(8192 + 16);
if (addr % 16)
addr = (addr + 16) - (addr % 16);
descriptor.addr = addr - 0xc0000000;
descriptor.cmd = 0;
}

out32(REG_TXDESCLO, ptr);
out32(REG_TXDESCLO, (u32)ptr - 0xc0000000);
out32(REG_TXDESCHI, 0);
out32(REG_TXDESCLEN, number_of_tx_descriptors * sizeof(e1000_tx_desc));
out32(REG_TXDESCHEAD, 0);
Expand Down Expand Up @@ -348,7 +354,8 @@ void E1000NetworkAdapter::send_raw(const u8* data, int length)
#endif
auto& descriptor = m_tx_descriptors[tx_current];
ASSERT(length <= 8192);
memcpy((void*)descriptor.addr, data, length);
auto *vptr = (void*)(descriptor.addr + 0xc0000000);
memcpy(vptr, data, length);
descriptor.length = length;
descriptor.status = 0;
descriptor.cmd = CMD_EOP | CMD_IFCS | CMD_RS;
Expand Down Expand Up @@ -381,7 +388,7 @@ void E1000NetworkAdapter::receive()
rx_current = (rx_current + 1) % number_of_rx_descriptors;
if (!(m_rx_descriptors[rx_current].status & 1))
break;
auto* buffer = (u8*)m_rx_descriptors[rx_current].addr;
auto* buffer = (u8*)(m_rx_descriptors[rx_current].addr + 0xc0000000);
u16 length = m_rx_descriptors[rx_current].length;
#ifdef E1000_DEBUG
kprintf("E1000: Received 1 packet @ %p (%u) bytes!\n", buffer, length);
Expand Down
2 changes: 1 addition & 1 deletion Kernel/TTY/VirtualConsole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void VirtualConsole::flush_vga_cursor()

void VirtualConsole::initialize()
{
s_vga_buffer = (u8*)0xb8000;
s_vga_buffer = (u8*)0xc00b8000;
memset(s_consoles, 0, sizeof(s_consoles));
s_active_console = -1;
}
Expand Down
Loading

0 comments on commit e362b56

Please sign in to comment.