Files
KirkOS/src/mm/vmm.c
T
kaguya db352f7ef4 sched: add POSIX signal support
We have added POSIX signals to KirkOS

It is very much experimental

Alongside, we have PCI support fully, and we have imported sbase coreutils, I'm not too sure if all of them work, likely not, but a good few should be okay.

Signed-off-by: kaguya <kaguya3311@national.shitposting.agency>
2026-05-19 00:48:13 -04:00

604 lines
17 KiB
C

#include "vmm.h"
#include "pmm.h"
#include "memory.h"
#include "libk/errno.h"
#include "mp/mp.h"
#include "libk/debug.h"
#include "arch/x86_64/cpu/reg.h"
#include <limine.h>
#include "mp/spinlock.h"
#include "fs/elf.h"
#include "sched/sched_types.h"
#include "mmap.h"
#include "arch/x86_64/sys/halt.h"
#include "sched/sched.h"
#include "arch/x86_64/cpu/cr.h"
#include "arch/x86_64/boot/isr.h"
#include "libk/misc.h"
bool mmap_handle_pf(registers_t *reg);
struct pagemap *kernel_pagemap = NULL;
extern char text_start_addr[], text_end_addr[];
extern char rodata_start_addr[], rodata_end_addr[];
extern char data_start_addr[], data_end_addr[];
volatile struct limine_hhdm_request hhdm_request = {.id = LIMINE_HHDM_REQUEST,
.revision = 0};
volatile struct limine_kernel_address_request kernel_address_request = {
.id = LIMINE_KERNEL_ADDRESS_REQUEST, .revision = 0};
static volatile struct limine_paging_mode_request paging_mode_request = {
.id = LIMINE_PAGING_MODE_REQUEST,
.revision = 0,
.response = NULL,
.mode = LIMINE_PAGING_MODE_X86_64_5LVL};
static uint64_t *get_next_level(uint64_t *top_level, size_t idx,
bool allocate) {
if (top_level[idx] & 1) {
return (uint64_t *)((size_t)(top_level[idx] & ~((uint64_t)0xFFF)) +
MEM_PHYS_OFFSET);
}
if (!allocate) {
return NULL;
}
void *next_level = pmm_allocz(1);
if (next_level == NULL) {
return NULL;
}
top_level[idx] = (uint64_t)next_level | 0b111;
return (uint64_t *)((uintptr_t)next_level + MEM_PHYS_OFFSET);
}
void vmm_init(struct limine_memmap_entry **memmap, size_t memmap_entries) {
kernel_pagemap = kmalloc(sizeof(struct pagemap));
spinlock_init(kernel_pagemap->lock);
kernel_pagemap->top_level =
(uint64_t *)((uintptr_t)pmm_allocz(1) + MEM_PHYS_OFFSET);
for (uint64_t p = 256; p < 512; p++)
get_next_level(kernel_pagemap->top_level, p, true);
for (uint64_t p = 0; p < 4096UL * 1024 * 1024; p += 0x200000) {
vmm_map_page(kernel_pagemap, p + MEM_PHYS_OFFSET, p, 0b11, Size2MiB);
}
for (size_t i = 0; i < (memmap_entries); i++) {
uint64_t base = memmap[i]->base;
uint64_t length = memmap[i]->length;
uint64_t top = base + length;
if (base < 0x100000000)
base = 0x100000000;
if (base >= top)
continue;
uint64_t aligned_base = ALIGN_DOWN(base, PAGE_SIZE);
uint64_t aligned_top = ALIGN_UP(top, PAGE_SIZE);
uint64_t aligned_length = aligned_top - aligned_base;
for (uint64_t j = 0; j < aligned_length; j += PAGE_SIZE) {
uint64_t page = aligned_base + j;
vmm_map_page(kernel_pagemap, page + MEM_PHYS_OFFSET, page, 0b11,
Size4KiB);
}
}
uintptr_t text_start = ALIGN_DOWN((uintptr_t)text_start_addr, PAGE_SIZE),
rodata_start =
ALIGN_DOWN((uintptr_t)rodata_start_addr, PAGE_SIZE),
data_start = ALIGN_DOWN((uintptr_t)data_start_addr, PAGE_SIZE),
text_end = ALIGN_UP((uintptr_t)text_end_addr, PAGE_SIZE),
rodata_end = ALIGN_UP((uintptr_t)rodata_end_addr, PAGE_SIZE),
data_end = ALIGN_UP((uintptr_t)data_end_addr, PAGE_SIZE);
uint64_t paddr = kernel_address_request.response->physical_base;
uint64_t vaddr = kernel_address_request.response->virtual_base;
for (uintptr_t text_addr = text_start; text_addr < text_end;
text_addr += PAGE_SIZE) {
uintptr_t phys = text_addr - vaddr + paddr;
vmm_map_page(kernel_pagemap, text_addr, phys, 1, Size4KiB);
}
for (uintptr_t rodata_addr = rodata_start; rodata_addr < rodata_end;
rodata_addr += PAGE_SIZE) {
uintptr_t phys = rodata_addr - vaddr + paddr;
vmm_map_page(kernel_pagemap, rodata_addr, phys, 1 | 1ull << 63ull,
Size4KiB);
}
for (uintptr_t data_addr = data_start; data_addr < data_end;
data_addr += PAGE_SIZE) {
uintptr_t phys = data_addr - vaddr + paddr;
vmm_map_page(kernel_pagemap, data_addr, phys, 0b11 | 1ull << 63ull,
Size4KiB);
}
// Switch to the new page map, dropping Limine's default one
kprintf("Switching to our pagemaps now!");
vmm_switch_pagemap(kernel_pagemap);
isr_register_handler(0xe, vmm_page_fault_handler);
}
void vmm_switch_pagemap(struct pagemap *pagemap) {
asm volatile("mov cr3, %0"
:
: "r"((void *)((uint64_t)pagemap->top_level - MEM_PHYS_OFFSET))
: "memory");
}
// Creates a new dynamically allocated page map
struct pagemap *vmm_new_pagemap(void) {
struct pagemap *pagemap = kmalloc(sizeof(struct pagemap));
spinlock_init(pagemap->lock);
pagemap->top_level =
(uint64_t *)((uintptr_t)pmm_allocz(1) + MEM_PHYS_OFFSET);
for (size_t i = 256; i < 512; i++)
pagemap->top_level[i] = kernel_pagemap->top_level[i];
vec_init(&pagemap->mmap_ranges);
return pagemap;
}
bool vmm_map_page(struct pagemap *pagemap, uint64_t virt_addr,
uint64_t phys_addr, uint64_t flags, enum page_size pg_size) {
spinlock_acquire_or_wait(&pagemap->lock);
// Calculate the indices in the various tables using the virtual address
size_t pml5_entry = (virt_addr & ((uint64_t)0x1FF << 48)) >> 48;
size_t pml4_entry = (virt_addr & ((uint64_t)0x1FF << 39)) >> 39;
size_t pml3_entry = (virt_addr & ((uint64_t)0x1FF << 30)) >> 30;
size_t pml2_entry = (virt_addr & ((uint64_t)0x1FF << 21)) >> 21;
size_t pml1_entry = (virt_addr & ((uint64_t)0x1FF << 12)) >> 12;
uint64_t *pml5, *pml4, *pml3, *pml2, *pml1;
if (paging_mode_request.response->mode == LIMINE_PAGING_MODE_X86_64_5LVL) {
pml5 = pagemap->top_level;
goto level5;
} else {
pml4 = pagemap->top_level;
goto level4;
}
level5:
pml4 = get_next_level(pml5, pml5_entry, true);
if (pml4 == NULL) {
goto die;
}
level4:
pml3 = get_next_level(pml4, pml4_entry, true);
if (pml3 == NULL) {
goto die;
}
pml2 = get_next_level(pml3, pml3_entry, true);
if (pml2 == NULL) {
goto die;
}
if (pg_size == Size2MiB) {
pml2[pml2_entry] = phys_addr | flags | (1 << 7);
spinlock_drop(&pagemap->lock);
return true;
}
pml1 = get_next_level(pml2, pml2_entry, true);
if (pml1 == NULL) {
die:
spinlock_drop(&pagemap->lock);
return false;
}
pml1[pml1_entry] = phys_addr | flags;
spinlock_drop(&pagemap->lock);
return true;
}
bool vmm_remap_page(struct pagemap *pagemap, uintptr_t virt, uint64_t flags,
bool locked) {
if (!locked) {
spinlock_acquire_or_wait(&pagemap->lock);
}
size_t pml5_entry = (virt & ((uint64_t)0x1FF << 48)) >> 48;
size_t pml4_entry = (virt & ((uint64_t)0x1FF << 39)) >> 39;
size_t pml3_entry = (virt & ((uint64_t)0x1FF << 30)) >> 30;
size_t pml2_entry = (virt & ((uint64_t)0x1FF << 21)) >> 21;
size_t pml1_entry = (virt & ((uint64_t)0x1FF << 12)) >> 12;
uint64_t *pml5, *pml4, *pml3, *pml2, *pml1;
if (paging_mode_request.response->mode == LIMINE_PAGING_MODE_X86_64_5LVL) {
pml5 = pagemap->top_level;
goto level5;
} else {
pml4 = pagemap->top_level;
goto level4;
}
level5:
pml4 = get_next_level(pml5, pml5_entry, false);
if (pml4 == NULL) {
goto die;
}
level4:
pml3 = get_next_level(pml4, pml4_entry, false);
if (pml3 == NULL) {
goto die;
}
pml2 = get_next_level(pml3, pml3_entry, false);
if (pml2 == NULL) {
goto die;
}
pml1 = get_next_level(pml2, pml2_entry, false);
if (pml1 == NULL) {
goto die;
}
if ((pml1[pml1_entry] & 1) == 0) {
die:
if (!locked) {
spinlock_drop(&pagemap->lock);
}
return false;
}
pml1[pml1_entry] = (((pml1[pml1_entry]) & 0xffffffffff000)) | flags;
asm volatile("invlpg [%0]" : : "r"(virt) : "memory");
if (!locked) {
spinlock_drop(&pagemap->lock);
}
return true;
}
bool vmm_unmap_page(struct pagemap *pagemap, uintptr_t virt, bool locked) {
if (!locked) {
spinlock_acquire_or_wait(&pagemap->lock);
}
size_t pml5_entry = (virt & ((uint64_t)0x1FF << 48)) >> 48;
size_t pml4_entry = (virt & ((uint64_t)0x1FF << 39)) >> 39;
size_t pml3_entry = (virt & ((uint64_t)0x1FF << 30)) >> 30;
size_t pml2_entry = (virt & ((uint64_t)0x1FF << 21)) >> 21;
size_t pml1_entry = (virt & ((uint64_t)0x1FF << 12)) >> 12;
uint64_t *pml5, *pml4, *pml3, *pml2, *pml1;
if (paging_mode_request.response->mode == LIMINE_PAGING_MODE_X86_64_5LVL) {
pml5 = pagemap->top_level;
goto level5;
} else {
pml4 = pagemap->top_level;
goto level4;
}
level5:
pml4 = get_next_level(pml5, pml5_entry, false);
if (pml4 == NULL) {
goto die;
}
level4:
pml3 = get_next_level(pml4, pml4_entry, false);
if (pml3 == NULL) {
goto die;
}
pml2 = get_next_level(pml3, pml3_entry, false);
if (pml2 == NULL) {
goto die;
}
pml1 = get_next_level(pml2, pml2_entry, false);
if (pml1 == NULL) {
goto die;
}
if ((pml1[pml1_entry] & 1) == 0) {
die:
if (!locked) {
spinlock_drop(&pagemap->lock);
}
return false;
}
pml1[pml1_entry] = 0;
asm volatile("invlpg [%0]" : : "r"(virt) : "memory");
if (!locked) {
spinlock_drop(&pagemap->lock);
}
return true;
}
uint64_t *vmm_virt_to_pte(struct pagemap *pagemap, uintptr_t virt_addr,
bool allocate) {
// spinlock_acquire_or_wait(&pagemap->lock);
size_t pml5_entry = (virt_addr & ((uint64_t)0x1FF << 48)) >> 48;
size_t pml4_entry = (virt_addr & ((uint64_t)0x1FF << 39)) >> 39;
size_t pml3_entry = (virt_addr & ((uint64_t)0x1FF << 30)) >> 30;
size_t pml2_entry = (virt_addr & ((uint64_t)0x1FF << 21)) >> 21;
size_t pml1_entry = (virt_addr & ((uint64_t)0x1FF << 12)) >> 12;
uint64_t *pml5, *pml4, *pml3, *pml2, *pml1;
if (paging_mode_request.response->mode == LIMINE_PAGING_MODE_X86_64_5LVL) {
pml5 = pagemap->top_level;
goto level5;
} else {
pml4 = pagemap->top_level;
goto level4;
}
level5:
pml4 = get_next_level(pml5, pml5_entry, allocate);
if (pml4 == NULL) {
goto die;
}
level4:
pml3 = get_next_level(pml4, pml4_entry, allocate);
if (pml3 == NULL) {
goto die;
}
pml2 = get_next_level(pml3, pml3_entry, allocate);
if (pml2 == NULL) {
goto die;
}
pml1 = get_next_level(pml2, pml2_entry, allocate);
if (pml1 == NULL) {
die:
spinlock_drop(&pagemap->lock);
return NULL;
}
// spinlock_drop(&pagemap->lock);
return &pml1[pml1_entry];
}
uint64_t vmm_virt_to_phys(struct pagemap *pagemap, uint64_t virt_addr) {
spinlock_acquire_or_wait(&pagemap->lock);
uint64_t *pte = vmm_virt_to_pte(pagemap, virt_addr, false);
spinlock_drop(&pagemap->lock);
if (pte == NULL || (((*pte) & ~0xffffffffff000) & 1) == 0)
return INVALID_PHYS;
return ((*pte) & 0xffffffffff000);
}
uint64_t vmm_virt_to_kernel(struct pagemap *pagemap, uint64_t virt_addr) {
uint64_t aligned_virtual_address = ALIGN_DOWN(virt_addr, PAGE_SIZE);
uint64_t phys_addr = vmm_virt_to_phys(pagemap, virt_addr);
if (phys_addr == INVALID_PHYS) {
return 0;
}
return (phys_addr + MEM_PHYS_OFFSET + virt_addr - aligned_virtual_address);
}
void vmm_page_fault_handler(registers_t *reg) {
if (mmap_handle_pf(reg)) {
return;
}
uint64_t faulting_address = read_cr("2");
bool present = reg->errorCode & 0x1;
bool read_write = reg->errorCode & 0x2;
bool user_supervisor = reg->errorCode & 0x4;
bool reserved = reg->errorCode & 0x8;
bool execute = reg->errorCode & 0x10;
if (reg->cs & 0x3) {
struct thread *thrd = sched_get_running_thread();
kprintf("Killing user thread tid %d under process %s for Page Fault\n",
thrd->tid, thrd->mother_proc->name);
kprintf("User thread crashed at address: %p\n", reg->rip);
// backtrace_unsafe((void *)reg->rbp);
#if 0
kprintf("RIP: %p RBP: %p RSP: %p\n", reg->rip, reg->rbp, reg->rsp);
kprintf("RAX: %p RBX: %p RCX: %p\n", reg->rax, reg->rbx, reg->rcx);
kprintf("RDX: %p RDI: %p RSI: %p\n", reg->rdx, reg->rdi, reg->rsi);
kprintf("R8 : %p R9 : %p R10: %p\n", reg->r8, reg->r9, reg->r10);
kprintf("R11: %p R12: %p R13: %p\n", reg->r11, reg->r12, reg->r13);
kprintf("R14: %p R15: %p ERR: 0b%b\n", reg->r14, reg->r15,
reg->errorCode);
kprintf("CS : %p SS : %p RFLAGS: %p\n", reg->cs, reg->ss, reg->rflags);
kprintf("FS: %p UGS: %p KGS: %p\n", read_fs_base(), read_user_gs(),
read_kernel_gs());
kprintf("Page fault at %p present: %s, read/write: %s, "
"user/supervisor: %s, reserved: %s, execute: %s\n",
faulting_address, present ? "P" : "NP", read_write ? "R" : "RW",
user_supervisor ? "U" : "S", reserved ? "R" : "NR",
execute ? "X" : "NX");
#endif
thread_kill(thrd, true);
} else {
halt_other_cpus();
kprintffos(0, "AH! UNHANDLED EXCEPTION!\n");
kprintffos(0, "RIP: %p RBP: %p RSP: %p\n", reg->rip, reg->rbp,
reg->rsp);
kprintffos(0, "RAX: %p RBX: %p RCX: %p\n", reg->rax, reg->rbx,
reg->rcx);
kprintffos(0, "RDX: %p RDI: %p RSI: %p\n", reg->rdx, reg->rdi,
reg->rsi);
kprintffos(0, "R8 : %p R9 : %p R10: %p\n", reg->r8, reg->r9, reg->r10);
kprintffos(0, "R11: %p R12: %p R13: %p\n", reg->r11, reg->r12,
reg->r13);
kprintffos(0, "R14: %p R15: %p ERR: 0b%b\n", reg->r14, reg->r15,
reg->errorCode);
kprintffos(0, "CS : %p SS : %p RFLAGS: %p\n", reg->cs, reg->ss,
reg->rflags);
kprintffos(0, "FS : %p UGS: %p KGS: %p\n", read_fs_base(),
read_user_gs(), read_kernel_gs());
put_to_fb = true;
panic_((void *)reg->rip, (void *)reg->rbp,
"Page fault at %p present: %s, read/write: %s, "
"user/supervisor: %s, reserved: %s, execute: %s\n",
faulting_address, present ? "P" : "NP", read_write ? "R" : "RW",
user_supervisor ? "U" : "S", reserved ? "R" : "NR",
execute ? "X" : "NX");
}
}
struct pagemap *vmm_fork_pagemap(struct pagemap *pagemap) {
spinlock_acquire_or_wait(&pagemap->lock);
struct pagemap *new_pagemap = vmm_new_pagemap();
if (new_pagemap == NULL) {
goto cleanup;
}
struct mmap_range_local *local_range = NULL;
int idxn = 0;
vec_foreach(&pagemap->mmap_ranges, local_range, idxn) {
struct mmap_range_global *global_range = local_range->global;
struct mmap_range_local *new_local_range =
kmalloc(sizeof(struct mmap_range_local));
if (new_local_range == NULL) {
goto cleanup;
}
*new_local_range = *local_range;
new_local_range->pagemap = new_pagemap;
if (global_range->res != NULL) {
global_range->res->refcount++;
}
if ((local_range->flags & MAP_SHARED) != 0) {
vec_push(&global_range->locals, new_local_range);
for (uintptr_t i = local_range->base;
i < local_range->base + local_range->length; i += PAGE_SIZE) {
uint64_t *old_pte = vmm_virt_to_pte(pagemap, i, false);
if (old_pte == NULL) {
continue;
}
uint64_t *new_pte = vmm_virt_to_pte(new_pagemap, i, true);
if (new_pte == NULL) {
goto cleanup;
}
*new_pte = *old_pte;
}
} else {
struct mmap_range_global *new_global_range =
kmalloc(sizeof(struct mmap_range_global));
if (new_global_range == NULL) {
goto cleanup;
}
new_global_range->shadow_pagemap = vmm_new_pagemap();
if (new_global_range->shadow_pagemap == NULL) {
goto cleanup;
}
new_global_range->base = global_range->base;
new_global_range->length = global_range->length;
new_global_range->res = global_range->res;
new_global_range->offset = global_range->offset;
vec_push(&new_global_range->locals, new_local_range);
// TODO: CoW for MAP_PRIVATE?
// if ((local_range->flags & MAP_ANONYMOUS) != 0) {
for (uintptr_t i = local_range->base;
i < local_range->base + local_range->length; i += PAGE_SIZE) {
uint64_t *old_pte = vmm_virt_to_pte(pagemap, i, false);
if (old_pte == NULL || (((*old_pte) & 0xfff) & 1) == 0) {
continue;
}
spinlock_acquire_or_wait(&new_pagemap->lock);
uint64_t *new_pte = vmm_virt_to_pte(new_pagemap, i, true);
spinlock_drop(&new_pagemap->lock);
if (new_pte == NULL) {
goto cleanup;
}
spinlock_acquire_or_wait(
&new_global_range->shadow_pagemap->lock);
uint64_t *new_spte =
vmm_virt_to_pte(new_global_range->shadow_pagemap, i, true);
spinlock_drop(&new_global_range->shadow_pagemap->lock);
if (new_spte == NULL) {
goto cleanup;
}
void *old_page = (void *)((*old_pte) & 0xffffffffff000);
void *page = pmm_alloc(1);
if (page == NULL) {
goto cleanup;
}
memcpy((void *)((uintptr_t)page + MEM_PHYS_OFFSET),
(void *)((uintptr_t)old_page + MEM_PHYS_OFFSET),
PAGE_SIZE);
*new_pte = ((*old_pte) & 0xfff) | (uint64_t)page;
*new_spte = *new_pte;
}
// } else {
// kprintf("WARNING: Non anon fork\n");
// }
}
vec_push(&new_pagemap->mmap_ranges, new_local_range);
}
spinlock_drop(&pagemap->lock);
return new_pagemap;
cleanup:
spinlock_drop(&pagemap->lock);
if (new_pagemap != NULL) {
vmm_destroy_pagemap(new_pagemap);
}
return NULL;
}
static void destroy_level(uint64_t *pml, size_t start, size_t end, int level) {
if (level == 0) {
return;
}
for (size_t i = start; i < end; i++) {
uint64_t *next_level = get_next_level(pml, i, false);
if (next_level == NULL) {
continue;
}
destroy_level(next_level, 0, 512, level - 1);
}
pmm_free((void *)((uintptr_t)pml - MEM_PHYS_OFFSET), 1);
}
void vmm_destroy_pagemap(struct pagemap *pagemap) {
while (pagemap->mmap_ranges.length > 0) {
struct mmap_range_local *local_range = pagemap->mmap_ranges.data[0];
munmap(pagemap, local_range->base, local_range->length);
}
destroy_level(
pagemap->top_level, 0, 256,
(paging_mode_request.response->mode == LIMINE_PAGING_MODE_X86_64_5LVL)
? 5
: 4);
kfree(pagemap);
}