user: implement mlibc as the libc, finally.
It's finally done.. Signed-off-by: kaguya <vpshinomiya@protonmail.com>
This commit is contained in:
@@ -55,14 +55,14 @@ void x86_64_ISR_Initialize(void)
|
||||
x86_64_ISR_InitializeGates();
|
||||
for (int i = 0; i < 256; i++)
|
||||
x86_64_IDT_EnableGate(i);
|
||||
x86_64_IDT_DisableGate(0x80); // syscall gate if you want
|
||||
x86_64_IDT_DisableGate(0x80); // syscall gate
|
||||
}
|
||||
|
||||
void page_fault_handler(Registers* regs, uint64_t cr2)
|
||||
{
|
||||
|
||||
|
||||
// You can decode error bits here:
|
||||
|
||||
// bit 0: present
|
||||
// bit 1: write
|
||||
// bit 2: user-mode
|
||||
|
||||
@@ -40,10 +40,14 @@ struct pagemap *create_user_pagemap(void)
|
||||
}
|
||||
|
||||
/* Copy kernel higher-half mappings (kernel + HHDM) */
|
||||
for (size_t i = 256; i < 512; i++) {
|
||||
for (size_t i = 0; i < 512; i++) {
|
||||
pm->top_level[i] = kernel_pagemap->top_level[i];
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
pm->top_level[i] = 0;
|
||||
}
|
||||
|
||||
/* Lower half remains zero (user address space) */
|
||||
printf("[usermode] user pagemap created (PML4 phys = 0x%lx)\n",
|
||||
(uint64_t)pm->top_level - MEM_PHYS_OFFSET);
|
||||
@@ -53,7 +57,7 @@ struct pagemap *create_user_pagemap(void)
|
||||
|
||||
uintptr_t setup_user_stack(struct pagemap *pagemap)
|
||||
{
|
||||
user_stack_phys_base = (uint64_t)pmm_alloc(USER_STACK_PAGES);
|
||||
user_stack_phys_base = (uint64_t)pmm_allocz(USER_STACK_PAGES);
|
||||
|
||||
if (!user_stack_phys_base) {
|
||||
printf("Failed to allocate user stack pages!\n");
|
||||
@@ -83,45 +87,9 @@ uintptr_t setup_user_stack(struct pagemap *pagemap)
|
||||
return rsp;
|
||||
}
|
||||
|
||||
__attribute__((naked))
|
||||
void enter_user_mode(uint64_t rip, uint64_t rsp)
|
||||
{
|
||||
asm volatile(
|
||||
"cli\n\t"
|
||||
"mov $0x1B, %%ax\n\t"
|
||||
"mov %%ax, %%ds\n\t"
|
||||
"mov %%ax, %%es\n\t"
|
||||
"mov %%ax, %%fs\n\t"
|
||||
"mov %%ax, %%gs\n\t"
|
||||
|
||||
// SS
|
||||
"pushq $0x1B\n\t"
|
||||
|
||||
// RSP
|
||||
"pushq %1\n\t"
|
||||
|
||||
// RFLAGS
|
||||
"pushfq\n\t"
|
||||
"pop %%rax\n\t"
|
||||
"or $0x200, %%rax\n\t"
|
||||
"push %%rax\n\t"
|
||||
|
||||
// CS
|
||||
"pushq $0x23\n\t"
|
||||
|
||||
// RIP
|
||||
"pushq %0\n\t"
|
||||
|
||||
"iretq\n\t"
|
||||
:
|
||||
: "r"(rip), "r"(rsp)
|
||||
: "rax", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void start_userspace(void)
|
||||
{
|
||||
|
||||
struct pagemap *user_pagemap = create_user_pagemap();
|
||||
if (!user_pagemap) {
|
||||
printf("Failed to create user pagemap\n");
|
||||
@@ -129,19 +97,35 @@ void start_userspace(void)
|
||||
}
|
||||
|
||||
void *elf_entry = NULL;
|
||||
if (!ELF_Read("init.elf", &elf_entry, user_pagemap)) {
|
||||
printf("Failed to load init.elf\n");
|
||||
uint64_t tls_fs_base = 0;
|
||||
uint64_t phdr_va = 0;
|
||||
uint16_t phent = 0;
|
||||
uint16_t phnum = 0;
|
||||
|
||||
if (!ELF_Read("helloworld.elf",
|
||||
&elf_entry,
|
||||
user_pagemap,
|
||||
&tls_fs_base,
|
||||
&phdr_va,
|
||||
&phent,
|
||||
&phnum)) {
|
||||
printf("Failed to load helloworld.elf\n");
|
||||
for(;;);
|
||||
}
|
||||
|
||||
if (!elf_entry) {
|
||||
printf("ELF has no entry point\n");
|
||||
for(;;);
|
||||
}
|
||||
printf("ELF: entry=0x%lx TLS_FS=0x%lx PHDR=0x%lx PHENT=0x%x PHNUM=%u\n",
|
||||
(uint64_t)elf_entry, tls_fs_base, phdr_va, phent, phnum);
|
||||
|
||||
uintptr_t user_rsp = setup_user_stack(user_pagemap);
|
||||
|
||||
printf("Entering usermode RIP=%p RSP=%p\n", elf_entry, (void*)user_rsp);
|
||||
|
||||
sched_create_user_task("init", (uint64_t)elf_entry, user_rsp, user_pagemap);
|
||||
sched_create_user_task("init",
|
||||
(uint64_t)elf_entry,
|
||||
user_rsp,
|
||||
user_pagemap,
|
||||
tls_fs_base,
|
||||
phdr_va,
|
||||
phent,
|
||||
phnum);
|
||||
}
|
||||
@@ -138,7 +138,7 @@ void lapic_init(void) {
|
||||
* ── Step 8: Set Task Priority to 0 ───────────────────────────────────
|
||||
*
|
||||
* TPR = 0 means the CPU will accept all interrupt priorities.
|
||||
* Raise this later if you need to block lower-priority interrupts.
|
||||
* Raise this later if need to block lower-priority interrupts.
|
||||
*/
|
||||
lapic_write(LAPIC_TPR, 0);
|
||||
|
||||
|
||||
@@ -64,9 +64,6 @@ void lapic_init(void);
|
||||
|
||||
/**
|
||||
* lapic_eoi - Signal end-of-interrupt to the LAPIC.
|
||||
* Must be called from interrupt handlers that go through the LAPIC
|
||||
* (i.e. IOAPIC-routed interrupts). ExtINT (i8259) interrupts only
|
||||
* need the i8259 EOI, which your existing irq.c already sends.
|
||||
*/
|
||||
void lapic_eoi(void);
|
||||
|
||||
|
||||
@@ -237,7 +237,7 @@ void ioapic_init(void) {
|
||||
* MEM_PHYS_OFFSET. Two MMIO registers are accessed (offsets 0 and
|
||||
* 0x10) so one 4 KiB page is sufficient.
|
||||
*
|
||||
* TODO: Mark the page UC (cache-disable) in the PTE when your VMM
|
||||
* TODO: Mark the page UC (cache-disable) in the PTE when VMM
|
||||
* gains support for PAT / PCD flags.
|
||||
*/
|
||||
uint64_t phys = (uint64_t)e->address;
|
||||
@@ -343,7 +343,7 @@ void irq_redirect_to_apic(uint8_t isa_irq, uint8_t vector,
|
||||
|
||||
/* Mask in the 8259 so it stops firing through LINT0 */
|
||||
if (g_Driver) {
|
||||
g_Driver->Mask(isa_irq); // from your irq.c / i8259
|
||||
g_Driver->Mask(isa_irq);
|
||||
}
|
||||
|
||||
/* Programme IOAPIC redirection entry */
|
||||
|
||||
@@ -22,6 +22,8 @@ extern bool g_IOAPIC;
|
||||
void x86_64_IRQ_Handler(Registers *regs)
|
||||
{
|
||||
int irq = regs->interrupt - PIC_REMAP_OFFSET;
|
||||
|
||||
g_Driver->SendEndOfInterrupt(irq);
|
||||
|
||||
if (g_IRQHandlers[irq] != NULL)
|
||||
{
|
||||
@@ -33,7 +35,7 @@ void x86_64_IRQ_Handler(Registers *regs)
|
||||
log_warn(MODULE, "Unhandled IRQ %d...", irq);
|
||||
}
|
||||
|
||||
g_Driver->SendEndOfInterrupt(irq);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -42,6 +44,7 @@ void x86_64_IRQ_Handler(Registers *regs)
|
||||
void x86_64_APIC_IRQ_Handler(Registers* regs)
|
||||
{
|
||||
uint8_t vector = regs->interrupt;
|
||||
lapic_eoi();
|
||||
|
||||
if (g_APICHandlers[vector] != NULL) {
|
||||
g_APICHandlers[vector](regs);
|
||||
@@ -49,7 +52,7 @@ void x86_64_APIC_IRQ_Handler(Registers* regs)
|
||||
log_warn("APIC", "Unhandled vector 0x%02x", vector);
|
||||
}
|
||||
|
||||
lapic_eoi(); // ← This is the key difference from PIC!
|
||||
// ← This is the key difference from PIC!
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ typedef struct __attribute__((packed)) {
|
||||
* • WAV – any PCM WAV (no compression): sample rate / channels / bit depth
|
||||
* are read from the "fmt " chunk automatically.
|
||||
* • Raw – no RIFF header; audio is assumed to be 48 000 Hz, 16-bit, stereo.
|
||||
* Override with pcm_play_raw() if your file has a different format.
|
||||
* Override with pcm_play_raw() if file has a different format.
|
||||
*
|
||||
* The function allocates a physically-contiguous DMA buffer, reads the file,
|
||||
* starts playback, blocks until complete, then frees the buffer.
|
||||
|
||||
+182
-106
@@ -1,3 +1,4 @@
|
||||
// elf.c (now extracts AT_PHDR / AT_PHENT / AT_PHNUM + minor cleanups)
|
||||
#include "elf.h"
|
||||
#include "libk/stdio.h"
|
||||
#include "libk/string.h"
|
||||
@@ -8,148 +9,223 @@
|
||||
|
||||
extern uintptr_t g_hhdm_offset;
|
||||
|
||||
#define ELF_BUFFER_SIZE (1024 * 1024)
|
||||
|
||||
|
||||
|
||||
|
||||
bool ELF_Read(const char* path, void** entryPoint, struct pagemap *target_pagemap)
|
||||
bool ELF_Read(const char* path,
|
||||
void** entryPoint,
|
||||
struct pagemap *target_pagemap,
|
||||
uint64_t *out_tls_fs_base,
|
||||
uint64_t *out_phdr_va,
|
||||
uint16_t *out_phent,
|
||||
uint16_t *out_phnum)
|
||||
{
|
||||
uint32_t size;
|
||||
*out_tls_fs_base = 0;
|
||||
*out_phdr_va = 0;
|
||||
*out_phent = 0;
|
||||
*out_phnum = 0;
|
||||
|
||||
uint8_t* elf_buffer = kmalloc(ELF_BUFFER_SIZE);
|
||||
if (!elf_buffer) {
|
||||
printf("ELF: kmalloc failed\n");
|
||||
uint32_t inum = ext2_resolve_path(path);
|
||||
if (!inum) {
|
||||
printf("ELF: file not found: %s\n", path);
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── load file ─────────────────────────────────────
|
||||
if (!ext2_read_file_from_root(path, elf_buffer, &size)) {
|
||||
printf("ELF: failed to read file\n");
|
||||
kfree(elf_buffer);
|
||||
ext2_inode_t inode;
|
||||
if (!ext2_read_inode(inum, &inode)) {
|
||||
printf("ELF: failed to read inode\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size < sizeof(ELFHeader)) {
|
||||
uint64_t file_size = inode.i_size;
|
||||
if (file_size < sizeof(ELFHeader)) {
|
||||
printf("ELF: file too small\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t buf_pages = ALIGN_UP(file_size, PAGE_SIZE) / PAGE_SIZE;
|
||||
void* buffer_phys = pmm_allocz(buf_pages);
|
||||
if (!buffer_phys) {
|
||||
printf("ELF: failed to allocate %lu pages for file buffer\n", buf_pages);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t* elf_buffer = (uint8_t*)((uintptr_t)buffer_phys + MEM_PHYS_OFFSET);
|
||||
|
||||
if (!ext2_read_file(&inode, elf_buffer)) {
|
||||
pmm_free(buffer_phys, buf_pages);
|
||||
return false;
|
||||
}
|
||||
|
||||
ELFHeader* header = (ELFHeader*)elf_buffer;
|
||||
|
||||
printf("=== ELF DEBUG ===\n");
|
||||
printf("Entry point VA = 0x%lx\n", header->ProgramEntryPosition);
|
||||
printf("PHDR offset = 0x%lx\n", header->ProgramHeaderTablePosition);
|
||||
printf("PHDR count = %u\n", header->ProgramHeaderTableEntryCount);
|
||||
printf("=== ELF DEBUG ===\n"
|
||||
"Entry=0x%lx PHDR@0x%lx count=%u type=0x%x arch=0x%x\n"
|
||||
"=== END ===\n",
|
||||
header->ProgramEntryPosition,
|
||||
header->ProgramHeaderTablePosition,
|
||||
header->ProgramHeaderTableEntryCount,
|
||||
header->Type,
|
||||
header->InstructionSet);
|
||||
|
||||
if (memcmp(header->Magic, ELF_MAGIC, 4) != 0 ||
|
||||
header->Bitness != ELF_BITNESS_64BIT ||
|
||||
header->Endianness != ELF_ENDIANNESS_LITTLE ||
|
||||
(header->Type != ELF_TYPE_EXECUTABLE && header->Type != ELF_TYPE_SHARED) ||
|
||||
header->InstructionSet != ELF_INSTRUCTION_SET_X64) {
|
||||
|
||||
|
||||
printf("=== END ELF DEBUG ===\n");
|
||||
|
||||
// ── validate ELF ──────────────────────────────────
|
||||
if (memcmp(header->Magic, ELF_MAGIC, 4) != 0) {
|
||||
printf("ELF: bad magic\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header->Bitness != ELF_BITNESS_64BIT) {
|
||||
printf("ELF: not 64-bit\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header->Endianness != ELF_ENDIANNESS_LITTLE) {
|
||||
printf("ELF: wrong endianness\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header->Type != ELF_TYPE_EXECUTABLE) {
|
||||
printf("ELF: not executable\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header->InstructionSet != ELF_INSTRUCTION_SET_X64) {
|
||||
printf("ELF: wrong arch\n");
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
printf("ELF: unsupported/invalid header\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*entryPoint = (void*)header->ProgramEntryPosition;
|
||||
|
||||
// ── program headers ───────────────────────────────
|
||||
// ------------------------------------------------------------------
|
||||
// Parse program headers – LOAD, TLS, and PHDR
|
||||
// ------------------------------------------------------------------
|
||||
uint64_t tls_vaddr = 0, tls_filesz = 0, tls_memsz = 0, tls_align = 8;
|
||||
uint8_t* tls_src = NULL;
|
||||
uint64_t phdr_vaddr = 0;
|
||||
|
||||
uint8_t* ph_table = elf_buffer + header->ProgramHeaderTablePosition;
|
||||
uint64_t phdr_table_end = header->ProgramHeaderTablePosition +
|
||||
(uint64_t)header->ProgramHeaderTableEntryCount *
|
||||
header->ProgramHeaderTableEntrySize;
|
||||
|
||||
for (uint32_t i = 0; i < header->ProgramHeaderTableEntryCount; i++)
|
||||
{
|
||||
ELFProgramHeader* ph = (ELFProgramHeader*)(ph_table +
|
||||
i * header->ProgramHeaderTableEntrySize);
|
||||
if (phdr_table_end > file_size) {
|
||||
printf("ELF: program header table extends beyond file\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ph->Type != ELF_PROGRAM_TYPE_LOAD) {
|
||||
printf("LOAD segment: VA=0x%lx FileSz=0x%lx MemSz=0x%lx\n",
|
||||
ph->VirtualAddress, ph->FileSize, ph->MemorySize);
|
||||
for (uint32_t i = 0; i < header->ProgramHeaderTableEntryCount; i++) {
|
||||
ELFProgramHeader* ph = (ELFProgramHeader*)(ph_table + i * header->ProgramHeaderTableEntrySize);
|
||||
|
||||
// PT_PHDR
|
||||
if (ph->Type == ELF_PROGRAM_TYPE_PHDR) {
|
||||
phdr_vaddr = ph->VirtualAddress;
|
||||
printf("ELF: Found PT_PHDR VA=0x%lx\n", phdr_vaddr);
|
||||
// fall through
|
||||
}
|
||||
|
||||
// PT_TLS
|
||||
if (ph->Type == ELF_PROGRAM_TYPE_TLS) {
|
||||
tls_vaddr = ph->VirtualAddress;
|
||||
tls_filesz = ph->FileSize;
|
||||
tls_memsz = ph->MemorySize;
|
||||
tls_align = ph->Align ? ph->Align : 8;
|
||||
tls_src = elf_buffer + ph->Offset;
|
||||
|
||||
if (ph->Offset + ph->FileSize > file_size) {
|
||||
printf("ELF: PT_TLS segment data out of file bounds\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
printf("ELF: Found PT_TLS VA=0x%lx FileSz=0x%lx MemSz=0x%lx Align=0x%lx\n",
|
||||
tls_vaddr, tls_filesz, tls_memsz, tls_align);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t virt = ph->VirtualAddress;
|
||||
uint64_t offset = ph->Offset;
|
||||
uint64_t memsz = ph->MemorySize;
|
||||
uint64_t filesz = ph->FileSize;
|
||||
|
||||
if (memsz == 0)
|
||||
if (ph->Type != ELF_PROGRAM_TYPE_LOAD || ph->MemorySize == 0)
|
||||
continue;
|
||||
|
||||
// ── align to page boundary ─────────────────────
|
||||
uint64_t aligned_virt = ALIGN_DOWN(virt, PAGE_SIZE);
|
||||
uint64_t page_offset = virt & 0xFFF;
|
||||
uint64_t aligned_memsz = ALIGN_UP(memsz + page_offset, PAGE_SIZE);
|
||||
|
||||
uint64_t pages = aligned_memsz / PAGE_SIZE;
|
||||
|
||||
// Allocate physical pages
|
||||
uint64_t phys_base = (uint64_t)pmm_alloc(pages);
|
||||
if (!phys_base) {
|
||||
printf("ELF: pmm_alloc failed for %lu pages\n", pages);
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
if (ph->Offset + ph->FileSize > file_size) {
|
||||
printf("ELF: PT_LOAD segment data out of file bounds\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// Map with exact permissions
|
||||
uint64_t map_flags = PAGE_USER;
|
||||
if (ph->Flags & PF_R) map_flags |= PAGE_READ;
|
||||
if (ph->Flags & PF_W) map_flags |= PAGE_WRITE;
|
||||
if (!(ph->Flags & PF_X)) map_flags |= PAGE_NO_EXECUTE;
|
||||
|
||||
uint64_t virt = ph->VirtualAddress;
|
||||
uint64_t aligned_virt = ALIGN_DOWN(virt, PAGE_SIZE);
|
||||
uint64_t page_offset = virt & (PAGE_SIZE - 1);
|
||||
uint64_t aligned_memsz = ALIGN_UP(ph->MemorySize + page_offset, PAGE_SIZE);
|
||||
uint64_t pages = aligned_memsz / PAGE_SIZE;
|
||||
|
||||
void* seg_phys = pmm_allocz(pages);
|
||||
if (!seg_phys) {
|
||||
printf("ELF: failed to allocate physical pages for LOAD segment\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// ── map each page individually using new vmm_map_page ─────
|
||||
for (uint64_t p = 0; p < pages; p++) {
|
||||
uint64_t virt_addr = aligned_virt + p * PAGE_SIZE;
|
||||
uint64_t phys_addr = phys_base + p * PAGE_SIZE;
|
||||
|
||||
bool success = vmm_map_page(
|
||||
target_pagemap,
|
||||
virt_addr,
|
||||
phys_addr,
|
||||
PAGE_READ | PAGE_WRITE | PAGE_USER, // RW + User mode
|
||||
Size4KiB
|
||||
);
|
||||
|
||||
if (!success) {
|
||||
printf("ELF: failed to map page at 0x%lx\n", virt_addr);
|
||||
// TODO: cleanup previously mapped pages + free phys
|
||||
kfree(elf_buffer);
|
||||
return false;
|
||||
if (!vmm_map_page(target_pagemap,
|
||||
aligned_virt + p * PAGE_SIZE,
|
||||
(uintptr_t)seg_phys + p * PAGE_SIZE,
|
||||
map_flags,
|
||||
Size4KiB)) {
|
||||
pmm_free(seg_phys, pages);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
// ── copy segment data ───────────────────────────────
|
||||
uint8_t* dst = (uint8_t*)(phys_base + MEM_PHYS_OFFSET); // via HHDM
|
||||
uint8_t* src = elf_buffer + offset;
|
||||
|
||||
memcpy(dst + page_offset, src, filesz);
|
||||
|
||||
// ── zero BSS section ────────────────────────────────
|
||||
if (memsz > filesz) {
|
||||
memset(dst + page_offset + filesz, 0, memsz - filesz);
|
||||
uint8_t* dst = (uint8_t*)((uintptr_t)seg_phys + MEM_PHYS_OFFSET);
|
||||
memcpy(dst + page_offset, elf_buffer + ph->Offset, ph->FileSize);
|
||||
if (ph->MemorySize > ph->FileSize) {
|
||||
memset(dst + page_offset + ph->FileSize, 0,
|
||||
ph->MemorySize - ph->FileSize);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(elf_buffer);
|
||||
|
||||
uint64_t tls_size = tls_memsz ? ALIGN_UP(tls_memsz, tls_align) : 0ULL;
|
||||
|
||||
uint64_t tcb_va = TLS_BASE_VA + PAGE_SIZE;
|
||||
uint64_t tls_va = tcb_va - tls_size;
|
||||
|
||||
uint64_t page_va = ALIGN_DOWN(tls_va, PAGE_SIZE);
|
||||
uint64_t tcb_size = sizeof(TCB);
|
||||
uint64_t block_end_va = tcb_va + tcb_size;
|
||||
uint64_t block_end_page = ALIGN_UP(block_end_va, PAGE_SIZE);
|
||||
uint64_t map_pages = ((block_end_page - page_va) / PAGE_SIZE) + 8;
|
||||
if (map_pages == 0) map_pages = 1;
|
||||
|
||||
void* tls_phys = pmm_allocz(map_pages);
|
||||
if (!tls_phys) {
|
||||
printf("ELF: failed to allocate TLS/TCB pages\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
uint64_t tls_map_flags = PAGE_USER | PAGE_READ | PAGE_WRITE;
|
||||
for (uint64_t p = 0; p < map_pages; p++) {
|
||||
if (!vmm_map_page(target_pagemap,
|
||||
page_va + p * PAGE_SIZE,
|
||||
(uintptr_t)tls_phys + p * PAGE_SIZE,
|
||||
tls_map_flags,
|
||||
Size4KiB)) {
|
||||
pmm_free(tls_phys, map_pages);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* base_hhdm = (uint8_t*)((uintptr_t)tls_phys + MEM_PHYS_OFFSET);
|
||||
|
||||
if (tls_size > 0) {
|
||||
uint8_t* tls_dst = base_hhdm + (tls_va - page_va);
|
||||
if (tls_filesz) memcpy(tls_dst, tls_src, tls_filesz);
|
||||
if (tls_memsz > tls_filesz)
|
||||
memset(tls_dst + tls_filesz, 0, tls_memsz - tls_filesz);
|
||||
}
|
||||
|
||||
TCB* tcb = (TCB*)(base_hhdm + (tcb_va - page_va));
|
||||
memset(tcb, 0, sizeof(TCB));
|
||||
tcb->self = (void*)tcb_va;
|
||||
tcb->tid = 1;
|
||||
|
||||
*out_tls_fs_base = tcb_va;
|
||||
|
||||
*out_phdr_va = tls_vaddr ? tls_vaddr : phdr_vaddr;
|
||||
*out_phent = header->ProgramHeaderTableEntrySize;
|
||||
*out_phnum = header->ProgramHeaderTableEntryCount;
|
||||
|
||||
printf("ELF: TLS/TCB setup complete TCB@0x%lx TLS@0x%lx FS=0x%lx\n"
|
||||
" PHDR@0x%lx PHENT=0x%x PHNUM=%u\n",
|
||||
tcb_va, tls_va, tcb_va, *out_phdr_va, *out_phent, *out_phnum);
|
||||
|
||||
pmm_free(buffer_phys, buf_pages);
|
||||
return true;
|
||||
|
||||
cleanup:
|
||||
pmm_free(buffer_phys, buf_pages);
|
||||
return false;
|
||||
}
|
||||
+64
-52
@@ -1,11 +1,22 @@
|
||||
// elf.h
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h> // size_t for TCB
|
||||
#include "mm/vmm.h"
|
||||
|
||||
// ELF magic and basic constants
|
||||
#define ELF_MAGIC ("\x7F" "ELF")
|
||||
|
||||
#include <stdint.h>
|
||||
// Standard ELF program header flags (bitfield)
|
||||
#define PF_X 0x00000001 // Execute
|
||||
#define PF_W 0x00000002 // Write
|
||||
#define PF_R 0x00000004 // Read
|
||||
|
||||
// Fixed canonical address for the initial thread's TLS + TCB block.
|
||||
// This lives in the upper half of the 47-bit user address space and
|
||||
// will never overlap with normal LOAD segments (which are usually low).
|
||||
#define TLS_BASE_VA 0x00007FFF00000000ULL
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -18,13 +29,13 @@ typedef struct
|
||||
uint8_t _Padding[7];
|
||||
|
||||
uint16_t Type; // relocatable, executable, shared, core
|
||||
uint16_t InstructionSet; // architecture (was too small for real ELF, but kept)
|
||||
uint16_t InstructionSet; // architecture
|
||||
|
||||
uint32_t ELFVersion;
|
||||
|
||||
uint64_t ProgramEntryPosition; // FIXED (was 32-bit)
|
||||
uint64_t ProgramHeaderTablePosition; // FIXED
|
||||
uint64_t SectionHeaderTablePosition; // FIXED
|
||||
uint64_t ProgramEntryPosition;
|
||||
uint64_t ProgramHeaderTablePosition;
|
||||
uint64_t SectionHeaderTablePosition;
|
||||
|
||||
uint32_t Flags;
|
||||
|
||||
@@ -39,80 +50,81 @@ typedef struct
|
||||
|
||||
enum ELFBitness
|
||||
{
|
||||
ELF_BITNESS_32BIT = 1,
|
||||
ELF_BITNESS_64BIT = 2,
|
||||
ELF_BITNESS_32BIT = 1,
|
||||
ELF_BITNESS_64BIT = 2,
|
||||
};
|
||||
|
||||
enum ELFEndianness
|
||||
{
|
||||
ELF_ENDIANNESS_LITTLE = 1,
|
||||
ELF_ENDIANNESS_BIG = 2,
|
||||
ELF_ENDIANNESS_LITTLE = 1,
|
||||
ELF_ENDIANNESS_BIG = 2,
|
||||
};
|
||||
|
||||
enum ELFInstructionSet
|
||||
{
|
||||
ELF_INSTRUCTION_SET_NONE = 0,
|
||||
ELF_INSTRUCTION_SET_X86 = 3,
|
||||
ELF_INSTRUCTION_SET_ARM = 0x28,
|
||||
ELF_INSTRUCTION_SET_X64 = 0x3E,
|
||||
ELF_INSTRUCTION_SET_ARM64 = 0xB7,
|
||||
ELF_INSTRUCTION_SET_RISCV = 0xF3,
|
||||
ELF_INSTRUCTION_SET_NONE = 0,
|
||||
ELF_INSTRUCTION_SET_X86 = 3,
|
||||
ELF_INSTRUCTION_SET_ARM = 0x28,
|
||||
ELF_INSTRUCTION_SET_X64 = 0x3E,
|
||||
ELF_INSTRUCTION_SET_ARM64 = 0xB7,
|
||||
ELF_INSTRUCTION_SET_RISCV = 0xF3,
|
||||
};
|
||||
|
||||
enum ELFType
|
||||
{
|
||||
ELF_TYPE_RELOCATABLE = 1,
|
||||
ELF_TYPE_EXECUTABLE = 2,
|
||||
ELF_TYPE_SHARED = 3,
|
||||
ELF_TYPE_CORE = 4,
|
||||
ELF_TYPE_RELOCATABLE = 1,
|
||||
ELF_TYPE_EXECUTABLE = 2,
|
||||
ELF_TYPE_SHARED = 3,
|
||||
ELF_TYPE_CORE = 4,
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t Type;
|
||||
uint32_t Flags;
|
||||
uint64_t Offset;
|
||||
uint64_t VirtualAddress;
|
||||
uint64_t PhysicalAddress;
|
||||
uint64_t FileSize;
|
||||
uint64_t MemorySize;
|
||||
uint32_t Flags;
|
||||
uint64_t Align;
|
||||
|
||||
} ELFProgramHeader;
|
||||
} __attribute__((packed)) ELFProgramHeader;
|
||||
|
||||
enum ELFProgramType {
|
||||
// Program header table entry unused.
|
||||
ELF_PROGRAM_TYPE_NULL = 0,
|
||||
ELF_PROGRAM_TYPE_NULL = 0,
|
||||
ELF_PROGRAM_TYPE_LOAD = 1,
|
||||
ELF_PROGRAM_TYPE_DYNAMIC = 2,
|
||||
ELF_PROGRAM_TYPE_INTERP = 3,
|
||||
ELF_PROGRAM_TYPE_NOTE = 4,
|
||||
ELF_PROGRAM_TYPE_SHLIB = 5,
|
||||
ELF_PROGRAM_TYPE_PHDR = 6,
|
||||
ELF_PROGRAM_TYPE_TLS = 7,
|
||||
|
||||
// Loadable segment.
|
||||
ELF_PROGRAM_TYPE_LOAD = 1,
|
||||
|
||||
// Dynamic linking information.
|
||||
ELF_PROGRAM_TYPE_DYNAMIC = 2,
|
||||
|
||||
// Interpreter information.
|
||||
ELF_PROGRAM_TYPE_INTERP = 3,
|
||||
|
||||
// Auxiliary information.
|
||||
ELF_PROGRAM_TYPE_NOTE = 4,
|
||||
|
||||
// Reserved
|
||||
ELF_PROGRAM_TYPE_SHLIB = 5,
|
||||
|
||||
// Segment containing program header table itself.
|
||||
ELF_PROGRAM_TYPE_PHDR = 6,
|
||||
|
||||
// Thread-Local Storage template.
|
||||
ELF_PROGRAM_TYPE_TLS = 7,
|
||||
|
||||
// Reserved inclusive range. Operating system specific.
|
||||
ELF_PROGRAM_TYPE_LOOS = 0x60000000,
|
||||
ELF_PROGRAM_TYPE_HIOS = 0x6FFFFFFF,
|
||||
|
||||
// Reserved inclusive range. Processor specific.
|
||||
ELF_PROGRAM_TYPE_LOPROC = 0x70000000,
|
||||
ELF_PROGRAM_TYPE_HIPROC = 0x7FFFFFFF,
|
||||
// OS/processor reserved ranges (we ignore them)
|
||||
ELF_PROGRAM_TYPE_LOOS = 0x60000000,
|
||||
ELF_PROGRAM_TYPE_HIOS = 0x6FFFFFFF,
|
||||
ELF_PROGRAM_TYPE_LOPROC = 0x70000000,
|
||||
ELF_PROGRAM_TYPE_HIPROC = 0x7FFFFFFF,
|
||||
};
|
||||
|
||||
// Thread Control Block layout expected by mlibc.
|
||||
// Only the fields mlibc actually reads are populated; the rest stay zero.
|
||||
typedef struct {
|
||||
void* self; // 0x00 fs:0 (TCB self-pointer)
|
||||
size_t dtvSize; // 0x08
|
||||
void** dtvPointers; // 0x10
|
||||
int tid; // 0x18
|
||||
int didExit; // 0x1C
|
||||
uint8_t padding[8]; // 0x20
|
||||
uintptr_t stackCanary; // 0x28
|
||||
int cancelBits; // 0x30
|
||||
} TCB;
|
||||
|
||||
bool ELF_Read(const char* path, void** entryPoint, struct pagemap *target_pagemap);
|
||||
bool ELF_Read(const char* path,
|
||||
void** entryPoint,
|
||||
struct pagemap *target_pagemap,
|
||||
uint64_t *out_tls_fs_base,
|
||||
uint64_t *out_phdr_va, // AT_PHDR
|
||||
uint16_t *out_phent, // AT_PHENT
|
||||
uint16_t *out_phnum); // AT_PHNUM
|
||||
@@ -344,6 +344,8 @@ bool ext2_read_inode_internal(uint32_t inum, ext2_inode_t* out) {
|
||||
if (!ext2_read_block_raw(gdt[g].bg_inode_table + block_off, buf)) {
|
||||
kfree(buf); return false;
|
||||
}
|
||||
printf("ext2_read_inode: inum=%u group=%u idx=%u block_off=%u inode_off=%u\n",
|
||||
inum + 1, g, idx, block_off, inode_off);
|
||||
memcpy(out, buf + inode_off * sb.s_inode_size, sizeof(ext2_inode_t));
|
||||
kfree(buf);
|
||||
return true;
|
||||
@@ -950,16 +952,21 @@ bool ext2_read_root_dir(void) {
|
||||
|
||||
bool ext2_read_file_from_root_internal(const char* name, uint8_t* buf, uint32_t* size) {
|
||||
ext2_inode_t root;
|
||||
printf("EXT2: reading file from root: %s\n", name);
|
||||
if (!ext2_read_inode_internal(2, &root)) return false;
|
||||
printf("EXT2: root inode: size=%u blocks=%u\n", root.i_size, root.i_blocks);
|
||||
|
||||
uint32_t inum;
|
||||
if (!ext2_find_in_dir_internal(&root, name, &inum)) {
|
||||
printf("EXT2: not found: %s\n", name);
|
||||
return false;
|
||||
}
|
||||
printf("EXT2: found in root: inum=%u\n", inum);
|
||||
ext2_inode_t fi;
|
||||
if (!ext2_read_inode_internal(inum, &fi)) return false;
|
||||
printf("EXT2: file inode: size=%u blocks=%u\n", fi.i_size, fi.i_blocks);
|
||||
*size = fi.i_size;
|
||||
printf("EXT2: read file: size=%u\n", *size);
|
||||
return ext2_read_file_internal(&fi, buf);
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@ static const uint32_t g_LogSeverityColors[] =
|
||||
[LVL_INFO] = 0xFFFFFF, // white
|
||||
[LVL_WARN] = 0xFFFF00, // yellow
|
||||
[LVL_ERROR] = 0xFF0000, // red
|
||||
[LVL_CRITICAL] = 0xFFFFFF, // white (can do red background separately if you want)
|
||||
[LVL_CRITICAL] = 0xFFFFFF, // white
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,143 @@
|
||||
#define EPERM 1 /* Operation not permitted */
|
||||
#define ENOENT 2 /* No such file or directory */
|
||||
#define ESRCH 3 /* No such process */
|
||||
#define EINTR 4 /* Interrupted system call */
|
||||
#define EIO 5 /* I/O error */
|
||||
#define ENXIO 6 /* No such device or address */
|
||||
#define E2BIG 7 /* Argument list too long */
|
||||
#define ENOEXEC 8 /* Exec format error */
|
||||
#define EBADF 9 /* Bad file descriptor */
|
||||
#define ECHILD 10 /* No child processes */
|
||||
#define EAGAIN 11 /* Try again / resource temporarily unavailable */
|
||||
#define ENOMEM 12 /* Out of memory */
|
||||
#define EACCES 13 /* Permission denied */
|
||||
#define EFAULT 14 /* Bad address */
|
||||
#define ENOTBLK 15 /* Block device required */
|
||||
#define EBUSY 16 /* Device or resource busy */
|
||||
#define EEXIST 17 /* File exists */
|
||||
#define EXDEV 18 /* Cross-device link */
|
||||
#define ENODEV 19 /* No such device */
|
||||
#define ENOTDIR 20 /* Not a directory */
|
||||
#define EISDIR 21 /* Is a directory */
|
||||
#define EINVAL 22 /* Invalid argument */
|
||||
#define ENFILE 23 /* File table overflow */
|
||||
#define EMFILE 24 /* Too many open files */
|
||||
#define ENOTTY 25 /* Not a typewriter / inappropriate ioctl */
|
||||
#define ETXTBSY 26 /* Text file busy */
|
||||
#define EFBIG 27 /* File too large */
|
||||
#define ENOSPC 28 /* No space left on device */
|
||||
#define ESPIPE 29 /* Illegal seek */
|
||||
#define EROFS 30 /* Read-only file system */
|
||||
#define EMLINK 31 /* Too many links */
|
||||
#define EPIPE 32 /* Broken pipe */
|
||||
#define EDOM 33 /* Math argument out of domain of func */
|
||||
#define ERANGE 34 /* Math result not representable */
|
||||
#define EDEADLK 35 /* Resource deadlock would occur */
|
||||
#define ENAMETOOLONG 36 /* File name too long */
|
||||
#define ENOLCK 37 /* No record locks available */
|
||||
#define ENOSYS 38 /* Function not implemented */
|
||||
#define ENOTEMPTY 39 /* Directory not empty */
|
||||
#define ELOOP 40 /* Too many symbolic links encountered */
|
||||
|
||||
#define EWOULDBLOCK EAGAIN /* Operation would block */
|
||||
|
||||
#define ENOMSG 42 /* No message of desired type */
|
||||
#define EIDRM 43 /* Identifier removed */
|
||||
#define ECHRNG 44 /* Channel number out of range */
|
||||
#define EL2NSYNC 45 /* Level 2 not synchronized */
|
||||
#define EL3HLT 46 /* Level 3 halted */
|
||||
#define EL3RST 47 /* Level 3 reset */
|
||||
#define ELNRNG 48 /* Link number out of range */
|
||||
#define EUNATCH 49 /* Protocol driver not attached */
|
||||
#define ENOCSI 50 /* No CSI structure available */
|
||||
#define EL2HLT 51 /* Level 2 halted */
|
||||
#define EBADE 52 /* Invalid exchange */
|
||||
#define EBADR 53 /* Invalid request descriptor */
|
||||
#define EXFULL 54 /* Exchange full */
|
||||
#define ENOANO 55 /* No anode */
|
||||
#define EBADRQC 56 /* Invalid request code */
|
||||
#define EBADSLT 57 /* Invalid slot */
|
||||
|
||||
#define EDEADLOCK EDEADLK /* Alias for deadlock */
|
||||
|
||||
#define EBFONT 59 /* Bad font file format */
|
||||
#define ENOSTR 60 /* Device not a stream */
|
||||
#define ENODATA 61 /* No data available */
|
||||
#define ETIME 62 /* Timer expired */
|
||||
#define ENOSR 63 /* Out of streams resources */
|
||||
#define ENONET 64 /* Machine is not on the network */
|
||||
#define ENOPKG 65 /* Package not installed */
|
||||
#define EREMOTE 66 /* Object is remote */
|
||||
#define ENOLINK 67 /* Link has been severed */
|
||||
#define EADV 68 /* Advertise error */
|
||||
#define ESRMNT 69 /* Srmount error */
|
||||
#define ECOMM 70 /* Communication error on send */
|
||||
#define EPROTO 71 /* Protocol error */
|
||||
#define EMULTIHOP 72 /* Multihop attempted */
|
||||
#define EDOTDOT 73 /* RFS specific error */
|
||||
#define EBADMSG 74 /* Not a data message */
|
||||
#define EOVERFLOW 75 /* Value too large for defined data type */
|
||||
#define ENOTUNIQ 76 /* Name not unique on network */
|
||||
#define EBADFD 77 /* File descriptor in bad state */
|
||||
#define EREMCHG 78 /* Remote address changed */
|
||||
|
||||
#define ELIBACC 79 /* Can not access a needed shared library */
|
||||
#define ELIBBAD 80 /* Accessing a corrupted shared library */
|
||||
#define ELIBSCN 81 /* lib section in a.out corrupted */
|
||||
#define ELIBMAX 82 /* Attempting to link in too many libs */
|
||||
#define ELIBEXEC 83 /* Cannot exec a shared library directly */
|
||||
|
||||
#define EILSEQ 84 /* Illegal byte sequence */
|
||||
#define ERESTART 85 /* Interrupted system call should be restarted */
|
||||
#define ESTRPIPE 86 /* Streams pipe error */
|
||||
#define EUSERS 87 /* Too many users */
|
||||
|
||||
#define ENOTSOCK 88 /* Socket operation on non-socket */
|
||||
#define EDESTADDRREQ 89 /* Destination address required */
|
||||
#define EMSGSIZE 90 /* Message too long */
|
||||
#define EPROTOTYPE 91 /* Protocol wrong type for socket */
|
||||
#define ENOPROTOOPT 92 /* Protocol not available */
|
||||
#define EPROTONOSUPPORT 93 /* Protocol not supported */
|
||||
#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
|
||||
#define EOPNOTSUPP 95 /* Operation not supported */
|
||||
#define ENOTSUP EOPNOTSUPP
|
||||
|
||||
#define EPFNOSUPPORT 96 /* Protocol family not supported */
|
||||
#define EAFNOSUPPORT 97 /* Address family not supported */
|
||||
#define EADDRINUSE 98 /* Address already in use */
|
||||
#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
|
||||
#define ENETDOWN 100 /* Network is down */
|
||||
#define ENETUNREACH 101 /* Network is unreachable */
|
||||
#define ENETRESET 102 /* Network dropped connection */
|
||||
#define ECONNABORTED 103 /* Software caused connection abort */
|
||||
#define ECONNRESET 104 /* Connection reset by peer */
|
||||
#define ENOBUFS 105 /* No buffer space available */
|
||||
#define EISCONN 106 /* Transport endpoint is already connected */
|
||||
#define ENOTCONN 107 /* Transport endpoint is not connected */
|
||||
#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
|
||||
#define ETOOMANYREFS 109 /* Too many references */
|
||||
#define ETIMEDOUT 110 /* Connection timed out */
|
||||
#define ECONNREFUSED 111 /* Connection refused */
|
||||
#define EHOSTDOWN 112 /* Host is down */
|
||||
#define EHOSTUNREACH 113 /* No route to host */
|
||||
#define EALREADY 114 /* Operation already in progress */
|
||||
#define EINPROGRESS 115 /* Operation now in progress */
|
||||
#define ESTALE 116 /* Stale file handle */
|
||||
#define EUCLEAN 117 /* Structure needs cleaning */
|
||||
#define ENOTNAM 118 /* Not a XENIX named type file */
|
||||
#define ENAVAIL 119 /* No XENIX semaphores available */
|
||||
#define EISNAM 120 /* Is a named type file */
|
||||
#define EREMOTEIO 121 /* Remote I/O error */
|
||||
#define EDQUOT 122 /* Quota exceeded */
|
||||
#define ENOMEDIUM 123 /* No medium found */
|
||||
#define EMEDIUMTYPE 124 /* Wrong medium type */
|
||||
#define ECANCELED 125 /* Operation canceled */
|
||||
#define ENOKEY 126 /* Required key not available */
|
||||
#define EKEYEXPIRED 127 /* Key has expired */
|
||||
#define EKEYREVOKED 128 /* Key has been revoked */
|
||||
#define EKEYREJECTED 129 /* Key was rejected by service */
|
||||
|
||||
#define EOWNERDEAD 130 /* Owner died */
|
||||
#define ENOTRECOVERABLE 131 /* State not recoverable */
|
||||
#define ERFKILL 132 /* Operation not possible due to RF-kill */
|
||||
#define EHWPOISON 133 /* Memory page has hardware error */
|
||||
+1
-1
@@ -14,7 +14,7 @@ static const uint32_t g_LogSeverityColors[] =
|
||||
[LVL_INFO] = 0xFFFFFF, // white
|
||||
[LVL_WARN] = 0xFFFF00, // yellow
|
||||
[LVL_ERROR] = 0xFF0000, // red
|
||||
[LVL_CRITICAL] = 0xFFFFFF, // white (can do red background separately if you want)
|
||||
[LVL_CRITICAL] = 0xFFFFFF, // white
|
||||
};
|
||||
|
||||
static spinlock_t s_printf_lock = SPINLOCK_INIT;
|
||||
|
||||
+90
@@ -98,6 +98,68 @@ static void hcf(void) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline void cpuid(uint32_t leaf, uint32_t subleaf,
|
||||
uint32_t *eax, uint32_t *ebx,
|
||||
uint32_t *ecx, uint32_t *edx) {
|
||||
asm volatile ("cpuid"
|
||||
: "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
|
||||
: "a"(leaf), "c"(subleaf));
|
||||
}
|
||||
|
||||
int cpu_has_leaf7() {
|
||||
uint32_t a, b, c, d;
|
||||
cpuid(0, 0, &a, &b, &c, &d);
|
||||
return a >= 7;
|
||||
}
|
||||
|
||||
int cpu_has_fsgsbase() {
|
||||
if (!cpu_has_leaf7())
|
||||
return 0;
|
||||
|
||||
uint32_t a, b, c, d;
|
||||
cpuid(7, 0, &a, &b, &c, &d);
|
||||
|
||||
return (b & (1u << 0)) != 0;
|
||||
}
|
||||
|
||||
static inline uint64_t read_cr4(void) {
|
||||
uint64_t val;
|
||||
__asm__ volatile ("mov %%cr4, %0" : "=r"(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
static inline void write_cr4(uint64_t val) {
|
||||
asm volatile ("mov %0, %%cr4" :: "r"(val));
|
||||
}
|
||||
|
||||
static inline uint64_t read_cr0(void) {
|
||||
uint64_t val;
|
||||
__asm__ volatile ("mov %%cr0, %0" : "=r"(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void write_cr0(uint64_t val) {
|
||||
__asm__ volatile ("mov %0, %%cr0" :: "r"(val));
|
||||
}
|
||||
|
||||
|
||||
#define CR4_FSGSBASE (1ULL << 16)
|
||||
|
||||
|
||||
|
||||
void enable_fsgsbase_if_supported() {
|
||||
if (!cpu_has_fsgsbase()) {
|
||||
// fallback: don't use wrfsbase
|
||||
printf("FSGSBASE not supported, skipping wrfsbase/wrgsbase\n");
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t cr4 = read_cr4();
|
||||
cr4 |= CR4_FSGSBASE;
|
||||
write_cr4(cr4);
|
||||
}
|
||||
|
||||
extern struct kernel_pagemap;
|
||||
uint64_t g_rsdp_phys;
|
||||
|
||||
@@ -128,6 +190,31 @@ static uacpi_interrupt_ret handle_power_button(uacpi_handle ctx) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void init_simd(void) {
|
||||
uint64_t cr0 = read_cr0();
|
||||
uint64_t cr4 = read_cr4();
|
||||
|
||||
// --- CR0 setup ---
|
||||
cr0 &= ~(1 << 2); // Clear EM (Emulation) → allow FPU/SSE
|
||||
cr0 |= (1 << 1); // Set MP (Monitor Coprocessor)
|
||||
cr0 &= ~(1 << 3); // Clear TS (Task Switched) → no #NM
|
||||
|
||||
// --- CR4 setup ---
|
||||
cr4 |= (1 << 9); // OSFXSR → enable FXSAVE/FXRSTOR + SSE
|
||||
cr4 |= (1 << 10); // OSXMMEXCPT → enable SSE exceptions
|
||||
|
||||
write_cr0(cr0);
|
||||
write_cr4(cr4);
|
||||
|
||||
// Initialize FPU/SSE state
|
||||
__asm__ volatile ("fninit");
|
||||
|
||||
}
|
||||
|
||||
|
||||
void kmain(void) {
|
||||
if (LIMINE_BASE_REVISION_SUPPORTED(limine_base_revision) == false) {
|
||||
hcf();
|
||||
@@ -354,6 +441,9 @@ void kmain(void) {
|
||||
|
||||
sched_init();
|
||||
|
||||
enable_fsgsbase_if_supported();
|
||||
init_simd();
|
||||
|
||||
start_userspace();
|
||||
|
||||
sched_yield();
|
||||
|
||||
+13
-3
@@ -83,10 +83,20 @@ void *pmm_alloc(size_t pages) {
|
||||
|
||||
void *pmm_allocz(size_t pages) {
|
||||
void *ret = pmm_alloc(pages);
|
||||
|
||||
if (ret) {
|
||||
memset((void *)((uintptr_t)ret + MEM_PHYS_OFFSET), 0, pages * PAGE_SIZE); // this is at fault for the page fault
|
||||
if (!ret) return NULL;
|
||||
|
||||
uintptr_t vaddr = (uintptr_t)ret + MEM_PHYS_OFFSET;
|
||||
// Sanity: make sure we're not zeroing something ridiculous
|
||||
if (vaddr < MEM_PHYS_OFFSET || vaddr > MEM_PHYS_OFFSET + 0x8000000000ULL) {
|
||||
printf("PMM: allocz addr 0x%lx looks wrong!\n", vaddr);
|
||||
pmm_free(ret, pages);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint64_t *p = (uint64_t *)vaddr;
|
||||
for (size_t i = 0; i < (pages * PAGE_SIZE) / 8; i++)
|
||||
p[i] = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -36,7 +36,7 @@ static inline struct slab *slab_for(size_t size) {
|
||||
|
||||
static void create_slab(struct slab *slab, size_t ent_size) {
|
||||
spinlock_init(&slab->lock);
|
||||
slab->first_free = (void **)((uint64_t)pmm_alloc(1) + MEM_PHYS_OFFSET);
|
||||
slab->first_free = (void **)((uint64_t)pmm_allocz(1) + MEM_PHYS_OFFSET);
|
||||
slab->ent_size = ent_size;
|
||||
|
||||
size_t header_offset = ALIGN_UP(sizeof(struct slab_header), ent_size);
|
||||
|
||||
+10
-1
@@ -320,4 +320,13 @@ fail:
|
||||
spinlock_drop(&pagemap->lock);
|
||||
printf("Invalid Phys!\n");
|
||||
return INVALID_PHYS;
|
||||
}
|
||||
}
|
||||
|
||||
uintptr_t find_free_vaddr(struct pagemap *pm, size_t len) {
|
||||
// Very naive for now - start from a high address
|
||||
static uintptr_t next = 0x700050000000ULL;
|
||||
|
||||
uintptr_t addr = next;
|
||||
next += ALIGN_UP(len, 0x1000000ULL); // 16 MiB alignment for simplicity
|
||||
return addr;
|
||||
}
|
||||
|
||||
+3
-1
@@ -42,4 +42,6 @@ bool vmm_map_page(struct pagemap *pagemap, uint64_t virt, uint64_t phys,
|
||||
uint64_t flags, enum page_size pg_size);
|
||||
uint64_t vmm_virt_to_phys(struct pagemap *pagemap, uint64_t virt);
|
||||
uint64_t *vmm_virt_to_pte(struct pagemap *pagemap, uintptr_t virt_addr,
|
||||
bool allocate);
|
||||
bool allocate);
|
||||
bool vmm_unmap_page(struct pagemap *pagemap, uintptr_t virt, bool locked);
|
||||
uintptr_t find_free_vaddr(struct pagemap *pm, size_t len);
|
||||
@@ -0,0 +1,84 @@
|
||||
#include "futex.h"
|
||||
#include "mm/memory.h"
|
||||
#include "string.h"
|
||||
#include "sched/scheduler.h"
|
||||
#include "libk/stdio.h"
|
||||
|
||||
#define FUTEX_BUCKETS 256
|
||||
|
||||
struct futex_waiter {
|
||||
task_t *task;
|
||||
int *uaddr;
|
||||
struct futex_waiter *next;
|
||||
};
|
||||
|
||||
static struct futex_waiter *g_futex_table[FUTEX_BUCKETS];
|
||||
|
||||
static inline uint32_t futex_hash(int *uaddr) {
|
||||
return ((uintptr_t)uaddr >> 3) & (FUTEX_BUCKETS - 1);
|
||||
}
|
||||
|
||||
int futex_wait(int *uaddr, int expected)
|
||||
{
|
||||
if (!uaddr) return -1;
|
||||
|
||||
/* 1. check value in user memory */
|
||||
if (*uaddr != expected)
|
||||
return -1;
|
||||
|
||||
uint32_t h = futex_hash(uaddr);
|
||||
|
||||
struct futex_waiter *w = kmalloc(sizeof(*w));
|
||||
if (!w) return -1;
|
||||
|
||||
w->task = sched_current();
|
||||
w->uaddr = uaddr;
|
||||
|
||||
/* 2. insert into bucket */
|
||||
w->next = g_futex_table[h];
|
||||
g_futex_table[h] = w;
|
||||
|
||||
/* 3. block task */
|
||||
sched_block(TASK_INTERRUPTIBLE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int futex_wake(int *uaddr, int count)
|
||||
{
|
||||
if (!uaddr || count <= 0)
|
||||
return 0;
|
||||
|
||||
uint32_t h = futex_hash(uaddr);
|
||||
|
||||
struct futex_waiter **prev = &g_futex_table[h];
|
||||
struct futex_waiter *cur = g_futex_table[h];
|
||||
|
||||
int woken = 0;
|
||||
|
||||
while (cur && woken < count) {
|
||||
if (cur->uaddr == uaddr) {
|
||||
|
||||
task_t *task = cur->task;
|
||||
|
||||
/* remove from list */
|
||||
*prev = cur->next;
|
||||
|
||||
struct futex_waiter *tmp = cur;
|
||||
cur = cur->next;
|
||||
|
||||
kfree(tmp);
|
||||
|
||||
/* wake task */
|
||||
sched_wake(task);
|
||||
|
||||
woken++;
|
||||
continue;
|
||||
}
|
||||
|
||||
prev = &cur->next;
|
||||
cur = cur->next;
|
||||
}
|
||||
|
||||
return woken;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "sched/scheduler.h"
|
||||
|
||||
#define FUTEX_WAIT 0
|
||||
#define FUTEX_WAKE 1
|
||||
|
||||
int futex_wait(int *uaddr, int expected);
|
||||
int futex_wake(int *uaddr, int count);
|
||||
+70
-75
@@ -6,6 +6,8 @@
|
||||
#include "arch/x86_64/sys/pit.h"
|
||||
#include "string.h"
|
||||
|
||||
#define IA32_FS_BASE 0xC0000100
|
||||
|
||||
/* =====================================================================
|
||||
* Forward declarations for GDT/TSS (defined in gdt.c)
|
||||
* ===================================================================== */
|
||||
@@ -329,43 +331,22 @@ static void kthread_trampoline(void) {
|
||||
sched_exit(0);
|
||||
}
|
||||
|
||||
static void user_task_trampoline(void) {
|
||||
x86_64_EnableInterrupts();
|
||||
void set_fs_base(uint64_t base) {
|
||||
/* ake sure the address is canonical (bits 63..48 all 0 or all 1).
|
||||
* Non-canonical FS base + any fs: access from user code = #GP. */
|
||||
uint64_t high = base >> 48;
|
||||
if (high != 0 && high != 0xFFFFULL) {
|
||||
/* Simple sign-extension from bit 47 (common for user-space TLS) */
|
||||
if (base & (1ULL << 47))
|
||||
base |= 0xFFFFULL << 48; /* negative canonical */
|
||||
else
|
||||
base &= (1ULL << 48) - 1; /* positive canonical */
|
||||
}
|
||||
|
||||
task_t *self = g_current_task;
|
||||
|
||||
/*
|
||||
* Build an iretq frame on the current (kernel) stack and enter
|
||||
* user mode. We reset the stack pointer to the very top of the
|
||||
* kernel stack first, so the iretq frame doesn't sit below a
|
||||
* pile of stale context-switch frames.
|
||||
*
|
||||
* Segment selectors (from your GDT / STAR setup):
|
||||
* User CS = 0x23 (GDT index 4, RPL 3)
|
||||
* User SS = 0x1B (GDT index 3, RPL 3)
|
||||
*/
|
||||
uint64_t kstack_top = (uint64_t)self->kernel_stack + self->kernel_stack_size;
|
||||
uint64_t user_rip = self->user_entry;
|
||||
uint64_t user_rsp = self->user_stack_top;
|
||||
|
||||
asm volatile(
|
||||
"movq %0, %%rsp\n\t" /* Reset kernel RSP to stack top */
|
||||
"pushq $0x1B\n\t" /* SS – user data segment */
|
||||
"pushq %1\n\t" /* RSP – user stack pointer */
|
||||
"pushfq\n\t" /* RFLAGS */
|
||||
"orq $0x200, (%%rsp)\n\t" /* Set IF so user code runs with */
|
||||
/* interrupts enabled */
|
||||
"pushq $0x23\n\t" /* CS – user code segment */
|
||||
"pushq %2\n\t" /* RIP – user entry point */
|
||||
"iretq\n\t"
|
||||
:
|
||||
: "r"(kstack_top), "r"(user_rsp), "r"(user_rip)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
__builtin_unreachable();
|
||||
asm volatile("wrfsbase %0" : : "r"(base) : "memory");
|
||||
}
|
||||
|
||||
|
||||
/* =====================================================================
|
||||
* Kernel stack setup for a new task
|
||||
*
|
||||
@@ -444,13 +425,18 @@ task_t *sched_create_kthread(const char *name,
|
||||
task->vruntime = g_runqueue.min_vruntime;
|
||||
|
||||
sched_enqueue(task);
|
||||
printf("[sched] kthread '%s' pid=%d created\n", task->name, task->pid);
|
||||
//printf("[sched] kthread '%s' pid=%d created\n", task->name, task->pid);
|
||||
return task;
|
||||
}
|
||||
|
||||
task_t *sched_create_user_task(const char *name,
|
||||
uint64_t entry_rip, uint64_t user_rsp,
|
||||
struct pagemap *pm)
|
||||
uint64_t entry_rip,
|
||||
uint64_t user_rsp,
|
||||
struct pagemap *pm,
|
||||
uint64_t tls_fs_base,
|
||||
uint64_t phdr_va,
|
||||
uint16_t phent,
|
||||
uint16_t phnum)
|
||||
{
|
||||
task_t *task = alloc_task(name, true);
|
||||
if (!task) return NULL;
|
||||
@@ -458,8 +444,11 @@ task_t *sched_create_user_task(const char *name,
|
||||
task->pagemap = pm;
|
||||
task->user_entry = entry_rip;
|
||||
task->user_stack_top= user_rsp;
|
||||
task->tls_fs_base = tls_fs_base;
|
||||
task->phdr_va = phdr_va;
|
||||
task->phent = phent;
|
||||
task->phnum = phnum;
|
||||
|
||||
/* CR3 = physical address of PML4 */
|
||||
task->ctx.cr3 = (uint64_t)pm->top_level - MEM_PHYS_OFFSET;
|
||||
|
||||
setup_initial_kstack(task, user_task_trampoline);
|
||||
@@ -467,9 +456,11 @@ task_t *sched_create_user_task(const char *name,
|
||||
task->time_slice = calc_timeslice(task);
|
||||
task->vruntime = g_runqueue.min_vruntime;
|
||||
|
||||
for (size_t i = 256; i < 512; i++) {
|
||||
pm->top_level[i] = kernel_pagemap->top_level[i];
|
||||
}
|
||||
|
||||
sched_enqueue(task);
|
||||
printf("[sched] user task '%s' pid=%d created, entry=0x%lx\n",
|
||||
task->name, task->pid, entry_rip);
|
||||
return task;
|
||||
}
|
||||
|
||||
@@ -556,6 +547,15 @@ void schedule(void) {
|
||||
task_t *prev = g_runqueue.current;
|
||||
task_t *next = pick_next_task(&g_runqueue);
|
||||
|
||||
if (next->is_user) {
|
||||
if (next->tls_fs_base != 0) {
|
||||
set_fs_base(next->tls_fs_base);
|
||||
} else {
|
||||
printf("Warning: user task '%s' has no TLS FS base set; leaving FS at 0\n", next->name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (next == prev || next == NULL) {
|
||||
/* Nothing to switch to; keep running current task. */
|
||||
spinlock_drop(&g_runqueue.lock);
|
||||
@@ -600,6 +600,11 @@ void schedule(void) {
|
||||
spinlock_drop(&g_runqueue.lock);
|
||||
|
||||
/* ---- Context switch -------------------------------------------- */
|
||||
//printf("[sched] switching from '%s' (pid=%d) to '%s' (pid=%d)\n",
|
||||
// prev->name, prev->pid, next->name, next->pid);
|
||||
if (next->is_user) {
|
||||
//printf("switching to user task, fs_base=0x%lx\n", next->tls_fs_base);
|
||||
}
|
||||
sched_context_switch(&prev->ctx, &next->ctx);
|
||||
|
||||
/*
|
||||
@@ -705,8 +710,8 @@ void sched_exit(int exit_code) {
|
||||
if (self->parent)
|
||||
task_send_signal(self->parent, SIGCHLD);
|
||||
|
||||
printf("[sched] task '%s' pid=%d exited with code %d\n",
|
||||
self->name, self->pid, exit_code);
|
||||
//printf("[sched] task '%s' pid=%d exited with code %d\n",
|
||||
// self->name, self->pid, exit_code);
|
||||
|
||||
/* Hand off to someone else; we will never return. */
|
||||
schedule();
|
||||
@@ -791,65 +796,55 @@ void task_handle_pending_signals(void) {
|
||||
task_t *self = g_current_task;
|
||||
if (!self) return;
|
||||
|
||||
/* Only run signal handling when we are about to return to user mode.
|
||||
* Kernel threads can still get synchronous handlers, but we avoid
|
||||
* unnecessary work / possible recursion on kernel tasks. */
|
||||
if (!self->is_user && !(self->pending_signals & ~self->signal_mask))
|
||||
return;
|
||||
|
||||
while (self->pending_signals & ~self->signal_mask) {
|
||||
/* Find the lowest-numbered pending, unblocked signal */
|
||||
uint64_t deliverable = self->pending_signals & ~self->signal_mask;
|
||||
int signum = __builtin_ctzll(deliverable) + 1; /* +1: bit 0 = sig 1 */
|
||||
int signum = __builtin_ctzll(deliverable) + 1;
|
||||
if (signum >= _NSIG) break;
|
||||
|
||||
/* Clear the pending bit */
|
||||
self->pending_signals &= ~(1ULL << (signum - 1));
|
||||
|
||||
sighandler_t handler = self->sigactions[signum].sa_handler;
|
||||
|
||||
if (handler == SIG_IGN) {
|
||||
/* Explicitly ignored */
|
||||
if (signum == SIGCHLD) continue; /* common: reap silently */
|
||||
if (signum == SIGCHLD) continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if (handler != SIG_DFL) {
|
||||
/*
|
||||
* User-defined handler.
|
||||
*
|
||||
* A full POSIX implementation would build a signal frame on
|
||||
* the user stack and set registers so that iretq delivers
|
||||
* the signal; that requires knowing the saved RFLAGS/RIP
|
||||
* from the ISR frame. We leave this as a TODO and just
|
||||
* call the handler directly for kernel threads.
|
||||
*
|
||||
* For user tasks this is the point where you would push a
|
||||
* ucontext_t / sigframe onto the user stack and adjust the
|
||||
* saved user RIP in the ISR frame.
|
||||
*/
|
||||
if (handler != SIG_DFL) {
|
||||
if (!self->is_user) {
|
||||
handler(signum);
|
||||
handler(signum); /* kernel thread */
|
||||
} else {
|
||||
/* TODO: build user-space signal frame */
|
||||
printf("[signal] TODO: deliver signal %d to user task '%s'\n",
|
||||
signum, self->name);
|
||||
/* TODO: proper sigframe + adjust trap frame on kernel stack */
|
||||
printf("[signal] TODO: deliver signal %d to user task '%s' (pid=%d)\n",
|
||||
signum, self->name, self->pid);
|
||||
/* For now fall through to default action so we don't silently ignore */
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
/* SIG_DFL */
|
||||
/* Default action (also used for user tasks when no handler is installed) */
|
||||
if (handler == SIG_DFL || self->is_user) {
|
||||
switch (default_action(signum)) {
|
||||
case SIG_ACTION_TERM:
|
||||
case SIG_ACTION_CORE:
|
||||
printf("[signal] task '%s' pid=%d killed by signal %d\n",
|
||||
self->name, self->pid, signum);
|
||||
sched_exit(128 + signum);
|
||||
break; /* unreachable */
|
||||
sched_exit(128 + signum); /* does not return */
|
||||
break;
|
||||
|
||||
case SIG_ACTION_STOP:
|
||||
self->state = TASK_STOPPED;
|
||||
/* Notify parent */
|
||||
if (self->parent) task_send_signal(self->parent, SIGCHLD);
|
||||
sched_block(TASK_STOPPED);
|
||||
if (self->parent)
|
||||
task_send_signal(self->parent, SIGCHLD);
|
||||
sched_block(TASK_STOPPED); /* does not return until CONT */
|
||||
break;
|
||||
|
||||
case SIG_ACTION_CONT:
|
||||
/* Already running (we were woken to handle this) */
|
||||
break;
|
||||
|
||||
case SIG_ACTION_IGN:
|
||||
break;
|
||||
}
|
||||
|
||||
+16
-2
@@ -160,6 +160,10 @@ struct task {
|
||||
uint64_t user_stack_top; /* User-space RSP for user tasks */
|
||||
void (*kthread_entry)(void *arg); /* Kernel thread entry point */
|
||||
void *kthread_arg;
|
||||
uint64_t tls_fs_base; /* FS base for user tasks (TLS support) */
|
||||
uint64_t phdr_va;
|
||||
uint16_t phent;
|
||||
uint16_t phnum;
|
||||
|
||||
/* ---- Signals ----------------------------------------------------- */
|
||||
uint64_t pending_signals; /* Bitmask of unhandled signals */
|
||||
@@ -248,8 +252,14 @@ task_t *sched_create_kthread(const char *name,
|
||||
|
||||
/* Create a user-space task and enqueue it immediately */
|
||||
task_t *sched_create_user_task(const char *name,
|
||||
uint64_t entry_rip, uint64_t user_rsp,
|
||||
struct pagemap *pm);
|
||||
uint64_t entry_rip,
|
||||
uint64_t user_rsp,
|
||||
struct pagemap *pm,
|
||||
uint64_t tls_fs_base,
|
||||
uint64_t phdr_va,
|
||||
uint16_t phent,
|
||||
uint16_t phnum);
|
||||
|
||||
|
||||
/* Add a task to the appropriate run queue */
|
||||
void sched_enqueue(task_t *task);
|
||||
@@ -308,5 +318,9 @@ static inline task_t *sched_current(void) { return g_current_task; }
|
||||
void sched_context_switch(struct cpu_context *from,
|
||||
struct cpu_context *to);
|
||||
|
||||
void set_fs_base(uint64_t base);
|
||||
|
||||
extern void user_task_trampoline(void); /* Defined in user_task_trampoline.S */
|
||||
|
||||
/* Kernel stack size for each task */
|
||||
#define KSTACK_SIZE (32 * 1024) /* 32 KiB — comfortable headroom */
|
||||
@@ -0,0 +1,118 @@
|
||||
|
||||
/* ── struct task offsets ─────────────────────────────────────────────────── */
|
||||
.equ TASK_KERNEL_STACK, 160
|
||||
.equ TASK_KERNEL_STACK_SIZE, 168
|
||||
.equ TASK_USER_ENTRY, 176
|
||||
.equ TASK_USER_STACK_TOP, 184
|
||||
.equ TASK_TLS_FS_BASE, 208
|
||||
|
||||
.equ TASK_PHDR_VA, 216
|
||||
.equ TASK_PHENT, 224
|
||||
.equ TASK_PHNUM, 226
|
||||
|
||||
/* ── GDT selectors ───────────────────────────────────────────────────────── */
|
||||
.equ SEL_USER_DS, 0x1B /* ring-3 data (index 3, RPL 3) */
|
||||
.equ SEL_USER_CS, 0x23 /* ring-3 code (index 4, RPL 3) */
|
||||
|
||||
/* ── ELF auxiliary-vector types ──────────────────────────────────────────── */
|
||||
.equ AT_NULL, 0
|
||||
.equ AT_PAGESZ, 6
|
||||
.equ AT_ENTRY, 9
|
||||
.equ AT_PHDR, 3
|
||||
.equ AT_PHENT, 4
|
||||
.equ AT_PHNUM, 5
|
||||
.equ AT_BASE, 7
|
||||
|
||||
/* ═══════════════════════════════════════════════════════════════════════════
|
||||
* user_task_trampoline
|
||||
* ═══════════════════════════════════════════════════════════════════════════ */
|
||||
.section .text
|
||||
.global user_task_trampoline
|
||||
.type user_task_trampoline, @function
|
||||
|
||||
user_task_trampoline:
|
||||
movq g_current_task(%rip), %rbx
|
||||
|
||||
/* ── TLS FS base ───────────────────────────────────────────────────── */
|
||||
movq TASK_TLS_FS_BASE(%rbx), %rdi
|
||||
testq %rdi, %rdi
|
||||
jz .Lno_tls
|
||||
call set_fs_base
|
||||
movq g_current_task(%rip), %rbx
|
||||
|
||||
.Lno_tls:
|
||||
/* ── Stash values we need after we switch stacks ───────────────────── */
|
||||
movq TASK_USER_STACK_TOP(%rbx), %r15
|
||||
movq TASK_USER_ENTRY(%rbx), %r14
|
||||
movq TASK_KERNEL_STACK(%rbx), %r13
|
||||
addq TASK_KERNEL_STACK_SIZE(%rbx), %r13
|
||||
|
||||
/* ── Load auxv values ──────── */
|
||||
movq TASK_PHDR_VA(%rbx), %r11
|
||||
movzwq TASK_PHENT(%rbx), %r10
|
||||
movzwq TASK_PHNUM(%rbx), %r9
|
||||
|
||||
/* ── Build initial user stack ─────────────────────── */
|
||||
/* program name string */
|
||||
movabsq $0x726f776f6c6c6568, %rax
|
||||
movq %rax, -0x20(%r15)
|
||||
movabsq $0x000000000000646c, %rax
|
||||
movq %rax, -0x18(%r15)
|
||||
|
||||
/* argc / argv / envp */
|
||||
movq $1, -0xB0(%r15) /* argc = 1 */
|
||||
leaq -0x20(%r15), %rax
|
||||
movq %rax, -0xA8(%r15) /* argv[0] */
|
||||
movq $0, -0xA0(%r15)
|
||||
movq $0, -0x98(%r15) /* envp[0] = NULL */
|
||||
|
||||
/* auxv */
|
||||
movq $AT_PAGESZ, -0x90(%r15)
|
||||
movq $4096, -0x88(%r15)
|
||||
|
||||
movq $AT_ENTRY, -0x80(%r15)
|
||||
movq %r14, -0x78(%r15)
|
||||
|
||||
movq $AT_PHDR, -0x70(%r15)
|
||||
movq %r11, -0x68(%r15)
|
||||
|
||||
movq $AT_PHENT, -0x60(%r15)
|
||||
movq %r10, -0x58(%r15)
|
||||
|
||||
movq $AT_PHNUM, -0x50(%r15)
|
||||
movq %r9, -0x48(%r15)
|
||||
|
||||
movq $AT_BASE, -0x40(%r15)
|
||||
movq $0, -0x38(%r15)
|
||||
|
||||
movq $AT_NULL, -0x30(%r15)
|
||||
movq $0, -0x28(%r15)
|
||||
|
||||
leaq -0xB0(%r15), %r12 /* user RSP */
|
||||
|
||||
/* ── Pivot to kernel stack top and build iretq frame ──────────────── */
|
||||
movq %r13, %rsp
|
||||
|
||||
pushq $SEL_USER_DS
|
||||
pushq %r12
|
||||
pushfq
|
||||
orq $0x200, (%rsp)
|
||||
pushq $SEL_USER_CS
|
||||
pushq %r14
|
||||
|
||||
/* Zero GPRs */
|
||||
xorq %rax, %rax
|
||||
xorq %rbx, %rbx
|
||||
xorq %rcx, %rcx
|
||||
xorq %rdx, %rdx
|
||||
xorq %rsi, %rsi
|
||||
xorq %rdi, %rdi
|
||||
xorq %rbp, %rbp
|
||||
xorq %r8, %r8
|
||||
xorq %r9, %r9
|
||||
xorq %r10, %r10
|
||||
xorq %r11, %r11
|
||||
|
||||
iretq
|
||||
|
||||
.size user_task_trampoline, . - user_task_trampoline
|
||||
+108
-1
@@ -4,12 +4,18 @@
|
||||
#include "fs/vfs.h"
|
||||
#include "syscall.h"
|
||||
#include "sched/scheduler.h"
|
||||
#include "mm/vmm.h"
|
||||
#include "mm/pmm.h"
|
||||
#include "mm/memory.h"
|
||||
#include "libk/errno.h"
|
||||
#include "mp/futex.h"
|
||||
|
||||
#define MSR_EFER 0xC0000080
|
||||
#define MSR_STAR 0xC0000081
|
||||
#define MSR_LSTAR 0xC0000082
|
||||
#define MSR_SFMASK 0xC0000084
|
||||
#define MSR_KERNEL_GSBASE 0xC0000102
|
||||
#define MSR_KERNEL_FSBASE 0xC0000100
|
||||
|
||||
#define EFER_SCE (1 << 0)
|
||||
|
||||
@@ -76,6 +82,76 @@ uint64_t syscall_handler(uint64_t num,
|
||||
case SYS_EXIT_GROUP:
|
||||
sched_exit((int)arg1);
|
||||
//noreturn
|
||||
|
||||
case SYS_MMAP:
|
||||
{
|
||||
uintptr_t addr = (uintptr_t)arg1;
|
||||
size_t len = (size_t)arg2;
|
||||
int prot = (int)arg3;
|
||||
int flags = (int)arg4;
|
||||
int fd = (int)arg5;
|
||||
off_t offset = (off_t)arg6;
|
||||
|
||||
(void)fd; (void)offset; // we only support anonymous for now
|
||||
|
||||
if (len == 0)
|
||||
return (uint64_t)MAP_FAILED;
|
||||
|
||||
len = ALIGN_UP(len, PAGE_SIZE);
|
||||
|
||||
if (!(flags & MAP_ANONYMOUS)) {
|
||||
return (uint64_t)MAP_FAILED; // todo: file backed later
|
||||
}
|
||||
|
||||
struct pagemap *pm = sched_current()->pagemap;
|
||||
if (!pm) pm = kernel_pagemap;
|
||||
|
||||
if (!(flags & MAP_FIXED)) {
|
||||
addr = find_free_vaddr(pm, len);
|
||||
}
|
||||
|
||||
uint64_t vmm_flags = PAGE_USER | PAGE_READ;
|
||||
if (prot & PROT_WRITE) vmm_flags |= PAGE_WRITE;
|
||||
if (prot & PROT_EXEC) vmm_flags |= PAGE_NO_EXECUTE;
|
||||
|
||||
size_t page_count = len / PAGE_SIZE;
|
||||
void *phys = pmm_allocz(page_count);
|
||||
if (!phys) {
|
||||
return (uint64_t)MAP_FAILED;
|
||||
}
|
||||
|
||||
// Map them
|
||||
for (size_t i = 0; i < page_count; i++) {
|
||||
uint64_t va = addr + i * PAGE_SIZE;
|
||||
uint64_t pa = (uint64_t)phys + i * PAGE_SIZE;
|
||||
if (!vmm_map_page(pm, va, pa, vmm_flags | PAGE_USER, Size4KiB)) {
|
||||
pmm_free(phys, page_count);
|
||||
return (uint64_t)MAP_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
case SYS_MUNMAP:
|
||||
{
|
||||
uintptr_t addr = (uintptr_t)arg1;
|
||||
size_t len = (size_t)arg2;
|
||||
|
||||
if (len == 0 || addr == 0)
|
||||
return 0;
|
||||
|
||||
len = ALIGN_UP(len, PAGE_SIZE);
|
||||
|
||||
struct pagemap *pm = sched_current()->pagemap ?: kernel_pagemap;
|
||||
|
||||
for (size_t i = 0; i < len; i += PAGE_SIZE) {
|
||||
vmm_unmap_page(pm, addr + i, false);
|
||||
// TODO: also free the physical page (will need page refcounting or virt_to_phys + pmm_free)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
case SYS_SCHED_YIELD:
|
||||
sched_yield();
|
||||
@@ -159,8 +235,39 @@ uint64_t syscall_handler(uint64_t num,
|
||||
return (uint64_t)task_set_scheduler(t, policy, rt_prio);
|
||||
}
|
||||
|
||||
case SYS_TCB_SET:
|
||||
{
|
||||
void *pointer = (void*)arg1;
|
||||
if (pointer == NULL) {
|
||||
return (uint64_t)-EINVAL;
|
||||
}
|
||||
|
||||
set_fs_base((uint64_t)pointer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case SYS_FUTEX:
|
||||
{
|
||||
int *uaddr = (int*)arg1;
|
||||
int op = (int)arg2;
|
||||
int val = (int)arg3;
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_WAIT:
|
||||
return (uint64_t)futex_wait(uaddr, val);
|
||||
case FUTEX_WAKE:
|
||||
return (uint64_t)futex_wake(uaddr, val);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return (uint64_t)-1;
|
||||
{
|
||||
printf("Unknown syscall: %lu\n", num);
|
||||
return (uint64_t)-ENOSYS;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+35
-11
@@ -1,22 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#define SYS_READ 0
|
||||
#define SYS_WRITE 1
|
||||
#define SYS_OPEN 2
|
||||
#define SYS_CLOSE 3
|
||||
|
||||
#define SYS_SCHED_YIELD 24
|
||||
#define SYS_GETPID 39
|
||||
#define SYS_GETPPID 110
|
||||
#define SYS_NICE 34
|
||||
#define SYS_KILL 62
|
||||
#define SYS_READ 0
|
||||
#define SYS_WRITE 1
|
||||
#define SYS_OPEN 2
|
||||
#define SYS_CLOSE 3
|
||||
#define SYS_MMAP 9
|
||||
#define SYS_MUNMAP 11
|
||||
#define SYS_BRK 12
|
||||
#define SYS_SIGACTION 13 /* rt_sigaction on Linux */
|
||||
#define SYS_SIGPROCMASK 14 /* rt_sigprocmask on Linux */
|
||||
#define SYS_SCHED_YIELD 24
|
||||
#define SYS_GETPID 39
|
||||
|
||||
#define SYS_NICE 34
|
||||
#define SYS_FORK 57
|
||||
#define SYS_EXECVE 59
|
||||
#define SYS_EXIT 60
|
||||
#define SYS_EXIT_GROUP 231
|
||||
#define SYS_KILL 62
|
||||
|
||||
#define SYS_GETPPID 110
|
||||
#define SYS_SCHED_GETSCHEDULER 138
|
||||
#define SYS_SCHED_SETSCHEDULER 139
|
||||
#define SYS_FUTEX 202
|
||||
#define SYS_EXIT_GROUP 231
|
||||
#define SYS_TCB_SET 300
|
||||
|
||||
typedef int64_t off_t;
|
||||
|
||||
|
||||
// Memory protection flags (Linux compatible)
|
||||
#define PROT_NONE 0x0
|
||||
#define PROT_READ 0x1
|
||||
#define PROT_WRITE 0x2
|
||||
#define PROT_EXEC 0x4
|
||||
|
||||
// mmap flags
|
||||
#define MAP_PRIVATE 0x02
|
||||
#define MAP_SHARED 0x01
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#define MAP_FIXED 0x10
|
||||
#define MAP_FAILED ((void*)-1)
|
||||
|
||||
|
||||
void syscall_init(void);
|
||||
Reference in New Issue
Block a user