user: implement mlibc as the libc, finally.

It's finally done..

Signed-off-by: kaguya <vpshinomiya@protonmail.com>
This commit is contained in:
kaguya
2026-05-02 03:31:49 -04:00
parent 2fa39ad85a
commit 9a9b91c940
2387 changed files with 152741 additions and 315 deletions
+2 -2
View File
@@ -55,14 +55,14 @@ void x86_64_ISR_Initialize(void)
x86_64_ISR_InitializeGates();
for (int i = 0; i < 256; i++)
x86_64_IDT_EnableGate(i);
x86_64_IDT_DisableGate(0x80); // syscall gate if you want
x86_64_IDT_DisableGate(0x80); // syscall gate
}
void page_fault_handler(Registers* regs, uint64_t cr2)
{
// You can decode error bits here:
// bit 0: present
// bit 1: write
// bit 2: user-mode
+29 -45
View File
@@ -40,10 +40,14 @@ struct pagemap *create_user_pagemap(void)
}
/* Copy kernel higher-half mappings (kernel + HHDM) */
for (size_t i = 256; i < 512; i++) {
for (size_t i = 0; i < 512; i++) {
pm->top_level[i] = kernel_pagemap->top_level[i];
}
for (size_t i = 0; i < 256; i++) {
pm->top_level[i] = 0;
}
/* Lower half remains zero (user address space) */
printf("[usermode] user pagemap created (PML4 phys = 0x%lx)\n",
(uint64_t)pm->top_level - MEM_PHYS_OFFSET);
@@ -53,7 +57,7 @@ struct pagemap *create_user_pagemap(void)
uintptr_t setup_user_stack(struct pagemap *pagemap)
{
user_stack_phys_base = (uint64_t)pmm_alloc(USER_STACK_PAGES);
user_stack_phys_base = (uint64_t)pmm_allocz(USER_STACK_PAGES);
if (!user_stack_phys_base) {
printf("Failed to allocate user stack pages!\n");
@@ -83,45 +87,9 @@ uintptr_t setup_user_stack(struct pagemap *pagemap)
return rsp;
}
__attribute__((naked))
void enter_user_mode(uint64_t rip, uint64_t rsp)
{
asm volatile(
"cli\n\t"
"mov $0x1B, %%ax\n\t"
"mov %%ax, %%ds\n\t"
"mov %%ax, %%es\n\t"
"mov %%ax, %%fs\n\t"
"mov %%ax, %%gs\n\t"
// SS
"pushq $0x1B\n\t"
// RSP
"pushq %1\n\t"
// RFLAGS
"pushfq\n\t"
"pop %%rax\n\t"
"or $0x200, %%rax\n\t"
"push %%rax\n\t"
// CS
"pushq $0x23\n\t"
// RIP
"pushq %0\n\t"
"iretq\n\t"
:
: "r"(rip), "r"(rsp)
: "rax", "memory"
);
}
void start_userspace(void)
{
struct pagemap *user_pagemap = create_user_pagemap();
if (!user_pagemap) {
printf("Failed to create user pagemap\n");
@@ -129,19 +97,35 @@ void start_userspace(void)
}
void *elf_entry = NULL;
if (!ELF_Read("init.elf", &elf_entry, user_pagemap)) {
printf("Failed to load init.elf\n");
uint64_t tls_fs_base = 0;
uint64_t phdr_va = 0;
uint16_t phent = 0;
uint16_t phnum = 0;
if (!ELF_Read("helloworld.elf",
&elf_entry,
user_pagemap,
&tls_fs_base,
&phdr_va,
&phent,
&phnum)) {
printf("Failed to load helloworld.elf\n");
for(;;);
}
if (!elf_entry) {
printf("ELF has no entry point\n");
for(;;);
}
printf("ELF: entry=0x%lx TLS_FS=0x%lx PHDR=0x%lx PHENT=0x%x PHNUM=%u\n",
(uint64_t)elf_entry, tls_fs_base, phdr_va, phent, phnum);
uintptr_t user_rsp = setup_user_stack(user_pagemap);
printf("Entering usermode RIP=%p RSP=%p\n", elf_entry, (void*)user_rsp);
sched_create_user_task("init", (uint64_t)elf_entry, user_rsp, user_pagemap);
sched_create_user_task("init",
(uint64_t)elf_entry,
user_rsp,
user_pagemap,
tls_fs_base,
phdr_va,
phent,
phnum);
}
+1 -1
View File
@@ -138,7 +138,7 @@ void lapic_init(void) {
* ── Step 8: Set Task Priority to 0 ───────────────────────────────────
*
* TPR = 0 means the CPU will accept all interrupt priorities.
* Raise this later if you need to block lower-priority interrupts.
* Raise this later if need to block lower-priority interrupts.
*/
lapic_write(LAPIC_TPR, 0);
-3
View File
@@ -64,9 +64,6 @@ void lapic_init(void);
/**
* lapic_eoi - Signal end-of-interrupt to the LAPIC.
* Must be called from interrupt handlers that go through the LAPIC
* (i.e. IOAPIC-routed interrupts). ExtINT (i8259) interrupts only
* need the i8259 EOI, which your existing irq.c already sends.
*/
void lapic_eoi(void);
+2 -2
View File
@@ -237,7 +237,7 @@ void ioapic_init(void) {
* MEM_PHYS_OFFSET. Two MMIO registers are accessed (offsets 0 and
* 0x10) so one 4 KiB page is sufficient.
*
* TODO: Mark the page UC (cache-disable) in the PTE when your VMM
* TODO: Mark the page UC (cache-disable) in the PTE when VMM
* gains support for PAT / PCD flags.
*/
uint64_t phys = (uint64_t)e->address;
@@ -343,7 +343,7 @@ void irq_redirect_to_apic(uint8_t isa_irq, uint8_t vector,
/* Mask in the 8259 so it stops firing through LINT0 */
if (g_Driver) {
g_Driver->Mask(isa_irq); // from your irq.c / i8259
g_Driver->Mask(isa_irq);
}
/* Programme IOAPIC redirection entry */
+5 -2
View File
@@ -22,6 +22,8 @@ extern bool g_IOAPIC;
void x86_64_IRQ_Handler(Registers *regs)
{
int irq = regs->interrupt - PIC_REMAP_OFFSET;
g_Driver->SendEndOfInterrupt(irq);
if (g_IRQHandlers[irq] != NULL)
{
@@ -33,7 +35,7 @@ void x86_64_IRQ_Handler(Registers *regs)
log_warn(MODULE, "Unhandled IRQ %d...", irq);
}
g_Driver->SendEndOfInterrupt(irq);
@@ -42,6 +44,7 @@ void x86_64_IRQ_Handler(Registers *regs)
void x86_64_APIC_IRQ_Handler(Registers* regs)
{
uint8_t vector = regs->interrupt;
lapic_eoi();
if (g_APICHandlers[vector] != NULL) {
g_APICHandlers[vector](regs);
@@ -49,7 +52,7 @@ void x86_64_APIC_IRQ_Handler(Registers* regs)
log_warn("APIC", "Unhandled vector 0x%02x", vector);
}
lapic_eoi(); // ← This is the key difference from PIC!
// ← This is the key difference from PIC!
}
+1 -1
View File
@@ -33,7 +33,7 @@ typedef struct __attribute__((packed)) {
* • WAV any PCM WAV (no compression): sample rate / channels / bit depth
* are read from the "fmt " chunk automatically.
* • Raw no RIFF header; audio is assumed to be 48 000 Hz, 16-bit, stereo.
* Override with pcm_play_raw() if your file has a different format.
* Override with pcm_play_raw() if file has a different format.
*
* The function allocates a physically-contiguous DMA buffer, reads the file,
* starts playback, blocks until complete, then frees the buffer.
+182 -106
View File
@@ -1,3 +1,4 @@
// elf.c (now extracts AT_PHDR / AT_PHENT / AT_PHNUM + minor cleanups)
#include "elf.h"
#include "libk/stdio.h"
#include "libk/string.h"
@@ -8,148 +9,223 @@
extern uintptr_t g_hhdm_offset;
#define ELF_BUFFER_SIZE (1024 * 1024)
bool ELF_Read(const char* path, void** entryPoint, struct pagemap *target_pagemap)
bool ELF_Read(const char* path,
void** entryPoint,
struct pagemap *target_pagemap,
uint64_t *out_tls_fs_base,
uint64_t *out_phdr_va,
uint16_t *out_phent,
uint16_t *out_phnum)
{
uint32_t size;
*out_tls_fs_base = 0;
*out_phdr_va = 0;
*out_phent = 0;
*out_phnum = 0;
uint8_t* elf_buffer = kmalloc(ELF_BUFFER_SIZE);
if (!elf_buffer) {
printf("ELF: kmalloc failed\n");
uint32_t inum = ext2_resolve_path(path);
if (!inum) {
printf("ELF: file not found: %s\n", path);
return false;
}
// ── load file ─────────────────────────────────────
if (!ext2_read_file_from_root(path, elf_buffer, &size)) {
printf("ELF: failed to read file\n");
kfree(elf_buffer);
ext2_inode_t inode;
if (!ext2_read_inode(inum, &inode)) {
printf("ELF: failed to read inode\n");
return false;
}
if (size < sizeof(ELFHeader)) {
uint64_t file_size = inode.i_size;
if (file_size < sizeof(ELFHeader)) {
printf("ELF: file too small\n");
kfree(elf_buffer);
return false;
}
uint64_t buf_pages = ALIGN_UP(file_size, PAGE_SIZE) / PAGE_SIZE;
void* buffer_phys = pmm_allocz(buf_pages);
if (!buffer_phys) {
printf("ELF: failed to allocate %lu pages for file buffer\n", buf_pages);
return false;
}
uint8_t* elf_buffer = (uint8_t*)((uintptr_t)buffer_phys + MEM_PHYS_OFFSET);
if (!ext2_read_file(&inode, elf_buffer)) {
pmm_free(buffer_phys, buf_pages);
return false;
}
ELFHeader* header = (ELFHeader*)elf_buffer;
printf("=== ELF DEBUG ===\n");
printf("Entry point VA = 0x%lx\n", header->ProgramEntryPosition);
printf("PHDR offset = 0x%lx\n", header->ProgramHeaderTablePosition);
printf("PHDR count = %u\n", header->ProgramHeaderTableEntryCount);
printf("=== ELF DEBUG ===\n"
"Entry=0x%lx PHDR@0x%lx count=%u type=0x%x arch=0x%x\n"
"=== END ===\n",
header->ProgramEntryPosition,
header->ProgramHeaderTablePosition,
header->ProgramHeaderTableEntryCount,
header->Type,
header->InstructionSet);
if (memcmp(header->Magic, ELF_MAGIC, 4) != 0 ||
header->Bitness != ELF_BITNESS_64BIT ||
header->Endianness != ELF_ENDIANNESS_LITTLE ||
(header->Type != ELF_TYPE_EXECUTABLE && header->Type != ELF_TYPE_SHARED) ||
header->InstructionSet != ELF_INSTRUCTION_SET_X64) {
printf("=== END ELF DEBUG ===\n");
// ── validate ELF ──────────────────────────────────
if (memcmp(header->Magic, ELF_MAGIC, 4) != 0) {
printf("ELF: bad magic\n");
kfree(elf_buffer);
return false;
}
if (header->Bitness != ELF_BITNESS_64BIT) {
printf("ELF: not 64-bit\n");
kfree(elf_buffer);
return false;
}
if (header->Endianness != ELF_ENDIANNESS_LITTLE) {
printf("ELF: wrong endianness\n");
kfree(elf_buffer);
return false;
}
if (header->Type != ELF_TYPE_EXECUTABLE) {
printf("ELF: not executable\n");
kfree(elf_buffer);
return false;
}
if (header->InstructionSet != ELF_INSTRUCTION_SET_X64) {
printf("ELF: wrong arch\n");
kfree(elf_buffer);
return false;
printf("ELF: unsupported/invalid header\n");
goto cleanup;
}
*entryPoint = (void*)header->ProgramEntryPosition;
// ── program headers ───────────────────────────────
// ------------------------------------------------------------------
// Parse program headers LOAD, TLS, and PHDR
// ------------------------------------------------------------------
uint64_t tls_vaddr = 0, tls_filesz = 0, tls_memsz = 0, tls_align = 8;
uint8_t* tls_src = NULL;
uint64_t phdr_vaddr = 0;
uint8_t* ph_table = elf_buffer + header->ProgramHeaderTablePosition;
uint64_t phdr_table_end = header->ProgramHeaderTablePosition +
(uint64_t)header->ProgramHeaderTableEntryCount *
header->ProgramHeaderTableEntrySize;
for (uint32_t i = 0; i < header->ProgramHeaderTableEntryCount; i++)
{
ELFProgramHeader* ph = (ELFProgramHeader*)(ph_table +
i * header->ProgramHeaderTableEntrySize);
if (phdr_table_end > file_size) {
printf("ELF: program header table extends beyond file\n");
goto cleanup;
}
if (ph->Type != ELF_PROGRAM_TYPE_LOAD) {
printf("LOAD segment: VA=0x%lx FileSz=0x%lx MemSz=0x%lx\n",
ph->VirtualAddress, ph->FileSize, ph->MemorySize);
for (uint32_t i = 0; i < header->ProgramHeaderTableEntryCount; i++) {
ELFProgramHeader* ph = (ELFProgramHeader*)(ph_table + i * header->ProgramHeaderTableEntrySize);
// PT_PHDR
if (ph->Type == ELF_PROGRAM_TYPE_PHDR) {
phdr_vaddr = ph->VirtualAddress;
printf("ELF: Found PT_PHDR VA=0x%lx\n", phdr_vaddr);
// fall through
}
// PT_TLS
if (ph->Type == ELF_PROGRAM_TYPE_TLS) {
tls_vaddr = ph->VirtualAddress;
tls_filesz = ph->FileSize;
tls_memsz = ph->MemorySize;
tls_align = ph->Align ? ph->Align : 8;
tls_src = elf_buffer + ph->Offset;
if (ph->Offset + ph->FileSize > file_size) {
printf("ELF: PT_TLS segment data out of file bounds\n");
goto cleanup;
}
printf("ELF: Found PT_TLS VA=0x%lx FileSz=0x%lx MemSz=0x%lx Align=0x%lx\n",
tls_vaddr, tls_filesz, tls_memsz, tls_align);
continue;
}
uint64_t virt = ph->VirtualAddress;
uint64_t offset = ph->Offset;
uint64_t memsz = ph->MemorySize;
uint64_t filesz = ph->FileSize;
if (memsz == 0)
if (ph->Type != ELF_PROGRAM_TYPE_LOAD || ph->MemorySize == 0)
continue;
// ── align to page boundary ─────────────────────
uint64_t aligned_virt = ALIGN_DOWN(virt, PAGE_SIZE);
uint64_t page_offset = virt & 0xFFF;
uint64_t aligned_memsz = ALIGN_UP(memsz + page_offset, PAGE_SIZE);
uint64_t pages = aligned_memsz / PAGE_SIZE;
// Allocate physical pages
uint64_t phys_base = (uint64_t)pmm_alloc(pages);
if (!phys_base) {
printf("ELF: pmm_alloc failed for %lu pages\n", pages);
kfree(elf_buffer);
return false;
if (ph->Offset + ph->FileSize > file_size) {
printf("ELF: PT_LOAD segment data out of file bounds\n");
goto cleanup;
}
// Map with exact permissions
uint64_t map_flags = PAGE_USER;
if (ph->Flags & PF_R) map_flags |= PAGE_READ;
if (ph->Flags & PF_W) map_flags |= PAGE_WRITE;
if (!(ph->Flags & PF_X)) map_flags |= PAGE_NO_EXECUTE;
uint64_t virt = ph->VirtualAddress;
uint64_t aligned_virt = ALIGN_DOWN(virt, PAGE_SIZE);
uint64_t page_offset = virt & (PAGE_SIZE - 1);
uint64_t aligned_memsz = ALIGN_UP(ph->MemorySize + page_offset, PAGE_SIZE);
uint64_t pages = aligned_memsz / PAGE_SIZE;
void* seg_phys = pmm_allocz(pages);
if (!seg_phys) {
printf("ELF: failed to allocate physical pages for LOAD segment\n");
goto cleanup;
}
// ── map each page individually using new vmm_map_page ─────
for (uint64_t p = 0; p < pages; p++) {
uint64_t virt_addr = aligned_virt + p * PAGE_SIZE;
uint64_t phys_addr = phys_base + p * PAGE_SIZE;
bool success = vmm_map_page(
target_pagemap,
virt_addr,
phys_addr,
PAGE_READ | PAGE_WRITE | PAGE_USER, // RW + User mode
Size4KiB
);
if (!success) {
printf("ELF: failed to map page at 0x%lx\n", virt_addr);
// TODO: cleanup previously mapped pages + free phys
kfree(elf_buffer);
return false;
if (!vmm_map_page(target_pagemap,
aligned_virt + p * PAGE_SIZE,
(uintptr_t)seg_phys + p * PAGE_SIZE,
map_flags,
Size4KiB)) {
pmm_free(seg_phys, pages);
goto cleanup;
}
}
// ── copy segment data ───────────────────────────────
uint8_t* dst = (uint8_t*)(phys_base + MEM_PHYS_OFFSET); // via HHDM
uint8_t* src = elf_buffer + offset;
memcpy(dst + page_offset, src, filesz);
// ── zero BSS section ────────────────────────────────
if (memsz > filesz) {
memset(dst + page_offset + filesz, 0, memsz - filesz);
uint8_t* dst = (uint8_t*)((uintptr_t)seg_phys + MEM_PHYS_OFFSET);
memcpy(dst + page_offset, elf_buffer + ph->Offset, ph->FileSize);
if (ph->MemorySize > ph->FileSize) {
memset(dst + page_offset + ph->FileSize, 0,
ph->MemorySize - ph->FileSize);
}
}
kfree(elf_buffer);
uint64_t tls_size = tls_memsz ? ALIGN_UP(tls_memsz, tls_align) : 0ULL;
uint64_t tcb_va = TLS_BASE_VA + PAGE_SIZE;
uint64_t tls_va = tcb_va - tls_size;
uint64_t page_va = ALIGN_DOWN(tls_va, PAGE_SIZE);
uint64_t tcb_size = sizeof(TCB);
uint64_t block_end_va = tcb_va + tcb_size;
uint64_t block_end_page = ALIGN_UP(block_end_va, PAGE_SIZE);
uint64_t map_pages = ((block_end_page - page_va) / PAGE_SIZE) + 8;
if (map_pages == 0) map_pages = 1;
void* tls_phys = pmm_allocz(map_pages);
if (!tls_phys) {
printf("ELF: failed to allocate TLS/TCB pages\n");
goto cleanup;
}
uint64_t tls_map_flags = PAGE_USER | PAGE_READ | PAGE_WRITE;
for (uint64_t p = 0; p < map_pages; p++) {
if (!vmm_map_page(target_pagemap,
page_va + p * PAGE_SIZE,
(uintptr_t)tls_phys + p * PAGE_SIZE,
tls_map_flags,
Size4KiB)) {
pmm_free(tls_phys, map_pages);
goto cleanup;
}
}
uint8_t* base_hhdm = (uint8_t*)((uintptr_t)tls_phys + MEM_PHYS_OFFSET);
if (tls_size > 0) {
uint8_t* tls_dst = base_hhdm + (tls_va - page_va);
if (tls_filesz) memcpy(tls_dst, tls_src, tls_filesz);
if (tls_memsz > tls_filesz)
memset(tls_dst + tls_filesz, 0, tls_memsz - tls_filesz);
}
TCB* tcb = (TCB*)(base_hhdm + (tcb_va - page_va));
memset(tcb, 0, sizeof(TCB));
tcb->self = (void*)tcb_va;
tcb->tid = 1;
*out_tls_fs_base = tcb_va;
*out_phdr_va = tls_vaddr ? tls_vaddr : phdr_vaddr;
*out_phent = header->ProgramHeaderTableEntrySize;
*out_phnum = header->ProgramHeaderTableEntryCount;
printf("ELF: TLS/TCB setup complete TCB@0x%lx TLS@0x%lx FS=0x%lx\n"
" PHDR@0x%lx PHENT=0x%x PHNUM=%u\n",
tcb_va, tls_va, tcb_va, *out_phdr_va, *out_phent, *out_phnum);
pmm_free(buffer_phys, buf_pages);
return true;
cleanup:
pmm_free(buffer_phys, buf_pages);
return false;
}
+64 -52
View File
@@ -1,11 +1,22 @@
// elf.h
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h> // size_t for TCB
#include "mm/vmm.h"
// ELF magic and basic constants
#define ELF_MAGIC ("\x7F" "ELF")
#include <stdint.h>
// Standard ELF program header flags (bitfield)
#define PF_X 0x00000001 // Execute
#define PF_W 0x00000002 // Write
#define PF_R 0x00000004 // Read
// Fixed canonical address for the initial thread's TLS + TCB block.
// This lives in the upper half of the 47-bit user address space and
// will never overlap with normal LOAD segments (which are usually low).
#define TLS_BASE_VA 0x00007FFF00000000ULL
typedef struct
{
@@ -18,13 +29,13 @@ typedef struct
uint8_t _Padding[7];
uint16_t Type; // relocatable, executable, shared, core
uint16_t InstructionSet; // architecture (was too small for real ELF, but kept)
uint16_t InstructionSet; // architecture
uint32_t ELFVersion;
uint64_t ProgramEntryPosition; // FIXED (was 32-bit)
uint64_t ProgramHeaderTablePosition; // FIXED
uint64_t SectionHeaderTablePosition; // FIXED
uint64_t ProgramEntryPosition;
uint64_t ProgramHeaderTablePosition;
uint64_t SectionHeaderTablePosition;
uint32_t Flags;
@@ -39,80 +50,81 @@ typedef struct
enum ELFBitness
{
ELF_BITNESS_32BIT = 1,
ELF_BITNESS_64BIT = 2,
ELF_BITNESS_32BIT = 1,
ELF_BITNESS_64BIT = 2,
};
enum ELFEndianness
{
ELF_ENDIANNESS_LITTLE = 1,
ELF_ENDIANNESS_BIG = 2,
ELF_ENDIANNESS_LITTLE = 1,
ELF_ENDIANNESS_BIG = 2,
};
enum ELFInstructionSet
{
ELF_INSTRUCTION_SET_NONE = 0,
ELF_INSTRUCTION_SET_X86 = 3,
ELF_INSTRUCTION_SET_ARM = 0x28,
ELF_INSTRUCTION_SET_X64 = 0x3E,
ELF_INSTRUCTION_SET_ARM64 = 0xB7,
ELF_INSTRUCTION_SET_RISCV = 0xF3,
ELF_INSTRUCTION_SET_NONE = 0,
ELF_INSTRUCTION_SET_X86 = 3,
ELF_INSTRUCTION_SET_ARM = 0x28,
ELF_INSTRUCTION_SET_X64 = 0x3E,
ELF_INSTRUCTION_SET_ARM64 = 0xB7,
ELF_INSTRUCTION_SET_RISCV = 0xF3,
};
enum ELFType
{
ELF_TYPE_RELOCATABLE = 1,
ELF_TYPE_EXECUTABLE = 2,
ELF_TYPE_SHARED = 3,
ELF_TYPE_CORE = 4,
ELF_TYPE_RELOCATABLE = 1,
ELF_TYPE_EXECUTABLE = 2,
ELF_TYPE_SHARED = 3,
ELF_TYPE_CORE = 4,
};
typedef struct
{
uint32_t Type;
uint32_t Flags;
uint64_t Offset;
uint64_t VirtualAddress;
uint64_t PhysicalAddress;
uint64_t FileSize;
uint64_t MemorySize;
uint32_t Flags;
uint64_t Align;
} ELFProgramHeader;
} __attribute__((packed)) ELFProgramHeader;
enum ELFProgramType {
// Program header table entry unused.
ELF_PROGRAM_TYPE_NULL = 0,
ELF_PROGRAM_TYPE_NULL = 0,
ELF_PROGRAM_TYPE_LOAD = 1,
ELF_PROGRAM_TYPE_DYNAMIC = 2,
ELF_PROGRAM_TYPE_INTERP = 3,
ELF_PROGRAM_TYPE_NOTE = 4,
ELF_PROGRAM_TYPE_SHLIB = 5,
ELF_PROGRAM_TYPE_PHDR = 6,
ELF_PROGRAM_TYPE_TLS = 7,
// Loadable segment.
ELF_PROGRAM_TYPE_LOAD = 1,
// Dynamic linking information.
ELF_PROGRAM_TYPE_DYNAMIC = 2,
// Interpreter information.
ELF_PROGRAM_TYPE_INTERP = 3,
// Auxiliary information.
ELF_PROGRAM_TYPE_NOTE = 4,
// Reserved
ELF_PROGRAM_TYPE_SHLIB = 5,
// Segment containing program header table itself.
ELF_PROGRAM_TYPE_PHDR = 6,
// Thread-Local Storage template.
ELF_PROGRAM_TYPE_TLS = 7,
// Reserved inclusive range. Operating system specific.
ELF_PROGRAM_TYPE_LOOS = 0x60000000,
ELF_PROGRAM_TYPE_HIOS = 0x6FFFFFFF,
// Reserved inclusive range. Processor specific.
ELF_PROGRAM_TYPE_LOPROC = 0x70000000,
ELF_PROGRAM_TYPE_HIPROC = 0x7FFFFFFF,
// OS/processor reserved ranges (we ignore them)
ELF_PROGRAM_TYPE_LOOS = 0x60000000,
ELF_PROGRAM_TYPE_HIOS = 0x6FFFFFFF,
ELF_PROGRAM_TYPE_LOPROC = 0x70000000,
ELF_PROGRAM_TYPE_HIPROC = 0x7FFFFFFF,
};
// Thread Control Block layout expected by mlibc.
// Only the fields mlibc actually reads are populated; the rest stay zero.
typedef struct {
void* self; // 0x00 fs:0 (TCB self-pointer)
size_t dtvSize; // 0x08
void** dtvPointers; // 0x10
int tid; // 0x18
int didExit; // 0x1C
uint8_t padding[8]; // 0x20
uintptr_t stackCanary; // 0x28
int cancelBits; // 0x30
} TCB;
bool ELF_Read(const char* path, void** entryPoint, struct pagemap *target_pagemap);
bool ELF_Read(const char* path,
void** entryPoint,
struct pagemap *target_pagemap,
uint64_t *out_tls_fs_base,
uint64_t *out_phdr_va, // AT_PHDR
uint16_t *out_phent, // AT_PHENT
uint16_t *out_phnum); // AT_PHNUM
+7
View File
@@ -344,6 +344,8 @@ bool ext2_read_inode_internal(uint32_t inum, ext2_inode_t* out) {
if (!ext2_read_block_raw(gdt[g].bg_inode_table + block_off, buf)) {
kfree(buf); return false;
}
printf("ext2_read_inode: inum=%u group=%u idx=%u block_off=%u inode_off=%u\n",
inum + 1, g, idx, block_off, inode_off);
memcpy(out, buf + inode_off * sb.s_inode_size, sizeof(ext2_inode_t));
kfree(buf);
return true;
@@ -950,16 +952,21 @@ bool ext2_read_root_dir(void) {
bool ext2_read_file_from_root_internal(const char* name, uint8_t* buf, uint32_t* size) {
ext2_inode_t root;
printf("EXT2: reading file from root: %s\n", name);
if (!ext2_read_inode_internal(2, &root)) return false;
printf("EXT2: root inode: size=%u blocks=%u\n", root.i_size, root.i_blocks);
uint32_t inum;
if (!ext2_find_in_dir_internal(&root, name, &inum)) {
printf("EXT2: not found: %s\n", name);
return false;
}
printf("EXT2: found in root: inum=%u\n", inum);
ext2_inode_t fi;
if (!ext2_read_inode_internal(inum, &fi)) return false;
printf("EXT2: file inode: size=%u blocks=%u\n", fi.i_size, fi.i_blocks);
*size = fi.i_size;
printf("EXT2: read file: size=%u\n", *size);
return ext2_read_file_internal(&fi, buf);
}
+1 -1
View File
@@ -9,7 +9,7 @@ static const uint32_t g_LogSeverityColors[] =
[LVL_INFO] = 0xFFFFFF, // white
[LVL_WARN] = 0xFFFF00, // yellow
[LVL_ERROR] = 0xFF0000, // red
[LVL_CRITICAL] = 0xFFFFFF, // white (can do red background separately if you want)
[LVL_CRITICAL] = 0xFFFFFF, // white
};
+143
View File
@@ -0,0 +1,143 @@
#define EPERM 1 /* Operation not permitted */
#define ENOENT 2 /* No such file or directory */
#define ESRCH 3 /* No such process */
#define EINTR 4 /* Interrupted system call */
#define EIO 5 /* I/O error */
#define ENXIO 6 /* No such device or address */
#define E2BIG 7 /* Argument list too long */
#define ENOEXEC 8 /* Exec format error */
#define EBADF 9 /* Bad file descriptor */
#define ECHILD 10 /* No child processes */
#define EAGAIN 11 /* Try again / resource temporarily unavailable */
#define ENOMEM 12 /* Out of memory */
#define EACCES 13 /* Permission denied */
#define EFAULT 14 /* Bad address */
#define ENOTBLK 15 /* Block device required */
#define EBUSY 16 /* Device or resource busy */
#define EEXIST 17 /* File exists */
#define EXDEV 18 /* Cross-device link */
#define ENODEV 19 /* No such device */
#define ENOTDIR 20 /* Not a directory */
#define EISDIR 21 /* Is a directory */
#define EINVAL 22 /* Invalid argument */
#define ENFILE 23 /* File table overflow */
#define EMFILE 24 /* Too many open files */
#define ENOTTY 25 /* Not a typewriter / inappropriate ioctl */
#define ETXTBSY 26 /* Text file busy */
#define EFBIG 27 /* File too large */
#define ENOSPC 28 /* No space left on device */
#define ESPIPE 29 /* Illegal seek */
#define EROFS 30 /* Read-only file system */
#define EMLINK 31 /* Too many links */
#define EPIPE 32 /* Broken pipe */
#define EDOM 33 /* Math argument out of domain of func */
#define ERANGE 34 /* Math result not representable */
#define EDEADLK 35 /* Resource deadlock would occur */
#define ENAMETOOLONG 36 /* File name too long */
#define ENOLCK 37 /* No record locks available */
#define ENOSYS 38 /* Function not implemented */
#define ENOTEMPTY 39 /* Directory not empty */
#define ELOOP 40 /* Too many symbolic links encountered */
#define EWOULDBLOCK EAGAIN /* Operation would block */
#define ENOMSG 42 /* No message of desired type */
#define EIDRM 43 /* Identifier removed */
#define ECHRNG 44 /* Channel number out of range */
#define EL2NSYNC 45 /* Level 2 not synchronized */
#define EL3HLT 46 /* Level 3 halted */
#define EL3RST 47 /* Level 3 reset */
#define ELNRNG 48 /* Link number out of range */
#define EUNATCH 49 /* Protocol driver not attached */
#define ENOCSI 50 /* No CSI structure available */
#define EL2HLT 51 /* Level 2 halted */
#define EBADE 52 /* Invalid exchange */
#define EBADR 53 /* Invalid request descriptor */
#define EXFULL 54 /* Exchange full */
#define ENOANO 55 /* No anode */
#define EBADRQC 56 /* Invalid request code */
#define EBADSLT 57 /* Invalid slot */
#define EDEADLOCK EDEADLK /* Alias for deadlock */
#define EBFONT 59 /* Bad font file format */
#define ENOSTR 60 /* Device not a stream */
#define ENODATA 61 /* No data available */
#define ETIME 62 /* Timer expired */
#define ENOSR 63 /* Out of streams resources */
#define ENONET 64 /* Machine is not on the network */
#define ENOPKG 65 /* Package not installed */
#define EREMOTE 66 /* Object is remote */
#define ENOLINK 67 /* Link has been severed */
#define EADV 68 /* Advertise error */
#define ESRMNT 69 /* Srmount error */
#define ECOMM 70 /* Communication error on send */
#define EPROTO 71 /* Protocol error */
#define EMULTIHOP 72 /* Multihop attempted */
#define EDOTDOT 73 /* RFS specific error */
#define EBADMSG 74 /* Not a data message */
#define EOVERFLOW 75 /* Value too large for defined data type */
#define ENOTUNIQ 76 /* Name not unique on network */
#define EBADFD 77 /* File descriptor in bad state */
#define EREMCHG 78 /* Remote address changed */
#define ELIBACC 79 /* Can not access a needed shared library */
#define ELIBBAD 80 /* Accessing a corrupted shared library */
#define ELIBSCN 81 /* lib section in a.out corrupted */
#define ELIBMAX 82 /* Attempting to link in too many libs */
#define ELIBEXEC 83 /* Cannot exec a shared library directly */
#define EILSEQ 84 /* Illegal byte sequence */
#define ERESTART 85 /* Interrupted system call should be restarted */
#define ESTRPIPE 86 /* Streams pipe error */
#define EUSERS 87 /* Too many users */
#define ENOTSOCK 88 /* Socket operation on non-socket */
#define EDESTADDRREQ 89 /* Destination address required */
#define EMSGSIZE 90 /* Message too long */
#define EPROTOTYPE 91 /* Protocol wrong type for socket */
#define ENOPROTOOPT 92 /* Protocol not available */
#define EPROTONOSUPPORT 93 /* Protocol not supported */
#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
#define EOPNOTSUPP 95 /* Operation not supported */
#define ENOTSUP EOPNOTSUPP
#define EPFNOSUPPORT 96 /* Protocol family not supported */
#define EAFNOSUPPORT 97 /* Address family not supported */
#define EADDRINUSE 98 /* Address already in use */
#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
#define ENETDOWN 100 /* Network is down */
#define ENETUNREACH 101 /* Network is unreachable */
#define ENETRESET 102 /* Network dropped connection */
#define ECONNABORTED 103 /* Software caused connection abort */
#define ECONNRESET 104 /* Connection reset by peer */
#define ENOBUFS 105 /* No buffer space available */
#define EISCONN 106 /* Transport endpoint is already connected */
#define ENOTCONN 107 /* Transport endpoint is not connected */
#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
#define ETOOMANYREFS 109 /* Too many references */
#define ETIMEDOUT 110 /* Connection timed out */
#define ECONNREFUSED 111 /* Connection refused */
#define EHOSTDOWN 112 /* Host is down */
#define EHOSTUNREACH 113 /* No route to host */
#define EALREADY 114 /* Operation already in progress */
#define EINPROGRESS 115 /* Operation now in progress */
#define ESTALE 116 /* Stale file handle */
#define EUCLEAN 117 /* Structure needs cleaning */
#define ENOTNAM 118 /* Not a XENIX named type file */
#define ENAVAIL 119 /* No XENIX semaphores available */
#define EISNAM 120 /* Is a named type file */
#define EREMOTEIO 121 /* Remote I/O error */
#define EDQUOT 122 /* Quota exceeded */
#define ENOMEDIUM 123 /* No medium found */
#define EMEDIUMTYPE 124 /* Wrong medium type */
#define ECANCELED 125 /* Operation canceled */
#define ENOKEY 126 /* Required key not available */
#define EKEYEXPIRED 127 /* Key has expired */
#define EKEYREVOKED 128 /* Key has been revoked */
#define EKEYREJECTED 129 /* Key was rejected by service */
#define EOWNERDEAD 130 /* Owner died */
#define ENOTRECOVERABLE 131 /* State not recoverable */
#define ERFKILL 132 /* Operation not possible due to RF-kill */
#define EHWPOISON 133 /* Memory page has hardware error */
+1 -1
View File
@@ -14,7 +14,7 @@ static const uint32_t g_LogSeverityColors[] =
[LVL_INFO] = 0xFFFFFF, // white
[LVL_WARN] = 0xFFFF00, // yellow
[LVL_ERROR] = 0xFF0000, // red
[LVL_CRITICAL] = 0xFFFFFF, // white (can do red background separately if you want)
[LVL_CRITICAL] = 0xFFFFFF, // white
};
static spinlock_t s_printf_lock = SPINLOCK_INIT;
+90
View File
@@ -98,6 +98,68 @@ static void hcf(void) {
}
}
static inline void cpuid(uint32_t leaf, uint32_t subleaf,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx) {
asm volatile ("cpuid"
: "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
: "a"(leaf), "c"(subleaf));
}
int cpu_has_leaf7() {
uint32_t a, b, c, d;
cpuid(0, 0, &a, &b, &c, &d);
return a >= 7;
}
int cpu_has_fsgsbase() {
if (!cpu_has_leaf7())
return 0;
uint32_t a, b, c, d;
cpuid(7, 0, &a, &b, &c, &d);
return (b & (1u << 0)) != 0;
}
static inline uint64_t read_cr4(void) {
uint64_t val;
__asm__ volatile ("mov %%cr4, %0" : "=r"(val));
return val;
}
static inline void write_cr4(uint64_t val) {
asm volatile ("mov %0, %%cr4" :: "r"(val));
}
static inline uint64_t read_cr0(void) {
uint64_t val;
__asm__ volatile ("mov %%cr0, %0" : "=r"(val));
return val;
}
static inline void write_cr0(uint64_t val) {
__asm__ volatile ("mov %0, %%cr0" :: "r"(val));
}
#define CR4_FSGSBASE (1ULL << 16)
void enable_fsgsbase_if_supported() {
if (!cpu_has_fsgsbase()) {
// fallback: don't use wrfsbase
printf("FSGSBASE not supported, skipping wrfsbase/wrgsbase\n");
return;
}
uint64_t cr4 = read_cr4();
cr4 |= CR4_FSGSBASE;
write_cr4(cr4);
}
extern struct kernel_pagemap;
uint64_t g_rsdp_phys;
@@ -128,6 +190,31 @@ static uacpi_interrupt_ret handle_power_button(uacpi_handle ctx) {
}
void init_simd(void) {
uint64_t cr0 = read_cr0();
uint64_t cr4 = read_cr4();
// --- CR0 setup ---
cr0 &= ~(1 << 2); // Clear EM (Emulation) → allow FPU/SSE
cr0 |= (1 << 1); // Set MP (Monitor Coprocessor)
cr0 &= ~(1 << 3); // Clear TS (Task Switched) → no #NM
// --- CR4 setup ---
cr4 |= (1 << 9); // OSFXSR → enable FXSAVE/FXRSTOR + SSE
cr4 |= (1 << 10); // OSXMMEXCPT → enable SSE exceptions
write_cr0(cr0);
write_cr4(cr4);
// Initialize FPU/SSE state
__asm__ volatile ("fninit");
}
void kmain(void) {
if (LIMINE_BASE_REVISION_SUPPORTED(limine_base_revision) == false) {
hcf();
@@ -354,6 +441,9 @@ void kmain(void) {
sched_init();
enable_fsgsbase_if_supported();
init_simd();
start_userspace();
sched_yield();
+13 -3
View File
@@ -83,10 +83,20 @@ void *pmm_alloc(size_t pages) {
void *pmm_allocz(size_t pages) {
void *ret = pmm_alloc(pages);
if (ret) {
memset((void *)((uintptr_t)ret + MEM_PHYS_OFFSET), 0, pages * PAGE_SIZE); // this is at fault for the page fault
if (!ret) return NULL;
uintptr_t vaddr = (uintptr_t)ret + MEM_PHYS_OFFSET;
// Sanity: make sure we're not zeroing something ridiculous
if (vaddr < MEM_PHYS_OFFSET || vaddr > MEM_PHYS_OFFSET + 0x8000000000ULL) {
printf("PMM: allocz addr 0x%lx looks wrong!\n", vaddr);
pmm_free(ret, pages);
return NULL;
}
uint64_t *p = (uint64_t *)vaddr;
for (size_t i = 0; i < (pages * PAGE_SIZE) / 8; i++)
p[i] = 0;
return ret;
}
+1 -1
View File
@@ -36,7 +36,7 @@ static inline struct slab *slab_for(size_t size) {
static void create_slab(struct slab *slab, size_t ent_size) {
spinlock_init(&slab->lock);
slab->first_free = (void **)((uint64_t)pmm_alloc(1) + MEM_PHYS_OFFSET);
slab->first_free = (void **)((uint64_t)pmm_allocz(1) + MEM_PHYS_OFFSET);
slab->ent_size = ent_size;
size_t header_offset = ALIGN_UP(sizeof(struct slab_header), ent_size);
+10 -1
View File
@@ -320,4 +320,13 @@ fail:
spinlock_drop(&pagemap->lock);
printf("Invalid Phys!\n");
return INVALID_PHYS;
}
}
uintptr_t find_free_vaddr(struct pagemap *pm, size_t len) {
// Very naive for now - start from a high address
static uintptr_t next = 0x700050000000ULL;
uintptr_t addr = next;
next += ALIGN_UP(len, 0x1000000ULL); // 16 MiB alignment for simplicity
return addr;
}
+3 -1
View File
@@ -42,4 +42,6 @@ bool vmm_map_page(struct pagemap *pagemap, uint64_t virt, uint64_t phys,
uint64_t flags, enum page_size pg_size);
uint64_t vmm_virt_to_phys(struct pagemap *pagemap, uint64_t virt);
uint64_t *vmm_virt_to_pte(struct pagemap *pagemap, uintptr_t virt_addr,
bool allocate);
bool allocate);
bool vmm_unmap_page(struct pagemap *pagemap, uintptr_t virt, bool locked);
uintptr_t find_free_vaddr(struct pagemap *pm, size_t len);
+84
View File
@@ -0,0 +1,84 @@
#include "futex.h"
#include "mm/memory.h"
#include "string.h"
#include "sched/scheduler.h"
#include "libk/stdio.h"
#define FUTEX_BUCKETS 256
struct futex_waiter {
task_t *task;
int *uaddr;
struct futex_waiter *next;
};
static struct futex_waiter *g_futex_table[FUTEX_BUCKETS];
static inline uint32_t futex_hash(int *uaddr) {
return ((uintptr_t)uaddr >> 3) & (FUTEX_BUCKETS - 1);
}
int futex_wait(int *uaddr, int expected)
{
if (!uaddr) return -1;
/* 1. check value in user memory */
if (*uaddr != expected)
return -1;
uint32_t h = futex_hash(uaddr);
struct futex_waiter *w = kmalloc(sizeof(*w));
if (!w) return -1;
w->task = sched_current();
w->uaddr = uaddr;
/* 2. insert into bucket */
w->next = g_futex_table[h];
g_futex_table[h] = w;
/* 3. block task */
sched_block(TASK_INTERRUPTIBLE);
return 0;
}
int futex_wake(int *uaddr, int count)
{
if (!uaddr || count <= 0)
return 0;
uint32_t h = futex_hash(uaddr);
struct futex_waiter **prev = &g_futex_table[h];
struct futex_waiter *cur = g_futex_table[h];
int woken = 0;
while (cur && woken < count) {
if (cur->uaddr == uaddr) {
task_t *task = cur->task;
/* remove from list */
*prev = cur->next;
struct futex_waiter *tmp = cur;
cur = cur->next;
kfree(tmp);
/* wake task */
sched_wake(task);
woken++;
continue;
}
prev = &cur->next;
cur = cur->next;
}
return woken;
}
+10
View File
@@ -0,0 +1,10 @@
#pragma once
#include <stdint.h>
#include <stddef.h>
#include "sched/scheduler.h"
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
int futex_wait(int *uaddr, int expected);
int futex_wake(int *uaddr, int count);
+70 -75
View File
@@ -6,6 +6,8 @@
#include "arch/x86_64/sys/pit.h"
#include "string.h"
#define IA32_FS_BASE 0xC0000100
/* =====================================================================
* Forward declarations for GDT/TSS (defined in gdt.c)
* ===================================================================== */
@@ -329,43 +331,22 @@ static void kthread_trampoline(void) {
sched_exit(0);
}
static void user_task_trampoline(void) {
x86_64_EnableInterrupts();
void set_fs_base(uint64_t base) {
/* ake sure the address is canonical (bits 63..48 all 0 or all 1).
* Non-canonical FS base + any fs: access from user code = #GP. */
uint64_t high = base >> 48;
if (high != 0 && high != 0xFFFFULL) {
/* Simple sign-extension from bit 47 (common for user-space TLS) */
if (base & (1ULL << 47))
base |= 0xFFFFULL << 48; /* negative canonical */
else
base &= (1ULL << 48) - 1; /* positive canonical */
}
task_t *self = g_current_task;
/*
* Build an iretq frame on the current (kernel) stack and enter
* user mode. We reset the stack pointer to the very top of the
* kernel stack first, so the iretq frame doesn't sit below a
* pile of stale context-switch frames.
*
* Segment selectors (from your GDT / STAR setup):
* User CS = 0x23 (GDT index 4, RPL 3)
* User SS = 0x1B (GDT index 3, RPL 3)
*/
uint64_t kstack_top = (uint64_t)self->kernel_stack + self->kernel_stack_size;
uint64_t user_rip = self->user_entry;
uint64_t user_rsp = self->user_stack_top;
asm volatile(
"movq %0, %%rsp\n\t" /* Reset kernel RSP to stack top */
"pushq $0x1B\n\t" /* SS user data segment */
"pushq %1\n\t" /* RSP user stack pointer */
"pushfq\n\t" /* RFLAGS */
"orq $0x200, (%%rsp)\n\t" /* Set IF so user code runs with */
/* interrupts enabled */
"pushq $0x23\n\t" /* CS user code segment */
"pushq %2\n\t" /* RIP user entry point */
"iretq\n\t"
:
: "r"(kstack_top), "r"(user_rsp), "r"(user_rip)
: "memory"
);
__builtin_unreachable();
asm volatile("wrfsbase %0" : : "r"(base) : "memory");
}
/* =====================================================================
* Kernel stack setup for a new task
*
@@ -444,13 +425,18 @@ task_t *sched_create_kthread(const char *name,
task->vruntime = g_runqueue.min_vruntime;
sched_enqueue(task);
printf("[sched] kthread '%s' pid=%d created\n", task->name, task->pid);
//printf("[sched] kthread '%s' pid=%d created\n", task->name, task->pid);
return task;
}
task_t *sched_create_user_task(const char *name,
uint64_t entry_rip, uint64_t user_rsp,
struct pagemap *pm)
uint64_t entry_rip,
uint64_t user_rsp,
struct pagemap *pm,
uint64_t tls_fs_base,
uint64_t phdr_va,
uint16_t phent,
uint16_t phnum)
{
task_t *task = alloc_task(name, true);
if (!task) return NULL;
@@ -458,8 +444,11 @@ task_t *sched_create_user_task(const char *name,
task->pagemap = pm;
task->user_entry = entry_rip;
task->user_stack_top= user_rsp;
task->tls_fs_base = tls_fs_base;
task->phdr_va = phdr_va;
task->phent = phent;
task->phnum = phnum;
/* CR3 = physical address of PML4 */
task->ctx.cr3 = (uint64_t)pm->top_level - MEM_PHYS_OFFSET;
setup_initial_kstack(task, user_task_trampoline);
@@ -467,9 +456,11 @@ task_t *sched_create_user_task(const char *name,
task->time_slice = calc_timeslice(task);
task->vruntime = g_runqueue.min_vruntime;
for (size_t i = 256; i < 512; i++) {
pm->top_level[i] = kernel_pagemap->top_level[i];
}
sched_enqueue(task);
printf("[sched] user task '%s' pid=%d created, entry=0x%lx\n",
task->name, task->pid, entry_rip);
return task;
}
@@ -556,6 +547,15 @@ void schedule(void) {
task_t *prev = g_runqueue.current;
task_t *next = pick_next_task(&g_runqueue);
if (next->is_user) {
if (next->tls_fs_base != 0) {
set_fs_base(next->tls_fs_base);
} else {
printf("Warning: user task '%s' has no TLS FS base set; leaving FS at 0\n", next->name);
}
}
if (next == prev || next == NULL) {
/* Nothing to switch to; keep running current task. */
spinlock_drop(&g_runqueue.lock);
@@ -600,6 +600,11 @@ void schedule(void) {
spinlock_drop(&g_runqueue.lock);
/* ---- Context switch -------------------------------------------- */
//printf("[sched] switching from '%s' (pid=%d) to '%s' (pid=%d)\n",
// prev->name, prev->pid, next->name, next->pid);
if (next->is_user) {
//printf("switching to user task, fs_base=0x%lx\n", next->tls_fs_base);
}
sched_context_switch(&prev->ctx, &next->ctx);
/*
@@ -705,8 +710,8 @@ void sched_exit(int exit_code) {
if (self->parent)
task_send_signal(self->parent, SIGCHLD);
printf("[sched] task '%s' pid=%d exited with code %d\n",
self->name, self->pid, exit_code);
//printf("[sched] task '%s' pid=%d exited with code %d\n",
// self->name, self->pid, exit_code);
/* Hand off to someone else; we will never return. */
schedule();
@@ -791,65 +796,55 @@ void task_handle_pending_signals(void) {
task_t *self = g_current_task;
if (!self) return;
/* Only run signal handling when we are about to return to user mode.
* Kernel threads can still get synchronous handlers, but we avoid
* unnecessary work / possible recursion on kernel tasks. */
if (!self->is_user && !(self->pending_signals & ~self->signal_mask))
return;
while (self->pending_signals & ~self->signal_mask) {
/* Find the lowest-numbered pending, unblocked signal */
uint64_t deliverable = self->pending_signals & ~self->signal_mask;
int signum = __builtin_ctzll(deliverable) + 1; /* +1: bit 0 = sig 1 */
int signum = __builtin_ctzll(deliverable) + 1;
if (signum >= _NSIG) break;
/* Clear the pending bit */
self->pending_signals &= ~(1ULL << (signum - 1));
sighandler_t handler = self->sigactions[signum].sa_handler;
if (handler == SIG_IGN) {
/* Explicitly ignored */
if (signum == SIGCHLD) continue; /* common: reap silently */
if (signum == SIGCHLD) continue;
continue;
}
} else if (handler != SIG_DFL) {
/*
* User-defined handler.
*
* A full POSIX implementation would build a signal frame on
* the user stack and set registers so that iretq delivers
* the signal; that requires knowing the saved RFLAGS/RIP
* from the ISR frame. We leave this as a TODO and just
* call the handler directly for kernel threads.
*
* For user tasks this is the point where you would push a
* ucontext_t / sigframe onto the user stack and adjust the
* saved user RIP in the ISR frame.
*/
if (handler != SIG_DFL) {
if (!self->is_user) {
handler(signum);
handler(signum); /* kernel thread */
} else {
/* TODO: build user-space signal frame */
printf("[signal] TODO: deliver signal %d to user task '%s'\n",
signum, self->name);
/* TODO: proper sigframe + adjust trap frame on kernel stack */
printf("[signal] TODO: deliver signal %d to user task '%s' (pid=%d)\n",
signum, self->name, self->pid);
/* For now fall through to default action so we don't silently ignore */
}
}
} else {
/* SIG_DFL */
/* Default action (also used for user tasks when no handler is installed) */
if (handler == SIG_DFL || self->is_user) {
switch (default_action(signum)) {
case SIG_ACTION_TERM:
case SIG_ACTION_CORE:
printf("[signal] task '%s' pid=%d killed by signal %d\n",
self->name, self->pid, signum);
sched_exit(128 + signum);
break; /* unreachable */
sched_exit(128 + signum); /* does not return */
break;
case SIG_ACTION_STOP:
self->state = TASK_STOPPED;
/* Notify parent */
if (self->parent) task_send_signal(self->parent, SIGCHLD);
sched_block(TASK_STOPPED);
if (self->parent)
task_send_signal(self->parent, SIGCHLD);
sched_block(TASK_STOPPED); /* does not return until CONT */
break;
case SIG_ACTION_CONT:
/* Already running (we were woken to handle this) */
break;
case SIG_ACTION_IGN:
break;
}
+16 -2
View File
@@ -160,6 +160,10 @@ struct task {
uint64_t user_stack_top; /* User-space RSP for user tasks */
void (*kthread_entry)(void *arg); /* Kernel thread entry point */
void *kthread_arg;
uint64_t tls_fs_base; /* FS base for user tasks (TLS support) */
uint64_t phdr_va;
uint16_t phent;
uint16_t phnum;
/* ---- Signals ----------------------------------------------------- */
uint64_t pending_signals; /* Bitmask of unhandled signals */
@@ -248,8 +252,14 @@ task_t *sched_create_kthread(const char *name,
/* Create a user-space task and enqueue it immediately */
task_t *sched_create_user_task(const char *name,
uint64_t entry_rip, uint64_t user_rsp,
struct pagemap *pm);
uint64_t entry_rip,
uint64_t user_rsp,
struct pagemap *pm,
uint64_t tls_fs_base,
uint64_t phdr_va,
uint16_t phent,
uint16_t phnum);
/* Add a task to the appropriate run queue */
void sched_enqueue(task_t *task);
@@ -308,5 +318,9 @@ static inline task_t *sched_current(void) { return g_current_task; }
void sched_context_switch(struct cpu_context *from,
struct cpu_context *to);
void set_fs_base(uint64_t base);
extern void user_task_trampoline(void); /* Defined in user_task_trampoline.S */
/* Kernel stack size for each task */
#define KSTACK_SIZE (32 * 1024) /* 32 KiB — comfortable headroom */
+118
View File
@@ -0,0 +1,118 @@
/* ── struct task offsets ─────────────────────────────────────────────────── */
.equ TASK_KERNEL_STACK, 160
.equ TASK_KERNEL_STACK_SIZE, 168
.equ TASK_USER_ENTRY, 176
.equ TASK_USER_STACK_TOP, 184
.equ TASK_TLS_FS_BASE, 208
.equ TASK_PHDR_VA, 216
.equ TASK_PHENT, 224
.equ TASK_PHNUM, 226
/* ── GDT selectors ───────────────────────────────────────────────────────── */
.equ SEL_USER_DS, 0x1B /* ring-3 data (index 3, RPL 3) */
.equ SEL_USER_CS, 0x23 /* ring-3 code (index 4, RPL 3) */
/* ── ELF auxiliary-vector types ──────────────────────────────────────────── */
.equ AT_NULL, 0
.equ AT_PAGESZ, 6
.equ AT_ENTRY, 9
.equ AT_PHDR, 3
.equ AT_PHENT, 4
.equ AT_PHNUM, 5
.equ AT_BASE, 7
/*
* user_task_trampoline
* */
.section .text
.global user_task_trampoline
.type user_task_trampoline, @function
user_task_trampoline:
movq g_current_task(%rip), %rbx
/* ── TLS FS base ───────────────────────────────────────────────────── */
movq TASK_TLS_FS_BASE(%rbx), %rdi
testq %rdi, %rdi
jz .Lno_tls
call set_fs_base
movq g_current_task(%rip), %rbx
.Lno_tls:
/* ── Stash values we need after we switch stacks ───────────────────── */
movq TASK_USER_STACK_TOP(%rbx), %r15
movq TASK_USER_ENTRY(%rbx), %r14
movq TASK_KERNEL_STACK(%rbx), %r13
addq TASK_KERNEL_STACK_SIZE(%rbx), %r13
/* ── Load auxv values ──────── */
movq TASK_PHDR_VA(%rbx), %r11
movzwq TASK_PHENT(%rbx), %r10
movzwq TASK_PHNUM(%rbx), %r9
/* ── Build initial user stack ─────────────────────── */
/* program name string */
movabsq $0x726f776f6c6c6568, %rax
movq %rax, -0x20(%r15)
movabsq $0x000000000000646c, %rax
movq %rax, -0x18(%r15)
/* argc / argv / envp */
movq $1, -0xB0(%r15) /* argc = 1 */
leaq -0x20(%r15), %rax
movq %rax, -0xA8(%r15) /* argv[0] */
movq $0, -0xA0(%r15)
movq $0, -0x98(%r15) /* envp[0] = NULL */
/* auxv */
movq $AT_PAGESZ, -0x90(%r15)
movq $4096, -0x88(%r15)
movq $AT_ENTRY, -0x80(%r15)
movq %r14, -0x78(%r15)
movq $AT_PHDR, -0x70(%r15)
movq %r11, -0x68(%r15)
movq $AT_PHENT, -0x60(%r15)
movq %r10, -0x58(%r15)
movq $AT_PHNUM, -0x50(%r15)
movq %r9, -0x48(%r15)
movq $AT_BASE, -0x40(%r15)
movq $0, -0x38(%r15)
movq $AT_NULL, -0x30(%r15)
movq $0, -0x28(%r15)
leaq -0xB0(%r15), %r12 /* user RSP */
/* ── Pivot to kernel stack top and build iretq frame ──────────────── */
movq %r13, %rsp
pushq $SEL_USER_DS
pushq %r12
pushfq
orq $0x200, (%rsp)
pushq $SEL_USER_CS
pushq %r14
/* Zero GPRs */
xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
xorq %rsi, %rsi
xorq %rdi, %rdi
xorq %rbp, %rbp
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
xorq %r11, %r11
iretq
.size user_task_trampoline, . - user_task_trampoline
+108 -1
View File
@@ -4,12 +4,18 @@
#include "fs/vfs.h"
#include "syscall.h"
#include "sched/scheduler.h"
#include "mm/vmm.h"
#include "mm/pmm.h"
#include "mm/memory.h"
#include "libk/errno.h"
#include "mp/futex.h"
#define MSR_EFER 0xC0000080
#define MSR_STAR 0xC0000081
#define MSR_LSTAR 0xC0000082
#define MSR_SFMASK 0xC0000084
#define MSR_KERNEL_GSBASE 0xC0000102
#define MSR_KERNEL_FSBASE 0xC0000100
#define EFER_SCE (1 << 0)
@@ -76,6 +82,76 @@ uint64_t syscall_handler(uint64_t num,
case SYS_EXIT_GROUP:
sched_exit((int)arg1);
//noreturn
case SYS_MMAP:
{
uintptr_t addr = (uintptr_t)arg1;
size_t len = (size_t)arg2;
int prot = (int)arg3;
int flags = (int)arg4;
int fd = (int)arg5;
off_t offset = (off_t)arg6;
(void)fd; (void)offset; // we only support anonymous for now
if (len == 0)
return (uint64_t)MAP_FAILED;
len = ALIGN_UP(len, PAGE_SIZE);
if (!(flags & MAP_ANONYMOUS)) {
return (uint64_t)MAP_FAILED; // todo: file backed later
}
struct pagemap *pm = sched_current()->pagemap;
if (!pm) pm = kernel_pagemap;
if (!(flags & MAP_FIXED)) {
addr = find_free_vaddr(pm, len);
}
uint64_t vmm_flags = PAGE_USER | PAGE_READ;
if (prot & PROT_WRITE) vmm_flags |= PAGE_WRITE;
if (prot & PROT_EXEC) vmm_flags |= PAGE_NO_EXECUTE;
size_t page_count = len / PAGE_SIZE;
void *phys = pmm_allocz(page_count);
if (!phys) {
return (uint64_t)MAP_FAILED;
}
// Map them
for (size_t i = 0; i < page_count; i++) {
uint64_t va = addr + i * PAGE_SIZE;
uint64_t pa = (uint64_t)phys + i * PAGE_SIZE;
if (!vmm_map_page(pm, va, pa, vmm_flags | PAGE_USER, Size4KiB)) {
pmm_free(phys, page_count);
return (uint64_t)MAP_FAILED;
}
}
return addr;
}
case SYS_MUNMAP:
{
uintptr_t addr = (uintptr_t)arg1;
size_t len = (size_t)arg2;
if (len == 0 || addr == 0)
return 0;
len = ALIGN_UP(len, PAGE_SIZE);
struct pagemap *pm = sched_current()->pagemap ?: kernel_pagemap;
for (size_t i = 0; i < len; i += PAGE_SIZE) {
vmm_unmap_page(pm, addr + i, false);
// TODO: also free the physical page (will need page refcounting or virt_to_phys + pmm_free)
}
return 0;
}
case SYS_SCHED_YIELD:
sched_yield();
@@ -159,8 +235,39 @@ uint64_t syscall_handler(uint64_t num,
return (uint64_t)task_set_scheduler(t, policy, rt_prio);
}
case SYS_TCB_SET:
{
void *pointer = (void*)arg1;
if (pointer == NULL) {
return (uint64_t)-EINVAL;
}
set_fs_base((uint64_t)pointer);
return 0;
}
case SYS_FUTEX:
{
int *uaddr = (int*)arg1;
int op = (int)arg2;
int val = (int)arg3;
switch (op) {
case FUTEX_WAIT:
return (uint64_t)futex_wait(uaddr, val);
case FUTEX_WAKE:
return (uint64_t)futex_wake(uaddr, val);
default:
return -EINVAL;
}
}
default:
return (uint64_t)-1;
{
printf("Unknown syscall: %lu\n", num);
return (uint64_t)-ENOSYS;
}
}
}
+35 -11
View File
@@ -1,22 +1,46 @@
#pragma once
#define SYS_READ 0
#define SYS_WRITE 1
#define SYS_OPEN 2
#define SYS_CLOSE 3
#define SYS_SCHED_YIELD 24
#define SYS_GETPID 39
#define SYS_GETPPID 110
#define SYS_NICE 34
#define SYS_KILL 62
#define SYS_READ 0
#define SYS_WRITE 1
#define SYS_OPEN 2
#define SYS_CLOSE 3
#define SYS_MMAP 9
#define SYS_MUNMAP 11
#define SYS_BRK 12
#define SYS_SIGACTION 13 /* rt_sigaction on Linux */
#define SYS_SIGPROCMASK 14 /* rt_sigprocmask on Linux */
#define SYS_SCHED_YIELD 24
#define SYS_GETPID 39
#define SYS_NICE 34
#define SYS_FORK 57
#define SYS_EXECVE 59
#define SYS_EXIT 60
#define SYS_EXIT_GROUP 231
#define SYS_KILL 62
#define SYS_GETPPID 110
#define SYS_SCHED_GETSCHEDULER 138
#define SYS_SCHED_SETSCHEDULER 139
#define SYS_FUTEX 202
#define SYS_EXIT_GROUP 231
#define SYS_TCB_SET 300
typedef int64_t off_t;
// Memory protection flags (Linux compatible)
#define PROT_NONE 0x0
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define PROT_EXEC 0x4
// mmap flags
#define MAP_PRIVATE 0x02
#define MAP_SHARED 0x01
#define MAP_ANONYMOUS 0x20
#define MAP_FIXED 0x10
#define MAP_FAILED ((void*)-1)
void syscall_init(void);