sched: add POSIX signal support
We have added POSIX signals to KirkOS It is very much experimental Alongside, we have PCI support fully, and we have imported sbase coreutils, I'm not too sure if all of them work, likely not, but a good few should be okay. Signed-off-by: kaguya <kaguya3311@national.shitposting.agency>
This commit is contained in:
@@ -319,6 +319,12 @@ void isr_handle(registers_t *r) {
|
||||
|
||||
if (r->isrNumber < 256 && event_handlers[r->isrNumber] != NULL) {
|
||||
event_handlers[r->isrNumber](r);
|
||||
/* Run pending signals on return-to-userspace. Skipped on ring-0
|
||||
* returns; signal_check_and_dispatch guards on it internally too. */
|
||||
extern bool signal_check_and_dispatch(registers_t *);
|
||||
if (r->cs & 0x3) {
|
||||
signal_check_and_dispatch(r);
|
||||
}
|
||||
} else {
|
||||
if (r->isrNumber < 32) {
|
||||
if (r->cs & 0x3) {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "mm/vmm.h"
|
||||
#include "sched/sched.h"
|
||||
#include "sched/syscall.h"
|
||||
#include "sched/signal.h"
|
||||
#include "arch/x86_64/boot/isr.h"
|
||||
#include "arch/x86_64/sys/prcb.h"
|
||||
|
||||
@@ -23,16 +24,29 @@ void syscall_handler(registers_t *reg) {
|
||||
.args3 = reg->r10,
|
||||
.args4 = reg->r8,
|
||||
.args5 = reg->r9,
|
||||
.ret = reg->rax};
|
||||
.ret = reg->rax,
|
||||
.reg_ptr = reg};
|
||||
|
||||
syscall_handle(&args);
|
||||
|
||||
int64_t ret = (int64_t)args.ret;
|
||||
if (ret < 0) {
|
||||
if (args.syscall_nr == 15 /* SYS_sigreturn */) {
|
||||
/* sigreturn already mutated *reg directly with the saved user
|
||||
* state. Do NOT overwrite reg->rax here. */
|
||||
} else if (ret < 0) {
|
||||
ret = -((int)errno);
|
||||
reg->rax = ret;
|
||||
} else
|
||||
} else {
|
||||
reg->rax = args.ret;
|
||||
}
|
||||
|
||||
/* Pending-signal dispatch on return to userspace. signal_check_and_dispatch
|
||||
* is a no-op when returning to ring 0 (we never check r->cs here because
|
||||
* the syscall instruction is only legal from CPL=3). It mutates `reg` in
|
||||
* place — for the sysret path, that means writing reg->rcx (used as new
|
||||
* rip) and reg->r11 (used as new rflags), and updating the per-CPU
|
||||
* user_stack (used as new rsp). signal_check_and_dispatch handles that. */
|
||||
signal_check_and_dispatch(reg);
|
||||
}
|
||||
|
||||
void syscall_install_handler(void) {
|
||||
@@ -54,14 +68,29 @@ bool syscall_helper_copy_to_user(uintptr_t user_addr, void *buffer,
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
struct pagemap *target_pagemap = proc->process_pagemap;
|
||||
|
||||
uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr);
|
||||
/* vmm_virt_to_kernel resolves ONE page at a time via the direct map.
|
||||
* Two consecutive user virtual pages map to physical frames that are
|
||||
* almost never contiguous in the kernel direct map, so we must walk
|
||||
* a page at a time and re-translate at each page boundary. */
|
||||
uint8_t *src = (uint8_t *)buffer;
|
||||
while (count > 0) {
|
||||
uint64_t page_off = user_addr & (PAGE_SIZE - 1);
|
||||
uint64_t chunk = PAGE_SIZE - page_off;
|
||||
if (chunk > count) chunk = count;
|
||||
|
||||
if (!kernel_addr) {
|
||||
errno = EFAULT;
|
||||
return false;
|
||||
uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr);
|
||||
if (!kernel_addr) {
|
||||
vmm_switch_pagemap(target_pagemap);
|
||||
errno = EFAULT;
|
||||
return false;
|
||||
}
|
||||
memcpy((void *)kernel_addr, src, chunk);
|
||||
|
||||
user_addr += chunk;
|
||||
src += chunk;
|
||||
count -= chunk;
|
||||
}
|
||||
|
||||
memcpy((void *)kernel_addr, buffer, count);
|
||||
vmm_switch_pagemap(target_pagemap);
|
||||
return true;
|
||||
}
|
||||
@@ -73,13 +102,26 @@ bool syscall_helper_copy_from_user(uintptr_t user_addr, void *buffer,
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
struct pagemap *target_pagemap = proc->process_pagemap;
|
||||
|
||||
uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr);
|
||||
/* Same per-page walk as copy_to_user — see comment there. */
|
||||
uint8_t *dst = (uint8_t *)buffer;
|
||||
while (count > 0) {
|
||||
uint64_t page_off = user_addr & (PAGE_SIZE - 1);
|
||||
uint64_t chunk = PAGE_SIZE - page_off;
|
||||
if (chunk > count) chunk = count;
|
||||
|
||||
if (!kernel_addr) {
|
||||
errno = EFAULT;
|
||||
return false;
|
||||
uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr);
|
||||
if (!kernel_addr) {
|
||||
vmm_switch_pagemap(target_pagemap);
|
||||
errno = EFAULT;
|
||||
return false;
|
||||
}
|
||||
memcpy(dst, (void *)kernel_addr, chunk);
|
||||
|
||||
user_addr += chunk;
|
||||
dst += chunk;
|
||||
count -= chunk;
|
||||
}
|
||||
memcpy(buffer, (void *)kernel_addr, count);
|
||||
|
||||
vmm_switch_pagemap(target_pagemap);
|
||||
return true;
|
||||
}
|
||||
@@ -107,6 +107,15 @@ void breakpoint_handler(registers_t *reg) {
|
||||
|
||||
pause_other_cpus();
|
||||
|
||||
|
||||
/* Mask the UART RX IRQ for the duration of the debugger. We are in
|
||||
* an INT3 context with IRQs disabled, but the moment we re-enable
|
||||
* them at exit (or any callee re-enables them) the deferred serial
|
||||
* IRQ would fire and drain pending bytes into the input ring —
|
||||
* stealing the keystrokes the debugger expects to read via the
|
||||
* polled serial_getchar() loop below. */
|
||||
serial_disable_rx_irq();
|
||||
|
||||
kprintffos(0, "=========== Start of dumps =========\n");
|
||||
kprintffos(0, "Breakpoint hit on CPU%u\n",
|
||||
prcb_return_current_cpu()->cpu_number);
|
||||
@@ -149,5 +158,10 @@ void breakpoint_handler(registers_t *reg) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Re-arm the UART RX IRQ before letting the system resume normal
|
||||
* operation; also drains anything that arrived while we polled. */
|
||||
serial_enable_rx_irq();
|
||||
|
||||
|
||||
unpause_other_cpus();
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "arch/x86_64/sys/timer.h"
|
||||
#include "madt.h"
|
||||
#include "libk/debug.h"
|
||||
#include "arch/x86_64/bus/pci.h"
|
||||
|
||||
void acpi_init() {
|
||||
|
||||
@@ -52,6 +53,7 @@ void acpi_init() {
|
||||
|
||||
|
||||
timer_init();
|
||||
pci_init();
|
||||
madt_init();
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "sched/syscall.h"
|
||||
#include "sched/sched.h"
|
||||
#include "libk/errno.h"
|
||||
#include "libk/debug.h"
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
@@ -31,6 +32,7 @@ void syscall_prctl(struct syscall_arguments *args) {
|
||||
case ARCH_SET_FS: {
|
||||
sched_get_running_thread()->fs_base = value;
|
||||
set_fs_base(sched_get_running_thread()->fs_base);
|
||||
args->ret = 0;
|
||||
break;
|
||||
}
|
||||
case ARCH_GET_FS:
|
||||
|
||||
@@ -2,8 +2,32 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "arch/x86_64/cpu/io.h"
|
||||
#include "arch/x86_64/sys/apic.h"
|
||||
#include "arch/x86_64/boot/isr.h"
|
||||
#include "drivers/input/input.h"
|
||||
#include "libk/debug.h"
|
||||
|
||||
static void serial_irq_handler(registers_t *r) {
|
||||
(void)r;
|
||||
|
||||
/* Drain everything currently buffered. A UART FIFO may hold up to
|
||||
* ~16 bytes if 16550A; better to loop than rely on getting one IRQ
|
||||
* per byte. */
|
||||
while (x86_64_inb(COM1 + 5) & 0x01) {
|
||||
uint8_t b = x86_64_inb(COM1);
|
||||
input_push_byte(b);
|
||||
}
|
||||
|
||||
/* Read IIR to acknowledge any pending interrupt source. Some
|
||||
* legacy UARTs require this. */
|
||||
(void)x86_64_inb(COM1 + 2);
|
||||
|
||||
apic_eoi();
|
||||
}
|
||||
|
||||
|
||||
void serial_init(void) {
|
||||
kprintf("Enabling Serial COM1.\n");
|
||||
x86_64_outb(COM1 + 1, 0x1);
|
||||
x86_64_outb(COM1 + 3, 0x80);
|
||||
x86_64_outb(COM1, 0x1);
|
||||
@@ -11,6 +35,19 @@ void serial_init(void) {
|
||||
x86_64_outb(COM1 + 3, 0x3);
|
||||
x86_64_outb(COM1 + 2, 0xC7);
|
||||
x86_64_outb(COM1 + 4, 0xB);
|
||||
|
||||
/* Enable the receive-data-available interrupt (IER bit 0). */
|
||||
x86_64_outb(COM1 + 1, 0x01);
|
||||
|
||||
/* Drain any byte the BIOS / QEMU may have left in the RBR before we
|
||||
* start listening, otherwise IIR may report a stale interrupt. */
|
||||
while (x86_64_inb(COM1 + 5) & 0x01) {
|
||||
(void)x86_64_inb(COM1);
|
||||
}
|
||||
|
||||
/* Route IRQ 4 → vector 36 in the IOAPIC and install our handler. */
|
||||
isr_register_handler(36, serial_irq_handler);
|
||||
ioapic_redirect_irq(4, 36);
|
||||
}
|
||||
|
||||
static inline bool is_transmit_empty(void) {
|
||||
@@ -62,3 +99,24 @@ char serial_getchar(void) {
|
||||
}
|
||||
return last_c;
|
||||
}
|
||||
|
||||
void serial_disable_rx_irq(void) {
|
||||
/* Clear IER bit 0 (Received Data Available Interrupt). The UART
|
||||
* stops asserting IRQ 4; we go back to polling-only mode. Used by
|
||||
* the breakpoint debugger so its serial_getchar() polling loop
|
||||
* actually sees the bytes the user types, instead of having them
|
||||
* stolen by serial_irq_handler() on the next sti. */
|
||||
uint8_t ier = x86_64_inb(COM1 + 1);
|
||||
x86_64_outb(COM1 + 1, ier & ~0x01);
|
||||
}
|
||||
|
||||
void serial_enable_rx_irq(void) {
|
||||
uint8_t ier = x86_64_inb(COM1 + 1);
|
||||
x86_64_outb(COM1 + 1, ier | 0x01);
|
||||
/* Drain anything that piled up while we were polling so the next
|
||||
* IRQ assertion corresponds to a fresh byte. */
|
||||
while (x86_64_inb(COM1 + 5) & 0x01) {
|
||||
(void)x86_64_inb(COM1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,4 +7,16 @@ void serial_putchar(char ch);
|
||||
void serial_puts(char *str);
|
||||
char serial_get_byte(void);
|
||||
char serial_getchar(void);
|
||||
int serial_received(void);
|
||||
int serial_received(void);
|
||||
|
||||
|
||||
/* Mask the UART receive-data-available IRQ (IER bit 0). Used by the
|
||||
* breakpoint debugger so bytes typed while it polls serial_getchar() are
|
||||
* not silently consumed by serial_irq_handler() and pushed into the input
|
||||
* ring instead of being delivered to the debugger. */
|
||||
void serial_disable_rx_irq(void);
|
||||
|
||||
/* Re-arm the UART receive-data-available IRQ and drain anything that
|
||||
* accumulated in the RBR while the IRQ was masked. Mirror of
|
||||
* serial_disable_rx_irq(). */
|
||||
void serial_enable_rx_irq(void);
|
||||
@@ -123,6 +123,6 @@ void fbdev_init(void) {
|
||||
framebuff_res->stat.st_rdev = resource_create_dev_id();
|
||||
framebuff_res->stat.st_mode = 0666 | S_IFCHR;
|
||||
|
||||
devtmpfs_add_device(framebuff_res, "fbdev");
|
||||
devtmpfs_add_device(framebuff_res, "fb0");
|
||||
vfs_symlink(vfs_root, "/dev/fbdev", "/dev/fb0");
|
||||
}
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
#include "input.h"
|
||||
#include "libk/event.h"
|
||||
#include "libk/errno.h"
|
||||
#include "mp/spinlock.h"
|
||||
#include "arch/x86_64/asm/asm.h"
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define INPUT_RING_SIZE 1024
|
||||
|
||||
static uint8_t input_ring[INPUT_RING_SIZE];
|
||||
static size_t input_head; /* write index (IRQ producer) */
|
||||
static size_t input_tail; /* read index (thread consumer)*/
|
||||
static size_t input_used; /* number of valid bytes */
|
||||
static spinlock_t input_lock = {0};
|
||||
static struct event input_event = {0};
|
||||
|
||||
void input_init(void) {
|
||||
input_head = 0;
|
||||
input_tail = 0;
|
||||
input_used = 0;
|
||||
/* spinlock_t and struct event are both zero-initialized at file scope,
|
||||
which matches their idle states (lock=0/no listeners/no pending). */
|
||||
}
|
||||
|
||||
void input_push_byte(uint8_t b) {
|
||||
/* Producer side: called from IRQ context with IF already cleared by the
|
||||
CPU on interrupt entry. Hold the ring lock only across the ring
|
||||
mutation; release it before calling event_trigger (which takes its
|
||||
own spinlock and toggles interrupts internally). */
|
||||
spinlock_acquire_or_wait(&input_lock);
|
||||
|
||||
if (input_used < INPUT_RING_SIZE) {
|
||||
input_ring[input_head] = b;
|
||||
input_head = (input_head + 1) % INPUT_RING_SIZE;
|
||||
input_used++;
|
||||
spinlock_drop(&input_lock);
|
||||
/* Drop the byte's worth of work on the wake path. drop=false so a
|
||||
wake delivered while no thread is parked still counts as pending,
|
||||
matching the await/trigger contract. */
|
||||
event_trigger(&input_event, false);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Overflow: silently drop. */
|
||||
spinlock_drop(&input_lock);
|
||||
}
|
||||
|
||||
bool input_has_data(void) {
|
||||
/* Cheap racy peek; size_t reads are word-sized and atomic on x86_64. */
|
||||
return input_used > 0;
|
||||
}
|
||||
|
||||
/* Internal: copy out up to `count` bytes from the ring into `buf`, handling
|
||||
wrap-around. Caller MUST hold input_lock and MUST have verified used > 0.
|
||||
Returns the number of bytes copied. */
|
||||
static size_t input_drain_locked(uint8_t *buf, size_t count) {
|
||||
size_t n = count < input_used ? count : input_used;
|
||||
|
||||
/* First chunk: from tail to end of ring (or n, whichever is smaller). */
|
||||
size_t first = INPUT_RING_SIZE - input_tail;
|
||||
if (first > n) {
|
||||
first = n;
|
||||
}
|
||||
for (size_t i = 0; i < first; i++) {
|
||||
buf[i] = input_ring[input_tail + i];
|
||||
}
|
||||
|
||||
/* Second chunk (only if we wrapped): from start of ring. */
|
||||
size_t second = n - first;
|
||||
for (size_t i = 0; i < second; i++) {
|
||||
buf[first + i] = input_ring[i];
|
||||
}
|
||||
|
||||
input_tail = (input_tail + n) % INPUT_RING_SIZE;
|
||||
input_used -= n;
|
||||
return n;
|
||||
}
|
||||
|
||||
ssize_t input_read(uint8_t *buf, size_t count) {
|
||||
if (count == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct event *events_arr[1] = { &input_event };
|
||||
|
||||
for (;;) {
|
||||
/* Disable IRQs on this CPU before taking the lock so that the
|
||||
PS/2 or serial IRQ handler — which also acquires input_lock
|
||||
from input_push_byte() — cannot be delivered to this CPU while
|
||||
we hold the lock and deadlock against us. */
|
||||
bool old = int_toggle(false);
|
||||
spinlock_acquire_or_wait(&input_lock);
|
||||
if (input_used > 0) {
|
||||
size_t n = input_drain_locked(buf, count);
|
||||
spinlock_drop(&input_lock);
|
||||
int_toggle(old);
|
||||
return (ssize_t)n;
|
||||
}
|
||||
spinlock_drop(&input_lock);
|
||||
int_toggle(old);
|
||||
|
||||
/* Sleep until a producer triggers input_event. event_await handles
|
||||
the wake/pending race internally, so a byte that arrives between
|
||||
our drop above and the await call is not lost (event_trigger
|
||||
bumped pending while no listeners were attached). It also
|
||||
manages its own IRQ-disable while parking on the event. */
|
||||
(void)event_await(events_arr, 1, true);
|
||||
/* Loop back and re-acquire the lock; another reader may have raced
|
||||
us and consumed the bytes, in which case we sleep again. */
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t input_try_read(uint8_t *buf, size_t count) {
|
||||
if (count == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Same IRQ-disable wrapper as input_read — see comment there. */
|
||||
bool old = int_toggle(false);
|
||||
spinlock_acquire_or_wait(&input_lock);
|
||||
if (input_used == 0) {
|
||||
spinlock_drop(&input_lock);
|
||||
int_toggle(old);
|
||||
errno = EAGAIN;
|
||||
return -1;
|
||||
}
|
||||
size_t n = input_drain_locked(buf, count);
|
||||
spinlock_drop(&input_lock);
|
||||
int_toggle(old);
|
||||
return (ssize_t)n;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include "libk/types.h"
|
||||
|
||||
/* Initialize the shared input ring buffer. Call once during boot before any
|
||||
IRQ handler can push bytes or any reader can pull them. */
|
||||
void input_init(void);
|
||||
|
||||
/* IRQ-safe: called from the keyboard/serial IRQ handlers. Pushes one byte
|
||||
into the ring. Drops on overflow (just discards -- the user lost a
|
||||
keystroke, no big deal). Wakes any blocked readers via event_trigger. */
|
||||
void input_push_byte(uint8_t b);
|
||||
|
||||
/* Non-blocking peek: returns true if at least one byte is in the ring. The
|
||||
answer is inherently racy; callers must not depend on monotonicity. */
|
||||
bool input_has_data(void);
|
||||
|
||||
/* Blocking read of up to `count` bytes into `buf`. Returns the number actually
|
||||
read. Sleeps on the input event while the ring is empty (event_await).
|
||||
Always returns at least 1 byte when it returns (no spurious 0-returns) --
|
||||
unless `count == 0`, in which case returns 0 immediately. */
|
||||
ssize_t input_read(uint8_t *buf, size_t count);
|
||||
|
||||
/* Non-blocking variant: returns -1 with errno=EAGAIN if no data. Used by
|
||||
O_NONBLOCK readers. */
|
||||
ssize_t input_try_read(uint8_t *buf, size_t count);
|
||||
@@ -0,0 +1,380 @@
|
||||
#include "kbd.h"
|
||||
#include "ps2.h"
|
||||
|
||||
#include "arch/x86_64/cpu/io.h"
|
||||
#include "arch/x86_64/cpu/reg.h"
|
||||
#include "arch/x86_64/sys/apic.h"
|
||||
#include "arch/x86_64/boot/isr.h"
|
||||
#include "arch/x86_64/asm/asm.h"
|
||||
|
||||
#include "libk/resource.h"
|
||||
#include "libk/types.h"
|
||||
#include "libk/errno.h"
|
||||
#include "libk/debug.h"
|
||||
|
||||
#include "mp/spinlock.h"
|
||||
#include "fs/devtmpfs.h"
|
||||
|
||||
#include "drivers/input/input.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* IRQ wiring constants. The IDT vector for the keyboard is 49 (chosen by
|
||||
* KirkOS convention; mouse would be 60). The I/O APIC entry for ISA IRQ 1
|
||||
* is pointed at that vector below.
|
||||
*/
|
||||
#define KBD_IRQ 1
|
||||
#define KBD_VECTOR 49
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* Scancode-set-1 → ASCII tables (US-QWERTY)
|
||||
*
|
||||
* We enabled the controller's set-2 → set-1 translation in ps2_init(), so
|
||||
* everything we read from port 0x60 is already in set-1. These tables map
|
||||
* a make-code (high bit clear) directly to its printable byte; entries
|
||||
* that have no printable form — modifiers, function keys, numeric-pad
|
||||
* non-printables — are 0.
|
||||
*
|
||||
* The shifted table is the standard "hold Shift" mapping. CapsLock only
|
||||
* affects letters and is composed with Shift via XOR (see translate()).
|
||||
*
|
||||
* Cross-checked against https://wiki.osdev.org/PS/2_Keyboard#Scan_Code_Set_1
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
|
||||
static const char kbd_us_unshift[128] = {
|
||||
/* 0x00 */ 0, 0x1b, '1', '2', '3', '4', '5', '6',
|
||||
/* 0x08 */ '7', '8', '9', '0', '-', '=', 0x7f, '\t',
|
||||
/* 0x10 */ 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i',
|
||||
/* 0x18 */ 'o', 'p', '[', ']', '\n', 0, 'a', 's',
|
||||
/* 0x20 */ 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';',
|
||||
/* 0x28 */ '\'', '`', 0, '\\', 'z', 'x', 'c', 'v',
|
||||
/* 0x30 */ 'b', 'n', 'm', ',', '.', '/', 0, '*',
|
||||
/* 0x38 */ 0, ' ', 0, 0, 0, 0, 0, 0,
|
||||
/* 0x40 */ 0, 0, 0, 0, 0, 0, 0, '7',
|
||||
/* 0x48 */ '8', '9', '-', '4', '5', '6', '+', '1',
|
||||
/* 0x50 */ '2', '3', '0', '.', 0, 0, 0, 0,
|
||||
/* 0x58 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x68 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x78 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char kbd_us_shift[128] = {
|
||||
/* 0x00 */ 0, 0x1b, '!', '@', '#', '$', '%', '^',
|
||||
/* 0x08 */ '&', '*', '(', ')', '_', '+', 0x7f, '\t',
|
||||
/* 0x10 */ 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I',
|
||||
/* 0x18 */ 'O', 'P', '{', '}', '\n', 0, 'A', 'S',
|
||||
/* 0x20 */ 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':',
|
||||
/* 0x28 */ '"', '~', 0, '|', 'Z', 'X', 'C', 'V',
|
||||
/* 0x30 */ 'B', 'N', 'M', '<', '>', '?', 0, '*',
|
||||
/* 0x38 */ 0, ' ', 0, 0, 0, 0, 0, 0,
|
||||
/* 0x40 */ 0, 0, 0, 0, 0, 0, 0, '7',
|
||||
/* 0x48 */ '8', '9', '-', '4', '5', '6', '+', '1',
|
||||
/* 0x50 */ '2', '3', '0', '.', 0, 0, 0, 0,
|
||||
/* 0x58 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x68 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x78 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* Modifier state. These are touched only from the IRQ handler, which is
|
||||
* never re-entered on the same CPU, so plain globals are fine — no lock
|
||||
* needed for the state itself. The /dev/keyboard scratch buffer below is
|
||||
* shared with userspace readers and DOES need a lock.
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
|
||||
static bool kbd_shift = false;
|
||||
static bool kbd_ctrl = false;
|
||||
static bool kbd_alt = false;
|
||||
static bool kbd_caps = false;
|
||||
static bool kbd_extended = false; /* set after a 0xE0 prefix byte */
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* /dev/keyboard — raw scancode device.
|
||||
*
|
||||
* This is a tiny in-memory ring of the last few raw bytes the IRQ saw.
|
||||
* Userspace can open /dev/keyboard and read raw set-1 scancodes — handy
|
||||
* for input-method daemons that want to see modifier release events, key
|
||||
* repeat rates, etc. The translated cooked stream still flows through the
|
||||
* shared input ring (input_push_byte) — this device is a secondary path.
|
||||
*
|
||||
* On each IRQ we drop the raw byte into the ring. If the ring is full the
|
||||
* oldest byte is overwritten — losing scancodes is preferable to stalling
|
||||
* the IRQ handler.
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
|
||||
#define KBD_RAW_RING_SZ 256
|
||||
|
||||
struct kbd_device {
|
||||
struct resource res;
|
||||
spinlock_t ring_lock;
|
||||
uint8_t ring[KBD_RAW_RING_SZ];
|
||||
size_t head; /* next write index */
|
||||
size_t tail; /* next read index */
|
||||
size_t count;
|
||||
};
|
||||
|
||||
static struct kbd_device *kbd_dev = NULL;
|
||||
|
||||
static void kbd_raw_push(uint8_t b) {
|
||||
/*
|
||||
* Called from IRQ context. We acquire the lock via try-acquire so the
|
||||
* handler can never spin on a userspace reader, but a clean trylock
|
||||
* helper isn't in spinlock.h — so we just take the lock and trust
|
||||
* that userspace reads are short. The lock is only ever held for the
|
||||
* duration of a memcpy / index update, never across a sleep.
|
||||
*/
|
||||
spinlock_acquire_or_wait(&kbd_dev->ring_lock);
|
||||
kbd_dev->ring[kbd_dev->head] = b;
|
||||
kbd_dev->head = (kbd_dev->head + 1) % KBD_RAW_RING_SZ;
|
||||
if (kbd_dev->count == KBD_RAW_RING_SZ) {
|
||||
/* Overwrote oldest — advance tail. */
|
||||
kbd_dev->tail = (kbd_dev->tail + 1) % KBD_RAW_RING_SZ;
|
||||
} else {
|
||||
kbd_dev->count++;
|
||||
}
|
||||
spinlock_drop(&kbd_dev->ring_lock);
|
||||
}
|
||||
|
||||
static ssize_t kbd_dev_read(struct resource *this,
|
||||
struct f_description *description,
|
||||
void *buf, off_t offset, size_t count) {
|
||||
(void)description;
|
||||
(void)offset;
|
||||
|
||||
if (!buf) {
|
||||
errno = EFAULT;
|
||||
return -1;
|
||||
}
|
||||
if (count == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *out = (uint8_t *)buf;
|
||||
size_t copied = 0;
|
||||
|
||||
/* Disable IRQs on this CPU before taking ring_lock — kbd_raw_push()
|
||||
* acquires the same lock from IRQ context (vector 49). If the IRQ
|
||||
* fired while this thread held ring_lock with IF=1, the handler
|
||||
* would spin forever and the deadlock-detector would panic. */
|
||||
bool old = int_toggle(false);
|
||||
spinlock_acquire_or_wait(&this->lock);
|
||||
spinlock_acquire_or_wait(&kbd_dev->ring_lock);
|
||||
|
||||
while (copied < count && kbd_dev->count > 0) {
|
||||
out[copied++] = kbd_dev->ring[kbd_dev->tail];
|
||||
kbd_dev->tail = (kbd_dev->tail + 1) % KBD_RAW_RING_SZ;
|
||||
kbd_dev->count--;
|
||||
}
|
||||
|
||||
spinlock_drop(&kbd_dev->ring_lock);
|
||||
spinlock_drop(&this->lock);
|
||||
int_toggle(old);
|
||||
|
||||
return (ssize_t)copied;
|
||||
}
|
||||
|
||||
static ssize_t kbd_dev_write(struct resource *this,
|
||||
struct f_description *description,
|
||||
const void *buf, off_t offset, size_t count) {
|
||||
(void)this;
|
||||
(void)description;
|
||||
(void)offset;
|
||||
(void)buf;
|
||||
(void)count;
|
||||
/* Writes to /dev/keyboard are silently dropped (no LED ioctls yet). */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* Translation helpers
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
|
||||
static void push_ascii(char c) {
|
||||
/*
|
||||
* Apply Ctrl-letter collapse here so that ^C / ^Z / ^D show up in the
|
||||
* TTY's line-discipline layer with the canonical control values. Only
|
||||
* letters are affected — Ctrl-1, Ctrl-., etc. pass through unchanged.
|
||||
*/
|
||||
if (kbd_ctrl && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) {
|
||||
c = (char)(c & 0x1f);
|
||||
}
|
||||
input_push_byte((uint8_t)c);
|
||||
}
|
||||
|
||||
static void push_seq(const char *s) {
|
||||
while (*s) {
|
||||
input_push_byte((uint8_t)*s++);
|
||||
}
|
||||
}
|
||||
|
||||
/* Map an extended (0xE0-prefixed) make-code to an ANSI escape sequence. */
|
||||
static void handle_extended_make(uint8_t sc) {
|
||||
switch (sc) {
|
||||
case 0x48: push_seq("\x1b[A"); break; /* Up */
|
||||
case 0x50: push_seq("\x1b[B"); break; /* Down */
|
||||
case 0x4d: push_seq("\x1b[C"); break; /* Right */
|
||||
case 0x4b: push_seq("\x1b[D"); break; /* Left */
|
||||
case 0x47: push_seq("\x1b[H"); break; /* Home */
|
||||
case 0x4f: push_seq("\x1b[F"); break; /* End */
|
||||
case 0x53: push_seq("\x1b[3~"); break; /* Delete */
|
||||
default:
|
||||
/* Unknown extended key — drop silently. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* IRQ handler — runs on every keyboard byte.
|
||||
*
|
||||
* Flow:
|
||||
* - read raw byte from 0x60
|
||||
* - stash it in the /dev/keyboard ring (raw passthrough)
|
||||
* - if it's a 0xE0 prefix, set the extended flag and bail
|
||||
* - if it's an extended scancode, dispatch to handle_extended_make()
|
||||
* (only on make; releases are ignored)
|
||||
* - otherwise, branch on modifier vs. printable
|
||||
* - finally, EOI the local APIC
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
static void kbd_irq_handler(registers_t *r) {
|
||||
(void)r;
|
||||
|
||||
uint8_t sc = x86_64_inb(PS2_DATA_PORT);
|
||||
|
||||
/* Mirror the byte into /dev/keyboard for raw consumers. */
|
||||
if (kbd_dev) {
|
||||
kbd_raw_push(sc);
|
||||
}
|
||||
|
||||
/* 0xE0 prefix → next byte is part of an extended sequence. */
|
||||
if (sc == 0xe0) {
|
||||
kbd_extended = true;
|
||||
apic_eoi();
|
||||
return;
|
||||
}
|
||||
|
||||
/* ── Modifier release events come in as scancode | 0x80 ──────────── */
|
||||
if (!kbd_extended) {
|
||||
switch (sc) {
|
||||
case 0x9d: kbd_ctrl = false; apic_eoi(); return; /* LCtrl ↑ */
|
||||
case 0xaa: kbd_shift = false; apic_eoi(); return; /* LShift ↑*/
|
||||
case 0xb6: kbd_shift = false; apic_eoi(); return; /* RShift ↑*/
|
||||
case 0xb8: kbd_alt = false; apic_eoi(); return; /* LAlt ↑ */
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Drop the rest of the break-codes for non-modifier keys. */
|
||||
if (sc & 0x80) {
|
||||
kbd_extended = false;
|
||||
apic_eoi();
|
||||
return;
|
||||
}
|
||||
|
||||
/* ── Extended make codes (arrows, Home/End, Delete, …) ───────────── */
|
||||
if (kbd_extended) {
|
||||
kbd_extended = false;
|
||||
handle_extended_make(sc);
|
||||
apic_eoi();
|
||||
return;
|
||||
}
|
||||
|
||||
/* ── Plain make codes ────────────────────────────────────────────── */
|
||||
switch (sc) {
|
||||
case 0x01: /* Esc */
|
||||
input_push_byte(0x1b);
|
||||
apic_eoi();
|
||||
return;
|
||||
case 0x0e: /* Backspace → DEL (matches VERASE default) */
|
||||
input_push_byte(0x7f);
|
||||
apic_eoi();
|
||||
return;
|
||||
case 0x0f: /* Tab */
|
||||
input_push_byte('\t');
|
||||
apic_eoi();
|
||||
return;
|
||||
case 0x1c: /* Enter */
|
||||
input_push_byte('\n');
|
||||
apic_eoi();
|
||||
return;
|
||||
case 0x39: /* Space */
|
||||
input_push_byte(' ');
|
||||
apic_eoi();
|
||||
return;
|
||||
|
||||
/* Modifier presses — record state, push nothing. */
|
||||
case 0x1d: kbd_ctrl = true; apic_eoi(); return; /* LCtrl ↓ */
|
||||
case 0x2a: kbd_shift = true; apic_eoi(); return; /* LShift ↓ */
|
||||
case 0x36: kbd_shift = true; apic_eoi(); return; /* RShift ↓ */
|
||||
case 0x38: kbd_alt = true; apic_eoi(); return; /* LAlt ↓ */
|
||||
case 0x3a: /* CapsLock toggle */
|
||||
kbd_caps = !kbd_caps;
|
||||
apic_eoi();
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* ── Printable key → look up in the right table ──────────────────── */
|
||||
if (sc < 128) {
|
||||
char c = 0;
|
||||
char unshift = kbd_us_unshift[sc];
|
||||
bool is_letter = (unshift >= 'a' && unshift <= 'z');
|
||||
|
||||
if (is_letter) {
|
||||
/* Letters: Shift XOR CapsLock decides case. */
|
||||
bool upper = kbd_shift ^ kbd_caps;
|
||||
c = upper ? kbd_us_shift[sc] : unshift;
|
||||
} else {
|
||||
/* Non-letters: only Shift matters; CapsLock is ignored. */
|
||||
c = kbd_shift ? kbd_us_shift[sc] : unshift;
|
||||
}
|
||||
|
||||
if (c != 0) {
|
||||
push_ascii(c);
|
||||
}
|
||||
}
|
||||
|
||||
apic_eoi();
|
||||
}
|
||||
|
||||
/* ════════════════════════════════════════════════════════════════════════
|
||||
* Public init — wire up the device node and the IRQ.
|
||||
* ════════════════════════════════════════════════════════════════════════ */
|
||||
void kbd_init(void) {
|
||||
kbd_dev = resource_create(sizeof(struct kbd_device));
|
||||
if (!kbd_dev) {
|
||||
kprintf("[kbd] resource_create failed — no /dev/keyboard\n");
|
||||
/* Even without the raw device we still want IRQ-driven input,
|
||||
* so don't bail; fall through to install the handler. */
|
||||
} else {
|
||||
spinlock_init(kbd_dev->ring_lock);
|
||||
kbd_dev->head = 0;
|
||||
kbd_dev->tail = 0;
|
||||
kbd_dev->count = 0;
|
||||
|
||||
kbd_dev->res.stat.st_size = 0;
|
||||
kbd_dev->res.stat.st_blocks = 0;
|
||||
kbd_dev->res.stat.st_blksize = 4096;
|
||||
kbd_dev->res.stat.st_rdev = resource_create_dev_id();
|
||||
kbd_dev->res.stat.st_mode = 0644 | S_IFCHR;
|
||||
|
||||
kbd_dev->res.read = kbd_dev_read;
|
||||
kbd_dev->res.write = kbd_dev_write;
|
||||
|
||||
devtmpfs_add_device((struct resource *)kbd_dev, "keyboard");
|
||||
}
|
||||
|
||||
isr_register_handler(KBD_VECTOR, kbd_irq_handler);
|
||||
ioapic_redirect_irq(KBD_IRQ, KBD_VECTOR);
|
||||
|
||||
kprintf("[kbd] PS/2 keyboard ready (IRQ %d → vector %d)\n",
|
||||
KBD_IRQ, KBD_VECTOR);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* PS/2 keyboard driver.
|
||||
*
|
||||
* kbd_init() is called by ps2_init() after the controller has been
|
||||
* configured. It:
|
||||
* - allocates and registers /dev/keyboard (a raw-scancode char device)
|
||||
* - installs the IRQ-1 → vector-49 handler
|
||||
* - asks the I/O APIC to route IRQ 1 to vector 49
|
||||
*
|
||||
* Once installed, every keyboard byte from port 0x60 is translated to
|
||||
* ASCII (or an ANSI escape sequence for arrows / Home / End / Delete) and
|
||||
* pushed into the shared input ring via input_push_byte().
|
||||
*/
|
||||
void kbd_init(void);
|
||||
@@ -0,0 +1,111 @@
|
||||
#include "ps2.h"
|
||||
#include "kbd.h"
|
||||
#include "arch/x86_64/cpu/io.h"
|
||||
#include "libk/debug.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* ── Low-level helpers ─────────────────────────────────────────────────────
|
||||
*
|
||||
* The PS/2 controller is glacially slow by modern standards but it is also
|
||||
* sometimes wired up to a virtualised platform that will happily return
|
||||
* stale status forever. We bound the spins with a generous attempt counter
|
||||
* so that a wedged controller doesn't hang the kernel during boot.
|
||||
*/
|
||||
|
||||
#define PS2_SPIN_LIMIT 100000
|
||||
|
||||
uint8_t ps2_read(void) {
|
||||
for (int i = 0; i < PS2_SPIN_LIMIT; i++) {
|
||||
if (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) {
|
||||
return x86_64_inb(PS2_DATA_PORT);
|
||||
}
|
||||
__asm__ volatile ("pause");
|
||||
}
|
||||
/* Fall through and read anyway — we are already in trouble. The
|
||||
* controller never raised OBF in PS2_SPIN_LIMIT iterations, which
|
||||
* is the signature of a wedged PS/2 controller. Log once per call
|
||||
* site so a boot-time hang has a breadcrumb in the serial log. */
|
||||
kprintf("[ps2] WARN ps2_read: OBF never set after %d spins — controller wedged?\n",
|
||||
PS2_SPIN_LIMIT);
|
||||
return x86_64_inb(PS2_DATA_PORT);
|
||||
}
|
||||
|
||||
void ps2_write(uint16_t port, uint8_t v) {
|
||||
for (int i = 0; i < PS2_SPIN_LIMIT; i++) {
|
||||
if (!(x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_IBF)) {
|
||||
x86_64_outb(port, v);
|
||||
return;
|
||||
}
|
||||
__asm__ volatile ("pause");
|
||||
}
|
||||
/* Write anyway as last-ditch; the controller is hosed otherwise.
|
||||
* IBF stayed set for the full spin window — same diagnostic story
|
||||
* as ps2_read above. */
|
||||
kprintf("[ps2] WARN ps2_write: IBF stuck after %d spins, port=%x — controller wedged?\n",
|
||||
PS2_SPIN_LIMIT, (unsigned)port);
|
||||
x86_64_outb(port, v);
|
||||
}
|
||||
|
||||
uint8_t ps2_read_config(void) {
|
||||
ps2_write(PS2_CMD_PORT, PS2_CMD_READ_CONFIG);
|
||||
return ps2_read();
|
||||
}
|
||||
|
||||
void ps2_write_config(uint8_t v) {
|
||||
ps2_write(PS2_CMD_PORT, PS2_CMD_WRITE_CONFIG);
|
||||
ps2_write(PS2_DATA_PORT, v);
|
||||
}
|
||||
|
||||
/*
|
||||
* ── Controller bring-up ───────────────────────────────────────────────────
|
||||
*
|
||||
* Sequence is the classic OSDev recipe:
|
||||
* 1. Disable both PS/2 ports so nothing interrupts us mid-init.
|
||||
* 2. Drain any byte the controller may have buffered before we started
|
||||
* paying attention (BIOS/QEMU often leaves one behind).
|
||||
* 3. Read the config byte and:
|
||||
* - enable port 1 interrupt (bit 0)
|
||||
* - enable scancode set-2 → set-1 translation (bit 6)
|
||||
* - clear port 1 disable (bit 4)
|
||||
* Leave port 2 disabled (mouse is intentionally not handled here).
|
||||
* 4. Write the config back.
|
||||
* 5. Re-enable port 1.
|
||||
* 6. Drain again in case the controller queued bytes between
|
||||
* reconfiguration and re-enable.
|
||||
* 7. Install the IRQ handler and register /dev/keyboard.
|
||||
*/
|
||||
void ps2_init(void) {
|
||||
/* 1. Disable both ports. */
|
||||
ps2_write(PS2_CMD_PORT, PS2_CMD_DISABLE_PORT1);
|
||||
ps2_write(PS2_CMD_PORT, PS2_CMD_DISABLE_PORT2);
|
||||
|
||||
/* 2. Drain leftover output buffer. */
|
||||
while (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) {
|
||||
(void)x86_64_inb(PS2_DATA_PORT);
|
||||
}
|
||||
|
||||
/* 3. Patch config byte. */
|
||||
uint8_t cfg = ps2_read_config();
|
||||
cfg |= PS2_CFG_PORT1_INT; /* enable port 1 IRQ */
|
||||
cfg |= PS2_CFG_TRANSLATION; /* set-2 → set-1 translation */
|
||||
cfg &= ~PS2_CFG_PORT1_DISABLE; /* be sure port 1 is on */
|
||||
|
||||
/* 4. Push it back. */
|
||||
ps2_write_config(cfg);
|
||||
|
||||
/* 5. Re-enable port 1. (Port 2 stays disabled — no mouse this round.) */
|
||||
ps2_write(PS2_CMD_PORT, PS2_CMD_ENABLE_PORT1);
|
||||
|
||||
/* 6. Drain anything that snuck in during reconfig. */
|
||||
while (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) {
|
||||
(void)x86_64_inb(PS2_DATA_PORT);
|
||||
}
|
||||
|
||||
kprintf("[ps2] controller initialised (port1 IRQ + xlate, port2 off)\n");
|
||||
|
||||
/* 7. Hand off to the keyboard driver. */
|
||||
kbd_init();
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
/*
|
||||
* Low-level PS/2 controller interface for KirkOS.
|
||||
*
|
||||
* The PS/2 controller lives behind I/O ports:
|
||||
* 0x60 — data port (read scancode, write data byte to keyboard/mouse)
|
||||
* 0x64 — command/status port (read status byte, write controller cmd)
|
||||
*
|
||||
* Status register bits:
|
||||
* bit 0 (OBF) — output buffer full, data ready to be read from 0x60
|
||||
* bit 1 (IBF) — input buffer full, controller not ready to accept a write
|
||||
*/
|
||||
|
||||
#define PS2_DATA_PORT 0x60
|
||||
#define PS2_STATUS_PORT 0x64
|
||||
#define PS2_CMD_PORT 0x64
|
||||
|
||||
#define PS2_STATUS_OBF (1u << 0)
|
||||
#define PS2_STATUS_IBF (1u << 1)
|
||||
|
||||
/* Controller commands (sent to 0x64). */
|
||||
#define PS2_CMD_READ_CONFIG 0x20
|
||||
#define PS2_CMD_WRITE_CONFIG 0x60
|
||||
#define PS2_CMD_DISABLE_PORT2 0xA7
|
||||
#define PS2_CMD_ENABLE_PORT2 0xA8
|
||||
#define PS2_CMD_DISABLE_PORT1 0xAD
|
||||
#define PS2_CMD_ENABLE_PORT1 0xAE
|
||||
|
||||
/* Controller config byte bits. */
|
||||
#define PS2_CFG_PORT1_INT (1u << 0)
|
||||
#define PS2_CFG_PORT2_INT (1u << 1)
|
||||
#define PS2_CFG_PORT1_DISABLE (1u << 4)
|
||||
#define PS2_CFG_PORT2_DISABLE (1u << 5)
|
||||
#define PS2_CFG_TRANSLATION (1u << 6)
|
||||
|
||||
/*
|
||||
* ps2_read — wait until the controller has a byte for us, then return it.
|
||||
* Spins on status-bit-0 (OBF).
|
||||
*/
|
||||
uint8_t ps2_read(void);
|
||||
|
||||
/*
|
||||
* ps2_write — wait until the controller is ready to accept a byte
|
||||
* (status-bit-1 IBF clear), then write `v` to `port`.
|
||||
* `port` is either 0x60 (data) or 0x64 (command).
|
||||
*/
|
||||
void ps2_write(uint16_t port, uint8_t v);
|
||||
|
||||
/*
|
||||
* ps2_read_config / ps2_write_config — read or replace the controller
|
||||
* configuration byte (a.k.a. "command byte").
|
||||
*/
|
||||
uint8_t ps2_read_config(void);
|
||||
void ps2_write_config(uint8_t v);
|
||||
|
||||
/*
|
||||
* ps2_init — full controller bring-up:
|
||||
* 1. disable both ports
|
||||
* 2. clear the output buffer
|
||||
* 3. enable port 1 IRQ + scancode set-1 translation
|
||||
* 4. re-enable port 1
|
||||
* 5. hand off to kbd_init() to install the IRQ handler and
|
||||
* register /dev/keyboard.
|
||||
* Called once from main.c.
|
||||
*/
|
||||
void ps2_init(void);
|
||||
+729
-149
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,45 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include "termios.h"
|
||||
#include "mp/spinlock.h"
|
||||
#include "libk/resource.h"
|
||||
#include "libk/event.h"
|
||||
|
||||
#define LINE_BUF_SZ 4096
|
||||
|
||||
struct console {
|
||||
struct resource res;
|
||||
struct termios term;
|
||||
size_t width, height;
|
||||
bool decckm;
|
||||
|
||||
/* ── job control ─────────────────────────────────────────────────── */
|
||||
int64_t fg_pgrp; /* foreground process group; 0 = none */
|
||||
int64_t session; /* owning session; 0 = none */
|
||||
|
||||
/* ── line-discipline assembly buffer ─────────────────────────────── */
|
||||
char line_buf[LINE_BUF_SZ];
|
||||
size_t line_len; /* bytes currently in line_buf */
|
||||
size_t line_read_off; /* drain offset (for partial reads) */
|
||||
bool line_ready; /* true when a delimiter has landed */
|
||||
bool lnext_pending; /* VLNEXT: next byte taken literally */
|
||||
|
||||
/* ── concurrent-reader serialization ─────────────────────────────── */
|
||||
/* Separate from res.lock so we can hold it across the blocking
|
||||
* input_read() without holding res.lock. Serializes the line-assembly
|
||||
* loop in console_read across multiple concurrent readers (e.g. parent
|
||||
* + child after fork) so their bytes can't interleave on line_buf.
|
||||
*
|
||||
* Sleep-mutex serializing the line-assembly loop across concurrent
|
||||
* readers. We hold this across the blocking input_read() — so it MUST
|
||||
* be a sleep-mutex, not a spinlock (a spinlock held while the holder
|
||||
* sleeps in event_await would deadlock the second reader). Built on
|
||||
* top of the existing event primitive. */
|
||||
spinlock_t read_mutex_lock; /* short-held; protects the two fields below */
|
||||
bool read_mutex_held;
|
||||
struct event read_mutex_released;
|
||||
};
|
||||
|
||||
void console_init(void);
|
||||
|
||||
@@ -479,7 +479,7 @@ void syscall_openpty(struct syscall_arguments *args) {
|
||||
ps->pty = p;
|
||||
ps->res.read = pty_slave_read;
|
||||
ps->res.write = pty_slave_write;
|
||||
pm->res.unref = pty_slave_unref;
|
||||
ps->res.unref = pty_slave_unref;
|
||||
ps->res.ioctl = pty_ioctl;
|
||||
ps->res.stat.st_size = 0;
|
||||
ps->res.stat.st_blocks = 0;
|
||||
|
||||
+3
-1
@@ -159,7 +159,7 @@ void _entry(void) {
|
||||
framebuffer_init(&fb);
|
||||
|
||||
print_now = true;
|
||||
serial_init();
|
||||
|
||||
|
||||
struct limine_file *kernel_file =
|
||||
limine_kernel_file_request.response->kernel_file;
|
||||
@@ -196,6 +196,8 @@ void _entry(void) {
|
||||
|
||||
apic_init();
|
||||
|
||||
serial_init();
|
||||
|
||||
mp_init(mp_request.response);
|
||||
|
||||
// The NSA has also forced hardware manufacturers to backdoor their 'Random
|
||||
|
||||
+22
-1
@@ -563,6 +563,18 @@ void syscall_openat(struct syscall_arguments *args) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Dynamic-resolver node (e.g. /dev/tty resolves to the calling
|
||||
* process's controlling-terminal resource). The resolver may return
|
||||
* NULL with errno set (ENXIO when the caller has no ctty). */
|
||||
struct resource *target_res = node->resource;
|
||||
if (node->resolve_open != NULL) {
|
||||
target_res = node->resolve_open(node);
|
||||
if (target_res == NULL) {
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
struct f_descriptor *fd = fd_create_from_resource(node->resource, flags);
|
||||
if (fd == NULL) {
|
||||
args->ret = -1;
|
||||
@@ -650,7 +662,16 @@ void syscall_getcwd(struct syscall_arguments *args) {
|
||||
size_t len = args->args1;
|
||||
|
||||
char path_buffer[PATH_MAX] = {0};
|
||||
if (vfs_pathname(proc->cwd, path_buffer, PATH_MAX) >= len) {
|
||||
size_t n = vfs_pathname(proc->cwd, path_buffer, PATH_MAX);
|
||||
|
||||
if (n == 0) {
|
||||
// proc->cwd is the VFS root — render as "/".
|
||||
path_buffer[0] = '/';
|
||||
path_buffer[1] = '\0';
|
||||
n = 1;
|
||||
}
|
||||
|
||||
if (n + 1 > len) {
|
||||
errno = ERANGE;
|
||||
args->ret = -1;
|
||||
return;
|
||||
|
||||
@@ -24,6 +24,14 @@ struct vfs_node {
|
||||
HASHMAP_TYPE(struct vfs_node *) children;
|
||||
char *symlink_target;
|
||||
bool populated;
|
||||
|
||||
|
||||
/* Per-process resolver. If non-NULL, syscall_openat will call this
|
||||
* instead of returning node->resource directly. Used by /dev/tty to
|
||||
* resolve to the calling process's controlling-terminal resource.
|
||||
* The function returns NULL with errno set if it can't satisfy the
|
||||
* open (e.g. ENXIO for "no controlling tty"). */
|
||||
struct resource *(*resolve_open)(struct vfs_node *self);
|
||||
};
|
||||
|
||||
typedef struct vfs_node *(*fs_mount_t)(struct vfs_node *, const char *,
|
||||
|
||||
+24
-1
@@ -2,6 +2,7 @@
|
||||
#include "fs/vfs.h"
|
||||
#include "sched/sched_types.h"
|
||||
#include "sched/sched.h"
|
||||
#include "sched/signal.h"
|
||||
#include "fs/tmpfs.h"
|
||||
#include "fs/devtmpfs.h"
|
||||
#include "libk/random.h"
|
||||
@@ -9,6 +10,8 @@
|
||||
#include "fs/partition.h"
|
||||
#include "drivers/fb/fb.h"
|
||||
#include "drivers/tty/console.h"
|
||||
#include "drivers/input/input.h"
|
||||
#include "drivers/ps2/ps2.h"
|
||||
#include "arch/x86_64/sys/timer.h"
|
||||
#include "libk/kargs.h"
|
||||
#include "fs/ramdisk.h"
|
||||
@@ -82,6 +85,26 @@ void kernel_main(void *args) {
|
||||
syscall_register_handler(0x10f, syscall_ppoll);
|
||||
syscall_register_handler(0x54, syscall_rmdir);
|
||||
|
||||
|
||||
/* ── POSIX signal syscalls ──────────────────────────────────────── */
|
||||
syscall_register_handler(13, syscall_sigaction); /* rt_sigaction */
|
||||
syscall_register_handler(14, syscall_sigprocmask); /* rt_sigprocmask */
|
||||
syscall_register_handler(15, syscall_sigreturn); /* rt_sigreturn */
|
||||
syscall_register_handler(34, syscall_pause);
|
||||
syscall_register_handler(127, syscall_sigpending);
|
||||
syscall_register_handler(130, syscall_sigsuspend);
|
||||
|
||||
/* ── POSIX session / process-group syscalls ─────────────────────── */
|
||||
syscall_register_handler(109, syscall_setpgid);
|
||||
syscall_register_handler(111, syscall_getpgrp);
|
||||
syscall_register_handler(112, syscall_setsid);
|
||||
syscall_register_handler(121, syscall_getpgid);
|
||||
syscall_register_handler(124, syscall_getsid);
|
||||
|
||||
/* ── Shared input ring buffer + PS/2 keyboard ───────────────────── */
|
||||
input_init();
|
||||
ps2_init();
|
||||
|
||||
kprintf("Halting for 5 seconds...");
|
||||
timer_sleep(5000);
|
||||
|
||||
@@ -96,7 +119,7 @@ void kernel_main(void *args) {
|
||||
NULL,
|
||||
};
|
||||
|
||||
char *init_path = "/bin/oksh";
|
||||
char *init_path = "/bin/sh";
|
||||
if (kernel_arguments.kernel_args & KERNEL_ARGS_INIT_PATH_GIVEN) {
|
||||
init_path = kernel_arguments.init_binary_path;
|
||||
}
|
||||
|
||||
+1
-1
@@ -71,7 +71,7 @@ void vmm_init(struct limine_memmap_entry **memmap, size_t memmap_entries) {
|
||||
vmm_map_page(kernel_pagemap, p + MEM_PHYS_OFFSET, p, 0b11, Size2MiB);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < (memmap_entries - 1); i++) {
|
||||
for (size_t i = 0; i < (memmap_entries); i++) {
|
||||
uint64_t base = memmap[i]->base;
|
||||
uint64_t length = memmap[i]->length;
|
||||
uint64_t top = base + length;
|
||||
|
||||
@@ -56,6 +56,7 @@ static void smp_cpu_init(struct limine_smp_info *smp_info) {
|
||||
uint64_t cr4 = 0;
|
||||
cr4 = read_cr("4");
|
||||
cr4 |= (3 << 9);
|
||||
cr4 |= (1 << 16); // FSGSBASE
|
||||
write_cr("4", cr4);
|
||||
|
||||
// Enable syscall in EFER
|
||||
|
||||
+142
-28
@@ -17,7 +17,9 @@
|
||||
#include "futex.h"
|
||||
#include "libk/string.h"
|
||||
#include "main.h"
|
||||
#include "sched/syscall.h"
|
||||
#include "syscall.h"
|
||||
#include "signal.h"
|
||||
#include "drivers/tty/console.h"
|
||||
|
||||
|
||||
|
||||
@@ -39,11 +41,13 @@ int64_t pid = 0;
|
||||
spinlock_t thread_lock = {0};
|
||||
spinlock_t process_lock = {0};
|
||||
|
||||
extern struct console *console_device; // for init's ctty
|
||||
|
||||
struct resource *std_console_device = NULL;
|
||||
|
||||
struct utsname system_uname = {
|
||||
.sysname = "KirkOS",
|
||||
.nodename = "localhost",
|
||||
.nodename = "kirk",
|
||||
.release = "0.0.0",
|
||||
.version = "Built on " __DATE__ " " __TIME__,
|
||||
#if defined(__x86_64__)
|
||||
@@ -103,7 +107,7 @@ static struct thread *sched_tid_to_thread(int64_t t) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct process *sched_pid_to_process(int64_t p) {
|
||||
struct process *sched_pid_to_process(int64_t p) {
|
||||
struct process *this = process_list;
|
||||
while (this) {
|
||||
if (this->pid == p)
|
||||
@@ -180,18 +184,13 @@ void sched_remove_process_from_list(struct process **proc_list,
|
||||
}
|
||||
}
|
||||
|
||||
void syscall_kill(struct syscall_arguments *args) {
|
||||
struct process *proc = sched_pid_to_process((int64_t)args->args0);
|
||||
args->ret = 0;
|
||||
if (!proc)
|
||||
args->ret = -1;
|
||||
else
|
||||
process_kill(proc, false);
|
||||
}
|
||||
|
||||
void syscall_exit(struct syscall_arguments *args) {
|
||||
sched_get_running_thread()->mother_proc->status = (uint8_t)args->args0;
|
||||
process_kill(sched_get_running_thread()->mother_proc, false);
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
/* POSIX wait status encoding for a normal exit: (code & 0xff) << 8. */
|
||||
int code = (int)(args->args0 & 0xff);
|
||||
proc->wait_status = (code & 0xff) << 8;
|
||||
proc->wait_status_valid = true;
|
||||
process_kill(proc, false);
|
||||
}
|
||||
|
||||
void syscall_getpid(struct syscall_arguments *args) {
|
||||
@@ -247,12 +246,16 @@ void syscall_waitpid(struct syscall_arguments *args) {
|
||||
spinlock_acquire_or_wait(&waiter_proc->lock);
|
||||
|
||||
if (!waiter_proc->child_processes.length) {
|
||||
spinlock_drop(&waiter_proc->lock);
|
||||
errno = ECHILD;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (pid_to_wait_on < -1 || pid_to_wait_on == 0) {
|
||||
spinlock_drop(&waiter_proc->lock);
|
||||
errno = EINVAL;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -279,6 +282,7 @@ void syscall_waitpid(struct syscall_arguments *args) {
|
||||
}
|
||||
|
||||
if (waitee_process == NULL) {
|
||||
spinlock_drop(&waiter_proc->lock);
|
||||
errno = ECHILD;
|
||||
kfree(events);
|
||||
args->ret = -1;
|
||||
@@ -310,7 +314,12 @@ void syscall_waitpid(struct syscall_arguments *args) {
|
||||
waitee_process = waiter_proc->child_processes.data[which];
|
||||
}
|
||||
|
||||
*status = waitee_process->status;
|
||||
|
||||
/* W* status was encoded by signal.c / syscall_exit. status pointer is
|
||||
* optional per POSIX — only write it if userspace provided one. */
|
||||
if (status != NULL) {
|
||||
*status = waitee_process->wait_status;
|
||||
}
|
||||
args->ret = waitee_process->pid;
|
||||
|
||||
vec_remove(&waiter_proc->child_processes, waitee_process);
|
||||
@@ -411,6 +420,8 @@ void process_create(char *name, uint8_t state, uint64_t runtime,
|
||||
proc->cwd = vfs_root;
|
||||
proc->stack_top = VIRTUAL_STACK_ADDR;
|
||||
|
||||
signal_init_process(proc);
|
||||
|
||||
if (parent_process) {
|
||||
proc->parent_process = parent_process;
|
||||
if (proc->parent_process->cwd) {
|
||||
@@ -418,12 +429,22 @@ void process_create(char *name, uint8_t state, uint64_t runtime,
|
||||
}
|
||||
proc->umask = parent_process->umask;
|
||||
proc->mmap_anon_base = parent_process->mmap_anon_base;
|
||||
/* Inherit session and process group from parent. ctty is also
|
||||
* inherited (the child shares the parent's controlling terminal
|
||||
* until it calls setsid()). */
|
||||
proc->pgid = parent_process->pgid;
|
||||
proc->sid = parent_process->sid;
|
||||
proc->ctty = parent_process->ctty;
|
||||
signal_inherit_on_fork(parent_process, proc);
|
||||
spinlock_acquire_or_wait(&parent_process->lock);
|
||||
vec_push(&parent_process->child_processes, proc);
|
||||
spinlock_drop(&parent_process->lock);
|
||||
} else {
|
||||
proc->umask = S_IWGRP | S_IWOTH;
|
||||
proc->mmap_anon_base = MMAP_ANON_BASE;
|
||||
proc->pgid = 0;
|
||||
proc->sid = 0;
|
||||
proc->ctty = NULL;
|
||||
}
|
||||
proc->next = NULL;
|
||||
|
||||
@@ -432,6 +453,10 @@ void process_create(char *name, uint8_t state, uint64_t runtime,
|
||||
|
||||
spinlock_acquire_or_wait(&process_lock);
|
||||
proc->pid = pid++;
|
||||
/* New processes that didn't inherit from a parent become their own
|
||||
* session+pgroup leader. */
|
||||
if (proc->pgid == 0) proc->pgid = proc->pid;
|
||||
if (proc->sid == 0) proc->sid = proc->pid;
|
||||
sched_add_process_to_list(&process_list, proc);
|
||||
spinlock_drop(&process_lock);
|
||||
|
||||
@@ -453,6 +478,12 @@ bool process_run_init(char *path, char **argv, char **envp,
|
||||
proc->cwd = vfs_root;
|
||||
proc->stack_top = VIRTUAL_STACK_ADDR;
|
||||
|
||||
signal_init_process(proc);
|
||||
/* init is its own session + process-group leader (pid 1). */
|
||||
proc->pgid = 0; /* filled in below from proc->pid */
|
||||
proc->sid = 0;
|
||||
proc->ctty = &console_device->res;
|
||||
|
||||
if (parent_process) {
|
||||
proc->parent_process = parent_process;
|
||||
if (proc->parent_process->cwd) {
|
||||
@@ -504,6 +535,8 @@ bool process_run_init(char *path, char **argv, char **envp,
|
||||
|
||||
spinlock_acquire_or_wait(&process_lock);
|
||||
proc->pid = pid++;
|
||||
if (proc->pgid == 0) proc->pgid = proc->pid;
|
||||
if (proc->sid == 0) proc->sid = proc->pid;
|
||||
sched_add_process_to_list(&process_list, proc);
|
||||
spinlock_drop(&process_lock);
|
||||
|
||||
@@ -542,6 +575,15 @@ int64_t process_fork(struct process *proc, struct thread *thrd) {
|
||||
fproc->parent_process = proc;
|
||||
fproc->next = NULL;
|
||||
|
||||
/* POSIX: fork inherits pgid, sid, controlling terminal, signal
|
||||
* dispositions, and the calling thread's signal mask. Pending
|
||||
* signals are NOT inherited. */
|
||||
fproc->pgid = proc->pgid;
|
||||
fproc->sid = proc->sid;
|
||||
fproc->ctty = proc->ctty;
|
||||
signal_init_process(fproc);
|
||||
signal_inherit_on_fork(proc, fproc);
|
||||
|
||||
vec_init(&fproc->child_processes);
|
||||
vec_init(&fproc->process_threads);
|
||||
|
||||
@@ -653,6 +695,11 @@ bool process_execve(char *path, char **argv, char **envp) {
|
||||
|
||||
strncpy(proc->name, path, 256);
|
||||
|
||||
/* POSIX execve(2): handlers reset to SIG_DFL unless they were SIG_IGN.
|
||||
* sa_mask cleared. Pending signals preserved. The surviving thread's
|
||||
* signal mask is PRESERVED across exec — POSIX requires this. */
|
||||
signal_reset_on_exec(proc);
|
||||
|
||||
for (int i = 0; i < proc->process_threads.length; i++) {
|
||||
if (proc->process_threads.data[i] != thread) {
|
||||
proc->process_threads.data[i]->state = THREAD_KILLED;
|
||||
@@ -677,12 +724,41 @@ bool process_execve(char *path, char **argv, char **envp) {
|
||||
}
|
||||
|
||||
void process_kill(struct process *proc, bool crash) {
|
||||
if (proc->pid < 2) {
|
||||
panic("Attempted to kill init!\n");
|
||||
/* The kernel itself is pid 0; killing it is always wrong. */
|
||||
if (proc->pid == 0) {
|
||||
panic("Attempted to kill kernel_tasks (pid 0)!\n");
|
||||
}
|
||||
|
||||
/* When init (pid 1) dies, the userspace is gone. Rather than panic
|
||||
* with a stack trace (which historically obscured the real cause),
|
||||
* print a final diagnostic and halt cleanly. This is what Linux does
|
||||
* for the same condition. */
|
||||
if (proc->pid == 1) {
|
||||
kprintf("\n*** init (pid 1) exiting with wait_status=0x%x ***\n",
|
||||
proc->wait_status);
|
||||
kprintf("*** system has no init — halting ***\n");
|
||||
/* Fall through and run normal cleanup; the idle loop in
|
||||
* kernel_main will keep the CPU parked. */
|
||||
}
|
||||
|
||||
bool are_we_killing_ourselves = false;
|
||||
|
||||
/* If this kill is the result of a fatal signal (crash=true), we
|
||||
* already have wait_status_valid; otherwise the caller (syscall_exit)
|
||||
* set it. If neither happened (raw signal default action), encode
|
||||
* "killed by SIGKILL" as a safe fallback. */
|
||||
if (!proc->wait_status_valid) {
|
||||
proc->wait_status = (KSIGKILL & 0x7f); /* encode as killed-by-SIGKILL */
|
||||
proc->wait_status_valid = true;
|
||||
}
|
||||
|
||||
/* TODO: when killing a foreign process, FD-close hooks run in the
|
||||
* caller's pagemap. Current KirkOS resources (devtmpfs char devices,
|
||||
* pipes, etc.) don't touch user memory in their unref handlers, so
|
||||
* this is safe — but future fs drivers that flush user pages on
|
||||
* close will fault. Either switch to victim pagemap around the
|
||||
* close loop, or defer FD reaping to the death_event-driven cleanup
|
||||
* in sched_get_next_thread. */
|
||||
for (int i = 0; i < MAX_FDS; i++) {
|
||||
if (proc->fds[i] == NULL) {
|
||||
continue;
|
||||
@@ -696,33 +772,64 @@ void process_kill(struct process *proc, bool crash) {
|
||||
vmm_switch_pagemap(kernel_pagemap);
|
||||
}
|
||||
|
||||
/* Hold proc->lock while iterating process_threads: signal_force_default
|
||||
* (STOP), signal_send_to_process (wake walk), thread_create, and
|
||||
* thread_fork all mutate this vec under proc->lock from other CPUs.
|
||||
* If are_we_killing_ourselves is true the running thread is still in
|
||||
* this vec — that's fine: we just mark it KILLED here, the actual
|
||||
* deselect happens when we sched_yield below. */
|
||||
spinlock_acquire_or_wait(&proc->lock);
|
||||
for (int i = 0; i < proc->process_threads.length; i++) {
|
||||
if (proc->process_threads.data[i]->state == THREAD_NORMAL) {
|
||||
sched_trigger_yield(proc->process_threads.data[i]->running_on_cpu);
|
||||
}
|
||||
proc->process_threads.data[i]->state = THREAD_KILLED;
|
||||
}
|
||||
spinlock_drop(&proc->lock);
|
||||
|
||||
spinlock_acquire_or_wait(&init_proc->lock);
|
||||
for (int i = 0; i < proc->child_processes.length; i++) {
|
||||
struct process *child_proc = proc->child_processes.data[i];
|
||||
child_proc->parent_process = init_proc;
|
||||
vec_push(&init_proc->child_processes, child_proc);
|
||||
/* Reparent children to init (pid 1), if init still exists. Hold
|
||||
* proc->lock while reading proc->child_processes: a concurrent
|
||||
* process_fork from one of our children would mutate the list. */
|
||||
if (init_proc && init_proc->state != PROCESS_KILLED) {
|
||||
spinlock_acquire_or_wait(&proc->lock);
|
||||
spinlock_acquire_or_wait(&init_proc->lock);
|
||||
for (int i = 0; i < proc->child_processes.length; i++) {
|
||||
struct process *child_proc = proc->child_processes.data[i];
|
||||
child_proc->parent_process = init_proc;
|
||||
vec_push(&init_proc->child_processes, child_proc);
|
||||
}
|
||||
spinlock_drop(&init_proc->lock);
|
||||
spinlock_drop(&proc->lock);
|
||||
}
|
||||
spinlock_drop(&init_proc->lock);
|
||||
|
||||
spinlock_acquire_or_wait(&proc->lock);
|
||||
vec_deinit(&proc->child_processes);
|
||||
vec_deinit(&proc->process_threads);
|
||||
spinlock_drop(&proc->lock);
|
||||
|
||||
/* SIGCHLD to the parent before death_event wakes any waitpid().
|
||||
* Skip when the parent is kernel_proc (pid 0): the kernel "task" is
|
||||
* not a userspace process and has no signal handling state. */
|
||||
if (proc->parent_process &&
|
||||
proc->parent_process->state != PROCESS_KILLED &&
|
||||
proc->parent_process->pid > 0) {
|
||||
signal_send_to_process(proc->parent_process, KSIGCHLD);
|
||||
}
|
||||
|
||||
event_trigger(&proc->death_event, false);
|
||||
proc->state = PROCESS_KILLED;
|
||||
|
||||
if (!are_we_killing_ourselves) {
|
||||
process_destroy_context(proc);
|
||||
}
|
||||
/* Defer pagemap destruction to the reaper (sched_get_next_thread)
|
||||
* to avoid tearing down CR3 while other CPUs may still be using it.
|
||||
* The reaper sees `clean_up` and runs process_destroy_context once
|
||||
* all threads have been deselected. */
|
||||
proc->clean_up = true;
|
||||
|
||||
if (are_we_killing_ourselves || crash) {
|
||||
proc->clean_up = true;
|
||||
/* Only yield if WE are the victim. In a cross-process kill (A kills B,
|
||||
* crash=true from signal_force_default) we run in A's context — A is
|
||||
* alive and must keep its quantum. Other CPUs currently running B's
|
||||
* threads were already kicked by sched_trigger_yield above. */
|
||||
if (are_we_killing_ourselves) {
|
||||
sched_yield(false);
|
||||
}
|
||||
}
|
||||
@@ -791,6 +898,13 @@ void thread_fork(struct thread *pthrd, struct process *fproc) {
|
||||
|
||||
thread_fork_context(pthrd, thrd);
|
||||
|
||||
|
||||
/* POSIX: child thread inherits parent thread's signal mask.
|
||||
* Pending-signal state is per-process and handled in process_fork. */
|
||||
thrd->sig_blocked = pthrd->sig_blocked;
|
||||
thrd->saved_sig_blocked = pthrd->saved_sig_blocked;
|
||||
thrd->in_sigsuspend = false; /* child doesn't inherit sigsuspend state */
|
||||
|
||||
thrd->last_scheduled = 0;
|
||||
|
||||
spinlock_acquire_or_wait(&thread_lock);
|
||||
|
||||
@@ -71,6 +71,15 @@ void process_wait_on_another_process(struct process *waiter,
|
||||
|
||||
void process_wait_on_processes(struct process *waiter, process_vec_t *waitees);
|
||||
|
||||
/* PID lookup. Returns NULL if no process has this pid. */
|
||||
struct process *sched_pid_to_process(int64_t p);
|
||||
|
||||
/* Session and process-group syscalls (defined in sched/session.c). */
|
||||
void syscall_setsid(struct syscall_arguments *args);
|
||||
void syscall_setpgid(struct syscall_arguments *args);
|
||||
void syscall_getpgid(struct syscall_arguments *args);
|
||||
void syscall_getsid(struct syscall_arguments *args);
|
||||
void syscall_getpgrp(struct syscall_arguments *args);
|
||||
|
||||
static inline struct thread *sched_get_running_thread(void) {
|
||||
struct thread *ret;
|
||||
|
||||
+36
-2
@@ -6,19 +6,22 @@
|
||||
#include "libk/event.h"
|
||||
#include "arch/x86_64/cpu/reg.h"
|
||||
#include "fs/elf.h"
|
||||
#include "signal.h"
|
||||
|
||||
|
||||
enum thread_states {
|
||||
THREAD_NORMAL = 0,
|
||||
THREAD_READY_TO_RUN,
|
||||
THREAD_KILLED,
|
||||
THREAD_WAITING_FOR_EVENT
|
||||
THREAD_WAITING_FOR_EVENT,
|
||||
THREAD_STOPPED /* SIGSTOP / SIGTSTP / SIGTTIN / SIGTTOU */
|
||||
};
|
||||
|
||||
enum process_states {
|
||||
PROCESS_NORMAL = 0,
|
||||
PROCESS_READY_TO_RUN,
|
||||
PROCESS_KILLED
|
||||
PROCESS_KILLED,
|
||||
PROCESS_STOPPED /* all threads stopped via job-control signal */
|
||||
};
|
||||
|
||||
struct process;
|
||||
@@ -47,11 +50,22 @@ struct thread {
|
||||
uint64_t fs_base;
|
||||
int64_t running_on_cpu;
|
||||
struct thread *next;
|
||||
|
||||
|
||||
/* ── signal state ────────────────────────────────────────────────── */
|
||||
k_sigset_t sig_blocked; /* per-thread blocked-signal mask */
|
||||
k_sigset_t saved_sig_blocked; /* for sigsuspend() */
|
||||
bool in_sigsuspend; /* sigsuspend semantics flag */
|
||||
};
|
||||
|
||||
typedef vec_t(struct thread *) thread_vec_t;
|
||||
typedef vec_t(struct process *) process_vec_t;
|
||||
|
||||
|
||||
/* Forward-decl: ctty is a pointer-back to a resource (devtmpfs node payload).
|
||||
* We don't include libk/resource.h here to keep the include graph shallow. */
|
||||
struct resource;
|
||||
|
||||
struct process {
|
||||
int64_t pid;
|
||||
enum process_states state;
|
||||
@@ -69,10 +83,30 @@ struct process {
|
||||
process_vec_t child_processes;
|
||||
struct auxval auxv;
|
||||
struct event death_event;
|
||||
|
||||
|
||||
/* ── wait() encoding ─────────────────────────────────────────────── */
|
||||
/* POSIX wait status: encoded with W_EXITCODE / W_TERMSIG / W_STOPCODE
|
||||
* (see signal.c). Kept wide enough for the encoded form (was uint8_t,
|
||||
* which couldn't represent a signal kill or a stop). */
|
||||
int wait_status;
|
||||
bool wait_status_valid; /* false until process exits / stops */
|
||||
|
||||
uint8_t status;
|
||||
bool clean_up;
|
||||
char name[256];
|
||||
struct process *next;
|
||||
|
||||
|
||||
/* ── session / process-group ─────────────────────────────────────── */
|
||||
int64_t pgid; /* process-group id (== own pid for leader) */
|
||||
int64_t sid; /* session id (== own pid for session leader) */
|
||||
struct resource *ctty; /* controlling terminal, or NULL */
|
||||
|
||||
/* ── signal disposition ──────────────────────────────────────────── */
|
||||
struct k_sigaction sig_handlers[NSIG];
|
||||
k_sigset_t sig_pending; /* process-level pending set */
|
||||
spinlock_t sig_lock; /* protects handlers + pending */
|
||||
};
|
||||
|
||||
#define CPU_STACK_SIZE (64 * 1024)
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* POSIX session & process-group syscalls for KirkOS.
|
||||
*
|
||||
* The data lives on struct process (see sched_types.h):
|
||||
*
|
||||
* - pid: process id, immutable after creation
|
||||
* - pgid: process-group id (defaults to pid; can be changed via
|
||||
* setpgid; all processes in a pgrp share the same value)
|
||||
* - sid: session id (defaults to pid for session-leaders; set by
|
||||
* setsid; survives fork+exec)
|
||||
* - ctty: controlling-terminal resource pointer (set on a session
|
||||
* leader's first open of a tty, or via TIOCSCTTY)
|
||||
*
|
||||
* Relationships and rules (from POSIX / SUSv4):
|
||||
*
|
||||
* - Every process is in exactly one process group.
|
||||
* - Every process group is in exactly one session.
|
||||
* - The session leader has pid == sid and pgid == sid.
|
||||
* - setsid() makes the caller a new session+pgroup leader, with no
|
||||
* controlling terminal. Fails (EPERM) if the caller is already a
|
||||
* pgroup leader.
|
||||
* - setpgid(pid, pgid) moves `pid` into pgrp `pgid`. The target must
|
||||
* be in the same session as the caller; pgid must equal an existing
|
||||
* pgid in that session OR be the target's own pid (creating a new
|
||||
* pgrp). Cannot change pgid of a session leader.
|
||||
* - getpgid / getsid / getpgrp are obvious accessors.
|
||||
*/
|
||||
|
||||
#include "sched.h"
|
||||
#include "sched_types.h"
|
||||
#include "syscall.h"
|
||||
#include "libk/errno.h"
|
||||
#include "libk/debug.h"
|
||||
#include "mp/spinlock.h"
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
/* Helper: pid==0 means "the calling process". */
|
||||
static struct process *resolve_pid(int64_t pid) {
|
||||
struct process *self = sched_get_running_thread()->mother_proc;
|
||||
if (pid == 0) {
|
||||
return self;
|
||||
}
|
||||
return sched_pid_to_process(pid);
|
||||
}
|
||||
|
||||
|
||||
/* ── setsid() ───────────────────────────────────────────────────────────── */
|
||||
void syscall_setsid(struct syscall_arguments *args) {
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
|
||||
/* POSIX: fails if the caller is already a pgroup leader. The session
|
||||
* leader is by definition a pgroup leader (its own pgid is its pid),
|
||||
* so a process that has previously called setsid() can't call it
|
||||
* again. */
|
||||
if (proc->pgid == proc->pid) {
|
||||
errno = EPERM;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
proc->sid = proc->pid;
|
||||
proc->pgid = proc->pid;
|
||||
proc->ctty = NULL; /* new session starts with no ctty */
|
||||
|
||||
args->ret = proc->sid;
|
||||
}
|
||||
|
||||
|
||||
/* ── setpgid(pid, pgid) ─────────────────────────────────────────────────── */
|
||||
void syscall_setpgid(struct syscall_arguments *args) {
|
||||
int64_t pid = (int64_t)args->args0;
|
||||
int64_t pgid = (int64_t)args->args1;
|
||||
|
||||
struct process *self = sched_get_running_thread()->mother_proc;
|
||||
struct process *target = resolve_pid(pid);
|
||||
|
||||
if (target == NULL) {
|
||||
errno = ESRCH;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* setpgid(0, ...) and setpgid(self, ...) — implicit conversion. */
|
||||
/* pgid==0 means: use the target's pid as the new pgid. */
|
||||
if (pgid == 0) {
|
||||
pgid = target->pid;
|
||||
}
|
||||
if (pgid < 0) {
|
||||
errno = EINVAL;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* The target must be in the same session as the caller. */
|
||||
if (target->sid != self->sid) {
|
||||
errno = EPERM;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Cannot change the pgid of a session leader. */
|
||||
if (target->sid == target->pid) {
|
||||
errno = EPERM;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* If pgid != target->pid (i.e. joining an EXISTING group), verify the
|
||||
* group exists in the same session. */
|
||||
if (pgid != target->pid) {
|
||||
bool found = false;
|
||||
for (struct process *p = process_list; p != NULL; p = p->next) {
|
||||
if (p->pgid == pgid && p->sid == self->sid) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
errno = EPERM;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
target->pgid = pgid;
|
||||
args->ret = 0;
|
||||
}
|
||||
|
||||
|
||||
/* ── getpgid(pid) ───────────────────────────────────────────────────────── */
|
||||
void syscall_getpgid(struct syscall_arguments *args) {
|
||||
int64_t pid = (int64_t)args->args0;
|
||||
struct process *target = resolve_pid(pid);
|
||||
if (target == NULL) {
|
||||
errno = ESRCH;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = target->pgid;
|
||||
}
|
||||
|
||||
|
||||
/* ── getsid(pid) ────────────────────────────────────────────────────────── */
|
||||
void syscall_getsid(struct syscall_arguments *args) {
|
||||
int64_t pid = (int64_t)args->args0;
|
||||
struct process *target = resolve_pid(pid);
|
||||
if (target == NULL) {
|
||||
errno = ESRCH;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = target->sid;
|
||||
}
|
||||
|
||||
|
||||
/* ── getpgrp() — POSIX shortcut for getpgid(0) ──────────────────────────── */
|
||||
void syscall_getpgrp(struct syscall_arguments *args) {
|
||||
args->ret = sched_get_running_thread()->mother_proc->pgid;
|
||||
}
|
||||
@@ -0,0 +1,894 @@
|
||||
/*
|
||||
* KirkOS POSIX signal implementation.
|
||||
*
|
||||
* Design notes
|
||||
* ────────────
|
||||
*
|
||||
* - Per-process disposition table (handlers, flags, sa_mask) stored on
|
||||
* struct process. Process-level pending mask. Per-thread blocked mask
|
||||
* (so that one thread can mask a signal while another thread handles
|
||||
* it — POSIX semantics).
|
||||
*
|
||||
* - Delivery happens at userspace-return time: every IRET back to ring 3
|
||||
* (timer tick, IRQ return) and every SYSRET (syscall return) calls
|
||||
* signal_check_and_dispatch(). The hook lives in
|
||||
* arch/x86_64/cpu/syscall_handle.c and arch/x86_64/boot/isr.c.
|
||||
*
|
||||
* - Sigframe layout (lives on the user stack):
|
||||
*
|
||||
* rsp (just after kernel iret/sysret enters the handler)
|
||||
* ┌─────────────────────────────────────────────┐
|
||||
* │ uint64_t restorer_addr ← popped by handler │ [+0]
|
||||
* │ `ret` │
|
||||
* ├─────────────────────────────────────────────┤
|
||||
* │ uint64_t magic = K_SIGFRAME_MAGIC │ [+8]
|
||||
* │ k_sigset_t saved_blocked_mask │ [+16]
|
||||
* │ registers_t saved_regs │ [+144]
|
||||
* │ int signo │
|
||||
* └─────────────────────────────────────────────┘
|
||||
*
|
||||
* When the handler `ret`s, it pops restorer_addr — that's
|
||||
* sa_restorer, which mlibc populates as a trampoline that does
|
||||
* `syscall(SYS_sigreturn)`. The kernel's syscall_sigreturn handler
|
||||
* reads the sigframe at (current_user_rsp - 8), validates magic,
|
||||
* restores regs + mask, and returns. The asm pop/sysret picks up the
|
||||
* restored values from the kernel-stack registers_t.
|
||||
*
|
||||
* - We do NOT support SA_SIGINFO three-argument handlers, sigaltstack,
|
||||
* or RT signals. SA_RESTART is accepted but unimplemented (the
|
||||
* syscall is not restarted — userspace gets the -EINTR return).
|
||||
* SA_NODEFER and SA_RESETHAND are honored.
|
||||
*/
|
||||
|
||||
#include "signal.h"
|
||||
#include "sched.h"
|
||||
#include "sched_types.h"
|
||||
#include "syscall.h"
|
||||
#include "libk/debug.h"
|
||||
#include "libk/errno.h"
|
||||
#include "libk/string.h"
|
||||
#include "mp/spinlock.h"
|
||||
#include "mm/vmm.h"
|
||||
#include "arch/x86_64/cpu/reg.h"
|
||||
#include "arch/x86_64/sys/prcb.h"
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include "arch/x86_64/asm/asm.h"
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* Default actions table.
|
||||
* Indexed by signal number; valid range 1..NSIG-1. Index 0 unused.
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
static const enum sig_default sig_defaults[NSIG] = {
|
||||
[KSIGHUP] = SIG_ACT_TERM,
|
||||
[KSIGINT] = SIG_ACT_TERM,
|
||||
[KSIGQUIT] = SIG_ACT_CORE,
|
||||
[KSIGILL] = SIG_ACT_CORE,
|
||||
[KSIGTRAP] = SIG_ACT_CORE,
|
||||
[KSIGABRT] = SIG_ACT_CORE,
|
||||
[KSIGBUS] = SIG_ACT_CORE,
|
||||
[KSIGFPE] = SIG_ACT_CORE,
|
||||
[KSIGKILL] = SIG_ACT_TERM,
|
||||
[KSIGUSR1] = SIG_ACT_TERM,
|
||||
[KSIGSEGV] = SIG_ACT_CORE,
|
||||
[KSIGUSR2] = SIG_ACT_TERM,
|
||||
[KSIGPIPE] = SIG_ACT_TERM,
|
||||
[KSIGALRM] = SIG_ACT_TERM,
|
||||
[KSIGTERM] = SIG_ACT_TERM,
|
||||
[KSIGSTKFLT] = SIG_ACT_TERM,
|
||||
[KSIGCHLD] = SIG_ACT_IGN,
|
||||
[KSIGCONT] = SIG_ACT_CONT,
|
||||
[KSIGSTOP] = SIG_ACT_STOP,
|
||||
[KSIGTSTP] = SIG_ACT_STOP,
|
||||
[KSIGTTIN] = SIG_ACT_STOP,
|
||||
[KSIGTTOU] = SIG_ACT_STOP,
|
||||
[KSIGURG] = SIG_ACT_IGN,
|
||||
[KSIGXCPU] = SIG_ACT_CORE,
|
||||
[KSIGXFSZ] = SIG_ACT_CORE,
|
||||
[KSIGVTALRM] = SIG_ACT_TERM,
|
||||
[KSIGPROF] = SIG_ACT_TERM,
|
||||
[KSIGWINCH] = SIG_ACT_IGN,
|
||||
[KSIGIO] = SIG_ACT_TERM,
|
||||
[KSIGPWR] = SIG_ACT_TERM,
|
||||
[KSIGSYS] = SIG_ACT_CORE,
|
||||
};
|
||||
|
||||
enum sig_default sig_default_action(int sig) {
|
||||
if (sig < 1 || sig >= NSIG) {
|
||||
return SIG_ACT_IGN;
|
||||
}
|
||||
/* We only know defaults for the standard POSIX signals 1..31.
|
||||
* Anything in the RT range (32..64) is unsupported; treat as
|
||||
* ignore so a bogus syscall can't accidentally terminate the
|
||||
* caller. */
|
||||
if (sig > 31) {
|
||||
return SIG_ACT_IGN;
|
||||
}
|
||||
return sig_defaults[sig];
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* Signal-state initialization
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
|
||||
void signal_init_process(struct process *p) {
|
||||
for (int i = 0; i < NSIG; i++) {
|
||||
p->sig_handlers[i].sa_handler = KSIG_DFL;
|
||||
p->sig_handlers[i].sa_flags = 0;
|
||||
p->sig_handlers[i].sa_restorer = NULL;
|
||||
k_sigemptyset(&p->sig_handlers[i].sa_mask);
|
||||
}
|
||||
k_sigemptyset(&p->sig_pending);
|
||||
spinlock_init(p->sig_lock);
|
||||
}
|
||||
|
||||
void signal_reset_on_exec(struct process *p) {
|
||||
/* POSIX execve: handlers reset to SIG_DFL unless they were SIG_IGN
|
||||
* (those stay SIG_IGN). sig_pending preserved. sa_mask cleared. */
|
||||
spinlock_acquire_or_wait(&p->sig_lock);
|
||||
for (int i = 1; i < NSIG; i++) {
|
||||
if (p->sig_handlers[i].sa_handler == KSIG_IGN) {
|
||||
continue;
|
||||
}
|
||||
p->sig_handlers[i].sa_handler = KSIG_DFL;
|
||||
p->sig_handlers[i].sa_flags = 0;
|
||||
p->sig_handlers[i].sa_restorer = NULL;
|
||||
k_sigemptyset(&p->sig_handlers[i].sa_mask);
|
||||
}
|
||||
spinlock_drop(&p->sig_lock);
|
||||
}
|
||||
|
||||
void signal_inherit_on_fork(struct process *parent, struct process *child) {
|
||||
/* Fork copies handler dispositions and sig mask of the calling thread.
|
||||
* Pending signals are NOT inherited (POSIX). */
|
||||
spinlock_acquire_or_wait(&parent->sig_lock);
|
||||
for (int i = 0; i < NSIG; i++) {
|
||||
child->sig_handlers[i] = parent->sig_handlers[i];
|
||||
}
|
||||
spinlock_drop(&parent->sig_lock);
|
||||
k_sigemptyset(&child->sig_pending);
|
||||
spinlock_init(child->sig_lock);
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* Wait-status encoding
|
||||
*
|
||||
* POSIX <-> Linux/mlibc convention:
|
||||
* exited(code) → (code & 0xff) << 8
|
||||
* killed(sig) → (sig & 0x7f)
|
||||
* stopped(sig) → (sig << 8) | 0x7f
|
||||
* continued → 0xffff
|
||||
*
|
||||
* mlibc's wait macros test the result and pull out the field; we just
|
||||
* have to lay down these encoded ints.
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
static inline int wait_encode_exit(int code) { return (code & 0xff) << 8; }
|
||||
static inline int wait_encode_kill(int sig) { return sig & 0x7f; }
|
||||
static inline int wait_encode_stop(int sig) { return (sig << 8) | 0x7f; }
|
||||
static inline int wait_encode_cont(void) { return 0xffff; }
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* Send-a-signal: queues the signal and (where possible) wakes the
|
||||
* receiver.
|
||||
*
|
||||
* We make a single design choice up front to keep the model tractable:
|
||||
* pending signals live on the *process* (not per-thread). When delivered,
|
||||
* the kernel picks the currently-running thread of that process to run
|
||||
* the handler. This matches POSIX "process-directed" signal semantics.
|
||||
*
|
||||
* The thread-private piece is the blocked-signal mask only.
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
|
||||
/* Deliver default action immediately if no handler is registered. Called
|
||||
* from signal_send_to_process for SIGKILL/SIGSTOP/SIGCONT specifically,
|
||||
* because those can't be blocked or handled — they take effect synchronously
|
||||
* even on threads that aren't returning to userspace.
|
||||
*
|
||||
* Caller must NOT hold p->sig_lock. */
|
||||
static void signal_force_default(struct process *p, int sig) {
|
||||
switch (sig_default_action(sig)) {
|
||||
case SIG_ACT_STOP: {
|
||||
/* Mark all threads stopped. Threads parked in WAITING_FOR_EVENT
|
||||
* are already off-CPU; flipping their state is enough — the
|
||||
* scheduler will skip them once they're STOPPED. For threads
|
||||
* running on a CPU, kick that CPU so it re-checks state on
|
||||
* return from the IPI. */
|
||||
spinlock_acquire_or_wait(&p->lock);
|
||||
for (int i = 0; i < p->process_threads.length; i++) {
|
||||
struct thread *t = p->process_threads.data[i];
|
||||
t->state = THREAD_STOPPED;
|
||||
if (t->running_on_cpu >= 0) {
|
||||
sched_trigger_yield(t->running_on_cpu);
|
||||
}
|
||||
}
|
||||
p->state = PROCESS_STOPPED;
|
||||
p->wait_status = wait_encode_stop(sig);
|
||||
p->wait_status_valid = true;
|
||||
spinlock_drop(&p->lock);
|
||||
if (p->parent_process) {
|
||||
signal_send_to_process(p->parent_process, KSIGCHLD);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SIG_ACT_CONT: {
|
||||
spinlock_acquire_or_wait(&p->lock);
|
||||
for (int i = 0; i < p->process_threads.length; i++) {
|
||||
if (p->process_threads.data[i]->state == THREAD_STOPPED) {
|
||||
p->process_threads.data[i]->state = THREAD_READY_TO_RUN;
|
||||
}
|
||||
}
|
||||
p->state = PROCESS_NORMAL;
|
||||
p->wait_status = wait_encode_cont();
|
||||
p->wait_status_valid = true;
|
||||
spinlock_drop(&p->lock);
|
||||
if (p->parent_process) {
|
||||
signal_send_to_process(p->parent_process, KSIGCHLD);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SIG_ACT_TERM:
|
||||
case SIG_ACT_CORE: {
|
||||
/* Terminate the process now. Don't wait for a thread to return
|
||||
* to userspace — that path may never run if all threads are
|
||||
* parked in WAITING_FOR_EVENT. */
|
||||
p->wait_status = wait_encode_kill(sig);
|
||||
p->wait_status_valid = true;
|
||||
process_kill(p, true);
|
||||
break;
|
||||
}
|
||||
case SIG_ACT_IGN:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int signal_send_to_process(struct process *p, int sig) {
|
||||
if (!p || sig < 1 || sig >= NSIG) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spinlock_acquire_or_wait(&p->lock);
|
||||
bool dead = (p->state == PROCESS_KILLED);
|
||||
spinlock_drop(&p->lock);
|
||||
if (dead) {
|
||||
return -ESRCH;
|
||||
}
|
||||
|
||||
/* sig 0 is "check existence". The caller already validated `p` exists
|
||||
* by reaching this function; nothing more to do. */
|
||||
if (sig == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
spinlock_acquire_or_wait(&p->sig_lock);
|
||||
k_sigaddset(&p->sig_pending, sig);
|
||||
spinlock_drop(&p->sig_lock);
|
||||
|
||||
/* SIGKILL and SIGSTOP are not maskable. Apply default action now. */
|
||||
if (sig == KSIGKILL) {
|
||||
spinlock_acquire_or_wait(&p->sig_lock);
|
||||
k_sigdelset(&p->sig_pending, sig);
|
||||
spinlock_drop(&p->sig_lock);
|
||||
signal_force_default(p, sig);
|
||||
return 0;
|
||||
}
|
||||
if (sig == KSIGSTOP) {
|
||||
spinlock_acquire_or_wait(&p->sig_lock);
|
||||
k_sigdelset(&p->sig_pending, sig);
|
||||
spinlock_drop(&p->sig_lock);
|
||||
signal_force_default(p, sig);
|
||||
return 0;
|
||||
}
|
||||
if (sig == KSIGCONT) {
|
||||
signal_force_default(p, sig);
|
||||
/* keep pending so handler (if any) also runs */
|
||||
}
|
||||
|
||||
/* Wake one waiting thread (if any) so it can reach the signal-check
|
||||
* on the way back to userspace. We pick the first WAITING_FOR_EVENT
|
||||
* thread and bump it to READY_TO_RUN. */
|
||||
spinlock_acquire_or_wait(&p->lock);
|
||||
for (int i = 0; i < p->process_threads.length; i++) {
|
||||
struct thread *t = p->process_threads.data[i];
|
||||
if (t->state == THREAD_WAITING_FOR_EVENT) {
|
||||
t->state = THREAD_READY_TO_RUN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spinlock_drop(&p->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int signal_send_to_pgrp(int64_t pgid, int sig) {
|
||||
extern struct process *process_list;
|
||||
int sent = 0;
|
||||
|
||||
for (struct process *p = process_list; p != NULL; p = p->next) {
|
||||
if (p->pgid == pgid && p->state != PROCESS_KILLED) {
|
||||
if (signal_send_to_process(p, sig) == 0) {
|
||||
sent++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return sent > 0 ? 0 : -ESRCH;
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* Signal dispatch on userspace return
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
|
||||
#define K_SIGFRAME_MAGIC 0xC0DEFACEDEADBEEFULL
|
||||
|
||||
struct k_sigframe {
|
||||
uint64_t restorer_addr; /* popped by handler's `ret` */
|
||||
uint64_t magic;
|
||||
k_sigset_t saved_blocked; /* sig_blocked at signal-delivery time */
|
||||
registers_t saved_regs; /* full pre-signal user state */
|
||||
int signo;
|
||||
int _pad;
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
/* Pick the lowest-numbered pending unblocked signal of the current thread's
|
||||
* process. Returns 0 if none. Caller holds proc->sig_lock. */
|
||||
static int pick_pending_signal(struct process *proc, struct thread *thrd) {
|
||||
for (int s = 1; s < NSIG; s++) {
|
||||
if (!k_sigismember(&proc->sig_pending, s)) continue;
|
||||
if (k_sigismember(&thrd->sig_blocked, s)) continue;
|
||||
return s;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Write a sigframe to the user stack at user_rsp - sizeof(sigframe).
|
||||
* Returns the new rsp (i.e. the address of the sigframe), or 0 on
|
||||
* translation failure.
|
||||
*
|
||||
* `saved_mask` is the blocked-signal mask to record in the sigframe so
|
||||
* sigreturn can restore it. This is normally thrd->sig_blocked, but for a
|
||||
* handler delivered inside sigsuspend it must be the pre-sigsuspend mask
|
||||
* (thrd->saved_sig_blocked) so the temporary suspend mask doesn't leak
|
||||
* past the handler. */
|
||||
static uint64_t build_sigframe(struct process *proc, struct thread *thrd,
|
||||
registers_t *reg, int sig,
|
||||
void (*restorer)(void),
|
||||
k_sigset_t saved_mask) {
|
||||
uint64_t cur_rsp = reg->rsp;
|
||||
|
||||
/* 16-byte align the frame, leave 8 bytes for the "fake call return"
|
||||
* — when the handler is entered via sysret/iret with rsp at the
|
||||
* sigframe address, the first qword on the stack is restorer_addr.
|
||||
* The handler's `ret` pops it. Subsequent stack frames within the
|
||||
* handler stay 16-aligned because we placed sigframe on a 16-aligned
|
||||
* address.
|
||||
*
|
||||
* Also reserve a 128-byte red zone per the System V ABI — the user
|
||||
* code before the signal might have data below rsp it expects to
|
||||
* survive. */
|
||||
uint64_t new_rsp = cur_rsp - 128; /* red zone */
|
||||
new_rsp -= sizeof(struct k_sigframe);
|
||||
new_rsp &= ~(uint64_t)15; /* align */
|
||||
/* SysV x86_64 ABI: at function entry, (rsp + 8) must be 16-byte
|
||||
* aligned (the caller's CALL placed an 8-byte return address).
|
||||
* Our restorer_addr at sigframe[0] plays the role of that return
|
||||
* address, so we want rsp aligned to (16k + 8). After aligning
|
||||
* down to 16, subtract 8 to land on the correct boundary. */
|
||||
new_rsp -= 8;
|
||||
|
||||
struct k_sigframe frame;
|
||||
frame.restorer_addr = (uint64_t)restorer;
|
||||
frame.magic = K_SIGFRAME_MAGIC;
|
||||
frame.saved_blocked = saved_mask;
|
||||
frame.saved_regs = *reg;
|
||||
frame.signo = sig;
|
||||
frame._pad = 0;
|
||||
|
||||
(void)thrd; /* mask now passed in directly */
|
||||
|
||||
/* Translate user address to kernel and write. Failure path: just kill
|
||||
* the process — the user can't deliver a signal anyway. */
|
||||
if (!syscall_helper_copy_to_user(new_rsp, &frame, sizeof(frame))) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
(void)proc; /* unused in this minimal impl */
|
||||
return new_rsp;
|
||||
}
|
||||
|
||||
bool signal_check_and_dispatch(registers_t *reg) {
|
||||
/* Only deliver on return to userspace. cs low 2 bits == 3 ⇒ ring 3. */
|
||||
if ((reg->cs & 0x3) != 0x3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
struct thread *thrd = sched_get_running_thread();
|
||||
if (!thrd) return false;
|
||||
struct process *proc = thrd->mother_proc;
|
||||
if (!proc || proc->state == PROCESS_KILLED) return false;
|
||||
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
int sig = pick_pending_signal(proc, thrd);
|
||||
if (sig == 0) {
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
struct k_sigaction act = proc->sig_handlers[sig];
|
||||
k_sigdelset(&proc->sig_pending, sig);
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
|
||||
/* If we're returning from a sigsuspend, any path that "consumes" the
|
||||
* pending signal must restore the pre-suspend mask before handing
|
||||
* control back to userspace. The handler path does this via the
|
||||
* sigframe; the ignore paths must do it inline. */
|
||||
#define KSIG_DISPATCH_RESTORE_SUSPEND() do { \
|
||||
if (thrd->in_sigsuspend) { \
|
||||
thrd->sig_blocked = thrd->saved_sig_blocked; \
|
||||
thrd->in_sigsuspend = false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* SIG_IGN: drop it. */
|
||||
if (act.sa_handler == KSIG_IGN) {
|
||||
KSIG_DISPATCH_RESTORE_SUSPEND();
|
||||
return false;
|
||||
}
|
||||
|
||||
/* SIG_DFL: apply default action. */
|
||||
if (act.sa_handler == KSIG_DFL) {
|
||||
enum sig_default da = sig_default_action(sig);
|
||||
switch (da) {
|
||||
case SIG_ACT_IGN:
|
||||
KSIG_DISPATCH_RESTORE_SUSPEND();
|
||||
return false;
|
||||
case SIG_ACT_STOP: {
|
||||
/* Stop ourselves: mark thread STOPPED, yield. We'll
|
||||
* sit here until SIGCONT comes in. */
|
||||
KSIG_DISPATCH_RESTORE_SUSPEND();
|
||||
thrd->state = THREAD_STOPPED;
|
||||
proc->state = PROCESS_STOPPED;
|
||||
proc->wait_status = wait_encode_stop(sig);
|
||||
proc->wait_status_valid = true;
|
||||
if (proc->parent_process) {
|
||||
signal_send_to_process(proc->parent_process, KSIGCHLD);
|
||||
}
|
||||
/* The arch return-to-user path will iret/sysret, and the
|
||||
* thread won't be scheduled again until state changes
|
||||
* back to READY/NORMAL via SIGCONT delivery in
|
||||
* signal_send_to_process. */
|
||||
sched_yield(false);
|
||||
return true;
|
||||
}
|
||||
case SIG_ACT_CONT:
|
||||
/* No-op when explicitly received; the wakeup happens in
|
||||
* signal_send_to_process. */
|
||||
KSIG_DISPATCH_RESTORE_SUSPEND();
|
||||
return false;
|
||||
case SIG_ACT_TERM:
|
||||
case SIG_ACT_CORE:
|
||||
/* Terminate the process. Encode and exit. */
|
||||
proc->wait_status = wait_encode_kill(sig);
|
||||
proc->wait_status_valid = true;
|
||||
/* process_kill is in sched.c — it will not return for the
|
||||
* current thread. */
|
||||
extern void process_kill(struct process *, bool);
|
||||
process_kill(proc, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* User-installed handler. Build sigframe, mutate reg. */
|
||||
if (!(act.sa_flags & KSA_RESTORER) || act.sa_restorer == NULL) {
|
||||
/* Without a restorer we can't return from the handler. Treat as
|
||||
* if no handler installed — terminate. */
|
||||
kprintf("[signal] sig %d for pid %ld has no SA_RESTORER; killing\n",
|
||||
sig, proc->pid);
|
||||
proc->wait_status = wait_encode_kill(sig);
|
||||
proc->wait_status_valid = true;
|
||||
extern void process_kill(struct process *, bool);
|
||||
process_kill(proc, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If we got here inside a sigsuspend, the user-visible mask to
|
||||
* restore is the pre-sigsuspend mask, not the temporary suspend mask
|
||||
* that's currently in thrd->sig_blocked. sigreturn will restore from
|
||||
* the sigframe, so stash the right value there. */
|
||||
k_sigset_t mask_to_save = thrd->in_sigsuspend ? thrd->saved_sig_blocked
|
||||
: thrd->sig_blocked;
|
||||
if (thrd->in_sigsuspend) {
|
||||
thrd->in_sigsuspend = false;
|
||||
}
|
||||
|
||||
uint64_t new_rsp = build_sigframe(proc, thrd, reg, sig, act.sa_restorer,
|
||||
mask_to_save);
|
||||
if (new_rsp == 0) {
|
||||
/* Couldn't build frame — SIGSEGV the process. */
|
||||
kprintf("[signal] failed to build sigframe for sig %d, pid %ld\n",
|
||||
sig, proc->pid);
|
||||
extern void process_kill(struct process *, bool);
|
||||
process_kill(proc, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Merge the handler's sa_mask + (this signal unless SA_NODEFER) into
|
||||
* sig_blocked for the duration of the handler. sigreturn restores. */
|
||||
k_sigorset(&thrd->sig_blocked, &thrd->sig_blocked, &act.sa_mask);
|
||||
if (!(act.sa_flags & KSA_NODEFER)) {
|
||||
k_sigaddset(&thrd->sig_blocked, sig);
|
||||
}
|
||||
|
||||
/* If SA_RESETHAND, reset handler to SIG_DFL after one invocation. */
|
||||
if (act.sa_flags & KSA_RESETHAND) {
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
proc->sig_handlers[sig].sa_handler = KSIG_DFL;
|
||||
proc->sig_handlers[sig].sa_flags = 0;
|
||||
k_sigemptyset(&proc->sig_handlers[sig].sa_mask);
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
}
|
||||
|
||||
cli(); // Interrupts must be disabled to call current cpu
|
||||
|
||||
/* Redirect to handler. Argument convention: rdi = signo, rsi/rdx = 0
|
||||
* (we don't populate siginfo / ucontext pointers — SA_SIGINFO is not
|
||||
* supported). The handler will read its arg from rdi. */
|
||||
uint64_t handler_addr = (uint64_t)act.sa_handler;
|
||||
|
||||
/* iret path uses reg->rip / reg->rsp / reg->rflags directly.
|
||||
* sysret path uses reg->rcx (rip) / reg->r11 (rflags) and the per-CPU
|
||||
* user_stack field for rsp. Update all of them so we're correct on
|
||||
* both return paths. */
|
||||
reg->rip = handler_addr;
|
||||
reg->rcx = handler_addr;
|
||||
reg->rsp = new_rsp;
|
||||
prcb_return_current_cpu()->user_stack = new_rsp;
|
||||
reg->r11 = reg->rflags;
|
||||
reg->rdi = (uint64_t)sig;
|
||||
reg->rsi = 0;
|
||||
reg->rdx = 0;
|
||||
sti();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* sigreturn — invoked by the user's sa_restorer trampoline after the
|
||||
* handler returns. Reads the sigframe (which sits at user rsp - 8 — the
|
||||
* `restorer_addr` was popped by the handler's `ret` so user rsp now
|
||||
* points at `magic`) and restores the original register state and mask.
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
void syscall_sigreturn(struct syscall_arguments *args) {
|
||||
struct thread *thrd = sched_get_running_thread();
|
||||
registers_t *reg = (registers_t *)args->reg_ptr;
|
||||
if (!reg) {
|
||||
/* No reg pointer (shouldn't happen) — return -EINVAL. */
|
||||
args->ret = -1;
|
||||
errno = EINVAL;
|
||||
return;
|
||||
}
|
||||
|
||||
/* At the moment of sigreturn, the user's rsp points at the byte just
|
||||
* past `restorer_addr` (which the handler popped). So sigframe lives
|
||||
* at rsp - 8 (the magic field is the first thing after restorer_addr,
|
||||
* and restorer_addr is sizeof(uint64_t)). */
|
||||
uint64_t frame_addr = reg->rsp - 8;
|
||||
|
||||
struct k_sigframe frame;
|
||||
if (!syscall_helper_copy_from_user(frame_addr, &frame, sizeof(frame))) {
|
||||
args->ret = -1;
|
||||
errno = EFAULT;
|
||||
return;
|
||||
}
|
||||
if (frame.magic != K_SIGFRAME_MAGIC) {
|
||||
kprintf("[signal] sigreturn: bad magic (pid %ld)\n",
|
||||
thrd->mother_proc->pid);
|
||||
args->ret = -1;
|
||||
errno = EINVAL;
|
||||
return;
|
||||
}
|
||||
|
||||
/* SECURITY: the sigframe lives on the user stack and is user-writable.
|
||||
* A malicious userspace can overwrite saved_regs between the handler's
|
||||
* first instruction and the sigreturn call, putting arbitrary values in
|
||||
* cs/ss/rflags to elevate privilege via iret/sysret. Without this clamp:
|
||||
* - cs = 0x08 (kernel CS) → privilege escalation to ring 0
|
||||
* - ss = 0x10 (kernel SS) → bypass user stack
|
||||
* - rflags IOPL=3 / VM=1 / IF=0 → unauthorized I/O privilege, v86, or
|
||||
* interrupt disable in user mode
|
||||
* This is the classic Linux CVE class (see CVE-2014-9322 for a variant).
|
||||
* Force the privileged fields to safe ring-3 values before they reach the
|
||||
* kernel-stack registers_t block that iret/sysret will consume.
|
||||
*
|
||||
* rflags bit assignments (user-allowed vs forced):
|
||||
* bit 0 CF, bit 2 PF, bit 4 AF, bit 6 ZF, bit 7 SF,
|
||||
* bit 8 TF (single-step), bit 10 DF, bit 11 OF, bit 18 AC, bit 21 ID
|
||||
* → user can set: mask 0x244D5
|
||||
* bit 1 reserved-1 (always 1) + bit 9 IF (must be 1 for userspace)
|
||||
* → forced on: 0x202
|
||||
* bits 12-13 IOPL, bit 14 NT, bit 15 reserved, bit 16 RF, bit 17 VM,
|
||||
* bits 19-20 VIF/VIP, bits 22-63 reserved
|
||||
* → forced off by mask
|
||||
*/
|
||||
cli(); // Interrupts must be disabled to call return current cpu;
|
||||
frame.saved_regs.cs = 0x23; /* user CS (matches amd_syscall.asm) */
|
||||
frame.saved_regs.ss = 0x1b; /* user SS (matches amd_syscall.asm) */
|
||||
{
|
||||
const uint64_t RFLAGS_USER_MASK = 0x000244D5ULL;
|
||||
const uint64_t RFLAGS_FORCED = 0x00000202ULL; /* bit 1 + IF */
|
||||
frame.saved_regs.rflags =
|
||||
(frame.saved_regs.rflags & RFLAGS_USER_MASK) | RFLAGS_FORCED;
|
||||
}
|
||||
|
||||
|
||||
/* Restore registers and blocked mask. */
|
||||
*reg = frame.saved_regs;
|
||||
prcb_return_current_cpu()->user_stack = reg->rsp;
|
||||
thrd->sig_blocked = frame.saved_blocked;
|
||||
thrd->in_sigsuspend = false;
|
||||
sti();
|
||||
|
||||
/* args->ret is whatever was in rax — for sysret, that's the value
|
||||
* that becomes the user's rax. The saved_regs.rax is the value the
|
||||
* user had before the signal, which is what we want. Don't overwrite
|
||||
* it via args->ret. To prevent syscall_handler's `reg->rax = args.ret`
|
||||
* line from clobbering, set args->ret to the saved rax. */
|
||||
args->ret = frame.saved_regs.rax;
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* sigaction / sigprocmask / sigpending
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
|
||||
void syscall_sigaction(struct syscall_arguments *args) {
|
||||
int signo = (int)args->args0;
|
||||
uintptr_t user_act = args->args1; /* const struct sigaction * */
|
||||
uintptr_t user_oact = args->args2; /* struct sigaction * */
|
||||
|
||||
if (signo < 1 || signo >= NSIG || signo == KSIGKILL || signo == KSIGSTOP) {
|
||||
errno = EINVAL;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
|
||||
if (user_oact != 0) {
|
||||
struct k_sigaction oa = proc->sig_handlers[signo];
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
if (!syscall_helper_copy_to_user(user_oact, &oa, sizeof(oa))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
}
|
||||
|
||||
if (user_act != 0) {
|
||||
struct k_sigaction na;
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
if (!syscall_helper_copy_from_user(user_act, &na, sizeof(na))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
/* Don't allow SIGKILL/SIGSTOP to ever have a non-default handler —
|
||||
* but we already rejected those above. */
|
||||
proc->sig_handlers[signo] = na;
|
||||
}
|
||||
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
args->ret = 0;
|
||||
}
|
||||
|
||||
void syscall_sigprocmask(struct syscall_arguments *args) {
|
||||
int how = (int)args->args0;
|
||||
uintptr_t user_set = args->args1;
|
||||
uintptr_t user_oldset = args->args2;
|
||||
|
||||
struct thread *thrd = sched_get_running_thread();
|
||||
|
||||
if (user_oldset != 0) {
|
||||
if (!syscall_helper_copy_to_user(user_oldset, &thrd->sig_blocked,
|
||||
sizeof(k_sigset_t))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (user_set != 0) {
|
||||
k_sigset_t s;
|
||||
if (!syscall_helper_copy_from_user(user_set, &s, sizeof(s))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (how) {
|
||||
case KSIG_BLOCK:
|
||||
k_sigorset(&thrd->sig_blocked, &thrd->sig_blocked, &s);
|
||||
break;
|
||||
case KSIG_UNBLOCK:
|
||||
for (int i = 0; i < 16; i++) {
|
||||
thrd->sig_blocked.bits[i] &= ~s.bits[i];
|
||||
}
|
||||
break;
|
||||
case KSIG_SETMASK:
|
||||
thrd->sig_blocked = s;
|
||||
break;
|
||||
default:
|
||||
errno = EINVAL;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* SIGKILL and SIGSTOP cannot be blocked. */
|
||||
k_sigdelset(&thrd->sig_blocked, KSIGKILL);
|
||||
k_sigdelset(&thrd->sig_blocked, KSIGSTOP);
|
||||
}
|
||||
|
||||
args->ret = 0;
|
||||
}
|
||||
|
||||
void syscall_sigpending(struct syscall_arguments *args) {
|
||||
uintptr_t user_set = args->args0;
|
||||
struct process *proc = sched_get_running_thread()->mother_proc;
|
||||
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
k_sigset_t snap = proc->sig_pending;
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
|
||||
if (!syscall_helper_copy_to_user(user_set, &snap, sizeof(snap))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = 0;
|
||||
}
|
||||
|
||||
void syscall_sigsuspend(struct syscall_arguments *args) {
|
||||
uintptr_t user_mask = args->args0;
|
||||
k_sigset_t new_mask;
|
||||
|
||||
if (!syscall_helper_copy_from_user(user_mask, &new_mask, sizeof(new_mask))) {
|
||||
errno = EFAULT;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
struct thread *thrd = sched_get_running_thread();
|
||||
thrd->saved_sig_blocked = thrd->sig_blocked;
|
||||
thrd->sig_blocked = new_mask;
|
||||
k_sigdelset(&thrd->sig_blocked, KSIGKILL);
|
||||
k_sigdelset(&thrd->sig_blocked, KSIGSTOP);
|
||||
thrd->in_sigsuspend = true;
|
||||
|
||||
/* Wait until a signal arrives. We loop on sched_yield until
|
||||
* signal_check_and_dispatch fires. The dispatch will restore the old
|
||||
* mask (via the sigframe path) only after running the handler — for
|
||||
* sigsuspend, the man page says the mask is restored "on return". The
|
||||
* sigframe path already handles that. */
|
||||
for (;;) {
|
||||
struct process *proc = thrd->mother_proc;
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
int sig = pick_pending_signal(proc, thrd);
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
if (sig != 0) {
|
||||
break;
|
||||
}
|
||||
sched_yield(true);
|
||||
}
|
||||
|
||||
/* Do NOT restore thrd->sig_blocked here. signal_check_and_dispatch
|
||||
* runs after this syscall returns and needs to see in_sigsuspend so
|
||||
* it can stash the *pre-sigsuspend* mask in the sigframe (so
|
||||
* sigreturn restores the correct mask). The dispatch code clears
|
||||
* in_sigsuspend once it has consumed the saved mask, and on the
|
||||
* SIG_IGN / default-ignore paths it restores thrd->sig_blocked
|
||||
* from saved_sig_blocked inline via KSIG_DISPATCH_RESTORE_SUSPEND().
|
||||
* Either way, by the time control reaches userspace the suspend mask
|
||||
* is gone — matching sigsuspend(2) semantics of "until the signal is
|
||||
* delivered". */
|
||||
errno = EINTR;
|
||||
args->ret = -1;
|
||||
}
|
||||
|
||||
void syscall_pause(struct syscall_arguments *args) {
|
||||
struct thread *thrd = sched_get_running_thread();
|
||||
for (;;) {
|
||||
struct process *proc = thrd->mother_proc;
|
||||
spinlock_acquire_or_wait(&proc->sig_lock);
|
||||
int sig = pick_pending_signal(proc, thrd);
|
||||
spinlock_drop(&proc->sig_lock);
|
||||
if (sig != 0) {
|
||||
break;
|
||||
}
|
||||
sched_yield(true);
|
||||
}
|
||||
errno = EINTR;
|
||||
args->ret = -1;
|
||||
}
|
||||
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────────────
|
||||
* kill — replaces the old stub in sched.c. Now actually honors the
|
||||
* signal number.
|
||||
* ─────────────────────────────────────────────────────────────────────── */
|
||||
|
||||
void syscall_kill(struct syscall_arguments *args) {
|
||||
int64_t pid = (int64_t)args->args0;
|
||||
int sig = (int)args->args1;
|
||||
|
||||
if (sig < 0 || sig >= NSIG) {
|
||||
errno = EINVAL;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
extern struct process *sched_pid_to_process(int64_t);
|
||||
|
||||
if (pid > 0) {
|
||||
struct process *p = sched_pid_to_process(pid);
|
||||
if (!p) {
|
||||
errno = ESRCH;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
int r = signal_send_to_process(p, sig);
|
||||
if (r < 0) {
|
||||
errno = -r;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (pid == 0) {
|
||||
/* Send to every process in the caller's process group. */
|
||||
struct process *self = sched_get_running_thread()->mother_proc;
|
||||
int r = signal_send_to_pgrp(self->pgid, sig);
|
||||
if (r < 0) {
|
||||
errno = -r;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (pid == -1) {
|
||||
/* Send to every process the caller has permission to signal —
|
||||
* which, in single-user kirkos, is every process. We skip pid 1
|
||||
* (init) so we don't accidentally kill it. */
|
||||
extern struct process *process_list;
|
||||
int sent = 0;
|
||||
for (struct process *p = process_list; p != NULL; p = p->next) {
|
||||
if (p->pid > 1 && p->state != PROCESS_KILLED) {
|
||||
if (signal_send_to_process(p, sig) == 0) {
|
||||
sent++;
|
||||
}
|
||||
}
|
||||
}
|
||||
args->ret = sent > 0 ? 0 : -1;
|
||||
if (sent == 0) errno = ESRCH;
|
||||
return;
|
||||
}
|
||||
|
||||
/* pid < -1: send to process group |pid|. */
|
||||
int r = signal_send_to_pgrp(-pid, sig);
|
||||
if (r < 0) {
|
||||
errno = -r;
|
||||
args->ret = -1;
|
||||
return;
|
||||
}
|
||||
args->ret = 0;
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include "arch/x86_64/cpu/reg.h"
|
||||
|
||||
/*
|
||||
* KirkOS in-kernel signal model.
|
||||
*
|
||||
* ABI-compatible with mlibc's abi-bits/signal.h on the userspace side:
|
||||
* - Signal numbers (1..31) match Linux/glibc/mlibc.
|
||||
* - sigset_t is the same 1024-bit array of uint64_t.
|
||||
* - struct sigaction has the same field layout and offsets.
|
||||
* - SA_* flags use the same bit values.
|
||||
*
|
||||
* What this file does NOT cover (intentionally out of scope for the first
|
||||
* pass): real-time signals (SIGRTMIN..SIGRTMAX), sigqueue, sigaltstack,
|
||||
* SA_SIGINFO three-argument handlers (the kernel always calls the handler
|
||||
* with a single int argument — userspace handlers that requested
|
||||
* SA_SIGINFO get undefined values for the siginfo_t* and ucontext_t*
|
||||
* pointers).
|
||||
*/
|
||||
|
||||
#define NSIG 65
|
||||
|
||||
/* ── Standard signal numbers ────────────────────────────────────────────── */
|
||||
#define KSIGHUP 1
|
||||
#define KSIGINT 2
|
||||
#define KSIGQUIT 3
|
||||
#define KSIGILL 4
|
||||
#define KSIGTRAP 5
|
||||
#define KSIGABRT 6
|
||||
#define KSIGBUS 7
|
||||
#define KSIGFPE 8
|
||||
#define KSIGKILL 9
|
||||
#define KSIGUSR1 10
|
||||
#define KSIGSEGV 11
|
||||
#define KSIGUSR2 12
|
||||
#define KSIGPIPE 13
|
||||
#define KSIGALRM 14
|
||||
#define KSIGTERM 15
|
||||
#define KSIGSTKFLT 16
|
||||
#define KSIGCHLD 17
|
||||
#define KSIGCONT 18
|
||||
#define KSIGSTOP 19
|
||||
#define KSIGTSTP 20
|
||||
#define KSIGTTIN 21
|
||||
#define KSIGTTOU 22
|
||||
#define KSIGURG 23
|
||||
#define KSIGXCPU 24
|
||||
#define KSIGXFSZ 25
|
||||
#define KSIGVTALRM 26
|
||||
#define KSIGPROF 27
|
||||
#define KSIGWINCH 28
|
||||
#define KSIGIO 29
|
||||
#define KSIGPOLL 29
|
||||
#define KSIGPWR 30
|
||||
#define KSIGSYS 31
|
||||
|
||||
/* ── sigaction flags ────────────────────────────────────────────────────── */
|
||||
#define KSA_NOCLDSTOP 0x00000001
|
||||
#define KSA_NOCLDWAIT 0x00000002
|
||||
#define KSA_SIGINFO 0x00000004
|
||||
#define KSA_RESTORER 0x04000000
|
||||
#define KSA_ONSTACK 0x08000000
|
||||
#define KSA_RESTART 0x10000000
|
||||
#define KSA_NODEFER 0x40000000
|
||||
#define KSA_RESETHAND 0x80000000
|
||||
|
||||
/* ── SIG_DFL / SIG_IGN sentinel pointers ────────────────────────────────── */
|
||||
#define KSIG_DFL ((void *)0)
|
||||
#define KSIG_IGN ((void *)1)
|
||||
|
||||
/* ── sigprocmask `how` values ───────────────────────────────────────────── */
|
||||
#define KSIG_BLOCK 0
|
||||
#define KSIG_UNBLOCK 1
|
||||
#define KSIG_SETMASK 2
|
||||
|
||||
/* ── sigset_t mirroring mlibc layout (16 × uint64_t = 1024 bits) ────────── */
|
||||
typedef struct {
|
||||
uint64_t bits[16];
|
||||
} k_sigset_t;
|
||||
|
||||
static inline void k_sigemptyset(k_sigset_t *s) {
|
||||
for (int i = 0; i < 16; i++) s->bits[i] = 0;
|
||||
}
|
||||
|
||||
static inline void k_sigfillset(k_sigset_t *s) {
|
||||
for (int i = 0; i < 16; i++) s->bits[i] = ~(uint64_t)0;
|
||||
}
|
||||
|
||||
static inline void k_sigaddset(k_sigset_t *s, int sig) {
|
||||
if (sig < 1 || sig >= NSIG) return;
|
||||
s->bits[(sig - 1) / 64] |= (uint64_t)1 << ((sig - 1) % 64);
|
||||
}
|
||||
|
||||
static inline void k_sigdelset(k_sigset_t *s, int sig) {
|
||||
if (sig < 1 || sig >= NSIG) return;
|
||||
s->bits[(sig - 1) / 64] &= ~((uint64_t)1 << ((sig - 1) % 64));
|
||||
}
|
||||
|
||||
static inline bool k_sigismember(const k_sigset_t *s, int sig) {
|
||||
if (sig < 1 || sig >= NSIG) return false;
|
||||
return (s->bits[(sig - 1) / 64] >> ((sig - 1) % 64)) & 1;
|
||||
}
|
||||
|
||||
static inline bool k_sigisempty(const k_sigset_t *s) {
|
||||
for (int i = 0; i < 16; i++) if (s->bits[i]) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void k_sigorset(k_sigset_t *dst,
|
||||
const k_sigset_t *a, const k_sigset_t *b) {
|
||||
for (int i = 0; i < 16; i++) dst->bits[i] = a->bits[i] | b->bits[i];
|
||||
}
|
||||
|
||||
/* ── Kernel-side sigaction (mirrors userspace layout) ───────────────────── */
|
||||
struct k_sigaction {
|
||||
void *sa_handler; /* SIG_DFL / SIG_IGN / function ptr */
|
||||
uint64_t sa_flags;
|
||||
void (*sa_restorer)(void); /* required if KSA_RESTORER set */
|
||||
k_sigset_t sa_mask;
|
||||
};
|
||||
|
||||
/* ── Default action for an unhandled signal ─────────────────────────────── */
|
||||
enum sig_default {
|
||||
SIG_ACT_TERM = 0, /* terminate process */
|
||||
SIG_ACT_CORE, /* terminate (we don't dump core) */
|
||||
SIG_ACT_IGN, /* ignore */
|
||||
SIG_ACT_STOP, /* stop process */
|
||||
SIG_ACT_CONT, /* resume a stopped process */
|
||||
};
|
||||
|
||||
enum sig_default sig_default_action(int sig);
|
||||
|
||||
/* ── Forward decls ──────────────────────────────────────────────────────── */
|
||||
struct thread;
|
||||
struct process;
|
||||
struct syscall_arguments;
|
||||
|
||||
/* ── Per-process signal init / reset ────────────────────────────────────── */
|
||||
void signal_init_process(struct process *p);
|
||||
void signal_reset_on_exec(struct process *p);
|
||||
void signal_inherit_on_fork(struct process *parent, struct process *child);
|
||||
|
||||
/* ── Send-a-signal entry points ─────────────────────────────────────────── */
|
||||
|
||||
/* Queue a signal on a specific process. Returns 0 on success, -ESRCH if
|
||||
* the process is killed/cleaned up. Handles default action delivery
|
||||
* (SIGKILL/SIGSTOP/SIGCONT) inline. */
|
||||
int signal_send_to_process(struct process *p, int sig);
|
||||
|
||||
/* Queue a signal on every process whose pgid matches. Returns 0 if at
|
||||
* least one process received it, -ESRCH otherwise. */
|
||||
int signal_send_to_pgrp(int64_t pgid, int sig);
|
||||
|
||||
/* ── Userspace-return-path hook ─────────────────────────────────────────── */
|
||||
|
||||
/* Called from the syscall return path and from the IRQ return path,
|
||||
* guarded by (reg->cs & 3) == 3 (returning to user mode). If a deliverable
|
||||
* signal is pending, this builds a sigframe on the user stack and rewrites
|
||||
* `reg` so the iret/sysret enters the handler. Returns true if it dispatched. */
|
||||
bool signal_check_and_dispatch(registers_t *reg);
|
||||
|
||||
/* ── Signal-related syscalls (registered from main.c) ───────────────────── */
|
||||
void syscall_sigaction(struct syscall_arguments *args);
|
||||
void syscall_sigprocmask(struct syscall_arguments *args);
|
||||
void syscall_sigpending(struct syscall_arguments *args);
|
||||
void syscall_sigsuspend(struct syscall_arguments *args);
|
||||
void syscall_pause(struct syscall_arguments *args);
|
||||
void syscall_kill(struct syscall_arguments *args); /* replaces sched.c version */
|
||||
void syscall_sigreturn(struct syscall_arguments *args);
|
||||
@@ -12,6 +12,11 @@ struct syscall_arguments {
|
||||
uint64_t args4;
|
||||
uint64_t args5;
|
||||
uint64_t ret;
|
||||
/* Pointer to the kernel-stack registers_t for this syscall — set by
|
||||
* the arch-side syscall_handler. Lets handlers that need to munge
|
||||
* userspace register state (e.g. sigreturn) reach into it.
|
||||
* NULL if the syscall was dispatched via an alternate path. */
|
||||
void *reg_ptr;
|
||||
};
|
||||
|
||||
typedef void (*syscall_handler_t)(struct syscall_arguments *);
|
||||
|
||||
Reference in New Issue
Block a user