diff --git a/ext2_root/bin/basename b/ext2_root/bin/basename new file mode 100755 index 0000000..31bdc73 Binary files /dev/null and b/ext2_root/bin/basename differ diff --git a/ext2_root/bin/bc b/ext2_root/bin/bc new file mode 100755 index 0000000..1e757cf Binary files /dev/null and b/ext2_root/bin/bc differ diff --git a/ext2_root/bin/cal b/ext2_root/bin/cal new file mode 100755 index 0000000..cca6aa2 Binary files /dev/null and b/ext2_root/bin/cal differ diff --git a/ext2_root/bin/cat b/ext2_root/bin/cat new file mode 100755 index 0000000..343e18c Binary files /dev/null and b/ext2_root/bin/cat differ diff --git a/ext2_root/bin/chgrp b/ext2_root/bin/chgrp new file mode 100755 index 0000000..e16c2c4 Binary files /dev/null and b/ext2_root/bin/chgrp differ diff --git a/ext2_root/bin/chmod b/ext2_root/bin/chmod new file mode 100755 index 0000000..3a9b9b5 Binary files /dev/null and b/ext2_root/bin/chmod differ diff --git a/ext2_root/bin/chown b/ext2_root/bin/chown new file mode 100755 index 0000000..a2889c3 Binary files /dev/null and b/ext2_root/bin/chown differ diff --git a/ext2_root/bin/chroot b/ext2_root/bin/chroot new file mode 100755 index 0000000..821ee7d Binary files /dev/null and b/ext2_root/bin/chroot differ diff --git a/ext2_root/bin/cksum b/ext2_root/bin/cksum new file mode 100755 index 0000000..61de0ae Binary files /dev/null and b/ext2_root/bin/cksum differ diff --git a/ext2_root/bin/cmp b/ext2_root/bin/cmp new file mode 100755 index 0000000..21278a9 Binary files /dev/null and b/ext2_root/bin/cmp differ diff --git a/ext2_root/bin/comm b/ext2_root/bin/comm new file mode 100755 index 0000000..5c37247 Binary files /dev/null and b/ext2_root/bin/comm differ diff --git a/ext2_root/bin/cp b/ext2_root/bin/cp new file mode 100755 index 0000000..6d549e5 Binary files /dev/null and b/ext2_root/bin/cp differ diff --git a/ext2_root/bin/cron b/ext2_root/bin/cron new file mode 100755 index 0000000..d2cb618 Binary files /dev/null and b/ext2_root/bin/cron differ diff --git a/ext2_root/bin/cut b/ext2_root/bin/cut new file mode 100755 index 0000000..40afb2e Binary files /dev/null and b/ext2_root/bin/cut differ diff --git a/ext2_root/bin/date b/ext2_root/bin/date new file mode 100755 index 0000000..638f0b0 Binary files /dev/null and b/ext2_root/bin/date differ diff --git a/ext2_root/bin/dc b/ext2_root/bin/dc new file mode 100755 index 0000000..b2aea44 Binary files /dev/null and b/ext2_root/bin/dc differ diff --git a/ext2_root/bin/dd b/ext2_root/bin/dd new file mode 100755 index 0000000..218c1a0 Binary files /dev/null and b/ext2_root/bin/dd differ diff --git a/ext2_root/bin/dirname b/ext2_root/bin/dirname new file mode 100755 index 0000000..f0ea782 Binary files /dev/null and b/ext2_root/bin/dirname differ diff --git a/ext2_root/bin/du b/ext2_root/bin/du new file mode 100755 index 0000000..422d4eb Binary files /dev/null and b/ext2_root/bin/du differ diff --git a/ext2_root/bin/echo b/ext2_root/bin/echo new file mode 100755 index 0000000..09b6d87 Binary files /dev/null and b/ext2_root/bin/echo differ diff --git a/ext2_root/bin/ed b/ext2_root/bin/ed new file mode 100755 index 0000000..4ad1414 Binary files /dev/null and b/ext2_root/bin/ed differ diff --git a/ext2_root/bin/env b/ext2_root/bin/env new file mode 100755 index 0000000..1fa4561 Binary files /dev/null and b/ext2_root/bin/env differ diff --git a/ext2_root/bin/expand b/ext2_root/bin/expand new file mode 100755 index 0000000..bd70cd3 Binary files /dev/null and b/ext2_root/bin/expand differ diff --git a/ext2_root/bin/expr b/ext2_root/bin/expr new file mode 100755 index 0000000..27a6dd3 Binary files /dev/null and b/ext2_root/bin/expr differ diff --git a/ext2_root/bin/false b/ext2_root/bin/false new file mode 100755 index 0000000..a4fc062 Binary files /dev/null and b/ext2_root/bin/false differ diff --git a/ext2_root/bin/find b/ext2_root/bin/find new file mode 100755 index 0000000..fb25320 Binary files /dev/null and b/ext2_root/bin/find differ diff --git a/ext2_root/bin/flock b/ext2_root/bin/flock new file mode 100755 index 0000000..9997b4b Binary files /dev/null and b/ext2_root/bin/flock differ diff --git a/ext2_root/bin/fold b/ext2_root/bin/fold new file mode 100755 index 0000000..95671c5 Binary files /dev/null and b/ext2_root/bin/fold differ diff --git a/ext2_root/bin/getconf b/ext2_root/bin/getconf new file mode 100755 index 0000000..62e00fa Binary files /dev/null and b/ext2_root/bin/getconf differ diff --git a/ext2_root/bin/grep b/ext2_root/bin/grep new file mode 100755 index 0000000..9e7d296 Binary files /dev/null and b/ext2_root/bin/grep differ diff --git a/ext2_root/bin/head b/ext2_root/bin/head new file mode 100755 index 0000000..56b0b0e Binary files /dev/null and b/ext2_root/bin/head differ diff --git a/ext2_root/bin/hostname b/ext2_root/bin/hostname new file mode 100755 index 0000000..45423cc Binary files /dev/null and b/ext2_root/bin/hostname differ diff --git a/ext2_root/bin/join b/ext2_root/bin/join new file mode 100755 index 0000000..87b45c1 Binary files /dev/null and b/ext2_root/bin/join differ diff --git a/ext2_root/bin/kill b/ext2_root/bin/kill new file mode 100755 index 0000000..98f7332 Binary files /dev/null and b/ext2_root/bin/kill differ diff --git a/ext2_root/bin/link b/ext2_root/bin/link new file mode 100755 index 0000000..0793e2a Binary files /dev/null and b/ext2_root/bin/link differ diff --git a/ext2_root/bin/ln b/ext2_root/bin/ln new file mode 100755 index 0000000..488e429 Binary files /dev/null and b/ext2_root/bin/ln differ diff --git a/ext2_root/bin/logger b/ext2_root/bin/logger new file mode 100755 index 0000000..8dc42fd Binary files /dev/null and b/ext2_root/bin/logger differ diff --git a/ext2_root/bin/logname b/ext2_root/bin/logname new file mode 100755 index 0000000..df0e093 Binary files /dev/null and b/ext2_root/bin/logname differ diff --git a/ext2_root/bin/ls b/ext2_root/bin/ls index 0b28261..c024873 100755 Binary files a/ext2_root/bin/ls and b/ext2_root/bin/ls differ diff --git a/ext2_root/bin/md5sum b/ext2_root/bin/md5sum new file mode 100755 index 0000000..7c81303 Binary files /dev/null and b/ext2_root/bin/md5sum differ diff --git a/ext2_root/bin/mkdir b/ext2_root/bin/mkdir new file mode 100755 index 0000000..255bb83 Binary files /dev/null and b/ext2_root/bin/mkdir differ diff --git a/ext2_root/bin/mkfifo b/ext2_root/bin/mkfifo new file mode 100755 index 0000000..f72d7af Binary files /dev/null and b/ext2_root/bin/mkfifo differ diff --git a/ext2_root/bin/mknod b/ext2_root/bin/mknod new file mode 100755 index 0000000..34c5c8e Binary files /dev/null and b/ext2_root/bin/mknod differ diff --git a/ext2_root/bin/mktemp b/ext2_root/bin/mktemp new file mode 100755 index 0000000..e9debe8 Binary files /dev/null and b/ext2_root/bin/mktemp differ diff --git a/ext2_root/bin/mv b/ext2_root/bin/mv new file mode 100755 index 0000000..01acebe Binary files /dev/null and b/ext2_root/bin/mv differ diff --git a/ext2_root/bin/nice b/ext2_root/bin/nice new file mode 100755 index 0000000..d4dc069 Binary files /dev/null and b/ext2_root/bin/nice differ diff --git a/ext2_root/bin/nl b/ext2_root/bin/nl new file mode 100755 index 0000000..9e3bd9c Binary files /dev/null and b/ext2_root/bin/nl differ diff --git a/ext2_root/bin/nohup b/ext2_root/bin/nohup new file mode 100755 index 0000000..f60f3f4 Binary files /dev/null and b/ext2_root/bin/nohup differ diff --git a/ext2_root/bin/od b/ext2_root/bin/od new file mode 100755 index 0000000..1948e56 Binary files /dev/null and b/ext2_root/bin/od differ diff --git a/ext2_root/bin/oksh b/ext2_root/bin/oksh index 73f7738..2d344a5 100755 Binary files a/ext2_root/bin/oksh and b/ext2_root/bin/oksh differ diff --git a/ext2_root/bin/paste b/ext2_root/bin/paste new file mode 100755 index 0000000..9751a9f Binary files /dev/null and b/ext2_root/bin/paste differ diff --git a/ext2_root/bin/pathchk b/ext2_root/bin/pathchk new file mode 100755 index 0000000..845714f Binary files /dev/null and b/ext2_root/bin/pathchk differ diff --git a/ext2_root/bin/printenv b/ext2_root/bin/printenv new file mode 100755 index 0000000..f87d77c Binary files /dev/null and b/ext2_root/bin/printenv differ diff --git a/ext2_root/bin/printf b/ext2_root/bin/printf new file mode 100755 index 0000000..08d22c2 Binary files /dev/null and b/ext2_root/bin/printf differ diff --git a/ext2_root/bin/pwd b/ext2_root/bin/pwd index a23cfdb..2b54cad 100755 Binary files a/ext2_root/bin/pwd and b/ext2_root/bin/pwd differ diff --git a/ext2_root/bin/readlink b/ext2_root/bin/readlink new file mode 100755 index 0000000..440c85f Binary files /dev/null and b/ext2_root/bin/readlink differ diff --git a/ext2_root/bin/renice b/ext2_root/bin/renice new file mode 100755 index 0000000..72b1cea Binary files /dev/null and b/ext2_root/bin/renice differ diff --git a/ext2_root/bin/rev b/ext2_root/bin/rev new file mode 100755 index 0000000..92a39a5 Binary files /dev/null and b/ext2_root/bin/rev differ diff --git a/ext2_root/bin/rm b/ext2_root/bin/rm new file mode 100755 index 0000000..0e9b166 Binary files /dev/null and b/ext2_root/bin/rm differ diff --git a/ext2_root/bin/rmdir b/ext2_root/bin/rmdir new file mode 100755 index 0000000..ea0e3ea Binary files /dev/null and b/ext2_root/bin/rmdir differ diff --git a/ext2_root/bin/sed b/ext2_root/bin/sed new file mode 100755 index 0000000..15d10c9 Binary files /dev/null and b/ext2_root/bin/sed differ diff --git a/ext2_root/bin/seq b/ext2_root/bin/seq new file mode 100755 index 0000000..0068ae8 Binary files /dev/null and b/ext2_root/bin/seq differ diff --git a/ext2_root/bin/setsid b/ext2_root/bin/setsid new file mode 100755 index 0000000..fc41b99 Binary files /dev/null and b/ext2_root/bin/setsid differ diff --git a/ext2_root/bin/sh b/ext2_root/bin/sh new file mode 120000 index 0000000..8c7d3b6 --- /dev/null +++ b/ext2_root/bin/sh @@ -0,0 +1 @@ +oksh \ No newline at end of file diff --git a/ext2_root/bin/sha1sum b/ext2_root/bin/sha1sum new file mode 100755 index 0000000..704312c Binary files /dev/null and b/ext2_root/bin/sha1sum differ diff --git a/ext2_root/bin/sha224sum b/ext2_root/bin/sha224sum new file mode 100755 index 0000000..98b18ae Binary files /dev/null and b/ext2_root/bin/sha224sum differ diff --git a/ext2_root/bin/sha256sum b/ext2_root/bin/sha256sum new file mode 100755 index 0000000..46f2538 Binary files /dev/null and b/ext2_root/bin/sha256sum differ diff --git a/ext2_root/bin/sha384sum b/ext2_root/bin/sha384sum new file mode 100755 index 0000000..1645276 Binary files /dev/null and b/ext2_root/bin/sha384sum differ diff --git a/ext2_root/bin/sha512-224sum b/ext2_root/bin/sha512-224sum new file mode 100755 index 0000000..bbe7b11 Binary files /dev/null and b/ext2_root/bin/sha512-224sum differ diff --git a/ext2_root/bin/sha512-256sum b/ext2_root/bin/sha512-256sum new file mode 100755 index 0000000..540c9a6 Binary files /dev/null and b/ext2_root/bin/sha512-256sum differ diff --git a/ext2_root/bin/sha512sum b/ext2_root/bin/sha512sum new file mode 100755 index 0000000..897f86b Binary files /dev/null and b/ext2_root/bin/sha512sum differ diff --git a/ext2_root/bin/sleep b/ext2_root/bin/sleep new file mode 100755 index 0000000..258af13 Binary files /dev/null and b/ext2_root/bin/sleep differ diff --git a/ext2_root/bin/sort b/ext2_root/bin/sort new file mode 100755 index 0000000..e77fe99 Binary files /dev/null and b/ext2_root/bin/sort differ diff --git a/ext2_root/bin/split b/ext2_root/bin/split new file mode 100755 index 0000000..990828f Binary files /dev/null and b/ext2_root/bin/split differ diff --git a/ext2_root/bin/sponge b/ext2_root/bin/sponge new file mode 100755 index 0000000..8fbe14a Binary files /dev/null and b/ext2_root/bin/sponge differ diff --git a/ext2_root/bin/strings b/ext2_root/bin/strings new file mode 100755 index 0000000..a989707 Binary files /dev/null and b/ext2_root/bin/strings differ diff --git a/ext2_root/bin/sync b/ext2_root/bin/sync new file mode 100755 index 0000000..535d360 Binary files /dev/null and b/ext2_root/bin/sync differ diff --git a/ext2_root/bin/tail b/ext2_root/bin/tail new file mode 100755 index 0000000..d4366cb Binary files /dev/null and b/ext2_root/bin/tail differ diff --git a/ext2_root/bin/tar b/ext2_root/bin/tar new file mode 100755 index 0000000..96a7bc5 Binary files /dev/null and b/ext2_root/bin/tar differ diff --git a/ext2_root/bin/tee b/ext2_root/bin/tee new file mode 100755 index 0000000..77335f8 Binary files /dev/null and b/ext2_root/bin/tee differ diff --git a/ext2_root/bin/test b/ext2_root/bin/test new file mode 100755 index 0000000..63732b8 Binary files /dev/null and b/ext2_root/bin/test differ diff --git a/ext2_root/bin/tftp b/ext2_root/bin/tftp new file mode 100755 index 0000000..25c050c Binary files /dev/null and b/ext2_root/bin/tftp differ diff --git a/ext2_root/bin/time b/ext2_root/bin/time new file mode 100755 index 0000000..ecdf89d Binary files /dev/null and b/ext2_root/bin/time differ diff --git a/ext2_root/bin/touch b/ext2_root/bin/touch new file mode 100755 index 0000000..0051985 Binary files /dev/null and b/ext2_root/bin/touch differ diff --git a/ext2_root/bin/tr b/ext2_root/bin/tr new file mode 100755 index 0000000..fe100d3 Binary files /dev/null and b/ext2_root/bin/tr differ diff --git a/ext2_root/bin/true b/ext2_root/bin/true new file mode 100755 index 0000000..86ec0fb Binary files /dev/null and b/ext2_root/bin/true differ diff --git a/ext2_root/bin/tsort b/ext2_root/bin/tsort new file mode 100755 index 0000000..98d2eb8 Binary files /dev/null and b/ext2_root/bin/tsort differ diff --git a/ext2_root/bin/tty b/ext2_root/bin/tty new file mode 100755 index 0000000..1bb4e25 Binary files /dev/null and b/ext2_root/bin/tty differ diff --git a/ext2_root/bin/uname b/ext2_root/bin/uname new file mode 100755 index 0000000..002058b Binary files /dev/null and b/ext2_root/bin/uname differ diff --git a/ext2_root/bin/unexpand b/ext2_root/bin/unexpand new file mode 100755 index 0000000..543510f Binary files /dev/null and b/ext2_root/bin/unexpand differ diff --git a/ext2_root/bin/uniq b/ext2_root/bin/uniq new file mode 100755 index 0000000..8a05c2d Binary files /dev/null and b/ext2_root/bin/uniq differ diff --git a/ext2_root/bin/unlink b/ext2_root/bin/unlink new file mode 100755 index 0000000..3485971 Binary files /dev/null and b/ext2_root/bin/unlink differ diff --git a/ext2_root/bin/uudecode b/ext2_root/bin/uudecode new file mode 100755 index 0000000..4f45606 Binary files /dev/null and b/ext2_root/bin/uudecode differ diff --git a/ext2_root/bin/uuencode b/ext2_root/bin/uuencode new file mode 100755 index 0000000..59291ce Binary files /dev/null and b/ext2_root/bin/uuencode differ diff --git a/ext2_root/bin/wc b/ext2_root/bin/wc new file mode 100755 index 0000000..222f94e Binary files /dev/null and b/ext2_root/bin/wc differ diff --git a/ext2_root/bin/which b/ext2_root/bin/which new file mode 100755 index 0000000..05af20d Binary files /dev/null and b/ext2_root/bin/which differ diff --git a/ext2_root/bin/whoami b/ext2_root/bin/whoami new file mode 100755 index 0000000..fa4de0e Binary files /dev/null and b/ext2_root/bin/whoami differ diff --git a/ext2_root/bin/xargs b/ext2_root/bin/xargs new file mode 100755 index 0000000..08850d1 Binary files /dev/null and b/ext2_root/bin/xargs differ diff --git a/ext2_root/bin/xinstall b/ext2_root/bin/xinstall new file mode 100755 index 0000000..962d78a Binary files /dev/null and b/ext2_root/bin/xinstall differ diff --git a/ext2_root/bin/yes b/ext2_root/bin/yes new file mode 100755 index 0000000..5228058 Binary files /dev/null and b/ext2_root/bin/yes differ diff --git a/ext2_root/usr/lib/libc.a b/ext2_root/usr/lib/libc.a index 348db67..defaf67 100644 Binary files a/ext2_root/usr/lib/libc.a and b/ext2_root/usr/lib/libc.a differ diff --git a/ramdisk.tar b/ramdisk.tar index bb66efe..365674e 100644 Binary files a/ramdisk.tar and b/ramdisk.tar differ diff --git a/src/arch/x86_64/boot/isr.c b/src/arch/x86_64/boot/isr.c index 7c09b2d..6241913 100644 --- a/src/arch/x86_64/boot/isr.c +++ b/src/arch/x86_64/boot/isr.c @@ -319,6 +319,12 @@ void isr_handle(registers_t *r) { if (r->isrNumber < 256 && event_handlers[r->isrNumber] != NULL) { event_handlers[r->isrNumber](r); + /* Run pending signals on return-to-userspace. Skipped on ring-0 + * returns; signal_check_and_dispatch guards on it internally too. */ + extern bool signal_check_and_dispatch(registers_t *); + if (r->cs & 0x3) { + signal_check_and_dispatch(r); + } } else { if (r->isrNumber < 32) { if (r->cs & 0x3) { diff --git a/src/arch/x86_64/cpu/syscall_handle.c b/src/arch/x86_64/cpu/syscall_handle.c index e45c60a..3bb3618 100644 --- a/src/arch/x86_64/cpu/syscall_handle.c +++ b/src/arch/x86_64/cpu/syscall_handle.c @@ -4,6 +4,7 @@ #include "mm/vmm.h" #include "sched/sched.h" #include "sched/syscall.h" +#include "sched/signal.h" #include "arch/x86_64/boot/isr.h" #include "arch/x86_64/sys/prcb.h" @@ -23,16 +24,29 @@ void syscall_handler(registers_t *reg) { .args3 = reg->r10, .args4 = reg->r8, .args5 = reg->r9, - .ret = reg->rax}; + .ret = reg->rax, + .reg_ptr = reg}; syscall_handle(&args); int64_t ret = (int64_t)args.ret; - if (ret < 0) { + if (args.syscall_nr == 15 /* SYS_sigreturn */) { + /* sigreturn already mutated *reg directly with the saved user + * state. Do NOT overwrite reg->rax here. */ + } else if (ret < 0) { ret = -((int)errno); reg->rax = ret; - } else + } else { reg->rax = args.ret; + } + + /* Pending-signal dispatch on return to userspace. signal_check_and_dispatch + * is a no-op when returning to ring 0 (we never check r->cs here because + * the syscall instruction is only legal from CPL=3). It mutates `reg` in + * place — for the sysret path, that means writing reg->rcx (used as new + * rip) and reg->r11 (used as new rflags), and updating the per-CPU + * user_stack (used as new rsp). signal_check_and_dispatch handles that. */ + signal_check_and_dispatch(reg); } void syscall_install_handler(void) { @@ -54,14 +68,29 @@ bool syscall_helper_copy_to_user(uintptr_t user_addr, void *buffer, struct process *proc = sched_get_running_thread()->mother_proc; struct pagemap *target_pagemap = proc->process_pagemap; - uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr); + /* vmm_virt_to_kernel resolves ONE page at a time via the direct map. + * Two consecutive user virtual pages map to physical frames that are + * almost never contiguous in the kernel direct map, so we must walk + * a page at a time and re-translate at each page boundary. */ + uint8_t *src = (uint8_t *)buffer; + while (count > 0) { + uint64_t page_off = user_addr & (PAGE_SIZE - 1); + uint64_t chunk = PAGE_SIZE - page_off; + if (chunk > count) chunk = count; - if (!kernel_addr) { - errno = EFAULT; - return false; + uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr); + if (!kernel_addr) { + vmm_switch_pagemap(target_pagemap); + errno = EFAULT; + return false; + } + memcpy((void *)kernel_addr, src, chunk); + + user_addr += chunk; + src += chunk; + count -= chunk; } - memcpy((void *)kernel_addr, buffer, count); vmm_switch_pagemap(target_pagemap); return true; } @@ -73,13 +102,26 @@ bool syscall_helper_copy_from_user(uintptr_t user_addr, void *buffer, struct process *proc = sched_get_running_thread()->mother_proc; struct pagemap *target_pagemap = proc->process_pagemap; - uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr); + /* Same per-page walk as copy_to_user — see comment there. */ + uint8_t *dst = (uint8_t *)buffer; + while (count > 0) { + uint64_t page_off = user_addr & (PAGE_SIZE - 1); + uint64_t chunk = PAGE_SIZE - page_off; + if (chunk > count) chunk = count; - if (!kernel_addr) { - errno = EFAULT; - return false; + uint64_t kernel_addr = vmm_virt_to_kernel(target_pagemap, user_addr); + if (!kernel_addr) { + vmm_switch_pagemap(target_pagemap); + errno = EFAULT; + return false; + } + memcpy(dst, (void *)kernel_addr, chunk); + + user_addr += chunk; + dst += chunk; + count -= chunk; } - memcpy(buffer, (void *)kernel_addr, count); + vmm_switch_pagemap(target_pagemap); return true; } \ No newline at end of file diff --git a/src/arch/x86_64/debug/breakpoint.c b/src/arch/x86_64/debug/breakpoint.c index fd2a33b..cb02c36 100644 --- a/src/arch/x86_64/debug/breakpoint.c +++ b/src/arch/x86_64/debug/breakpoint.c @@ -107,6 +107,15 @@ void breakpoint_handler(registers_t *reg) { pause_other_cpus(); + + /* Mask the UART RX IRQ for the duration of the debugger. We are in + * an INT3 context with IRQs disabled, but the moment we re-enable + * them at exit (or any callee re-enables them) the deferred serial + * IRQ would fire and drain pending bytes into the input ring — + * stealing the keystrokes the debugger expects to read via the + * polled serial_getchar() loop below. */ + serial_disable_rx_irq(); + kprintffos(0, "=========== Start of dumps =========\n"); kprintffos(0, "Breakpoint hit on CPU%u\n", prcb_return_current_cpu()->cpu_number); @@ -149,5 +158,10 @@ void breakpoint_handler(registers_t *reg) { } } + /* Re-arm the UART RX IRQ before letting the system resume normal + * operation; also drains anything that arrived while we polled. */ + serial_enable_rx_irq(); + + unpause_other_cpus(); } diff --git a/src/arch/x86_64/fw/acpi.c b/src/arch/x86_64/fw/acpi.c index 8193691..e702613 100644 --- a/src/arch/x86_64/fw/acpi.c +++ b/src/arch/x86_64/fw/acpi.c @@ -6,6 +6,7 @@ #include "arch/x86_64/sys/timer.h" #include "madt.h" #include "libk/debug.h" +#include "arch/x86_64/bus/pci.h" void acpi_init() { @@ -52,6 +53,7 @@ void acpi_init() { timer_init(); + pci_init(); madt_init(); } diff --git a/src/arch/x86_64/sched/prctl.c b/src/arch/x86_64/sched/prctl.c index 54836cb..27639b1 100644 --- a/src/arch/x86_64/sched/prctl.c +++ b/src/arch/x86_64/sched/prctl.c @@ -10,6 +10,7 @@ #include "sched/syscall.h" #include "sched/sched.h" #include "libk/errno.h" +#include "libk/debug.h" #define ARCH_SET_GS 0x1001 #define ARCH_SET_FS 0x1002 @@ -31,6 +32,7 @@ void syscall_prctl(struct syscall_arguments *args) { case ARCH_SET_FS: { sched_get_running_thread()->fs_base = value; set_fs_base(sched_get_running_thread()->fs_base); + args->ret = 0; break; } case ARCH_GET_FS: diff --git a/src/arch/x86_64/serial/serial.c b/src/arch/x86_64/serial/serial.c index 7cb5c4b..f54fb4d 100644 --- a/src/arch/x86_64/serial/serial.c +++ b/src/arch/x86_64/serial/serial.c @@ -2,8 +2,32 @@ #include #include #include "arch/x86_64/cpu/io.h" +#include "arch/x86_64/sys/apic.h" +#include "arch/x86_64/boot/isr.h" +#include "drivers/input/input.h" +#include "libk/debug.h" + +static void serial_irq_handler(registers_t *r) { + (void)r; + + /* Drain everything currently buffered. A UART FIFO may hold up to + * ~16 bytes if 16550A; better to loop than rely on getting one IRQ + * per byte. */ + while (x86_64_inb(COM1 + 5) & 0x01) { + uint8_t b = x86_64_inb(COM1); + input_push_byte(b); + } + + /* Read IIR to acknowledge any pending interrupt source. Some + * legacy UARTs require this. */ + (void)x86_64_inb(COM1 + 2); + + apic_eoi(); +} + void serial_init(void) { + kprintf("Enabling Serial COM1.\n"); x86_64_outb(COM1 + 1, 0x1); x86_64_outb(COM1 + 3, 0x80); x86_64_outb(COM1, 0x1); @@ -11,6 +35,19 @@ void serial_init(void) { x86_64_outb(COM1 + 3, 0x3); x86_64_outb(COM1 + 2, 0xC7); x86_64_outb(COM1 + 4, 0xB); + + /* Enable the receive-data-available interrupt (IER bit 0). */ + x86_64_outb(COM1 + 1, 0x01); + + /* Drain any byte the BIOS / QEMU may have left in the RBR before we + * start listening, otherwise IIR may report a stale interrupt. */ + while (x86_64_inb(COM1 + 5) & 0x01) { + (void)x86_64_inb(COM1); + } + + /* Route IRQ 4 → vector 36 in the IOAPIC and install our handler. */ + isr_register_handler(36, serial_irq_handler); + ioapic_redirect_irq(4, 36); } static inline bool is_transmit_empty(void) { @@ -62,3 +99,24 @@ char serial_getchar(void) { } return last_c; } + +void serial_disable_rx_irq(void) { + /* Clear IER bit 0 (Received Data Available Interrupt). The UART + * stops asserting IRQ 4; we go back to polling-only mode. Used by + * the breakpoint debugger so its serial_getchar() polling loop + * actually sees the bytes the user types, instead of having them + * stolen by serial_irq_handler() on the next sti. */ + uint8_t ier = x86_64_inb(COM1 + 1); + x86_64_outb(COM1 + 1, ier & ~0x01); +} + +void serial_enable_rx_irq(void) { + uint8_t ier = x86_64_inb(COM1 + 1); + x86_64_outb(COM1 + 1, ier | 0x01); + /* Drain anything that piled up while we were polling so the next + * IRQ assertion corresponds to a fresh byte. */ + while (x86_64_inb(COM1 + 5) & 0x01) { + (void)x86_64_inb(COM1); + } +} + diff --git a/src/arch/x86_64/serial/serial.h b/src/arch/x86_64/serial/serial.h index b5bf513..f091844 100644 --- a/src/arch/x86_64/serial/serial.h +++ b/src/arch/x86_64/serial/serial.h @@ -7,4 +7,16 @@ void serial_putchar(char ch); void serial_puts(char *str); char serial_get_byte(void); char serial_getchar(void); -int serial_received(void); \ No newline at end of file +int serial_received(void); + + +/* Mask the UART receive-data-available IRQ (IER bit 0). Used by the + * breakpoint debugger so bytes typed while it polls serial_getchar() are + * not silently consumed by serial_irq_handler() and pushed into the input + * ring instead of being delivered to the debugger. */ +void serial_disable_rx_irq(void); + +/* Re-arm the UART receive-data-available IRQ and drain anything that + * accumulated in the RBR while the IRQ was masked. Mirror of + * serial_disable_rx_irq(). */ +void serial_enable_rx_irq(void); \ No newline at end of file diff --git a/src/drivers/fb/fbdev.c b/src/drivers/fb/fbdev.c index ac2c53f..b1b65e0 100644 --- a/src/drivers/fb/fbdev.c +++ b/src/drivers/fb/fbdev.c @@ -123,6 +123,6 @@ void fbdev_init(void) { framebuff_res->stat.st_rdev = resource_create_dev_id(); framebuff_res->stat.st_mode = 0666 | S_IFCHR; - devtmpfs_add_device(framebuff_res, "fbdev"); + devtmpfs_add_device(framebuff_res, "fb0"); vfs_symlink(vfs_root, "/dev/fbdev", "/dev/fb0"); } diff --git a/src/drivers/input/input.c b/src/drivers/input/input.c new file mode 100644 index 0000000..7424477 --- /dev/null +++ b/src/drivers/input/input.c @@ -0,0 +1,133 @@ +#include "input.h" +#include "libk/event.h" +#include "libk/errno.h" +#include "mp/spinlock.h" +#include "arch/x86_64/asm/asm.h" +#include +#include +#include + +#define INPUT_RING_SIZE 1024 + +static uint8_t input_ring[INPUT_RING_SIZE]; +static size_t input_head; /* write index (IRQ producer) */ +static size_t input_tail; /* read index (thread consumer)*/ +static size_t input_used; /* number of valid bytes */ +static spinlock_t input_lock = {0}; +static struct event input_event = {0}; + +void input_init(void) { + input_head = 0; + input_tail = 0; + input_used = 0; + /* spinlock_t and struct event are both zero-initialized at file scope, + which matches their idle states (lock=0/no listeners/no pending). */ +} + +void input_push_byte(uint8_t b) { + /* Producer side: called from IRQ context with IF already cleared by the + CPU on interrupt entry. Hold the ring lock only across the ring + mutation; release it before calling event_trigger (which takes its + own spinlock and toggles interrupts internally). */ + spinlock_acquire_or_wait(&input_lock); + + if (input_used < INPUT_RING_SIZE) { + input_ring[input_head] = b; + input_head = (input_head + 1) % INPUT_RING_SIZE; + input_used++; + spinlock_drop(&input_lock); + /* Drop the byte's worth of work on the wake path. drop=false so a + wake delivered while no thread is parked still counts as pending, + matching the await/trigger contract. */ + event_trigger(&input_event, false); + return; + } + + /* Overflow: silently drop. */ + spinlock_drop(&input_lock); +} + +bool input_has_data(void) { + /* Cheap racy peek; size_t reads are word-sized and atomic on x86_64. */ + return input_used > 0; +} + +/* Internal: copy out up to `count` bytes from the ring into `buf`, handling + wrap-around. Caller MUST hold input_lock and MUST have verified used > 0. + Returns the number of bytes copied. */ +static size_t input_drain_locked(uint8_t *buf, size_t count) { + size_t n = count < input_used ? count : input_used; + + /* First chunk: from tail to end of ring (or n, whichever is smaller). */ + size_t first = INPUT_RING_SIZE - input_tail; + if (first > n) { + first = n; + } + for (size_t i = 0; i < first; i++) { + buf[i] = input_ring[input_tail + i]; + } + + /* Second chunk (only if we wrapped): from start of ring. */ + size_t second = n - first; + for (size_t i = 0; i < second; i++) { + buf[first + i] = input_ring[i]; + } + + input_tail = (input_tail + n) % INPUT_RING_SIZE; + input_used -= n; + return n; +} + +ssize_t input_read(uint8_t *buf, size_t count) { + if (count == 0) { + return 0; + } + + struct event *events_arr[1] = { &input_event }; + + for (;;) { + /* Disable IRQs on this CPU before taking the lock so that the + PS/2 or serial IRQ handler — which also acquires input_lock + from input_push_byte() — cannot be delivered to this CPU while + we hold the lock and deadlock against us. */ + bool old = int_toggle(false); + spinlock_acquire_or_wait(&input_lock); + if (input_used > 0) { + size_t n = input_drain_locked(buf, count); + spinlock_drop(&input_lock); + int_toggle(old); + return (ssize_t)n; + } + spinlock_drop(&input_lock); + int_toggle(old); + + /* Sleep until a producer triggers input_event. event_await handles + the wake/pending race internally, so a byte that arrives between + our drop above and the await call is not lost (event_trigger + bumped pending while no listeners were attached). It also + manages its own IRQ-disable while parking on the event. */ + (void)event_await(events_arr, 1, true); + /* Loop back and re-acquire the lock; another reader may have raced + us and consumed the bytes, in which case we sleep again. */ + } +} + +ssize_t input_try_read(uint8_t *buf, size_t count) { + if (count == 0) { + return 0; + } + + /* Same IRQ-disable wrapper as input_read — see comment there. */ + bool old = int_toggle(false); + spinlock_acquire_or_wait(&input_lock); + if (input_used == 0) { + spinlock_drop(&input_lock); + int_toggle(old); + errno = EAGAIN; + return -1; + } + size_t n = input_drain_locked(buf, count); + spinlock_drop(&input_lock); + int_toggle(old); + return (ssize_t)n; +} diff --git a/src/drivers/input/input.h b/src/drivers/input/input.h new file mode 100644 index 0000000..f193e7b --- /dev/null +++ b/src/drivers/input/input.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include "libk/types.h" + +/* Initialize the shared input ring buffer. Call once during boot before any + IRQ handler can push bytes or any reader can pull them. */ +void input_init(void); + +/* IRQ-safe: called from the keyboard/serial IRQ handlers. Pushes one byte + into the ring. Drops on overflow (just discards -- the user lost a + keystroke, no big deal). Wakes any blocked readers via event_trigger. */ +void input_push_byte(uint8_t b); + +/* Non-blocking peek: returns true if at least one byte is in the ring. The + answer is inherently racy; callers must not depend on monotonicity. */ +bool input_has_data(void); + +/* Blocking read of up to `count` bytes into `buf`. Returns the number actually + read. Sleeps on the input event while the ring is empty (event_await). + Always returns at least 1 byte when it returns (no spurious 0-returns) -- + unless `count == 0`, in which case returns 0 immediately. */ +ssize_t input_read(uint8_t *buf, size_t count); + +/* Non-blocking variant: returns -1 with errno=EAGAIN if no data. Used by + O_NONBLOCK readers. */ +ssize_t input_try_read(uint8_t *buf, size_t count); diff --git a/src/drivers/ps2/kbd.c b/src/drivers/ps2/kbd.c new file mode 100644 index 0000000..1f814bc --- /dev/null +++ b/src/drivers/ps2/kbd.c @@ -0,0 +1,380 @@ +#include "kbd.h" +#include "ps2.h" + +#include "arch/x86_64/cpu/io.h" +#include "arch/x86_64/cpu/reg.h" +#include "arch/x86_64/sys/apic.h" +#include "arch/x86_64/boot/isr.h" +#include "arch/x86_64/asm/asm.h" + +#include "libk/resource.h" +#include "libk/types.h" +#include "libk/errno.h" +#include "libk/debug.h" + +#include "mp/spinlock.h" +#include "fs/devtmpfs.h" + +#include "drivers/input/input.h" + +#include +#include +#include + +/* + * IRQ wiring constants. The IDT vector for the keyboard is 49 (chosen by + * KirkOS convention; mouse would be 60). The I/O APIC entry for ISA IRQ 1 + * is pointed at that vector below. + */ +#define KBD_IRQ 1 +#define KBD_VECTOR 49 + +/* ════════════════════════════════════════════════════════════════════════ + * Scancode-set-1 → ASCII tables (US-QWERTY) + * + * We enabled the controller's set-2 → set-1 translation in ps2_init(), so + * everything we read from port 0x60 is already in set-1. These tables map + * a make-code (high bit clear) directly to its printable byte; entries + * that have no printable form — modifiers, function keys, numeric-pad + * non-printables — are 0. + * + * The shifted table is the standard "hold Shift" mapping. CapsLock only + * affects letters and is composed with Shift via XOR (see translate()). + * + * Cross-checked against https://wiki.osdev.org/PS/2_Keyboard#Scan_Code_Set_1 + * ════════════════════════════════════════════════════════════════════════ */ + +static const char kbd_us_unshift[128] = { + /* 0x00 */ 0, 0x1b, '1', '2', '3', '4', '5', '6', + /* 0x08 */ '7', '8', '9', '0', '-', '=', 0x7f, '\t', + /* 0x10 */ 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', + /* 0x18 */ 'o', 'p', '[', ']', '\n', 0, 'a', 's', + /* 0x20 */ 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', + /* 0x28 */ '\'', '`', 0, '\\', 'z', 'x', 'c', 'v', + /* 0x30 */ 'b', 'n', 'm', ',', '.', '/', 0, '*', + /* 0x38 */ 0, ' ', 0, 0, 0, 0, 0, 0, + /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, '7', + /* 0x48 */ '8', '9', '-', '4', '5', '6', '+', '1', + /* 0x50 */ '2', '3', '0', '.', 0, 0, 0, 0, + /* 0x58 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x68 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x78 */ 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const char kbd_us_shift[128] = { + /* 0x00 */ 0, 0x1b, '!', '@', '#', '$', '%', '^', + /* 0x08 */ '&', '*', '(', ')', '_', '+', 0x7f, '\t', + /* 0x10 */ 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', + /* 0x18 */ 'O', 'P', '{', '}', '\n', 0, 'A', 'S', + /* 0x20 */ 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', + /* 0x28 */ '"', '~', 0, '|', 'Z', 'X', 'C', 'V', + /* 0x30 */ 'B', 'N', 'M', '<', '>', '?', 0, '*', + /* 0x38 */ 0, ' ', 0, 0, 0, 0, 0, 0, + /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, '7', + /* 0x48 */ '8', '9', '-', '4', '5', '6', '+', '1', + /* 0x50 */ '2', '3', '0', '.', 0, 0, 0, 0, + /* 0x58 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x68 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x78 */ 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* ════════════════════════════════════════════════════════════════════════ + * Modifier state. These are touched only from the IRQ handler, which is + * never re-entered on the same CPU, so plain globals are fine — no lock + * needed for the state itself. The /dev/keyboard scratch buffer below is + * shared with userspace readers and DOES need a lock. + * ════════════════════════════════════════════════════════════════════════ */ + +static bool kbd_shift = false; +static bool kbd_ctrl = false; +static bool kbd_alt = false; +static bool kbd_caps = false; +static bool kbd_extended = false; /* set after a 0xE0 prefix byte */ + +/* ════════════════════════════════════════════════════════════════════════ + * /dev/keyboard — raw scancode device. + * + * This is a tiny in-memory ring of the last few raw bytes the IRQ saw. + * Userspace can open /dev/keyboard and read raw set-1 scancodes — handy + * for input-method daemons that want to see modifier release events, key + * repeat rates, etc. The translated cooked stream still flows through the + * shared input ring (input_push_byte) — this device is a secondary path. + * + * On each IRQ we drop the raw byte into the ring. If the ring is full the + * oldest byte is overwritten — losing scancodes is preferable to stalling + * the IRQ handler. + * ════════════════════════════════════════════════════════════════════════ */ + +#define KBD_RAW_RING_SZ 256 + +struct kbd_device { + struct resource res; + spinlock_t ring_lock; + uint8_t ring[KBD_RAW_RING_SZ]; + size_t head; /* next write index */ + size_t tail; /* next read index */ + size_t count; +}; + +static struct kbd_device *kbd_dev = NULL; + +static void kbd_raw_push(uint8_t b) { + /* + * Called from IRQ context. We acquire the lock via try-acquire so the + * handler can never spin on a userspace reader, but a clean trylock + * helper isn't in spinlock.h — so we just take the lock and trust + * that userspace reads are short. The lock is only ever held for the + * duration of a memcpy / index update, never across a sleep. + */ + spinlock_acquire_or_wait(&kbd_dev->ring_lock); + kbd_dev->ring[kbd_dev->head] = b; + kbd_dev->head = (kbd_dev->head + 1) % KBD_RAW_RING_SZ; + if (kbd_dev->count == KBD_RAW_RING_SZ) { + /* Overwrote oldest — advance tail. */ + kbd_dev->tail = (kbd_dev->tail + 1) % KBD_RAW_RING_SZ; + } else { + kbd_dev->count++; + } + spinlock_drop(&kbd_dev->ring_lock); +} + +static ssize_t kbd_dev_read(struct resource *this, + struct f_description *description, + void *buf, off_t offset, size_t count) { + (void)description; + (void)offset; + + if (!buf) { + errno = EFAULT; + return -1; + } + if (count == 0) { + return 0; + } + + uint8_t *out = (uint8_t *)buf; + size_t copied = 0; + + /* Disable IRQs on this CPU before taking ring_lock — kbd_raw_push() + * acquires the same lock from IRQ context (vector 49). If the IRQ + * fired while this thread held ring_lock with IF=1, the handler + * would spin forever and the deadlock-detector would panic. */ + bool old = int_toggle(false); + spinlock_acquire_or_wait(&this->lock); + spinlock_acquire_or_wait(&kbd_dev->ring_lock); + + while (copied < count && kbd_dev->count > 0) { + out[copied++] = kbd_dev->ring[kbd_dev->tail]; + kbd_dev->tail = (kbd_dev->tail + 1) % KBD_RAW_RING_SZ; + kbd_dev->count--; + } + + spinlock_drop(&kbd_dev->ring_lock); + spinlock_drop(&this->lock); + int_toggle(old); + + return (ssize_t)copied; +} + +static ssize_t kbd_dev_write(struct resource *this, + struct f_description *description, + const void *buf, off_t offset, size_t count) { + (void)this; + (void)description; + (void)offset; + (void)buf; + (void)count; + /* Writes to /dev/keyboard are silently dropped (no LED ioctls yet). */ + errno = EINVAL; + return -1; +} + +/* ════════════════════════════════════════════════════════════════════════ + * Translation helpers + * ════════════════════════════════════════════════════════════════════════ */ + +static void push_ascii(char c) { + /* + * Apply Ctrl-letter collapse here so that ^C / ^Z / ^D show up in the + * TTY's line-discipline layer with the canonical control values. Only + * letters are affected — Ctrl-1, Ctrl-., etc. pass through unchanged. + */ + if (kbd_ctrl && ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) { + c = (char)(c & 0x1f); + } + input_push_byte((uint8_t)c); +} + +static void push_seq(const char *s) { + while (*s) { + input_push_byte((uint8_t)*s++); + } +} + +/* Map an extended (0xE0-prefixed) make-code to an ANSI escape sequence. */ +static void handle_extended_make(uint8_t sc) { + switch (sc) { + case 0x48: push_seq("\x1b[A"); break; /* Up */ + case 0x50: push_seq("\x1b[B"); break; /* Down */ + case 0x4d: push_seq("\x1b[C"); break; /* Right */ + case 0x4b: push_seq("\x1b[D"); break; /* Left */ + case 0x47: push_seq("\x1b[H"); break; /* Home */ + case 0x4f: push_seq("\x1b[F"); break; /* End */ + case 0x53: push_seq("\x1b[3~"); break; /* Delete */ + default: + /* Unknown extended key — drop silently. */ + break; + } +} + +/* ════════════════════════════════════════════════════════════════════════ + * IRQ handler — runs on every keyboard byte. + * + * Flow: + * - read raw byte from 0x60 + * - stash it in the /dev/keyboard ring (raw passthrough) + * - if it's a 0xE0 prefix, set the extended flag and bail + * - if it's an extended scancode, dispatch to handle_extended_make() + * (only on make; releases are ignored) + * - otherwise, branch on modifier vs. printable + * - finally, EOI the local APIC + * ════════════════════════════════════════════════════════════════════════ */ +static void kbd_irq_handler(registers_t *r) { + (void)r; + + uint8_t sc = x86_64_inb(PS2_DATA_PORT); + + /* Mirror the byte into /dev/keyboard for raw consumers. */ + if (kbd_dev) { + kbd_raw_push(sc); + } + + /* 0xE0 prefix → next byte is part of an extended sequence. */ + if (sc == 0xe0) { + kbd_extended = true; + apic_eoi(); + return; + } + + /* ── Modifier release events come in as scancode | 0x80 ──────────── */ + if (!kbd_extended) { + switch (sc) { + case 0x9d: kbd_ctrl = false; apic_eoi(); return; /* LCtrl ↑ */ + case 0xaa: kbd_shift = false; apic_eoi(); return; /* LShift ↑*/ + case 0xb6: kbd_shift = false; apic_eoi(); return; /* RShift ↑*/ + case 0xb8: kbd_alt = false; apic_eoi(); return; /* LAlt ↑ */ + default: break; + } + } + + /* Drop the rest of the break-codes for non-modifier keys. */ + if (sc & 0x80) { + kbd_extended = false; + apic_eoi(); + return; + } + + /* ── Extended make codes (arrows, Home/End, Delete, …) ───────────── */ + if (kbd_extended) { + kbd_extended = false; + handle_extended_make(sc); + apic_eoi(); + return; + } + + /* ── Plain make codes ────────────────────────────────────────────── */ + switch (sc) { + case 0x01: /* Esc */ + input_push_byte(0x1b); + apic_eoi(); + return; + case 0x0e: /* Backspace → DEL (matches VERASE default) */ + input_push_byte(0x7f); + apic_eoi(); + return; + case 0x0f: /* Tab */ + input_push_byte('\t'); + apic_eoi(); + return; + case 0x1c: /* Enter */ + input_push_byte('\n'); + apic_eoi(); + return; + case 0x39: /* Space */ + input_push_byte(' '); + apic_eoi(); + return; + + /* Modifier presses — record state, push nothing. */ + case 0x1d: kbd_ctrl = true; apic_eoi(); return; /* LCtrl ↓ */ + case 0x2a: kbd_shift = true; apic_eoi(); return; /* LShift ↓ */ + case 0x36: kbd_shift = true; apic_eoi(); return; /* RShift ↓ */ + case 0x38: kbd_alt = true; apic_eoi(); return; /* LAlt ↓ */ + case 0x3a: /* CapsLock toggle */ + kbd_caps = !kbd_caps; + apic_eoi(); + return; + default: + break; + } + + /* ── Printable key → look up in the right table ──────────────────── */ + if (sc < 128) { + char c = 0; + char unshift = kbd_us_unshift[sc]; + bool is_letter = (unshift >= 'a' && unshift <= 'z'); + + if (is_letter) { + /* Letters: Shift XOR CapsLock decides case. */ + bool upper = kbd_shift ^ kbd_caps; + c = upper ? kbd_us_shift[sc] : unshift; + } else { + /* Non-letters: only Shift matters; CapsLock is ignored. */ + c = kbd_shift ? kbd_us_shift[sc] : unshift; + } + + if (c != 0) { + push_ascii(c); + } + } + + apic_eoi(); +} + +/* ════════════════════════════════════════════════════════════════════════ + * Public init — wire up the device node and the IRQ. + * ════════════════════════════════════════════════════════════════════════ */ +void kbd_init(void) { + kbd_dev = resource_create(sizeof(struct kbd_device)); + if (!kbd_dev) { + kprintf("[kbd] resource_create failed — no /dev/keyboard\n"); + /* Even without the raw device we still want IRQ-driven input, + * so don't bail; fall through to install the handler. */ + } else { + spinlock_init(kbd_dev->ring_lock); + kbd_dev->head = 0; + kbd_dev->tail = 0; + kbd_dev->count = 0; + + kbd_dev->res.stat.st_size = 0; + kbd_dev->res.stat.st_blocks = 0; + kbd_dev->res.stat.st_blksize = 4096; + kbd_dev->res.stat.st_rdev = resource_create_dev_id(); + kbd_dev->res.stat.st_mode = 0644 | S_IFCHR; + + kbd_dev->res.read = kbd_dev_read; + kbd_dev->res.write = kbd_dev_write; + + devtmpfs_add_device((struct resource *)kbd_dev, "keyboard"); + } + + isr_register_handler(KBD_VECTOR, kbd_irq_handler); + ioapic_redirect_irq(KBD_IRQ, KBD_VECTOR); + + kprintf("[kbd] PS/2 keyboard ready (IRQ %d → vector %d)\n", + KBD_IRQ, KBD_VECTOR); +} diff --git a/src/drivers/ps2/kbd.h b/src/drivers/ps2/kbd.h new file mode 100644 index 0000000..2d71d30 --- /dev/null +++ b/src/drivers/ps2/kbd.h @@ -0,0 +1,19 @@ +#pragma once +#include +#include +#include + +/* + * PS/2 keyboard driver. + * + * kbd_init() is called by ps2_init() after the controller has been + * configured. It: + * - allocates and registers /dev/keyboard (a raw-scancode char device) + * - installs the IRQ-1 → vector-49 handler + * - asks the I/O APIC to route IRQ 1 to vector 49 + * + * Once installed, every keyboard byte from port 0x60 is translated to + * ASCII (or an ANSI escape sequence for arrows / Home / End / Delete) and + * pushed into the shared input ring via input_push_byte(). + */ +void kbd_init(void); diff --git a/src/drivers/ps2/ps2.c b/src/drivers/ps2/ps2.c new file mode 100644 index 0000000..f64f3b4 --- /dev/null +++ b/src/drivers/ps2/ps2.c @@ -0,0 +1,111 @@ +#include "ps2.h" +#include "kbd.h" +#include "arch/x86_64/cpu/io.h" +#include "libk/debug.h" + +#include +#include + +/* + * ── Low-level helpers ───────────────────────────────────────────────────── + * + * The PS/2 controller is glacially slow by modern standards but it is also + * sometimes wired up to a virtualised platform that will happily return + * stale status forever. We bound the spins with a generous attempt counter + * so that a wedged controller doesn't hang the kernel during boot. + */ + +#define PS2_SPIN_LIMIT 100000 + +uint8_t ps2_read(void) { + for (int i = 0; i < PS2_SPIN_LIMIT; i++) { + if (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) { + return x86_64_inb(PS2_DATA_PORT); + } + __asm__ volatile ("pause"); + } + /* Fall through and read anyway — we are already in trouble. The + * controller never raised OBF in PS2_SPIN_LIMIT iterations, which + * is the signature of a wedged PS/2 controller. Log once per call + * site so a boot-time hang has a breadcrumb in the serial log. */ + kprintf("[ps2] WARN ps2_read: OBF never set after %d spins — controller wedged?\n", + PS2_SPIN_LIMIT); + return x86_64_inb(PS2_DATA_PORT); +} + +void ps2_write(uint16_t port, uint8_t v) { + for (int i = 0; i < PS2_SPIN_LIMIT; i++) { + if (!(x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_IBF)) { + x86_64_outb(port, v); + return; + } + __asm__ volatile ("pause"); + } + /* Write anyway as last-ditch; the controller is hosed otherwise. + * IBF stayed set for the full spin window — same diagnostic story + * as ps2_read above. */ + kprintf("[ps2] WARN ps2_write: IBF stuck after %d spins, port=%x — controller wedged?\n", + PS2_SPIN_LIMIT, (unsigned)port); + x86_64_outb(port, v); +} + +uint8_t ps2_read_config(void) { + ps2_write(PS2_CMD_PORT, PS2_CMD_READ_CONFIG); + return ps2_read(); +} + +void ps2_write_config(uint8_t v) { + ps2_write(PS2_CMD_PORT, PS2_CMD_WRITE_CONFIG); + ps2_write(PS2_DATA_PORT, v); +} + +/* + * ── Controller bring-up ─────────────────────────────────────────────────── + * + * Sequence is the classic OSDev recipe: + * 1. Disable both PS/2 ports so nothing interrupts us mid-init. + * 2. Drain any byte the controller may have buffered before we started + * paying attention (BIOS/QEMU often leaves one behind). + * 3. Read the config byte and: + * - enable port 1 interrupt (bit 0) + * - enable scancode set-2 → set-1 translation (bit 6) + * - clear port 1 disable (bit 4) + * Leave port 2 disabled (mouse is intentionally not handled here). + * 4. Write the config back. + * 5. Re-enable port 1. + * 6. Drain again in case the controller queued bytes between + * reconfiguration and re-enable. + * 7. Install the IRQ handler and register /dev/keyboard. + */ +void ps2_init(void) { + /* 1. Disable both ports. */ + ps2_write(PS2_CMD_PORT, PS2_CMD_DISABLE_PORT1); + ps2_write(PS2_CMD_PORT, PS2_CMD_DISABLE_PORT2); + + /* 2. Drain leftover output buffer. */ + while (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) { + (void)x86_64_inb(PS2_DATA_PORT); + } + + /* 3. Patch config byte. */ + uint8_t cfg = ps2_read_config(); + cfg |= PS2_CFG_PORT1_INT; /* enable port 1 IRQ */ + cfg |= PS2_CFG_TRANSLATION; /* set-2 → set-1 translation */ + cfg &= ~PS2_CFG_PORT1_DISABLE; /* be sure port 1 is on */ + + /* 4. Push it back. */ + ps2_write_config(cfg); + + /* 5. Re-enable port 1. (Port 2 stays disabled — no mouse this round.) */ + ps2_write(PS2_CMD_PORT, PS2_CMD_ENABLE_PORT1); + + /* 6. Drain anything that snuck in during reconfig. */ + while (x86_64_inb(PS2_STATUS_PORT) & PS2_STATUS_OBF) { + (void)x86_64_inb(PS2_DATA_PORT); + } + + kprintf("[ps2] controller initialised (port1 IRQ + xlate, port2 off)\n"); + + /* 7. Hand off to the keyboard driver. */ + kbd_init(); +} diff --git a/src/drivers/ps2/ps2.h b/src/drivers/ps2/ps2.h new file mode 100644 index 0000000..61dc52c --- /dev/null +++ b/src/drivers/ps2/ps2.h @@ -0,0 +1,70 @@ +#pragma once +#include +#include +#include + +/* + * Low-level PS/2 controller interface for KirkOS. + * + * The PS/2 controller lives behind I/O ports: + * 0x60 — data port (read scancode, write data byte to keyboard/mouse) + * 0x64 — command/status port (read status byte, write controller cmd) + * + * Status register bits: + * bit 0 (OBF) — output buffer full, data ready to be read from 0x60 + * bit 1 (IBF) — input buffer full, controller not ready to accept a write + */ + +#define PS2_DATA_PORT 0x60 +#define PS2_STATUS_PORT 0x64 +#define PS2_CMD_PORT 0x64 + +#define PS2_STATUS_OBF (1u << 0) +#define PS2_STATUS_IBF (1u << 1) + +/* Controller commands (sent to 0x64). */ +#define PS2_CMD_READ_CONFIG 0x20 +#define PS2_CMD_WRITE_CONFIG 0x60 +#define PS2_CMD_DISABLE_PORT2 0xA7 +#define PS2_CMD_ENABLE_PORT2 0xA8 +#define PS2_CMD_DISABLE_PORT1 0xAD +#define PS2_CMD_ENABLE_PORT1 0xAE + +/* Controller config byte bits. */ +#define PS2_CFG_PORT1_INT (1u << 0) +#define PS2_CFG_PORT2_INT (1u << 1) +#define PS2_CFG_PORT1_DISABLE (1u << 4) +#define PS2_CFG_PORT2_DISABLE (1u << 5) +#define PS2_CFG_TRANSLATION (1u << 6) + +/* + * ps2_read — wait until the controller has a byte for us, then return it. + * Spins on status-bit-0 (OBF). + */ +uint8_t ps2_read(void); + +/* + * ps2_write — wait until the controller is ready to accept a byte + * (status-bit-1 IBF clear), then write `v` to `port`. + * `port` is either 0x60 (data) or 0x64 (command). + */ +void ps2_write(uint16_t port, uint8_t v); + +/* + * ps2_read_config / ps2_write_config — read or replace the controller + * configuration byte (a.k.a. "command byte"). + */ +uint8_t ps2_read_config(void); +void ps2_write_config(uint8_t v); + +/* + * ps2_init — full controller bring-up: + * 1. disable both ports + * 2. clear the output buffer + * 3. enable port 1 IRQ + scancode set-1 translation + * 4. re-enable port 1 + * 5. hand off to kbd_init() to install the IRQ handler and + * register /dev/keyboard. + * Called once from main.c. + */ +void ps2_init(void); diff --git a/src/drivers/tty/console.c b/src/drivers/tty/console.c index 4657269..bbb3bd1 100644 --- a/src/drivers/tty/console.c +++ b/src/drivers/tty/console.c @@ -1,3 +1,35 @@ +/* + * KirkOS system console driver. + * + * Provides /dev/console (an ordinary char device backed by the framebuffer + * + serial port for output, and the shared input ring for input) and + * /dev/tty (a dynamic-resolver node that maps to the calling process's + * controlling terminal). + * + * Implements full POSIX line discipline: + * - canonical-mode line editing (VERASE, VKILL, VWERASE, VLNEXT, + * VREPRINT, VEOF, VEOL, VEOL2) + * - input mode flags (ICRNL, INLCR, IGNCR, ISTRIP) + * - output mode flags (ONLCR via console_write) + * - local-mode flags (ICANON, ECHO, ECHOE, ECHOK, ECHOCTL, ECHONL, + * ISIG, TOSTOP) + * - signal generation on VINTR/VQUIT/VSUSP (when ISIG set) + * - job control (TIOCGPGRP/TIOCSPGRP, controlling terminal via TIOCSCTTY, + * SIGTTIN on bg-read, SIGTTOU on bg-write with TOSTOP) + * + * Threading model: one global line-assembly buffer per console_device. + * Concurrent readers serialize on a per-console sleep-mutex + * (read_mutex_*) built on top of the event primitive — NOT a spinlock, + * because the assembly loop holds it across the blocking input_read() + * call (which sleeps via event_await). A spinlock held across a sleep + * would deadlock the second reader on UP and peg a CPU on SMP. + * Short critical sections on line_buf still use console_device->res.lock. + * + * Input comes from the shared input ring (drivers/input/input.c), which + * is fed by IRQ-driven sources: PS/2 keyboard (IRQ 1) and UART RX + * (IRQ 4). + */ + #include "libk/debug.h" #include "console.h" #include "termios.h" @@ -11,202 +43,702 @@ #include #include "arch/x86_64/serial/serial.h" #include "sched/sched.h" +#include "fs/vfs.h" +#include "fs/devtmpfs.h" +#include "drivers/input/input.h" +#include "arch/x86_64/asm/asm.h" -struct console { - struct resource res; - struct termios term; - size_t width, height; - bool decckm; -}; +/* ════════════════════════════════════════════════════════════════════════ + * struct console + * ════════════════════════════════════════════════════════════════════════ */ struct console *console_device = NULL; + + + +/* ════════════════════════════════════════════════════════════════════════ + * Output: framebuffer + serial mirror + * ════════════════════════════════════════════════════════════════════════ */ + static void console_emit(char c) { - framebuffer_putchar(c); - serial_putchar(c); + framebuffer_putchar(c); + serial_putchar(c); } +static void console_emit_n(const char *s, size_t n) { + for (size_t i = 0; i < n; i++) { + framebuffer_putchar(s[i]); + serial_putchar(s[i]); + } +} + +/* ════════════════════════════════════════════════════════════════════════ + * Job-control helpers + * ════════════════════════════════════════════════════════════════════════ */ + +/* Returns true if the calling process is in the foreground process group + * of this console (or there is no foreground pgrp at all, in which case + * we allow the read/write to proceed unimpeded — this is what Linux + * does for orphaned sessions). */ +static bool calling_proc_is_foreground(struct console *con) { + if (con->fg_pgrp == 0) return true; + struct process *p = sched_get_running_thread()->mother_proc; + return p->pgid == con->fg_pgrp; +} + +/* If the calling process is in a *background* process group and the + * signal `sig` is neither blocked nor ignored, send `sig` to the caller's + * pgrp and return -EINTR so the syscall fails. If the signal IS blocked + * or ignored, we return 0 and the read/write proceeds as if foreground + * (POSIX SIGTTIN exception: blocked/ignored SIGTTIN means "no harm in + * letting bg read"; same for SIGTTOU). */ +static ssize_t do_jobctl_check(struct console *con, int sig) { + if (calling_proc_is_foreground(con)) return 0; + struct process *p = sched_get_running_thread()->mother_proc; + struct thread *t = sched_get_running_thread(); + + /* Per POSIX: if SIGTTIN/SIGTTOU is blocked or its handler is SIG_IGN, + * the read/write succeeds. */ + if (k_sigismember(&t->sig_blocked, sig)) return 0; + if (p->sig_handlers[sig].sa_handler == KSIG_IGN) return 0; + + /* Don't STOP the session leader or processes in an orphaned pgrp — + * Linux returns -EIO in that case. We don't track orphan-ness yet, + * so the simpler rule: if proc == session leader, allow through. */ + if (p->pid == p->sid) return 0; + + signal_send_to_pgrp(p->pgid, sig); + errno = EINTR; + return -1; +} + + +/* ════════════════════════════════════════════════════════════════════════ + * console_write — with TOSTOP job-control check + * ════════════════════════════════════════════════════════════════════════ */ + static ssize_t console_write(struct resource *this, - struct f_description *description, const void *buf, - off_t offset, size_t count) { + struct f_description *description, + const void *buf, off_t offset, size_t count) { (void)description; (void)offset; + struct console *con = (struct console *)this; if (!buf) { errno = EFAULT; return -1; } + /* TOSTOP: background writes get SIGTTOU. Most shells leave TOSTOP + * off, so this rarely fires. */ + if (con->term.c_lflag & TOSTOP) { + ssize_t jc = do_jobctl_check(con, KSIGTTOU); + if (jc < 0) return -1; + } + spinlock_acquire_or_wait(&this->lock); - - char *r = (char *)buf; + const char *r = (const char *)buf; for (size_t i = 0; i < count; i++) { console_emit(r[i]); } - spinlock_drop(&this->lock); - return count; + return (ssize_t)count; } +/* ════════════════════════════════════════════════════════════════════════ + * Line discipline + * ════════════════════════════════════════════════════════════════════════ */ + +/* Visibly erase one character from the screen (^H ' ' ^H). */ +static void erase_one(void) { + console_emit('\b'); + console_emit(' '); + console_emit('\b'); +} + +/* For ECHOCTL: render a control character as ^X. */ +static void echo_control(char c) { + console_emit('^'); + console_emit((char)(c + 0x40)); +} + +/* Reprint the buffer (VREPRINT support). */ +static void reprint_line(struct console *con) { + console_emit('\n'); + for (size_t i = 0; i < con->line_len; i++) { + char c = con->line_buf[i]; + if ((unsigned char)c < 0x20 && c != '\t' && + (con->term.c_lflag & ECHOCTL)) { + echo_control(c); + } else { + console_emit(c); + } + } +} + +/* Erase the entire current line (VKILL). With ECHOKE, do it character + * by character; otherwise just push a newline. */ +static void kill_line(struct console *con) { + if (con->term.c_lflag & ECHOKE) { + while (con->line_len > 0) { + char c = con->line_buf[--con->line_len]; + if ((unsigned char)c < 0x20 && c != '\t' && + (con->term.c_lflag & ECHOCTL)) { + erase_one(); + erase_one(); + } else { + erase_one(); + } + } + } else if (con->term.c_lflag & ECHOK) { + console_emit('\n'); + con->line_len = 0; + } else { + con->line_len = 0; + } +} + +/* Erase one word (VWERASE). */ +static void werase(struct console *con) { + /* Skip trailing whitespace. */ + while (con->line_len > 0 && + (con->line_buf[con->line_len - 1] == ' ' || + con->line_buf[con->line_len - 1] == '\t')) { + con->line_len--; + if (con->term.c_lflag & ECHO) erase_one(); + } + /* Then erase non-whitespace until we hit whitespace or BOL. */ + while (con->line_len > 0 && + con->line_buf[con->line_len - 1] != ' ' && + con->line_buf[con->line_len - 1] != '\t') { + con->line_len--; + if (con->term.c_lflag & ECHO) erase_one(); + } +} + +/* Process one input byte through the line discipline. Returns true if + * a delimiter (newline / VEOF / VEOL) has been seen and the caller can + * deliver the line. */ +static bool ld_input_byte(struct console *con, uint8_t b) { + tcflag_t iflag = con->term.c_iflag; + tcflag_t lflag = con->term.c_lflag; + cc_t *cc = con->term.c_cc; + bool canon = (lflag & ICANON) != 0; + bool echo = (lflag & ECHO) != 0; + bool echoe = (lflag & ECHOE) != 0; + + /* Input mode flags. */ + if (iflag & ISTRIP) b &= 0x7f; + if ((iflag & IGNCR) && b == '\r') return false; + if ((iflag & ICRNL) && b == '\r') b = '\n'; + else if ((iflag & INLCR) && b == '\n') b = '\r'; + + /* VLNEXT one-byte lookahead: the previous byte was VLNEXT, so this + * byte is taken literally with no signal/edit interpretation. Buffer + * it (subject only to ECHOCTL formatting for display). */ + if (con->lnext_pending) { + con->lnext_pending = false; + if (con->line_len < LINE_BUF_SZ) { + con->line_buf[con->line_len++] = (char)b; + } + if (echo) { + if ((unsigned char)b < 0x20 && b != '\t' && (lflag & ECHOCTL)) { + echo_control((char)b); + } else { + console_emit((char)b); + } + } + return false; + } + + /* ISIG: signal characters fire SIGINT/SIGQUIT/SIGTSTP. Buffered + * input is discarded; we DON'T deliver a line. POSIX: ISIG is + * independent of ICANON — signals fire in raw mode too. */ + if (lflag & ISIG) { + int sig = 0; + if (b == cc[VINTR] && cc[VINTR] != 0) sig = KSIGINT; + else if (b == cc[VQUIT] && cc[VQUIT] != 0) sig = KSIGQUIT; + else if (b == cc[VSUSP] && cc[VSUSP] != 0) sig = KSIGTSTP; + + if (sig) { + if (!(lflag & NOFLSH)) { + con->line_len = 0; + } + if (echo && (unsigned char)b < 0x20 && (lflag & ECHOCTL)) { + echo_control((char)b); + } + if (con->fg_pgrp != 0) { + signal_send_to_pgrp(con->fg_pgrp, sig); + } + return false; /* signal-char does not end a line */ + } + } + + if (canon) { + /* VERASE: backspace one character. POSIX: only cc[VERASE] + * triggers erase — no hardcoded BS fallback (user can rebind + * VERASE via tcsetattr). */ + if (b == cc[VERASE]) { + if (con->line_len > 0) { + char ch = con->line_buf[--con->line_len]; + if (echo && echoe) { + if ((unsigned char)ch < 0x20 && ch != '\t' && + (lflag & ECHOCTL)) { + erase_one(); + erase_one(); + } else { + erase_one(); + } + } + } + return false; + } + + /* VKILL: erase entire line. */ + if (b == cc[VKILL] && cc[VKILL] != 0) { + kill_line(con); + return false; + } + + /* VWERASE: erase one word. */ + if ((lflag & IEXTEN) && b == cc[VWERASE] && cc[VWERASE] != 0) { + werase(con); + return false; + } + + /* VREPRINT: redraw the line. */ + if ((lflag & IEXTEN) && b == cc[VREPRINT] && cc[VREPRINT] != 0) { + reprint_line(con); + return false; + } + + /* VLNEXT: take the next character literally (no special interp). + * Latch a flag; the next ld_input_byte call sees lnext_pending + * at the very top and buffers the byte verbatim. */ + if ((lflag & IEXTEN) && b == cc[VLNEXT] && cc[VLNEXT] != 0) { + con->lnext_pending = true; + return false; + } + + /* VEOF: end of file. Deliver whatever's buffered as a complete + * line — even if empty (giving the reader a 0-byte read = EOF). */ + if (b == cc[VEOF] && cc[VEOF] != 0) { + con->line_ready = true; + return true; + } + + /* VEOL / VEOL2: alternate line terminators. */ + if ((b == cc[VEOL] && cc[VEOL] != 0) || + (b == cc[VEOL2] && cc[VEOL2] != 0)) { + if (echo) console_emit((char)b); + if (con->line_len < LINE_BUF_SZ) { + con->line_buf[con->line_len++] = (char)b; + } + con->line_ready = true; + return true; + } + + /* Newline: standard line terminator. */ + if (b == '\n') { + if (echo || (lflag & ECHONL)) console_emit('\n'); + if (con->line_len < LINE_BUF_SZ) { + con->line_buf[con->line_len++] = '\n'; + } + con->line_ready = true; + return true; + } + + /* Regular printable / control character. */ + if (con->line_len < LINE_BUF_SZ) { + con->line_buf[con->line_len++] = (char)b; + } + if (echo) { + if ((unsigned char)b < 0x20 && b != '\t' && (lflag & ECHOCTL)) { + echo_control((char)b); + } else { + console_emit((char)b); + } + } + return false; + } + + /* Non-canonical (raw) mode: every byte is its own "line". */ + if (con->line_len < LINE_BUF_SZ) { + con->line_buf[con->line_len++] = (char)b; + } + if (echo) console_emit((char)b); + con->line_ready = true; + return true; +} + + +/* ════════════════════════════════════════════════════════════════════════ + * Sleep-mutex for concurrent-reader serialization + * + * console_read holds this across the blocking input_read() call, which + * sleeps via event_await. A real spinlock here would deadlock the second + * reader on UP (and burn a CPU + risk the deadlock-detector panic on + * SMP). The mutex is built directly on the event primitive: a short + * spinlock protects a held-bit, and waiters sleep on `read_mutex_released`. + * ════════════════════════════════════════════════════════════════════════ */ + +static void console_read_mutex_acquire(struct console *con) { + /* Acquire pattern: try-claim under a short spinlock; if held, + * sleep on the released event and retry. event_await handles its + * own IRQ-disable/re-enable internally. */ + struct event *evs[1] = { &con->read_mutex_released }; + for (;;) { + bool old = int_toggle(false); + spinlock_acquire_or_wait(&con->read_mutex_lock); + if (!con->read_mutex_held) { + con->read_mutex_held = true; + spinlock_drop(&con->read_mutex_lock); + int_toggle(old); + return; + } + spinlock_drop(&con->read_mutex_lock); + int_toggle(old); + /* Sleep until a releaser triggers the event. */ + (void)event_await(evs, 1, true); + } +} + +static void console_read_mutex_release(struct console *con) { + bool old = int_toggle(false); + spinlock_acquire_or_wait(&con->read_mutex_lock); + con->read_mutex_held = false; + spinlock_drop(&con->read_mutex_lock); + int_toggle(old); + /* drop=false so a release that fires between an acquirer's "is held?" + * spinlock-drop and its event_await call still counts. The acquirer + * loops back, re-checks read_mutex_held under the short spinlock, + * and (if free) claims it without sleeping. A stale pending count + * just costs one extra retry iteration — never a lost wakeup. */ + (void)event_trigger(&con->read_mutex_released, false); +} + + +/* ════════════════════════════════════════════════════════════════════════ + * console_read — POSIX-compliant + * ════════════════════════════════════════════════════════════════════════ */ + +/* NOTE on userspace pointer safety: `buf` here is a userspace pointer + * from the caller's read(2). We deref it directly (writing the assembled + * line out). The caller's pagemap is active at syscall time, so a kernel + * address would either fault (if not mapped writable in user mode) or + * touch a user-mapped page — we deliberately do NOT funnel through + * syscall_helper_copy_to_user here because that helper switches pagemaps + * and would require buffering the entire line in a kernel scratch + * area. Mirror behavior of pty_master_read in pty.c. */ static ssize_t console_read(struct resource *this, - struct f_description *description, void *buf, - off_t offset, size_t count) { - (void)this; - (void)description; + struct f_description *description, + void *buf, off_t offset, size_t count) { (void)offset; + struct console *con = (struct console *)this; + + /* Job-control: background readers get SIGTTIN. */ + if (do_jobctl_check(con, KSIGTTIN) < 0) return -1; if (!buf) { errno = EFAULT; return -1; } - - if (count == 0) { - return 0; - } + if (count == 0) return 0; char *out = (char *)buf; - size_t pos = 0; + bool nonblock = description && (description->flags & O_NONBLOCK); - tcflag_t lflag = console_device->term.c_lflag; - tcflag_t iflag = console_device->term.c_iflag; - bool canon = (lflag & ICANON) != 0; - bool echo = (lflag & ECHO) != 0; - bool echoe = (lflag & ECHOE) != 0; + /* Serialize the entire line-assembly loop across concurrent readers. + * Without this, parent + child after fork can each consume half of + * each other's input bytes and interleave them in line_buf. We hold + * the read mutex for the duration of the loop but only briefly take + * this->lock around line_buf mutations, so the blocking input_read() + * below does NOT hold this->lock (and other ops like ioctl can + * still proceed). MUST be the sleep-mutex, not a spinlock — we sleep + * with it held inside input_read(). */ + console_read_mutex_acquire(con); - cc_t verase = console_device->term.c_cc[VERASE]; - if (verase == 0) { - verase = 0x7f; - } - cc_t veof = console_device->term.c_cc[VEOF]; + for (;;) { + spinlock_acquire_or_wait(&this->lock); - while (pos < count) { - while (!serial_received()) { - sched_yield(true); - } - char c = serial_get_byte(); - - // Input mode flags - if ((iflag & ISTRIP) != 0) { - c = (char)((unsigned char)c & 0x7f); - } - if ((iflag & IGNCR) != 0 && c == '\r') { - continue; - } - if ((iflag & ICRNL) != 0 && c == '\r') { - c = '\n'; - } else if ((iflag & INLCR) != 0 && c == '\n') { - c = '\r'; - } - - if (canon) { - // Backspace / VERASE handling - if (c == (char)verase || c == 0x08) { - if (pos > 0) { - pos--; - if (echo && echoe) { - console_emit('\b'); - console_emit(' '); - console_emit('\b'); - } - } - continue; + /* If a line is already assembled (or in raw mode there are bytes + * we haven't drained yet), copy out. */ + if (con->line_ready || + ((con->term.c_lflag & ICANON) == 0 && con->line_len > 0)) { + size_t available = con->line_len - con->line_read_off; + size_t to_copy = available < count ? available : count; + for (size_t i = 0; i < to_copy; i++) { + out[i] = con->line_buf[con->line_read_off + i]; } - - // EOF: return what we have so far (possibly 0) - if (veof != 0 && c == (char)veof) { - break; + con->line_read_off += to_copy; + if (con->line_read_off >= con->line_len) { + con->line_len = 0; + con->line_read_off = 0; + con->line_ready = false; } + spinlock_drop(&this->lock); + console_read_mutex_release(con); + return (ssize_t)to_copy; + } - out[pos++] = c; + spinlock_drop(&this->lock); - if (echo) { - if (c == '\n') { - console_emit('\n'); - } else if ((unsigned char)c < 0x20 && c != '\t' && - (lflag & ECHOCTL) != 0) { - console_emit('^'); - console_emit((char)(c + 0x40)); - } else { - console_emit(c); - } - } else if (c == '\n' && (lflag & ECHONL) != 0) { - console_emit('\n'); - } - - if (c == '\n') { - break; + /* Drain bytes from the shared input ring into the line + * discipline. input_read blocks until at least one byte arrives; + * input_try_read returns immediately. */ + uint8_t in_byte; + ssize_t got; + if (nonblock) { + got = input_try_read(&in_byte, 1); + if (got < 0) { + console_read_mutex_release(con); + return -1; /* errno = EAGAIN set by input_try_read */ } } else { - // Non-canonical (raw) mode: return as soon as we have any data. - out[pos++] = c; - if (echo) { - console_emit(c); + got = input_read(&in_byte, 1); + if (got <= 0) { + /* Shouldn't happen — input_read either blocks or returns + * at least 1. If we get here, treat as EINTR. */ + errno = EINTR; + console_read_mutex_release(con); + return -1; } - break; } - } - return (ssize_t)pos; + spinlock_acquire_or_wait(&this->lock); + ld_input_byte(con, in_byte); + spinlock_drop(&this->lock); + /* Loop and re-check whether a full line is now available. */ + } } + +/* ════════════════════════════════════════════════════════════════════════ + * Ioctl: termios, winsize, job-control, controlling-terminal + * ════════════════════════════════════════════════════════════════════════ */ + int console_ioctl(struct resource *this, struct f_description *description, uint64_t request, uint64_t arg) { (void)description; + struct console *con = (struct console *)this; + + /* For terminal-state-modifying ioctls from a background process, + * deliver SIGTTOU and fail. POSIX requires this regardless of + * TOSTOP (TOSTOP only gates write() output). Done BEFORE taking + * this->lock because do_jobctl_check may queue a signal and we + * don't want to hold the resource lock across signal-delivery + * paths. */ + switch (request) { + case TCSETS: + case TCSETSW: + case TCSETSF: + case TIOCSPGRP: + case TIOCSCTTY: + case TIOCNOTTY: + case TIOCSWINSZ: + if (do_jobctl_check(con, KSIGTTOU) < 0) return -1; + break; + default: + break; + } + + struct process *self = sched_get_running_thread()->mother_proc; spinlock_acquire_or_wait(&this->lock); int ret = 0; switch (request) { case TCGETS: { - struct termios *t = (void *)arg; - if (t) - *t = console_device->term; + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + if (!syscall_helper_copy_to_user(arg, &con->term, + sizeof(con->term))) { + errno = EFAULT; + ret = -1; + } break; } case TCSETS: case TCSETSW: case TCSETSF: { - struct termios *t = (void *)arg; - if (t) - console_device->term = *t; + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + struct termios tmp; + if (!syscall_helper_copy_from_user(arg, &tmp, sizeof(tmp))) { + errno = EFAULT; + ret = -1; + break; + } + con->term = tmp; + /* TCSETSF: also flush pending input. (TCSETSW would also + * wait for output to drain — we have no output queue, so + * a no-op there.) */ + if (request == TCSETSF) { + con->line_len = 0; + con->line_read_off = 0; + con->line_ready = false; + con->lnext_pending = false; + } break; } case TIOCGWINSZ: { - struct winsize *w = (void *)arg; - if (w) { - w->ws_row = framebuff.ctx->rows; - w->ws_col = framebuff.ctx->cols; - w->ws_xpixel = framebuff.width; - w->ws_ypixel = framebuff.height; - } else { - errno = EINVAL; + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + struct winsize w; + w.ws_row = framebuff.ctx->rows; + w.ws_col = framebuff.ctx->cols; + w.ws_xpixel = framebuff.width; + w.ws_ypixel = framebuff.height; + if (!syscall_helper_copy_to_user(arg, &w, sizeof(w))) { + errno = EFAULT; ret = -1; } break; } - case TIOCSWINSZ: - // Accept but ignore: the console window size is fixed by the fb. + case TIOCSWINSZ: { + /* Window size is owned by flanterm; we still validate the + * userspace pointer to catch buggy/malicious callers. */ + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + struct winsize w; + if (!syscall_helper_copy_from_user(arg, &w, sizeof(w))) { + errno = EFAULT; + ret = -1; + break; + } + /* Accept-and-ignore the values themselves. */ break; - case TIOCSCTTY: - // Becoming the controlling tty is a no-op; there's only one console - // and any process opening it is implicitly attached. + } + + case TIOCSCTTY: { + /* Becoming the controlling tty: + * - caller must be a session leader (pid == sid) + * - this tty must not already belong to another session + * (or arg == 1 forces a steal — we don't honor that yet) + * - set caller->ctty = us, this->session = caller->sid, + * this->fg_pgrp = caller->pgid */ + if (self->pid != self->sid) { + errno = EPERM; + ret = -1; + break; + } + if (con->session != 0 && con->session != self->sid) { + errno = EPERM; + ret = -1; + break; + } + con->session = self->sid; + con->fg_pgrp = self->pgid; + self->ctty = (struct resource *)con; break; - case TIOCGPGRP: + } + + case TIOCNOTTY: { + /* Disassociate from controlling tty. Only the session leader's + * action propagates to all session members. */ + if (self->ctty == (struct resource *)con) { + self->ctty = NULL; + } + if (self->pid == self->sid && con->session == self->sid) { + con->session = 0; + con->fg_pgrp = 0; + /* Real Unix sends SIGHUP + SIGCONT to every process in the + * session here. We skip the iteration for brevity. We must change this later */ + } + break; + } + + case TIOCGPGRP: { + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + int n = (int)con->fg_pgrp; + if (!syscall_helper_copy_to_user(arg, &n, sizeof(n))) { + errno = EFAULT; + ret = -1; + } + break; + } + + case TIOCSPGRP: { + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + int new_pgrp; + if (!syscall_helper_copy_from_user(arg, &new_pgrp, + sizeof(new_pgrp))) { + errno = EFAULT; + ret = -1; + break; + } + if (new_pgrp <= 0) { + errno = EINVAL; + ret = -1; + break; + } + /* Caller must have this as controlling tty and be in the + * same session. */ + if (self->ctty != (struct resource *)con) { + errno = ENOTTY; + ret = -1; + break; + } + if (con->session != self->sid) { + errno = ENOTTY; + ret = -1; + break; + } + /* Verify the new pgrp belongs to this session. */ + extern struct process *process_list; + bool found = false; + for (struct process *p = process_list; p; p = p->next) { + if (p->pgid == new_pgrp && p->sid == self->sid) { + found = true; + break; + } + } + if (!found) { + errno = EPERM; + ret = -1; + break; + } + con->fg_pgrp = new_pgrp; + break; + } + case TIOCGSID: { - int *n = (int *)arg; - if (n) { - *n = sched_get_running_thread()->mother_proc->pid; - } else { - errno = EINVAL; + if (arg == 0) { + errno = EFAULT; + ret = -1; + break; + } + int n = (int)con->session; + if (!syscall_helper_copy_to_user(arg, &n, sizeof(n))) { + errno = EFAULT; ret = -1; } break; } - case TIOCSPGRP: - // Accept but ignore: no foreground-pgrp tracking yet. - break; + default: - errno = EINVAL; + errno = ENOTTY; ret = -1; break; } @@ -215,71 +747,119 @@ int console_ioctl(struct resource *this, struct f_description *description, return ret; } + static void dec_private(uint64_t esc_val_count, uint32_t *esc_values, uint64_t final) { (void)esc_val_count; - switch (esc_values[0]) { case 1: switch (final) { - case 'h': - console_device->decckm = true; - break; - case 'l': - console_device->decckm = false; - break; - default: - break; + case 'h': console_device->decckm = true; break; + case 'l': console_device->decckm = false; break; + default: break; } } } -static void term_callback(struct flanterm_context *term, uint64_t t, uint64_t a, - uint64_t b, uint64_t c) { - (void)term; +static void term_callback(struct flanterm_context *term, uint64_t t, + uint64_t a, uint64_t b, uint64_t c) { + (void)term; switch (t) { case 10: dec_private(a, (void *)b, c); } } +/* ════════════════════════════════════════════════════════════════════════ + * /dev/tty resolver — runs on every open() of /dev/tty. + * + * Returns the calling process's controlling-terminal resource. If the + * process has no ctty, returns NULL with errno=ENXIO per POSIX. + * ════════════════════════════════════════════════════════════════════════ */ + +static struct resource *console_tty_resolver(struct vfs_node *self) { + (void)self; + struct process *p = sched_get_running_thread()->mother_proc; + if (p->ctty == NULL) { + errno = ENXIO; + return NULL; + } + return p->ctty; +} + + +/* ════════════════════════════════════════════════════════════════════════ + * Init + * ════════════════════════════════════════════════════════════════════════ */ + void console_init(void) { console_device = resource_create(sizeof(struct console)); - console_device->res.stat.st_size = 0; - console_device->res.stat.st_blocks = 0; + console_device->res.stat.st_size = 0; + console_device->res.stat.st_blocks = 0; console_device->res.stat.st_blksize = 4096; - console_device->res.stat.st_rdev = resource_create_dev_id(); - console_device->res.stat.st_mode = 0644 | S_IFCHR; + console_device->res.stat.st_rdev = resource_create_dev_id(); + console_device->res.stat.st_mode = 0644 | S_IFCHR; - console_device->width = framebuff.width / 8; + console_device->width = framebuff.width / 8; console_device->height = framebuff.height / 16; - console_device->term.c_iflag = BRKINT | IGNPAR | ICRNL | IXON | IMAXBEL; + console_device->term.c_iflag = + BRKINT | IGNPAR | ICRNL | IXON | IMAXBEL; console_device->term.c_oflag = OPOST | ONLCR; console_device->term.c_cflag = CS8 | CREAD; console_device->term.c_lflag = - ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE; - console_device->term.c_cc[VINTR] = CTRL('C'); - console_device->term.c_cc[VEOF] = CTRL('D'); - console_device->term.c_cc[VSUSP] = CTRL('Z'); + ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN; + + console_device->term.c_cc[VINTR] = CTRL('C'); + console_device->term.c_cc[VQUIT] = CTRL('\\'); + console_device->term.c_cc[VERASE] = 0x7f; /* DEL */ + console_device->term.c_cc[VKILL] = CTRL('U'); + console_device->term.c_cc[VEOF] = CTRL('D'); + console_device->term.c_cc[VSTART] = CTRL('Q'); + console_device->term.c_cc[VSTOP] = CTRL('S'); + console_device->term.c_cc[VSUSP] = CTRL('Z'); + console_device->term.c_cc[VREPRINT] = CTRL('R'); + console_device->term.c_cc[VWERASE] = CTRL('W'); + console_device->term.c_cc[VLNEXT] = CTRL('V'); console_device->term.ibaud = 38400; console_device->term.obaud = 38400; + console_device->fg_pgrp = 0; + console_device->session = 0; + console_device->line_len = 0; + console_device->line_read_off = 0; + console_device->line_ready = false; + console_device->lnext_pending = false; + spinlock_init(console_device->read_mutex_lock); + console_device->read_mutex_held = false; + /* read_mutex_released is zero-init via .bss (resource_create zeros). */ + console_device->res.status |= POLLOUT; - - console_device->res.read = console_read; - console_device->res.write = console_write; - console_device->res.ioctl = console_ioctl; - + console_device->res.read = console_read; + console_device->res.write = console_write; + console_device->res.ioctl = console_ioctl; console_device->decckm = false; devtmpfs_add_device((struct resource *)console_device, "console"); - devtmpfs_add_device((struct resource *)console_device, "tty"); + + /* /dev/tty: dynamic resolver to whatever the calling process's + * proc->ctty points at. We register a "stub" resource here so that + * stat(2) on the path succeeds; the resolver kicks in only when + * the node is actually opened. */ + struct resource *tty_stub = resource_create(sizeof(struct resource)); + tty_stub->stat.st_mode = 0666 | S_IFCHR; + tty_stub->stat.st_rdev = resource_create_dev_id(); + devtmpfs_add_device(tty_stub, "tty"); + struct vfs_node *tty_node = vfs_get_node(vfs_root, "/dev/tty", false); + if (tty_node) { + tty_node->resolve_open = console_tty_resolver; + } kprintffos(false, "Bye bye!\n"); framebuffer_clear(0x00eee8d5, 0); framebuff.ctx->callback = term_callback; } + diff --git a/src/drivers/tty/console.h b/src/drivers/tty/console.h index 454cc30..b887761 100644 --- a/src/drivers/tty/console.h +++ b/src/drivers/tty/console.h @@ -1,3 +1,45 @@ #pragma once +#include +#include +#include +#include "termios.h" +#include "mp/spinlock.h" +#include "libk/resource.h" +#include "libk/event.h" + +#define LINE_BUF_SZ 4096 + +struct console { + struct resource res; + struct termios term; + size_t width, height; + bool decckm; + + /* ── job control ─────────────────────────────────────────────────── */ + int64_t fg_pgrp; /* foreground process group; 0 = none */ + int64_t session; /* owning session; 0 = none */ + + /* ── line-discipline assembly buffer ─────────────────────────────── */ + char line_buf[LINE_BUF_SZ]; + size_t line_len; /* bytes currently in line_buf */ + size_t line_read_off; /* drain offset (for partial reads) */ + bool line_ready; /* true when a delimiter has landed */ + bool lnext_pending; /* VLNEXT: next byte taken literally */ + + /* ── concurrent-reader serialization ─────────────────────────────── */ + /* Separate from res.lock so we can hold it across the blocking + * input_read() without holding res.lock. Serializes the line-assembly + * loop in console_read across multiple concurrent readers (e.g. parent + * + child after fork) so their bytes can't interleave on line_buf. + * + * Sleep-mutex serializing the line-assembly loop across concurrent + * readers. We hold this across the blocking input_read() — so it MUST + * be a sleep-mutex, not a spinlock (a spinlock held while the holder + * sleeps in event_await would deadlock the second reader). Built on + * top of the existing event primitive. */ + spinlock_t read_mutex_lock; /* short-held; protects the two fields below */ + bool read_mutex_held; + struct event read_mutex_released; +}; void console_init(void); diff --git a/src/drivers/tty/pty.c b/src/drivers/tty/pty.c index ef06e5d..0d6f9a6 100644 --- a/src/drivers/tty/pty.c +++ b/src/drivers/tty/pty.c @@ -479,7 +479,7 @@ void syscall_openpty(struct syscall_arguments *args) { ps->pty = p; ps->res.read = pty_slave_read; ps->res.write = pty_slave_write; - pm->res.unref = pty_slave_unref; + ps->res.unref = pty_slave_unref; ps->res.ioctl = pty_ioctl; ps->res.stat.st_size = 0; ps->res.stat.st_blocks = 0; diff --git a/src/entry.c b/src/entry.c index df4010f..49f91b9 100644 --- a/src/entry.c +++ b/src/entry.c @@ -159,7 +159,7 @@ void _entry(void) { framebuffer_init(&fb); print_now = true; - serial_init(); + struct limine_file *kernel_file = limine_kernel_file_request.response->kernel_file; @@ -196,6 +196,8 @@ void _entry(void) { apic_init(); + serial_init(); + mp_init(mp_request.response); // The NSA has also forced hardware manufacturers to backdoor their 'Random diff --git a/src/fs/vfs.c b/src/fs/vfs.c index c253cef..3cf9797 100644 --- a/src/fs/vfs.c +++ b/src/fs/vfs.c @@ -563,6 +563,18 @@ void syscall_openat(struct syscall_arguments *args) { return; } + /* Dynamic-resolver node (e.g. /dev/tty resolves to the calling + * process's controlling-terminal resource). The resolver may return + * NULL with errno set (ENXIO when the caller has no ctty). */ + struct resource *target_res = node->resource; + if (node->resolve_open != NULL) { + target_res = node->resolve_open(node); + if (target_res == NULL) { + args->ret = -1; + return; + } + } + struct f_descriptor *fd = fd_create_from_resource(node->resource, flags); if (fd == NULL) { args->ret = -1; @@ -650,7 +662,16 @@ void syscall_getcwd(struct syscall_arguments *args) { size_t len = args->args1; char path_buffer[PATH_MAX] = {0}; - if (vfs_pathname(proc->cwd, path_buffer, PATH_MAX) >= len) { + size_t n = vfs_pathname(proc->cwd, path_buffer, PATH_MAX); + + if (n == 0) { + // proc->cwd is the VFS root — render as "/". + path_buffer[0] = '/'; + path_buffer[1] = '\0'; + n = 1; + } + + if (n + 1 > len) { errno = ERANGE; args->ret = -1; return; diff --git a/src/fs/vfs.h b/src/fs/vfs.h index 3607f91..a8b358f 100644 --- a/src/fs/vfs.h +++ b/src/fs/vfs.h @@ -24,6 +24,14 @@ struct vfs_node { HASHMAP_TYPE(struct vfs_node *) children; char *symlink_target; bool populated; + + + /* Per-process resolver. If non-NULL, syscall_openat will call this + * instead of returning node->resource directly. Used by /dev/tty to + * resolve to the calling process's controlling-terminal resource. + * The function returns NULL with errno set if it can't satisfy the + * open (e.g. ENXIO for "no controlling tty"). */ + struct resource *(*resolve_open)(struct vfs_node *self); }; typedef struct vfs_node *(*fs_mount_t)(struct vfs_node *, const char *, diff --git a/src/main.c b/src/main.c index f5c61a6..6120d6a 100644 --- a/src/main.c +++ b/src/main.c @@ -2,6 +2,7 @@ #include "fs/vfs.h" #include "sched/sched_types.h" #include "sched/sched.h" +#include "sched/signal.h" #include "fs/tmpfs.h" #include "fs/devtmpfs.h" #include "libk/random.h" @@ -9,6 +10,8 @@ #include "fs/partition.h" #include "drivers/fb/fb.h" #include "drivers/tty/console.h" +#include "drivers/input/input.h" +#include "drivers/ps2/ps2.h" #include "arch/x86_64/sys/timer.h" #include "libk/kargs.h" #include "fs/ramdisk.h" @@ -82,6 +85,26 @@ void kernel_main(void *args) { syscall_register_handler(0x10f, syscall_ppoll); syscall_register_handler(0x54, syscall_rmdir); + + /* ── POSIX signal syscalls ──────────────────────────────────────── */ + syscall_register_handler(13, syscall_sigaction); /* rt_sigaction */ + syscall_register_handler(14, syscall_sigprocmask); /* rt_sigprocmask */ + syscall_register_handler(15, syscall_sigreturn); /* rt_sigreturn */ + syscall_register_handler(34, syscall_pause); + syscall_register_handler(127, syscall_sigpending); + syscall_register_handler(130, syscall_sigsuspend); + + /* ── POSIX session / process-group syscalls ─────────────────────── */ + syscall_register_handler(109, syscall_setpgid); + syscall_register_handler(111, syscall_getpgrp); + syscall_register_handler(112, syscall_setsid); + syscall_register_handler(121, syscall_getpgid); + syscall_register_handler(124, syscall_getsid); + + /* ── Shared input ring buffer + PS/2 keyboard ───────────────────── */ + input_init(); + ps2_init(); + kprintf("Halting for 5 seconds..."); timer_sleep(5000); @@ -96,7 +119,7 @@ void kernel_main(void *args) { NULL, }; - char *init_path = "/bin/oksh"; + char *init_path = "/bin/sh"; if (kernel_arguments.kernel_args & KERNEL_ARGS_INIT_PATH_GIVEN) { init_path = kernel_arguments.init_binary_path; } diff --git a/src/mm/vmm.c b/src/mm/vmm.c index 6412254..1a9101e 100644 --- a/src/mm/vmm.c +++ b/src/mm/vmm.c @@ -71,7 +71,7 @@ void vmm_init(struct limine_memmap_entry **memmap, size_t memmap_entries) { vmm_map_page(kernel_pagemap, p + MEM_PHYS_OFFSET, p, 0b11, Size2MiB); } - for (size_t i = 0; i < (memmap_entries - 1); i++) { + for (size_t i = 0; i < (memmap_entries); i++) { uint64_t base = memmap[i]->base; uint64_t length = memmap[i]->length; uint64_t top = base + length; diff --git a/src/mp/mp.c b/src/mp/mp.c index 08b3c89..475100f 100644 --- a/src/mp/mp.c +++ b/src/mp/mp.c @@ -56,6 +56,7 @@ static void smp_cpu_init(struct limine_smp_info *smp_info) { uint64_t cr4 = 0; cr4 = read_cr("4"); cr4 |= (3 << 9); + cr4 |= (1 << 16); // FSGSBASE write_cr("4", cr4); // Enable syscall in EFER diff --git a/src/sched/sched.c b/src/sched/sched.c index d5acce6..5bc6560 100644 --- a/src/sched/sched.c +++ b/src/sched/sched.c @@ -17,7 +17,9 @@ #include "futex.h" #include "libk/string.h" #include "main.h" -#include "sched/syscall.h" +#include "syscall.h" +#include "signal.h" +#include "drivers/tty/console.h" @@ -39,11 +41,13 @@ int64_t pid = 0; spinlock_t thread_lock = {0}; spinlock_t process_lock = {0}; +extern struct console *console_device; // for init's ctty + struct resource *std_console_device = NULL; struct utsname system_uname = { .sysname = "KirkOS", - .nodename = "localhost", + .nodename = "kirk", .release = "0.0.0", .version = "Built on " __DATE__ " " __TIME__, #if defined(__x86_64__) @@ -103,7 +107,7 @@ static struct thread *sched_tid_to_thread(int64_t t) { return NULL; } -static struct process *sched_pid_to_process(int64_t p) { +struct process *sched_pid_to_process(int64_t p) { struct process *this = process_list; while (this) { if (this->pid == p) @@ -180,18 +184,13 @@ void sched_remove_process_from_list(struct process **proc_list, } } -void syscall_kill(struct syscall_arguments *args) { - struct process *proc = sched_pid_to_process((int64_t)args->args0); - args->ret = 0; - if (!proc) - args->ret = -1; - else - process_kill(proc, false); -} - void syscall_exit(struct syscall_arguments *args) { - sched_get_running_thread()->mother_proc->status = (uint8_t)args->args0; - process_kill(sched_get_running_thread()->mother_proc, false); + struct process *proc = sched_get_running_thread()->mother_proc; + /* POSIX wait status encoding for a normal exit: (code & 0xff) << 8. */ + int code = (int)(args->args0 & 0xff); + proc->wait_status = (code & 0xff) << 8; + proc->wait_status_valid = true; + process_kill(proc, false); } void syscall_getpid(struct syscall_arguments *args) { @@ -247,12 +246,16 @@ void syscall_waitpid(struct syscall_arguments *args) { spinlock_acquire_or_wait(&waiter_proc->lock); if (!waiter_proc->child_processes.length) { + spinlock_drop(&waiter_proc->lock); errno = ECHILD; + args->ret = -1; return; } if (pid_to_wait_on < -1 || pid_to_wait_on == 0) { + spinlock_drop(&waiter_proc->lock); errno = EINVAL; + args->ret = -1; return; } @@ -279,6 +282,7 @@ void syscall_waitpid(struct syscall_arguments *args) { } if (waitee_process == NULL) { + spinlock_drop(&waiter_proc->lock); errno = ECHILD; kfree(events); args->ret = -1; @@ -310,7 +314,12 @@ void syscall_waitpid(struct syscall_arguments *args) { waitee_process = waiter_proc->child_processes.data[which]; } - *status = waitee_process->status; + + /* W* status was encoded by signal.c / syscall_exit. status pointer is + * optional per POSIX — only write it if userspace provided one. */ + if (status != NULL) { + *status = waitee_process->wait_status; + } args->ret = waitee_process->pid; vec_remove(&waiter_proc->child_processes, waitee_process); @@ -411,6 +420,8 @@ void process_create(char *name, uint8_t state, uint64_t runtime, proc->cwd = vfs_root; proc->stack_top = VIRTUAL_STACK_ADDR; + signal_init_process(proc); + if (parent_process) { proc->parent_process = parent_process; if (proc->parent_process->cwd) { @@ -418,12 +429,22 @@ void process_create(char *name, uint8_t state, uint64_t runtime, } proc->umask = parent_process->umask; proc->mmap_anon_base = parent_process->mmap_anon_base; + /* Inherit session and process group from parent. ctty is also + * inherited (the child shares the parent's controlling terminal + * until it calls setsid()). */ + proc->pgid = parent_process->pgid; + proc->sid = parent_process->sid; + proc->ctty = parent_process->ctty; + signal_inherit_on_fork(parent_process, proc); spinlock_acquire_or_wait(&parent_process->lock); vec_push(&parent_process->child_processes, proc); spinlock_drop(&parent_process->lock); } else { proc->umask = S_IWGRP | S_IWOTH; proc->mmap_anon_base = MMAP_ANON_BASE; + proc->pgid = 0; + proc->sid = 0; + proc->ctty = NULL; } proc->next = NULL; @@ -432,6 +453,10 @@ void process_create(char *name, uint8_t state, uint64_t runtime, spinlock_acquire_or_wait(&process_lock); proc->pid = pid++; + /* New processes that didn't inherit from a parent become their own + * session+pgroup leader. */ + if (proc->pgid == 0) proc->pgid = proc->pid; + if (proc->sid == 0) proc->sid = proc->pid; sched_add_process_to_list(&process_list, proc); spinlock_drop(&process_lock); @@ -453,6 +478,12 @@ bool process_run_init(char *path, char **argv, char **envp, proc->cwd = vfs_root; proc->stack_top = VIRTUAL_STACK_ADDR; + signal_init_process(proc); + /* init is its own session + process-group leader (pid 1). */ + proc->pgid = 0; /* filled in below from proc->pid */ + proc->sid = 0; + proc->ctty = &console_device->res; + if (parent_process) { proc->parent_process = parent_process; if (proc->parent_process->cwd) { @@ -504,6 +535,8 @@ bool process_run_init(char *path, char **argv, char **envp, spinlock_acquire_or_wait(&process_lock); proc->pid = pid++; + if (proc->pgid == 0) proc->pgid = proc->pid; + if (proc->sid == 0) proc->sid = proc->pid; sched_add_process_to_list(&process_list, proc); spinlock_drop(&process_lock); @@ -542,6 +575,15 @@ int64_t process_fork(struct process *proc, struct thread *thrd) { fproc->parent_process = proc; fproc->next = NULL; + /* POSIX: fork inherits pgid, sid, controlling terminal, signal + * dispositions, and the calling thread's signal mask. Pending + * signals are NOT inherited. */ + fproc->pgid = proc->pgid; + fproc->sid = proc->sid; + fproc->ctty = proc->ctty; + signal_init_process(fproc); + signal_inherit_on_fork(proc, fproc); + vec_init(&fproc->child_processes); vec_init(&fproc->process_threads); @@ -653,6 +695,11 @@ bool process_execve(char *path, char **argv, char **envp) { strncpy(proc->name, path, 256); + /* POSIX execve(2): handlers reset to SIG_DFL unless they were SIG_IGN. + * sa_mask cleared. Pending signals preserved. The surviving thread's + * signal mask is PRESERVED across exec — POSIX requires this. */ + signal_reset_on_exec(proc); + for (int i = 0; i < proc->process_threads.length; i++) { if (proc->process_threads.data[i] != thread) { proc->process_threads.data[i]->state = THREAD_KILLED; @@ -677,12 +724,41 @@ bool process_execve(char *path, char **argv, char **envp) { } void process_kill(struct process *proc, bool crash) { - if (proc->pid < 2) { - panic("Attempted to kill init!\n"); + /* The kernel itself is pid 0; killing it is always wrong. */ + if (proc->pid == 0) { + panic("Attempted to kill kernel_tasks (pid 0)!\n"); + } + + /* When init (pid 1) dies, the userspace is gone. Rather than panic + * with a stack trace (which historically obscured the real cause), + * print a final diagnostic and halt cleanly. This is what Linux does + * for the same condition. */ + if (proc->pid == 1) { + kprintf("\n*** init (pid 1) exiting with wait_status=0x%x ***\n", + proc->wait_status); + kprintf("*** system has no init — halting ***\n"); + /* Fall through and run normal cleanup; the idle loop in + * kernel_main will keep the CPU parked. */ } bool are_we_killing_ourselves = false; + /* If this kill is the result of a fatal signal (crash=true), we + * already have wait_status_valid; otherwise the caller (syscall_exit) + * set it. If neither happened (raw signal default action), encode + * "killed by SIGKILL" as a safe fallback. */ + if (!proc->wait_status_valid) { + proc->wait_status = (KSIGKILL & 0x7f); /* encode as killed-by-SIGKILL */ + proc->wait_status_valid = true; + } + + /* TODO: when killing a foreign process, FD-close hooks run in the + * caller's pagemap. Current KirkOS resources (devtmpfs char devices, + * pipes, etc.) don't touch user memory in their unref handlers, so + * this is safe — but future fs drivers that flush user pages on + * close will fault. Either switch to victim pagemap around the + * close loop, or defer FD reaping to the death_event-driven cleanup + * in sched_get_next_thread. */ for (int i = 0; i < MAX_FDS; i++) { if (proc->fds[i] == NULL) { continue; @@ -696,33 +772,64 @@ void process_kill(struct process *proc, bool crash) { vmm_switch_pagemap(kernel_pagemap); } + /* Hold proc->lock while iterating process_threads: signal_force_default + * (STOP), signal_send_to_process (wake walk), thread_create, and + * thread_fork all mutate this vec under proc->lock from other CPUs. + * If are_we_killing_ourselves is true the running thread is still in + * this vec — that's fine: we just mark it KILLED here, the actual + * deselect happens when we sched_yield below. */ + spinlock_acquire_or_wait(&proc->lock); for (int i = 0; i < proc->process_threads.length; i++) { if (proc->process_threads.data[i]->state == THREAD_NORMAL) { sched_trigger_yield(proc->process_threads.data[i]->running_on_cpu); } proc->process_threads.data[i]->state = THREAD_KILLED; } + spinlock_drop(&proc->lock); - spinlock_acquire_or_wait(&init_proc->lock); - for (int i = 0; i < proc->child_processes.length; i++) { - struct process *child_proc = proc->child_processes.data[i]; - child_proc->parent_process = init_proc; - vec_push(&init_proc->child_processes, child_proc); + /* Reparent children to init (pid 1), if init still exists. Hold + * proc->lock while reading proc->child_processes: a concurrent + * process_fork from one of our children would mutate the list. */ + if (init_proc && init_proc->state != PROCESS_KILLED) { + spinlock_acquire_or_wait(&proc->lock); + spinlock_acquire_or_wait(&init_proc->lock); + for (int i = 0; i < proc->child_processes.length; i++) { + struct process *child_proc = proc->child_processes.data[i]; + child_proc->parent_process = init_proc; + vec_push(&init_proc->child_processes, child_proc); + } + spinlock_drop(&init_proc->lock); + spinlock_drop(&proc->lock); } - spinlock_drop(&init_proc->lock); + spinlock_acquire_or_wait(&proc->lock); vec_deinit(&proc->child_processes); vec_deinit(&proc->process_threads); + spinlock_drop(&proc->lock); + + /* SIGCHLD to the parent before death_event wakes any waitpid(). + * Skip when the parent is kernel_proc (pid 0): the kernel "task" is + * not a userspace process and has no signal handling state. */ + if (proc->parent_process && + proc->parent_process->state != PROCESS_KILLED && + proc->parent_process->pid > 0) { + signal_send_to_process(proc->parent_process, KSIGCHLD); + } event_trigger(&proc->death_event, false); proc->state = PROCESS_KILLED; - if (!are_we_killing_ourselves) { - process_destroy_context(proc); - } + /* Defer pagemap destruction to the reaper (sched_get_next_thread) + * to avoid tearing down CR3 while other CPUs may still be using it. + * The reaper sees `clean_up` and runs process_destroy_context once + * all threads have been deselected. */ + proc->clean_up = true; - if (are_we_killing_ourselves || crash) { - proc->clean_up = true; + /* Only yield if WE are the victim. In a cross-process kill (A kills B, + * crash=true from signal_force_default) we run in A's context — A is + * alive and must keep its quantum. Other CPUs currently running B's + * threads were already kicked by sched_trigger_yield above. */ + if (are_we_killing_ourselves) { sched_yield(false); } } @@ -791,6 +898,13 @@ void thread_fork(struct thread *pthrd, struct process *fproc) { thread_fork_context(pthrd, thrd); + + /* POSIX: child thread inherits parent thread's signal mask. + * Pending-signal state is per-process and handled in process_fork. */ + thrd->sig_blocked = pthrd->sig_blocked; + thrd->saved_sig_blocked = pthrd->saved_sig_blocked; + thrd->in_sigsuspend = false; /* child doesn't inherit sigsuspend state */ + thrd->last_scheduled = 0; spinlock_acquire_or_wait(&thread_lock); diff --git a/src/sched/sched.h b/src/sched/sched.h index 5fed7c8..dc082fc 100644 --- a/src/sched/sched.h +++ b/src/sched/sched.h @@ -71,6 +71,15 @@ void process_wait_on_another_process(struct process *waiter, void process_wait_on_processes(struct process *waiter, process_vec_t *waitees); +/* PID lookup. Returns NULL if no process has this pid. */ +struct process *sched_pid_to_process(int64_t p); + +/* Session and process-group syscalls (defined in sched/session.c). */ +void syscall_setsid(struct syscall_arguments *args); +void syscall_setpgid(struct syscall_arguments *args); +void syscall_getpgid(struct syscall_arguments *args); +void syscall_getsid(struct syscall_arguments *args); +void syscall_getpgrp(struct syscall_arguments *args); static inline struct thread *sched_get_running_thread(void) { struct thread *ret; diff --git a/src/sched/sched_types.h b/src/sched/sched_types.h index 328c6a5..8cb4b76 100644 --- a/src/sched/sched_types.h +++ b/src/sched/sched_types.h @@ -6,19 +6,22 @@ #include "libk/event.h" #include "arch/x86_64/cpu/reg.h" #include "fs/elf.h" +#include "signal.h" enum thread_states { THREAD_NORMAL = 0, THREAD_READY_TO_RUN, THREAD_KILLED, - THREAD_WAITING_FOR_EVENT + THREAD_WAITING_FOR_EVENT, + THREAD_STOPPED /* SIGSTOP / SIGTSTP / SIGTTIN / SIGTTOU */ }; enum process_states { PROCESS_NORMAL = 0, PROCESS_READY_TO_RUN, - PROCESS_KILLED + PROCESS_KILLED, + PROCESS_STOPPED /* all threads stopped via job-control signal */ }; struct process; @@ -47,11 +50,22 @@ struct thread { uint64_t fs_base; int64_t running_on_cpu; struct thread *next; + + + /* ── signal state ────────────────────────────────────────────────── */ + k_sigset_t sig_blocked; /* per-thread blocked-signal mask */ + k_sigset_t saved_sig_blocked; /* for sigsuspend() */ + bool in_sigsuspend; /* sigsuspend semantics flag */ }; typedef vec_t(struct thread *) thread_vec_t; typedef vec_t(struct process *) process_vec_t; + +/* Forward-decl: ctty is a pointer-back to a resource (devtmpfs node payload). + * We don't include libk/resource.h here to keep the include graph shallow. */ +struct resource; + struct process { int64_t pid; enum process_states state; @@ -69,10 +83,30 @@ struct process { process_vec_t child_processes; struct auxval auxv; struct event death_event; + + + /* ── wait() encoding ─────────────────────────────────────────────── */ + /* POSIX wait status: encoded with W_EXITCODE / W_TERMSIG / W_STOPCODE + * (see signal.c). Kept wide enough for the encoded form (was uint8_t, + * which couldn't represent a signal kill or a stop). */ + int wait_status; + bool wait_status_valid; /* false until process exits / stops */ + uint8_t status; bool clean_up; char name[256]; struct process *next; + + + /* ── session / process-group ─────────────────────────────────────── */ + int64_t pgid; /* process-group id (== own pid for leader) */ + int64_t sid; /* session id (== own pid for session leader) */ + struct resource *ctty; /* controlling terminal, or NULL */ + + /* ── signal disposition ──────────────────────────────────────────── */ + struct k_sigaction sig_handlers[NSIG]; + k_sigset_t sig_pending; /* process-level pending set */ + spinlock_t sig_lock; /* protects handlers + pending */ }; #define CPU_STACK_SIZE (64 * 1024) diff --git a/src/sched/session.c b/src/sched/session.c new file mode 100644 index 0000000..b6648fb --- /dev/null +++ b/src/sched/session.c @@ -0,0 +1,161 @@ +/* + * POSIX session & process-group syscalls for KirkOS. + * + * The data lives on struct process (see sched_types.h): + * + * - pid: process id, immutable after creation + * - pgid: process-group id (defaults to pid; can be changed via + * setpgid; all processes in a pgrp share the same value) + * - sid: session id (defaults to pid for session-leaders; set by + * setsid; survives fork+exec) + * - ctty: controlling-terminal resource pointer (set on a session + * leader's first open of a tty, or via TIOCSCTTY) + * + * Relationships and rules (from POSIX / SUSv4): + * + * - Every process is in exactly one process group. + * - Every process group is in exactly one session. + * - The session leader has pid == sid and pgid == sid. + * - setsid() makes the caller a new session+pgroup leader, with no + * controlling terminal. Fails (EPERM) if the caller is already a + * pgroup leader. + * - setpgid(pid, pgid) moves `pid` into pgrp `pgid`. The target must + * be in the same session as the caller; pgid must equal an existing + * pgid in that session OR be the target's own pid (creating a new + * pgrp). Cannot change pgid of a session leader. + * - getpgid / getsid / getpgrp are obvious accessors. + */ + +#include "sched.h" +#include "sched_types.h" +#include "syscall.h" +#include "libk/errno.h" +#include "libk/debug.h" +#include "mp/spinlock.h" +#include +#include + + +/* Helper: pid==0 means "the calling process". */ +static struct process *resolve_pid(int64_t pid) { + struct process *self = sched_get_running_thread()->mother_proc; + if (pid == 0) { + return self; + } + return sched_pid_to_process(pid); +} + + +/* ── setsid() ───────────────────────────────────────────────────────────── */ +void syscall_setsid(struct syscall_arguments *args) { + struct process *proc = sched_get_running_thread()->mother_proc; + + /* POSIX: fails if the caller is already a pgroup leader. The session + * leader is by definition a pgroup leader (its own pgid is its pid), + * so a process that has previously called setsid() can't call it + * again. */ + if (proc->pgid == proc->pid) { + errno = EPERM; + args->ret = -1; + return; + } + + proc->sid = proc->pid; + proc->pgid = proc->pid; + proc->ctty = NULL; /* new session starts with no ctty */ + + args->ret = proc->sid; +} + + +/* ── setpgid(pid, pgid) ─────────────────────────────────────────────────── */ +void syscall_setpgid(struct syscall_arguments *args) { + int64_t pid = (int64_t)args->args0; + int64_t pgid = (int64_t)args->args1; + + struct process *self = sched_get_running_thread()->mother_proc; + struct process *target = resolve_pid(pid); + + if (target == NULL) { + errno = ESRCH; + args->ret = -1; + return; + } + + /* setpgid(0, ...) and setpgid(self, ...) — implicit conversion. */ + /* pgid==0 means: use the target's pid as the new pgid. */ + if (pgid == 0) { + pgid = target->pid; + } + if (pgid < 0) { + errno = EINVAL; + args->ret = -1; + return; + } + + /* The target must be in the same session as the caller. */ + if (target->sid != self->sid) { + errno = EPERM; + args->ret = -1; + return; + } + + /* Cannot change the pgid of a session leader. */ + if (target->sid == target->pid) { + errno = EPERM; + args->ret = -1; + return; + } + + /* If pgid != target->pid (i.e. joining an EXISTING group), verify the + * group exists in the same session. */ + if (pgid != target->pid) { + bool found = false; + for (struct process *p = process_list; p != NULL; p = p->next) { + if (p->pgid == pgid && p->sid == self->sid) { + found = true; + break; + } + } + if (!found) { + errno = EPERM; + args->ret = -1; + return; + } + } + + target->pgid = pgid; + args->ret = 0; +} + + +/* ── getpgid(pid) ───────────────────────────────────────────────────────── */ +void syscall_getpgid(struct syscall_arguments *args) { + int64_t pid = (int64_t)args->args0; + struct process *target = resolve_pid(pid); + if (target == NULL) { + errno = ESRCH; + args->ret = -1; + return; + } + args->ret = target->pgid; +} + + +/* ── getsid(pid) ────────────────────────────────────────────────────────── */ +void syscall_getsid(struct syscall_arguments *args) { + int64_t pid = (int64_t)args->args0; + struct process *target = resolve_pid(pid); + if (target == NULL) { + errno = ESRCH; + args->ret = -1; + return; + } + args->ret = target->sid; +} + + +/* ── getpgrp() — POSIX shortcut for getpgid(0) ──────────────────────────── */ +void syscall_getpgrp(struct syscall_arguments *args) { + args->ret = sched_get_running_thread()->mother_proc->pgid; +} diff --git a/src/sched/signal.c b/src/sched/signal.c new file mode 100644 index 0000000..8987d8a --- /dev/null +++ b/src/sched/signal.c @@ -0,0 +1,894 @@ +/* + * KirkOS POSIX signal implementation. + * + * Design notes + * ──────────── + * + * - Per-process disposition table (handlers, flags, sa_mask) stored on + * struct process. Process-level pending mask. Per-thread blocked mask + * (so that one thread can mask a signal while another thread handles + * it — POSIX semantics). + * + * - Delivery happens at userspace-return time: every IRET back to ring 3 + * (timer tick, IRQ return) and every SYSRET (syscall return) calls + * signal_check_and_dispatch(). The hook lives in + * arch/x86_64/cpu/syscall_handle.c and arch/x86_64/boot/isr.c. + * + * - Sigframe layout (lives on the user stack): + * + * rsp (just after kernel iret/sysret enters the handler) + * ┌─────────────────────────────────────────────┐ + * │ uint64_t restorer_addr ← popped by handler │ [+0] + * │ `ret` │ + * ├─────────────────────────────────────────────┤ + * │ uint64_t magic = K_SIGFRAME_MAGIC │ [+8] + * │ k_sigset_t saved_blocked_mask │ [+16] + * │ registers_t saved_regs │ [+144] + * │ int signo │ + * └─────────────────────────────────────────────┘ + * + * When the handler `ret`s, it pops restorer_addr — that's + * sa_restorer, which mlibc populates as a trampoline that does + * `syscall(SYS_sigreturn)`. The kernel's syscall_sigreturn handler + * reads the sigframe at (current_user_rsp - 8), validates magic, + * restores regs + mask, and returns. The asm pop/sysret picks up the + * restored values from the kernel-stack registers_t. + * + * - We do NOT support SA_SIGINFO three-argument handlers, sigaltstack, + * or RT signals. SA_RESTART is accepted but unimplemented (the + * syscall is not restarted — userspace gets the -EINTR return). + * SA_NODEFER and SA_RESETHAND are honored. + */ + +#include "signal.h" +#include "sched.h" +#include "sched_types.h" +#include "syscall.h" +#include "libk/debug.h" +#include "libk/errno.h" +#include "libk/string.h" +#include "mp/spinlock.h" +#include "mm/vmm.h" +#include "arch/x86_64/cpu/reg.h" +#include "arch/x86_64/sys/prcb.h" +#include +#include +#include +#include "arch/x86_64/asm/asm.h" + + +/* ─────────────────────────────────────────────────────────────────────── + * Default actions table. + * Indexed by signal number; valid range 1..NSIG-1. Index 0 unused. + * ─────────────────────────────────────────────────────────────────────── */ +static const enum sig_default sig_defaults[NSIG] = { + [KSIGHUP] = SIG_ACT_TERM, + [KSIGINT] = SIG_ACT_TERM, + [KSIGQUIT] = SIG_ACT_CORE, + [KSIGILL] = SIG_ACT_CORE, + [KSIGTRAP] = SIG_ACT_CORE, + [KSIGABRT] = SIG_ACT_CORE, + [KSIGBUS] = SIG_ACT_CORE, + [KSIGFPE] = SIG_ACT_CORE, + [KSIGKILL] = SIG_ACT_TERM, + [KSIGUSR1] = SIG_ACT_TERM, + [KSIGSEGV] = SIG_ACT_CORE, + [KSIGUSR2] = SIG_ACT_TERM, + [KSIGPIPE] = SIG_ACT_TERM, + [KSIGALRM] = SIG_ACT_TERM, + [KSIGTERM] = SIG_ACT_TERM, + [KSIGSTKFLT] = SIG_ACT_TERM, + [KSIGCHLD] = SIG_ACT_IGN, + [KSIGCONT] = SIG_ACT_CONT, + [KSIGSTOP] = SIG_ACT_STOP, + [KSIGTSTP] = SIG_ACT_STOP, + [KSIGTTIN] = SIG_ACT_STOP, + [KSIGTTOU] = SIG_ACT_STOP, + [KSIGURG] = SIG_ACT_IGN, + [KSIGXCPU] = SIG_ACT_CORE, + [KSIGXFSZ] = SIG_ACT_CORE, + [KSIGVTALRM] = SIG_ACT_TERM, + [KSIGPROF] = SIG_ACT_TERM, + [KSIGWINCH] = SIG_ACT_IGN, + [KSIGIO] = SIG_ACT_TERM, + [KSIGPWR] = SIG_ACT_TERM, + [KSIGSYS] = SIG_ACT_CORE, +}; + +enum sig_default sig_default_action(int sig) { + if (sig < 1 || sig >= NSIG) { + return SIG_ACT_IGN; + } + /* We only know defaults for the standard POSIX signals 1..31. + * Anything in the RT range (32..64) is unsupported; treat as + * ignore so a bogus syscall can't accidentally terminate the + * caller. */ + if (sig > 31) { + return SIG_ACT_IGN; + } + return sig_defaults[sig]; +} + + +/* ─────────────────────────────────────────────────────────────────────── + * Signal-state initialization + * ─────────────────────────────────────────────────────────────────────── */ + +void signal_init_process(struct process *p) { + for (int i = 0; i < NSIG; i++) { + p->sig_handlers[i].sa_handler = KSIG_DFL; + p->sig_handlers[i].sa_flags = 0; + p->sig_handlers[i].sa_restorer = NULL; + k_sigemptyset(&p->sig_handlers[i].sa_mask); + } + k_sigemptyset(&p->sig_pending); + spinlock_init(p->sig_lock); +} + +void signal_reset_on_exec(struct process *p) { + /* POSIX execve: handlers reset to SIG_DFL unless they were SIG_IGN + * (those stay SIG_IGN). sig_pending preserved. sa_mask cleared. */ + spinlock_acquire_or_wait(&p->sig_lock); + for (int i = 1; i < NSIG; i++) { + if (p->sig_handlers[i].sa_handler == KSIG_IGN) { + continue; + } + p->sig_handlers[i].sa_handler = KSIG_DFL; + p->sig_handlers[i].sa_flags = 0; + p->sig_handlers[i].sa_restorer = NULL; + k_sigemptyset(&p->sig_handlers[i].sa_mask); + } + spinlock_drop(&p->sig_lock); +} + +void signal_inherit_on_fork(struct process *parent, struct process *child) { + /* Fork copies handler dispositions and sig mask of the calling thread. + * Pending signals are NOT inherited (POSIX). */ + spinlock_acquire_or_wait(&parent->sig_lock); + for (int i = 0; i < NSIG; i++) { + child->sig_handlers[i] = parent->sig_handlers[i]; + } + spinlock_drop(&parent->sig_lock); + k_sigemptyset(&child->sig_pending); + spinlock_init(child->sig_lock); +} + + +/* ─────────────────────────────────────────────────────────────────────── + * Wait-status encoding + * + * POSIX <-> Linux/mlibc convention: + * exited(code) → (code & 0xff) << 8 + * killed(sig) → (sig & 0x7f) + * stopped(sig) → (sig << 8) | 0x7f + * continued → 0xffff + * + * mlibc's wait macros test the result and pull out the field; we just + * have to lay down these encoded ints. + * ─────────────────────────────────────────────────────────────────────── */ +static inline int wait_encode_exit(int code) { return (code & 0xff) << 8; } +static inline int wait_encode_kill(int sig) { return sig & 0x7f; } +static inline int wait_encode_stop(int sig) { return (sig << 8) | 0x7f; } +static inline int wait_encode_cont(void) { return 0xffff; } + + +/* ─────────────────────────────────────────────────────────────────────── + * Send-a-signal: queues the signal and (where possible) wakes the + * receiver. + * + * We make a single design choice up front to keep the model tractable: + * pending signals live on the *process* (not per-thread). When delivered, + * the kernel picks the currently-running thread of that process to run + * the handler. This matches POSIX "process-directed" signal semantics. + * + * The thread-private piece is the blocked-signal mask only. + * ─────────────────────────────────────────────────────────────────────── */ + +/* Deliver default action immediately if no handler is registered. Called + * from signal_send_to_process for SIGKILL/SIGSTOP/SIGCONT specifically, + * because those can't be blocked or handled — they take effect synchronously + * even on threads that aren't returning to userspace. + * + * Caller must NOT hold p->sig_lock. */ +static void signal_force_default(struct process *p, int sig) { + switch (sig_default_action(sig)) { + case SIG_ACT_STOP: { + /* Mark all threads stopped. Threads parked in WAITING_FOR_EVENT + * are already off-CPU; flipping their state is enough — the + * scheduler will skip them once they're STOPPED. For threads + * running on a CPU, kick that CPU so it re-checks state on + * return from the IPI. */ + spinlock_acquire_or_wait(&p->lock); + for (int i = 0; i < p->process_threads.length; i++) { + struct thread *t = p->process_threads.data[i]; + t->state = THREAD_STOPPED; + if (t->running_on_cpu >= 0) { + sched_trigger_yield(t->running_on_cpu); + } + } + p->state = PROCESS_STOPPED; + p->wait_status = wait_encode_stop(sig); + p->wait_status_valid = true; + spinlock_drop(&p->lock); + if (p->parent_process) { + signal_send_to_process(p->parent_process, KSIGCHLD); + } + break; + } + case SIG_ACT_CONT: { + spinlock_acquire_or_wait(&p->lock); + for (int i = 0; i < p->process_threads.length; i++) { + if (p->process_threads.data[i]->state == THREAD_STOPPED) { + p->process_threads.data[i]->state = THREAD_READY_TO_RUN; + } + } + p->state = PROCESS_NORMAL; + p->wait_status = wait_encode_cont(); + p->wait_status_valid = true; + spinlock_drop(&p->lock); + if (p->parent_process) { + signal_send_to_process(p->parent_process, KSIGCHLD); + } + break; + } + case SIG_ACT_TERM: + case SIG_ACT_CORE: { + /* Terminate the process now. Don't wait for a thread to return + * to userspace — that path may never run if all threads are + * parked in WAITING_FOR_EVENT. */ + p->wait_status = wait_encode_kill(sig); + p->wait_status_valid = true; + process_kill(p, true); + break; + } + case SIG_ACT_IGN: + break; + } +} + +int signal_send_to_process(struct process *p, int sig) { + if (!p || sig < 1 || sig >= NSIG) { + return -EINVAL; + } + + spinlock_acquire_or_wait(&p->lock); + bool dead = (p->state == PROCESS_KILLED); + spinlock_drop(&p->lock); + if (dead) { + return -ESRCH; + } + + /* sig 0 is "check existence". The caller already validated `p` exists + * by reaching this function; nothing more to do. */ + if (sig == 0) { + return 0; + } + + spinlock_acquire_or_wait(&p->sig_lock); + k_sigaddset(&p->sig_pending, sig); + spinlock_drop(&p->sig_lock); + + /* SIGKILL and SIGSTOP are not maskable. Apply default action now. */ + if (sig == KSIGKILL) { + spinlock_acquire_or_wait(&p->sig_lock); + k_sigdelset(&p->sig_pending, sig); + spinlock_drop(&p->sig_lock); + signal_force_default(p, sig); + return 0; + } + if (sig == KSIGSTOP) { + spinlock_acquire_or_wait(&p->sig_lock); + k_sigdelset(&p->sig_pending, sig); + spinlock_drop(&p->sig_lock); + signal_force_default(p, sig); + return 0; + } + if (sig == KSIGCONT) { + signal_force_default(p, sig); + /* keep pending so handler (if any) also runs */ + } + + /* Wake one waiting thread (if any) so it can reach the signal-check + * on the way back to userspace. We pick the first WAITING_FOR_EVENT + * thread and bump it to READY_TO_RUN. */ + spinlock_acquire_or_wait(&p->lock); + for (int i = 0; i < p->process_threads.length; i++) { + struct thread *t = p->process_threads.data[i]; + if (t->state == THREAD_WAITING_FOR_EVENT) { + t->state = THREAD_READY_TO_RUN; + break; + } + } + spinlock_drop(&p->lock); + + return 0; +} + +int signal_send_to_pgrp(int64_t pgid, int sig) { + extern struct process *process_list; + int sent = 0; + + for (struct process *p = process_list; p != NULL; p = p->next) { + if (p->pgid == pgid && p->state != PROCESS_KILLED) { + if (signal_send_to_process(p, sig) == 0) { + sent++; + } + } + } + return sent > 0 ? 0 : -ESRCH; +} + + +/* ─────────────────────────────────────────────────────────────────────── + * Signal dispatch on userspace return + * ─────────────────────────────────────────────────────────────────────── */ + +#define K_SIGFRAME_MAGIC 0xC0DEFACEDEADBEEFULL + +struct k_sigframe { + uint64_t restorer_addr; /* popped by handler's `ret` */ + uint64_t magic; + k_sigset_t saved_blocked; /* sig_blocked at signal-delivery time */ + registers_t saved_regs; /* full pre-signal user state */ + int signo; + int _pad; +} __attribute__((aligned(16))); + +/* Pick the lowest-numbered pending unblocked signal of the current thread's + * process. Returns 0 if none. Caller holds proc->sig_lock. */ +static int pick_pending_signal(struct process *proc, struct thread *thrd) { + for (int s = 1; s < NSIG; s++) { + if (!k_sigismember(&proc->sig_pending, s)) continue; + if (k_sigismember(&thrd->sig_blocked, s)) continue; + return s; + } + return 0; +} + +/* Write a sigframe to the user stack at user_rsp - sizeof(sigframe). + * Returns the new rsp (i.e. the address of the sigframe), or 0 on + * translation failure. + * + * `saved_mask` is the blocked-signal mask to record in the sigframe so + * sigreturn can restore it. This is normally thrd->sig_blocked, but for a + * handler delivered inside sigsuspend it must be the pre-sigsuspend mask + * (thrd->saved_sig_blocked) so the temporary suspend mask doesn't leak + * past the handler. */ +static uint64_t build_sigframe(struct process *proc, struct thread *thrd, + registers_t *reg, int sig, + void (*restorer)(void), + k_sigset_t saved_mask) { + uint64_t cur_rsp = reg->rsp; + + /* 16-byte align the frame, leave 8 bytes for the "fake call return" + * — when the handler is entered via sysret/iret with rsp at the + * sigframe address, the first qword on the stack is restorer_addr. + * The handler's `ret` pops it. Subsequent stack frames within the + * handler stay 16-aligned because we placed sigframe on a 16-aligned + * address. + * + * Also reserve a 128-byte red zone per the System V ABI — the user + * code before the signal might have data below rsp it expects to + * survive. */ + uint64_t new_rsp = cur_rsp - 128; /* red zone */ + new_rsp -= sizeof(struct k_sigframe); + new_rsp &= ~(uint64_t)15; /* align */ + /* SysV x86_64 ABI: at function entry, (rsp + 8) must be 16-byte + * aligned (the caller's CALL placed an 8-byte return address). + * Our restorer_addr at sigframe[0] plays the role of that return + * address, so we want rsp aligned to (16k + 8). After aligning + * down to 16, subtract 8 to land on the correct boundary. */ + new_rsp -= 8; + + struct k_sigframe frame; + frame.restorer_addr = (uint64_t)restorer; + frame.magic = K_SIGFRAME_MAGIC; + frame.saved_blocked = saved_mask; + frame.saved_regs = *reg; + frame.signo = sig; + frame._pad = 0; + + (void)thrd; /* mask now passed in directly */ + + /* Translate user address to kernel and write. Failure path: just kill + * the process — the user can't deliver a signal anyway. */ + if (!syscall_helper_copy_to_user(new_rsp, &frame, sizeof(frame))) { + return 0; + } + + (void)proc; /* unused in this minimal impl */ + return new_rsp; +} + +bool signal_check_and_dispatch(registers_t *reg) { + /* Only deliver on return to userspace. cs low 2 bits == 3 ⇒ ring 3. */ + if ((reg->cs & 0x3) != 0x3) { + return false; + } + + struct thread *thrd = sched_get_running_thread(); + if (!thrd) return false; + struct process *proc = thrd->mother_proc; + if (!proc || proc->state == PROCESS_KILLED) return false; + + spinlock_acquire_or_wait(&proc->sig_lock); + int sig = pick_pending_signal(proc, thrd); + if (sig == 0) { + spinlock_drop(&proc->sig_lock); + return false; + } + + struct k_sigaction act = proc->sig_handlers[sig]; + k_sigdelset(&proc->sig_pending, sig); + spinlock_drop(&proc->sig_lock); + + /* If we're returning from a sigsuspend, any path that "consumes" the + * pending signal must restore the pre-suspend mask before handing + * control back to userspace. The handler path does this via the + * sigframe; the ignore paths must do it inline. */ + #define KSIG_DISPATCH_RESTORE_SUSPEND() do { \ + if (thrd->in_sigsuspend) { \ + thrd->sig_blocked = thrd->saved_sig_blocked; \ + thrd->in_sigsuspend = false; \ + } \ + } while (0) + + /* SIG_IGN: drop it. */ + if (act.sa_handler == KSIG_IGN) { + KSIG_DISPATCH_RESTORE_SUSPEND(); + return false; + } + + /* SIG_DFL: apply default action. */ + if (act.sa_handler == KSIG_DFL) { + enum sig_default da = sig_default_action(sig); + switch (da) { + case SIG_ACT_IGN: + KSIG_DISPATCH_RESTORE_SUSPEND(); + return false; + case SIG_ACT_STOP: { + /* Stop ourselves: mark thread STOPPED, yield. We'll + * sit here until SIGCONT comes in. */ + KSIG_DISPATCH_RESTORE_SUSPEND(); + thrd->state = THREAD_STOPPED; + proc->state = PROCESS_STOPPED; + proc->wait_status = wait_encode_stop(sig); + proc->wait_status_valid = true; + if (proc->parent_process) { + signal_send_to_process(proc->parent_process, KSIGCHLD); + } + /* The arch return-to-user path will iret/sysret, and the + * thread won't be scheduled again until state changes + * back to READY/NORMAL via SIGCONT delivery in + * signal_send_to_process. */ + sched_yield(false); + return true; + } + case SIG_ACT_CONT: + /* No-op when explicitly received; the wakeup happens in + * signal_send_to_process. */ + KSIG_DISPATCH_RESTORE_SUSPEND(); + return false; + case SIG_ACT_TERM: + case SIG_ACT_CORE: + /* Terminate the process. Encode and exit. */ + proc->wait_status = wait_encode_kill(sig); + proc->wait_status_valid = true; + /* process_kill is in sched.c — it will not return for the + * current thread. */ + extern void process_kill(struct process *, bool); + process_kill(proc, true); + return true; + } + } + + /* User-installed handler. Build sigframe, mutate reg. */ + if (!(act.sa_flags & KSA_RESTORER) || act.sa_restorer == NULL) { + /* Without a restorer we can't return from the handler. Treat as + * if no handler installed — terminate. */ + kprintf("[signal] sig %d for pid %ld has no SA_RESTORER; killing\n", + sig, proc->pid); + proc->wait_status = wait_encode_kill(sig); + proc->wait_status_valid = true; + extern void process_kill(struct process *, bool); + process_kill(proc, true); + return true; + } + + /* If we got here inside a sigsuspend, the user-visible mask to + * restore is the pre-sigsuspend mask, not the temporary suspend mask + * that's currently in thrd->sig_blocked. sigreturn will restore from + * the sigframe, so stash the right value there. */ + k_sigset_t mask_to_save = thrd->in_sigsuspend ? thrd->saved_sig_blocked + : thrd->sig_blocked; + if (thrd->in_sigsuspend) { + thrd->in_sigsuspend = false; + } + + uint64_t new_rsp = build_sigframe(proc, thrd, reg, sig, act.sa_restorer, + mask_to_save); + if (new_rsp == 0) { + /* Couldn't build frame — SIGSEGV the process. */ + kprintf("[signal] failed to build sigframe for sig %d, pid %ld\n", + sig, proc->pid); + extern void process_kill(struct process *, bool); + process_kill(proc, true); + return true; + } + + /* Merge the handler's sa_mask + (this signal unless SA_NODEFER) into + * sig_blocked for the duration of the handler. sigreturn restores. */ + k_sigorset(&thrd->sig_blocked, &thrd->sig_blocked, &act.sa_mask); + if (!(act.sa_flags & KSA_NODEFER)) { + k_sigaddset(&thrd->sig_blocked, sig); + } + + /* If SA_RESETHAND, reset handler to SIG_DFL after one invocation. */ + if (act.sa_flags & KSA_RESETHAND) { + spinlock_acquire_or_wait(&proc->sig_lock); + proc->sig_handlers[sig].sa_handler = KSIG_DFL; + proc->sig_handlers[sig].sa_flags = 0; + k_sigemptyset(&proc->sig_handlers[sig].sa_mask); + spinlock_drop(&proc->sig_lock); + } + + cli(); // Interrupts must be disabled to call current cpu + + /* Redirect to handler. Argument convention: rdi = signo, rsi/rdx = 0 + * (we don't populate siginfo / ucontext pointers — SA_SIGINFO is not + * supported). The handler will read its arg from rdi. */ + uint64_t handler_addr = (uint64_t)act.sa_handler; + + /* iret path uses reg->rip / reg->rsp / reg->rflags directly. + * sysret path uses reg->rcx (rip) / reg->r11 (rflags) and the per-CPU + * user_stack field for rsp. Update all of them so we're correct on + * both return paths. */ + reg->rip = handler_addr; + reg->rcx = handler_addr; + reg->rsp = new_rsp; + prcb_return_current_cpu()->user_stack = new_rsp; + reg->r11 = reg->rflags; + reg->rdi = (uint64_t)sig; + reg->rsi = 0; + reg->rdx = 0; + sti(); + return true; +} + + +/* ─────────────────────────────────────────────────────────────────────── + * sigreturn — invoked by the user's sa_restorer trampoline after the + * handler returns. Reads the sigframe (which sits at user rsp - 8 — the + * `restorer_addr` was popped by the handler's `ret` so user rsp now + * points at `magic`) and restores the original register state and mask. + * ─────────────────────────────────────────────────────────────────────── */ +void syscall_sigreturn(struct syscall_arguments *args) { + struct thread *thrd = sched_get_running_thread(); + registers_t *reg = (registers_t *)args->reg_ptr; + if (!reg) { + /* No reg pointer (shouldn't happen) — return -EINVAL. */ + args->ret = -1; + errno = EINVAL; + return; + } + + /* At the moment of sigreturn, the user's rsp points at the byte just + * past `restorer_addr` (which the handler popped). So sigframe lives + * at rsp - 8 (the magic field is the first thing after restorer_addr, + * and restorer_addr is sizeof(uint64_t)). */ + uint64_t frame_addr = reg->rsp - 8; + + struct k_sigframe frame; + if (!syscall_helper_copy_from_user(frame_addr, &frame, sizeof(frame))) { + args->ret = -1; + errno = EFAULT; + return; + } + if (frame.magic != K_SIGFRAME_MAGIC) { + kprintf("[signal] sigreturn: bad magic (pid %ld)\n", + thrd->mother_proc->pid); + args->ret = -1; + errno = EINVAL; + return; + } + + /* SECURITY: the sigframe lives on the user stack and is user-writable. + * A malicious userspace can overwrite saved_regs between the handler's + * first instruction and the sigreturn call, putting arbitrary values in + * cs/ss/rflags to elevate privilege via iret/sysret. Without this clamp: + * - cs = 0x08 (kernel CS) → privilege escalation to ring 0 + * - ss = 0x10 (kernel SS) → bypass user stack + * - rflags IOPL=3 / VM=1 / IF=0 → unauthorized I/O privilege, v86, or + * interrupt disable in user mode + * This is the classic Linux CVE class (see CVE-2014-9322 for a variant). + * Force the privileged fields to safe ring-3 values before they reach the + * kernel-stack registers_t block that iret/sysret will consume. + * + * rflags bit assignments (user-allowed vs forced): + * bit 0 CF, bit 2 PF, bit 4 AF, bit 6 ZF, bit 7 SF, + * bit 8 TF (single-step), bit 10 DF, bit 11 OF, bit 18 AC, bit 21 ID + * → user can set: mask 0x244D5 + * bit 1 reserved-1 (always 1) + bit 9 IF (must be 1 for userspace) + * → forced on: 0x202 + * bits 12-13 IOPL, bit 14 NT, bit 15 reserved, bit 16 RF, bit 17 VM, + * bits 19-20 VIF/VIP, bits 22-63 reserved + * → forced off by mask + */ + cli(); // Interrupts must be disabled to call return current cpu; + frame.saved_regs.cs = 0x23; /* user CS (matches amd_syscall.asm) */ + frame.saved_regs.ss = 0x1b; /* user SS (matches amd_syscall.asm) */ + { + const uint64_t RFLAGS_USER_MASK = 0x000244D5ULL; + const uint64_t RFLAGS_FORCED = 0x00000202ULL; /* bit 1 + IF */ + frame.saved_regs.rflags = + (frame.saved_regs.rflags & RFLAGS_USER_MASK) | RFLAGS_FORCED; + } + + + /* Restore registers and blocked mask. */ + *reg = frame.saved_regs; + prcb_return_current_cpu()->user_stack = reg->rsp; + thrd->sig_blocked = frame.saved_blocked; + thrd->in_sigsuspend = false; + sti(); + + /* args->ret is whatever was in rax — for sysret, that's the value + * that becomes the user's rax. The saved_regs.rax is the value the + * user had before the signal, which is what we want. Don't overwrite + * it via args->ret. To prevent syscall_handler's `reg->rax = args.ret` + * line from clobbering, set args->ret to the saved rax. */ + args->ret = frame.saved_regs.rax; +} + + +/* ─────────────────────────────────────────────────────────────────────── + * sigaction / sigprocmask / sigpending + * ─────────────────────────────────────────────────────────────────────── */ + +void syscall_sigaction(struct syscall_arguments *args) { + int signo = (int)args->args0; + uintptr_t user_act = args->args1; /* const struct sigaction * */ + uintptr_t user_oact = args->args2; /* struct sigaction * */ + + if (signo < 1 || signo >= NSIG || signo == KSIGKILL || signo == KSIGSTOP) { + errno = EINVAL; + args->ret = -1; + return; + } + + struct process *proc = sched_get_running_thread()->mother_proc; + spinlock_acquire_or_wait(&proc->sig_lock); + + if (user_oact != 0) { + struct k_sigaction oa = proc->sig_handlers[signo]; + spinlock_drop(&proc->sig_lock); + if (!syscall_helper_copy_to_user(user_oact, &oa, sizeof(oa))) { + errno = EFAULT; + args->ret = -1; + return; + } + spinlock_acquire_or_wait(&proc->sig_lock); + } + + if (user_act != 0) { + struct k_sigaction na; + spinlock_drop(&proc->sig_lock); + if (!syscall_helper_copy_from_user(user_act, &na, sizeof(na))) { + errno = EFAULT; + args->ret = -1; + return; + } + spinlock_acquire_or_wait(&proc->sig_lock); + /* Don't allow SIGKILL/SIGSTOP to ever have a non-default handler — + * but we already rejected those above. */ + proc->sig_handlers[signo] = na; + } + + spinlock_drop(&proc->sig_lock); + args->ret = 0; +} + +void syscall_sigprocmask(struct syscall_arguments *args) { + int how = (int)args->args0; + uintptr_t user_set = args->args1; + uintptr_t user_oldset = args->args2; + + struct thread *thrd = sched_get_running_thread(); + + if (user_oldset != 0) { + if (!syscall_helper_copy_to_user(user_oldset, &thrd->sig_blocked, + sizeof(k_sigset_t))) { + errno = EFAULT; + args->ret = -1; + return; + } + } + + if (user_set != 0) { + k_sigset_t s; + if (!syscall_helper_copy_from_user(user_set, &s, sizeof(s))) { + errno = EFAULT; + args->ret = -1; + return; + } + + switch (how) { + case KSIG_BLOCK: + k_sigorset(&thrd->sig_blocked, &thrd->sig_blocked, &s); + break; + case KSIG_UNBLOCK: + for (int i = 0; i < 16; i++) { + thrd->sig_blocked.bits[i] &= ~s.bits[i]; + } + break; + case KSIG_SETMASK: + thrd->sig_blocked = s; + break; + default: + errno = EINVAL; + args->ret = -1; + return; + } + + /* SIGKILL and SIGSTOP cannot be blocked. */ + k_sigdelset(&thrd->sig_blocked, KSIGKILL); + k_sigdelset(&thrd->sig_blocked, KSIGSTOP); + } + + args->ret = 0; +} + +void syscall_sigpending(struct syscall_arguments *args) { + uintptr_t user_set = args->args0; + struct process *proc = sched_get_running_thread()->mother_proc; + + spinlock_acquire_or_wait(&proc->sig_lock); + k_sigset_t snap = proc->sig_pending; + spinlock_drop(&proc->sig_lock); + + if (!syscall_helper_copy_to_user(user_set, &snap, sizeof(snap))) { + errno = EFAULT; + args->ret = -1; + return; + } + args->ret = 0; +} + +void syscall_sigsuspend(struct syscall_arguments *args) { + uintptr_t user_mask = args->args0; + k_sigset_t new_mask; + + if (!syscall_helper_copy_from_user(user_mask, &new_mask, sizeof(new_mask))) { + errno = EFAULT; + args->ret = -1; + return; + } + + struct thread *thrd = sched_get_running_thread(); + thrd->saved_sig_blocked = thrd->sig_blocked; + thrd->sig_blocked = new_mask; + k_sigdelset(&thrd->sig_blocked, KSIGKILL); + k_sigdelset(&thrd->sig_blocked, KSIGSTOP); + thrd->in_sigsuspend = true; + + /* Wait until a signal arrives. We loop on sched_yield until + * signal_check_and_dispatch fires. The dispatch will restore the old + * mask (via the sigframe path) only after running the handler — for + * sigsuspend, the man page says the mask is restored "on return". The + * sigframe path already handles that. */ + for (;;) { + struct process *proc = thrd->mother_proc; + spinlock_acquire_or_wait(&proc->sig_lock); + int sig = pick_pending_signal(proc, thrd); + spinlock_drop(&proc->sig_lock); + if (sig != 0) { + break; + } + sched_yield(true); + } + + /* Do NOT restore thrd->sig_blocked here. signal_check_and_dispatch + * runs after this syscall returns and needs to see in_sigsuspend so + * it can stash the *pre-sigsuspend* mask in the sigframe (so + * sigreturn restores the correct mask). The dispatch code clears + * in_sigsuspend once it has consumed the saved mask, and on the + * SIG_IGN / default-ignore paths it restores thrd->sig_blocked + * from saved_sig_blocked inline via KSIG_DISPATCH_RESTORE_SUSPEND(). + * Either way, by the time control reaches userspace the suspend mask + * is gone — matching sigsuspend(2) semantics of "until the signal is + * delivered". */ + errno = EINTR; + args->ret = -1; +} + +void syscall_pause(struct syscall_arguments *args) { + struct thread *thrd = sched_get_running_thread(); + for (;;) { + struct process *proc = thrd->mother_proc; + spinlock_acquire_or_wait(&proc->sig_lock); + int sig = pick_pending_signal(proc, thrd); + spinlock_drop(&proc->sig_lock); + if (sig != 0) { + break; + } + sched_yield(true); + } + errno = EINTR; + args->ret = -1; +} + + +/* ─────────────────────────────────────────────────────────────────────── + * kill — replaces the old stub in sched.c. Now actually honors the + * signal number. + * ─────────────────────────────────────────────────────────────────────── */ + +void syscall_kill(struct syscall_arguments *args) { + int64_t pid = (int64_t)args->args0; + int sig = (int)args->args1; + + if (sig < 0 || sig >= NSIG) { + errno = EINVAL; + args->ret = -1; + return; + } + + extern struct process *sched_pid_to_process(int64_t); + + if (pid > 0) { + struct process *p = sched_pid_to_process(pid); + if (!p) { + errno = ESRCH; + args->ret = -1; + return; + } + int r = signal_send_to_process(p, sig); + if (r < 0) { + errno = -r; + args->ret = -1; + return; + } + args->ret = 0; + return; + } + + if (pid == 0) { + /* Send to every process in the caller's process group. */ + struct process *self = sched_get_running_thread()->mother_proc; + int r = signal_send_to_pgrp(self->pgid, sig); + if (r < 0) { + errno = -r; + args->ret = -1; + return; + } + args->ret = 0; + return; + } + + if (pid == -1) { + /* Send to every process the caller has permission to signal — + * which, in single-user kirkos, is every process. We skip pid 1 + * (init) so we don't accidentally kill it. */ + extern struct process *process_list; + int sent = 0; + for (struct process *p = process_list; p != NULL; p = p->next) { + if (p->pid > 1 && p->state != PROCESS_KILLED) { + if (signal_send_to_process(p, sig) == 0) { + sent++; + } + } + } + args->ret = sent > 0 ? 0 : -1; + if (sent == 0) errno = ESRCH; + return; + } + + /* pid < -1: send to process group |pid|. */ + int r = signal_send_to_pgrp(-pid, sig); + if (r < 0) { + errno = -r; + args->ret = -1; + return; + } + args->ret = 0; +} diff --git a/src/sched/signal.h b/src/sched/signal.h new file mode 100644 index 0000000..8c217b8 --- /dev/null +++ b/src/sched/signal.h @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include "arch/x86_64/cpu/reg.h" + +/* + * KirkOS in-kernel signal model. + * + * ABI-compatible with mlibc's abi-bits/signal.h on the userspace side: + * - Signal numbers (1..31) match Linux/glibc/mlibc. + * - sigset_t is the same 1024-bit array of uint64_t. + * - struct sigaction has the same field layout and offsets. + * - SA_* flags use the same bit values. + * + * What this file does NOT cover (intentionally out of scope for the first + * pass): real-time signals (SIGRTMIN..SIGRTMAX), sigqueue, sigaltstack, + * SA_SIGINFO three-argument handlers (the kernel always calls the handler + * with a single int argument — userspace handlers that requested + * SA_SIGINFO get undefined values for the siginfo_t* and ucontext_t* + * pointers). + */ + +#define NSIG 65 + +/* ── Standard signal numbers ────────────────────────────────────────────── */ +#define KSIGHUP 1 +#define KSIGINT 2 +#define KSIGQUIT 3 +#define KSIGILL 4 +#define KSIGTRAP 5 +#define KSIGABRT 6 +#define KSIGBUS 7 +#define KSIGFPE 8 +#define KSIGKILL 9 +#define KSIGUSR1 10 +#define KSIGSEGV 11 +#define KSIGUSR2 12 +#define KSIGPIPE 13 +#define KSIGALRM 14 +#define KSIGTERM 15 +#define KSIGSTKFLT 16 +#define KSIGCHLD 17 +#define KSIGCONT 18 +#define KSIGSTOP 19 +#define KSIGTSTP 20 +#define KSIGTTIN 21 +#define KSIGTTOU 22 +#define KSIGURG 23 +#define KSIGXCPU 24 +#define KSIGXFSZ 25 +#define KSIGVTALRM 26 +#define KSIGPROF 27 +#define KSIGWINCH 28 +#define KSIGIO 29 +#define KSIGPOLL 29 +#define KSIGPWR 30 +#define KSIGSYS 31 + +/* ── sigaction flags ────────────────────────────────────────────────────── */ +#define KSA_NOCLDSTOP 0x00000001 +#define KSA_NOCLDWAIT 0x00000002 +#define KSA_SIGINFO 0x00000004 +#define KSA_RESTORER 0x04000000 +#define KSA_ONSTACK 0x08000000 +#define KSA_RESTART 0x10000000 +#define KSA_NODEFER 0x40000000 +#define KSA_RESETHAND 0x80000000 + +/* ── SIG_DFL / SIG_IGN sentinel pointers ────────────────────────────────── */ +#define KSIG_DFL ((void *)0) +#define KSIG_IGN ((void *)1) + +/* ── sigprocmask `how` values ───────────────────────────────────────────── */ +#define KSIG_BLOCK 0 +#define KSIG_UNBLOCK 1 +#define KSIG_SETMASK 2 + +/* ── sigset_t mirroring mlibc layout (16 × uint64_t = 1024 bits) ────────── */ +typedef struct { + uint64_t bits[16]; +} k_sigset_t; + +static inline void k_sigemptyset(k_sigset_t *s) { + for (int i = 0; i < 16; i++) s->bits[i] = 0; +} + +static inline void k_sigfillset(k_sigset_t *s) { + for (int i = 0; i < 16; i++) s->bits[i] = ~(uint64_t)0; +} + +static inline void k_sigaddset(k_sigset_t *s, int sig) { + if (sig < 1 || sig >= NSIG) return; + s->bits[(sig - 1) / 64] |= (uint64_t)1 << ((sig - 1) % 64); +} + +static inline void k_sigdelset(k_sigset_t *s, int sig) { + if (sig < 1 || sig >= NSIG) return; + s->bits[(sig - 1) / 64] &= ~((uint64_t)1 << ((sig - 1) % 64)); +} + +static inline bool k_sigismember(const k_sigset_t *s, int sig) { + if (sig < 1 || sig >= NSIG) return false; + return (s->bits[(sig - 1) / 64] >> ((sig - 1) % 64)) & 1; +} + +static inline bool k_sigisempty(const k_sigset_t *s) { + for (int i = 0; i < 16; i++) if (s->bits[i]) return false; + return true; +} + +static inline void k_sigorset(k_sigset_t *dst, + const k_sigset_t *a, const k_sigset_t *b) { + for (int i = 0; i < 16; i++) dst->bits[i] = a->bits[i] | b->bits[i]; +} + +/* ── Kernel-side sigaction (mirrors userspace layout) ───────────────────── */ +struct k_sigaction { + void *sa_handler; /* SIG_DFL / SIG_IGN / function ptr */ + uint64_t sa_flags; + void (*sa_restorer)(void); /* required if KSA_RESTORER set */ + k_sigset_t sa_mask; +}; + +/* ── Default action for an unhandled signal ─────────────────────────────── */ +enum sig_default { + SIG_ACT_TERM = 0, /* terminate process */ + SIG_ACT_CORE, /* terminate (we don't dump core) */ + SIG_ACT_IGN, /* ignore */ + SIG_ACT_STOP, /* stop process */ + SIG_ACT_CONT, /* resume a stopped process */ +}; + +enum sig_default sig_default_action(int sig); + +/* ── Forward decls ──────────────────────────────────────────────────────── */ +struct thread; +struct process; +struct syscall_arguments; + +/* ── Per-process signal init / reset ────────────────────────────────────── */ +void signal_init_process(struct process *p); +void signal_reset_on_exec(struct process *p); +void signal_inherit_on_fork(struct process *parent, struct process *child); + +/* ── Send-a-signal entry points ─────────────────────────────────────────── */ + +/* Queue a signal on a specific process. Returns 0 on success, -ESRCH if + * the process is killed/cleaned up. Handles default action delivery + * (SIGKILL/SIGSTOP/SIGCONT) inline. */ +int signal_send_to_process(struct process *p, int sig); + +/* Queue a signal on every process whose pgid matches. Returns 0 if at + * least one process received it, -ESRCH otherwise. */ +int signal_send_to_pgrp(int64_t pgid, int sig); + +/* ── Userspace-return-path hook ─────────────────────────────────────────── */ + +/* Called from the syscall return path and from the IRQ return path, + * guarded by (reg->cs & 3) == 3 (returning to user mode). If a deliverable + * signal is pending, this builds a sigframe on the user stack and rewrites + * `reg` so the iret/sysret enters the handler. Returns true if it dispatched. */ +bool signal_check_and_dispatch(registers_t *reg); + +/* ── Signal-related syscalls (registered from main.c) ───────────────────── */ +void syscall_sigaction(struct syscall_arguments *args); +void syscall_sigprocmask(struct syscall_arguments *args); +void syscall_sigpending(struct syscall_arguments *args); +void syscall_sigsuspend(struct syscall_arguments *args); +void syscall_pause(struct syscall_arguments *args); +void syscall_kill(struct syscall_arguments *args); /* replaces sched.c version */ +void syscall_sigreturn(struct syscall_arguments *args); diff --git a/src/sched/syscall.h b/src/sched/syscall.h index b5b9397..f890923 100644 --- a/src/sched/syscall.h +++ b/src/sched/syscall.h @@ -12,6 +12,11 @@ struct syscall_arguments { uint64_t args4; uint64_t args5; uint64_t ret; + /* Pointer to the kernel-stack registers_t for this syscall — set by + * the arch-side syscall_handler. Lets handlers that need to munge + * userspace register state (e.g. sigreturn) reach into it. + * NULL if the syscall was dispatched via an alternate path. */ + void *reg_ptr; }; typedef void (*syscall_handler_t)(struct syscall_arguments *); diff --git a/user/include/mlibc/sysdeps/kirkos/generic/thread.S b/user/include/mlibc/sysdeps/kirkos/generic/thread.S new file mode 100644 index 0000000..47ab6a9 --- /dev/null +++ b/user/include/mlibc/sysdeps/kirkos/generic/thread.S @@ -0,0 +1,9 @@ +.section .text +.global __mlibc_thread_entry +__mlibc_thread_entry: + pop %rdi + pop %rsi + pop %rdx + call __mlibc_thread_trampoline + +.section .note.GNU-stack,"",%progbits diff --git a/user/include/mlibc/sysdeps/kirkos/generic/thread.cpp b/user/include/mlibc/sysdeps/kirkos/generic/thread.cpp new file mode 100644 index 0000000..0e8a73f --- /dev/null +++ b/user/include/mlibc/sysdeps/kirkos/generic/thread.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include + + +extern "C" void __mlibc_thread_trampoline(void *(*fn)(void *), Tcb *tcb, void *arg) { + if (mlibc::sys_tcb_set(tcb)) { + __ensure(!"failed to set tcb for new thread"); + } + + while (__atomic_load_n(&tcb->tid, __ATOMIC_RELAXED) == 0) { + mlibc::sys_futex_wait(&tcb->tid, 0, nullptr); + } + + tcb->invokeThreadFunc(reinterpret_cast(fn), arg); + + __atomic_store_n(&tcb->didExit, 1, __ATOMIC_RELEASE); + mlibc::sys_futex_wake(&tcb->didExit); + + mlibc::sys_thread_exit(); +} + +#define DEFAULT_STACK (2 * 1024 * 1024) + +namespace mlibc { + int sys_prepare_stack(void **stack, void *entry, void *arg, void *tcb, size_t *stack_size, size_t *guard_size, void **stack_base) { + // TODO guard + + mlibc::infoLogger() << "mlibc: sys_prepare_stack() does not setup a guard!" << frg::endlog; + + *guard_size = 0; + + *stack_size = *stack_size ? *stack_size : DEFAULT_STACK; + + if (!*stack) { + *stack_base = mmap(NULL, *stack_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*stack_base == MAP_FAILED) { + return errno; + } + } else { + *stack_base = *stack; + } + + *stack = (void *)((char *)*stack_base + *stack_size); + + void **stack_it = (void **)*stack; + + *--stack_it = arg; + *--stack_it = tcb; + *--stack_it = entry; + + *stack = (void *)stack_it; + + return 0; + } +} + diff --git a/user/include/mlibc/sysdeps/kirkos/include/syscall.h b/user/include/mlibc/sysdeps/kirkos/include/syscall.h index bc44ca6..f310344 100644 --- a/user/include/mlibc/sysdeps/kirkos/include/syscall.h +++ b/user/include/mlibc/sysdeps/kirkos/include/syscall.h @@ -9,7 +9,11 @@ #define SYS_mmap 9 #define SYS_mprotect 10 #define SYS_munmap 11 +#define SYS_sigaction 13 +#define SYS_sigprocmask 14 +#define SYS_sigreturn 15 #define SYS_ioctl 16 +#define SYS_pause 34 #define SYS_nanosleep 35 #define SYS_getpid 39 #define SYS_fork 57 @@ -22,9 +26,15 @@ #define SYS_rmdir 84 #define SYS_readdir 89 #define SYS_puts 103 +#define SYS_setpgid 109 #define SYS_getppid 110 +#define SYS_getpgrp 111 #define SYS_setsid 112 #define SYS_waitpid 114 +#define SYS_getpgid 121 +#define SYS_getsid 124 +#define SYS_sigpending 127 +#define SYS_sigsuspend 130 #define SYS_prctl 157 #define SYS_futex 202 #define SYS_openat 257 diff --git a/user/include/mlibc/sysdeps/kirkos/meson.build b/user/include/mlibc/sysdeps/kirkos/meson.build index a05f9cf..9924c67 100644 --- a/user/include/mlibc/sysdeps/kirkos/meson.build +++ b/user/include/mlibc/sysdeps/kirkos/meson.build @@ -98,5 +98,7 @@ libc_sources += files( 'sysdeps.cpp', 'syscall.cpp', 'dso.c', + 'generic/thread.cpp', + 'generic/thread.S', ) libc_include_dirs += include_directories('include') diff --git a/user/include/mlibc/sysdeps/kirkos/sysdeps.cpp b/user/include/mlibc/sysdeps/kirkos/sysdeps.cpp index ddc6cb3..46adafc 100644 --- a/user/include/mlibc/sysdeps/kirkos/sysdeps.cpp +++ b/user/include/mlibc/sysdeps/kirkos/sysdeps.cpp @@ -20,6 +20,15 @@ __builtin_unreachable(); \ }) +extern "C" void __mlibc_sigreturn_trampoline(void); +__asm__ ( + ".global __mlibc_sigreturn_trampoline\n" + "__mlibc_sigreturn_trampoline:\n" + " mov $15, %rax\n" /* SYS_sigreturn */ + " syscall\n" + " ud2\n" /* should never reach here */ +); + namespace mlibc { @@ -122,15 +131,6 @@ int sys_clone(void *tcb, pid_t *pid_out, void *stack) { return 0; } -extern "C" void __mlibc_thread_entry() { - /* Minimal stub so linking succeeds. - If you need real pthread support later, replace this with the - proper mlibc thread startup logic (pop start_routine + arg from stack, - call it, then sys_thread_exit). */ - mlibc::sys_libc_panic(); - __builtin_unreachable(); -} - int sys_tcgetattr(int fd, struct termios *attr) { int ret; if (int r = sys_ioctl(fd, TCGETS, attr, &ret) != 0) { @@ -345,15 +345,24 @@ gid_t sys_getgid() { return 0; } int sys_setgid(gid_t) { return 0; } -int sys_getpgid(pid_t, pid_t *) { return 0; } +int sys_getpgid(pid_t pid, pid_t *out) { + int ret = (int)syscall(SYS_getpgid, pid); + if (ret < 0) return -ret; + *out = (pid_t)ret; + return 0; +} gid_t sys_getegid() { return 0; } -int sys_setpgid(pid_t, pid_t) { return 0; } +int sys_setpgid(pid_t pid, pid_t pgid) { + int ret = (int)syscall(SYS_setpgid, pid, pgid); + if (ret < 0) return -ret; + return 0; +} int sys_kill(pid_t p, int sig) { - (void)sig; - syscall(SYS_kill, p); + int ret = (int)syscall(SYS_kill, p, sig); + if (ret < 0) return -ret; return 0; } @@ -479,23 +488,79 @@ int sys_rmdir(const char *path) { return ret; } -int sys_setsid(pid_t *sid) { - (void)sid; - int ret = syscall(SYS_setsid); - ret = ret < 0 ? -ret : ret; - return ret; -} - -int sys_sigprocmask(int, const sigset_t *__restrict, sigset_t *__restrict) { - mlibc::infoLogger() << "mlibc: sys_sigprocmask() is a stub\n" << frg::endlog; +int sys_setsid(pid_t *out) { + int ret = (int)syscall(SYS_setsid); + if (ret < 0) return -ret; + *out = (pid_t)ret; return 0; } -int sys_sigaction(int, const struct sigaction *, struct sigaction *) { - mlibc::infoLogger() << "mlibc: sys_sigaction() is a stub\n" << frg::endlog; +int sys_sigprocmask(int how, const sigset_t *__restrict set, sigset_t *__restrict oldset) { + int ret = (int)syscall(SYS_sigprocmask, how, set, oldset); + if (ret < 0) return -ret; return 0; } +int sys_sigaction(int how, const struct sigaction *act, struct sigaction *oldact) { + struct sigaction kact; + const struct sigaction *act_to_pass = act; + if (act) { + kact = *act; + /* Always set SA_RESTORER + sa_restorer; the kernel requires this + * to know where to return after the handler. If userspace already + * provided one (e.g. for sigaltstack tricks), honor it. */ + if (!(kact.sa_flags & SA_RESTORER) || kact.sa_restorer == nullptr) { + kact.sa_flags |= SA_RESTORER; + kact.sa_restorer = __mlibc_sigreturn_trampoline; + } + act_to_pass = &kact; + } + int ret = (int)syscall(SYS_sigaction, how, act_to_pass, oldact); + if (ret < 0) return -ret; + return 0; +} + +int sys_pause(void) { + int ret = (int)syscall(SYS_pause); + /* pause always returns -EINTR per POSIX */ + if (ret < 0) return -ret; + return -1; +} + +int sys_sigsuspend(const sigset_t *mask) { + int ret = (int)syscall(SYS_sigsuspend, mask); + if (ret < 0) return -ret; + return -1; /* never returns success */ +} + +int sys_sigpending(sigset_t *set) { + int ret = (int)syscall(SYS_sigpending, set); + if (ret < 0) return -ret; + return 0; +} + +int sys_getsid(pid_t pid, pid_t *out) { + int ret = (int)syscall(SYS_getsid, pid); + if (ret < 0) return -ret; + *out = (pid_t)ret; + return 0; +} + +int sys_tcgetpgrp(int fd, pid_t *out) { + int pgrp; + int ret_val; + if (int r = sys_ioctl(fd, TIOCGPGRP, &pgrp, &ret_val); r != 0) + return r; + *out = (pid_t)pgrp; + return 0; +} + +int sys_tcsetpgrp(int fd, pid_t pgrp) { + int ret_val; + int pg = (int)pgrp; + return sys_ioctl(fd, TIOCSPGRP, &pg, &ret_val); +} + int sys_waitpid(pid_t pid, int *status, int flags, struct rusage *ru, pid_t *ret_pid) { if (ru) { mlibc::infoLogger() << "mlibc: struct rusage in sys_waitpid is unsupported\n"