user: implement mlibc as the libc, finally.

It's finally done.. Signed-off-by: kaguya <vpshinomiya@protonmail.com>
2026-05-02 03:31:49 -04:00
parent 2fa39ad85a
commit 9a9b91c940
2387 changed files with 152741 additions and 315 deletions
@@ -0,0 +1,12 @@
+#!/bin/bash
+# USAGE: put files and ports in the arrays below and export the abi you want to use
+
+
+declare -a files=()
+declare -a ports=()
+
+for file in "${files[@]}"; do
+	for port in "${ports[@]}"; do
+	    ln -rsiv abis/$abi/$file sysdeps/$port/include/abi-bits/$file
+	done
+done
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+shopt -s lastpipe
+
+errors_found=0
+
+find . -wholename '*include/*.h' -print0 | while read -rd $'\0' file; do
+	uses=$(grep -c -E "__MLIBC_(ANSI|BSD|POSIX|LINUX|GLIBC)_OPTION" "$file")
+	if [ "$uses" -ne 0 ]; then
+		does_include=$(grep -c "#include <mlibc-config.h>" "$file")
+		if [ "$does_include" -eq 0 ]; then
+			echo "'$file' does not include mlibc-config.h while it does use mlibc option macros"
+			errors_found+=1
+		fi
+	fi
+done
+
+exit $errors_found
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+set -ex
+
+LINUX_MAJOR=v6.x
+LINUX_VERSION=6.16
+: "${ARCH:=$(uname -m)}"
+
+curl -Lo linux-$LINUX_VERSION.tar.xz https://cdn.kernel.org/pub/linux/kernel/$LINUX_MAJOR/linux-$LINUX_VERSION.tar.xz
+tar -xf linux-$LINUX_VERSION.tar.xz
+rm linux-$LINUX_VERSION.tar.xz
+cd linux-$LINUX_VERSION
+make mrproper
+make ARCH=$ARCH headers_install
+find usr/include -type f ! -name '*.h' -delete
+mv usr/include ../linux-headers
+cd ..
+rm -rf linux-$LINUX_VERSION
@@ -0,0 +1,15 @@
+[project]
+name = "mlibc"
+use_system_includes = false
+git_repo_url = "https://github.com/managarm/mlibc/"
+git_default_branch = "master"
+
+[ignore]
+paths = [
+	"options/ansi/musl-generic-math/"
+]
+
+[paths]
+input_dir = "@source_root@"
+output_dir = "@build_root@/docs/"
+compile_commands = "@build_root@/compile_commands.json"
@@ -0,0 +1,303 @@
+# additional includes to supply via `-I` arguments
+includes: []
+
+# paths relative to includedir whose files we don't want to parse themselves,
+# but whose content we want to analyze if they're included from another header.
+base_skipped_directories:
+  - bits
+# paths relative to includedir that we want to completely ignore, even for transitive includes
+base_ignored_directories:
+  - finclude
+  - gnu
+
+# same as the two above, but for files
+base_skipped_files: []
+base_ignored_files: []
+
+# record mapping from base (glibc) to mlibc
+map_record_to_struct:
+  - "__SOCKADDR_ARG": "struct sockaddr *restrict"
+  - "__CONST_SOCKADDR_ARG": "const struct sockaddr *"
+
+# struct equivalence mapping from base (glibc) to mlibc
+equivalent_structs:
+  - "struct _IO_FILE": "struct __mlibc_file_base"
+  - "__mbstate_t": "struct __mlibc_mbstate"
+  - "cnd_t": "struct __mlibc_cond"
+  - "mtx_t": "struct __mlibc_mutex"
+  - "struct __locale_struct": "void"
+  - "pthread_mutexattr_t": "struct __mlibc_mutexattr"
+  - "pthread_mutex_t": "struct __mlibc_mutex"
+  - "pthread_condattr_t": "struct __mlibc_condattr"
+  - "pthread_barrierattr_t": "struct __mlibc_barrierattr"
+  - "pthread_rwlockattr_t": "struct __mlibc_rwlockattr"
+  - "pthread_rwlock_t": "struct __mlibc_fair_rwlock"
+  - "union pthread_attr_t": "struct __mlibc_threadattr"
+  - "pthread_barrier_t": "struct __mlibc_barrier"
+  - "pthread_cond_t": "struct __mlibc_cond"
+  - "__sigset_t": "sigset_t"
+  - "struct ucontext_t": "struct __ucontext"
+  - "struct __dirstream": "struct __mlibc_dir_struct"
+  - "sem_t": "struct sem_"
+  - "struct stat64": "struct stat"
+  - "struct msqid64_ds": "struct msqid_ds"
+  - "struct msqid_ds": "struct msqid64_ds"
+  - "struct rlimit64": "struct rlimit"
+  - "glob_t": "struct glob_t"
+  - "stack_t": "struct __stack"
+  - "__useconds_t": "useconds_t"
+
+# files to ignore, relative to includedir
+ignored_files:
+  - math.h
+  - tgmath.h
+  - regexp.h
+  - sys/asm.h
+  - sys/elf.h
+  - sys/vm86.h
+
+ignored_structs:
+  # structs to ignore because we expose the kernel layout
+  - fd_set
+  - semid_ds
+
+  # libc-level constructs that are allowed to differ, and probably do
+  - ifaddrs
+  - lastlog
+  - regmatch_t
+  - sem_t
+  - sigevent
+  - utmp
+  - utmpx
+
+ignored_typedefs:
+  # opaque structs where users can't rely on any particular layout
+  - cnd_t
+  - fpos_t
+  - jmp_buf
+  - mtx_t
+  - pthread_attr_t
+  - pthread_barrier_t
+  - pthread_cond_t
+  - pthread_condattr_t
+  - pthread_mutex_t
+  - pthread_mutexattr_t
+  - pthread_rwlock_t
+  - pthread_rwlockattr_t
+  - sigjmp_buf
+  - wordexp_t
+
+  # glibc skill issue: regoff_t should be at least ptrdiff_t or ssize_t-sized, but glibc uses int
+  - regmatch_t
+
+# macros we ignore for being libc-level constructs or being allowed to diverge
+ignored_macros:
+  - AT_XPIPE
+  - AT_OPENFILES
+  - AT_FS_SERVER
+  - AT_MBUS_SERVER
+  - _Complex_I
+  - FTW_F
+  - FTW_D
+  - FTW_DNR
+  - FTW_DP
+  - FTW_NS
+  - FTW_SL
+  - FTW_SLN
+  - FTW_PHYS
+  - FTW_MOUNT
+  - FTW_DEPTH
+  - FTW_CHDIR
+  - FTW_CONTINUE
+  - WRDE_APPEND
+  - WRDE_DOOFFS
+  - WRDE_NOCMD
+  - WRDE_REUSE
+  - WRDE_SHOWERR
+  - WRDE_UNDEF
+  - WRDE_SUCCESS
+  - WRDE_BADCHAR
+  - WRDE_BADVAL
+  - WRDE_CMDSUB
+  - WRDE_NOSPACE
+  - WRDE_SYNTAX
+  - PF_MAX
+  - SOMAXCONN
+  - IPPROTO_MAX
+  - FOPEN_MAX
+  - FILENAME_MAX
+  - TMP_MAX
+  - LINE_MAX
+  - CHARCLASS_NAME_MAX
+  - RE_DUP_MAX
+  - SIGRTMIN
+  - L_ctermid
+  - L_tmpnam
+  - _IOFBF
+  - _IOLBF
+  - _IONBF
+  - _PATH_DEFPATH
+  - _PATH_STDPATH
+  - _PATH_VARDB
+  - _POSIX_OPEN_MAX
+  - PTHREAD_DESTRUCTOR_ITERATIONS
+  - PTHREAD_STACK_MIN
+  - MINSIGSTKSZ
+  - SIGSTKSZ
+  - _PC_FILESIZEBITS
+  - _PC_SYMLINK_MAX
+  - NL_ARGMAX
+  - MB_LEN_MAX
+  - ABDAY_1
+  - ABDAY_2
+  - ABDAY_3
+  - ABDAY_4
+  - ABDAY_5
+  - ABDAY_6
+  - ABDAY_7
+  - DAY_1
+  - DAY_2
+  - DAY_3
+  - DAY_4
+  - DAY_5
+  - DAY_6
+  - DAY_7
+  - ABMON_1
+  - ABMON_2
+  - ABMON_3
+  - ABMON_4
+  - ABMON_5
+  - ABMON_6
+  - ABMON_7
+  - ABMON_8
+  - ABMON_9
+  - ABMON_10
+  - ABMON_11
+  - ABMON_12
+  - MON_1
+  - MON_2
+  - MON_3
+  - MON_4
+  - MON_5
+  - MON_6
+  - MON_7
+  - MON_8
+  - MON_9
+  - MON_10
+  - MON_11
+  - MON_12
+  - AM_STR
+  - PM_STR
+  - D_T_FMT
+  - D_FMT
+  - T_FMT
+  - T_FMT_AMPM
+  - ERA
+  - ERA_D_FMT
+  - ALT_DIGITS
+  - ERA_D_T_FMT
+  - ERA_T_FMT
+  - CODESET
+  - CRNCYSTR
+  - RADIXCHAR
+  - DECIMAL_POINT
+  - THOUSEP
+  - THOUSANDS_SEP
+  - YESEXPR
+  - NOEXPR
+  - LC_ALL
+  - LC_COLLATE
+  - LC_CTYPE
+  - LC_MONETARY
+  - LC_NUMERIC
+  - LC_TIME
+  - LC_MESSAGES
+  - LC_MEASUREMENT
+  - LC_GLOBAL_LOCALE
+  - LC_CTYPE_MASK
+  - LC_NUMERIC_MASK
+  - LC_TIME_MASK
+  - LC_COLLATE_MASK
+  - LC_MONETARY_MASK
+  - LC_MESSAGES_MASK
+  - LC_MEASUREMENT_MASK
+  - LC_ALL_MASK
+  - AI_PASSIVE
+  - AI_CANONNAME
+  - AI_NUMERICHOST
+  - AI_V4MAPPED
+  - AI_ALL
+  - AI_ADDRCONFIG
+  - AI_NUMERICSERV
+  - NI_NOFQDN
+  - NI_NUMERICHOST
+  - NI_NAMEREQD
+  - NI_NUMERICSCOPE
+  - NI_DGRAM
+  - NI_NUMERICSERV
+  - NI_MAXSERV
+  - NI_IDN
+  - NI_IDN_USE_STD3_ASCII_RULES
+  - NI_MAXHOST
+  - EAI_AGAIN
+  - EAI_BADFLAGS
+  - EAI_FAIL
+  - EAI_FAMILY
+  - EAI_MEMORY
+  - EAI_NONAME
+  - EAI_SERVICE
+  - EAI_SOCKTYPE
+  - EAI_SYSTEM
+  - EAI_OVERFLOW
+  - EAI_NODATA
+  - EAI_ADDRFAMILY
+  - GLOB_APPEND
+  - GLOB_DOOFFS
+  - GLOB_ERR
+  - GLOB_MARK
+  - GLOB_NOCHECK
+  - GLOB_NOESCAPE
+  - GLOB_NOSORT
+  - GLOB_PERIOD
+  - GLOB_TILDE
+  - GLOB_TILDE_CHECK
+  - GLOB_BRACE
+  - GLOB_NOMAGIC
+  - GLOB_ALTDIRFUNC
+  - GLOB_ONLYDIR
+  - GLOB_MAGCHAR
+  - GLOB_ABORTED
+  - GLOB_NOMATCH
+  - GLOB_NOSPACE
+  - GLOB_NOSYS
+  - BUFSIZ
+  - F_LOCK
+  - F_TEST
+  - F_TLOCK
+  - F_ULOCK
+  - O_LARGEFILE
+  - FSETLOCKING_INTERNAL
+  - FSETLOCKING_BYCALLER
+  - FSETLOCKING_QUERY
+  - NSS_BUFLEN_PASSWD
+
+# enum members whose value should not be dependended on
+ignored_enum_constants:
+  - thrd_timedout
+  - thrd_busy
+  - thrd_error
+  - thrd_nomem
+
+# structs that would get ignored by default (with leading double underscores) that we
+# don't actually want to get ignored
+forced_structs:
+  - "__mlibc_thread_data"
+  - "__mlibc_threadattr"
+  - "__mlibc_mutex"
+  - "__mlibc_mutexattr"
+  - "__mlibc_cond"
+  - "__mlibc_condattr"
+  - "__ucontext"
+
+forced_typedefs:
+  - "__ucontext"
@@ -0,0 +1,934 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import pathlib
+import re
+import subprocess
+import sys
+import tempfile
+import typing
+from dataclasses import dataclass, field
+
+import clang.cindex
+import colorama
+import yaml
+from clang.cindex import CursorKind, TokenKind, TypeKind
+
+dry_run = True
+errors_emitted = 0
+
+
+def on_ci() -> bool:
+    return "CI" in os.environ
+
+
+def log_err(prefix, msg):
+    global errors_emitted
+
+    if on_ci():
+        print(f"{prefix}: {msg}", file=sys.stderr)
+    else:
+        print(
+            f"{colorama.Fore.RED}{prefix}{colorama.Style.RESET_ALL}: {msg}",
+            file=sys.stderr,
+        )
+    errors_emitted += 1
+
+
+def no_system_includes(cursor, level):
+    """filter out verbose stuff from system include files"""
+    return (level != 1) or (
+        cursor.location.file is not None
+        and not cursor.location.file.name.startswith("/usr/include")
+    )
+
+
+class Type:
+    def __init__(self, t: clang.cindex.Type):
+        self.t = t
+        self.kind = t.kind
+        self.typename = (
+            str(self.t.spelling).removesuffix("restrict").removeprefix("const ")
+        )
+        self.compat_typename = None
+
+        match t.kind:
+            case TypeKind.ELABORATED:
+                replacement = next(
+                    filter(
+                        lambda x: self.typename in x.keys(),
+                        config["map_record_to_struct"],
+                    ),
+                    None,
+                )
+                if replacement is not None:
+                    self.compat_typename = (
+                        replacement[self.typename]
+                        .removesuffix("restrict")
+                        .removeprefix("const ")
+                    )
+            case TypeKind.RECORD:
+                replacement = next(
+                    filter(
+                        lambda x: self.typename in x.keys(),
+                        config["map_record_to_struct"],
+                    ),
+                    None,
+                )
+                if replacement is not None:
+                    self.kind = TypeKind.POINTER
+                    self.compat_typename = (
+                        replacement[self.typename]
+                        .removesuffix("restrict")
+                        .removeprefix("const ")
+                    )
+            case TypeKind.POINTER:
+                ptr_type = (
+                    self.t.get_pointee()
+                    .spelling.removesuffix("restrict")
+                    .removeprefix("const ")
+                )
+                replacement = next(
+                    filter(
+                        lambda x: ptr_type in x.keys(), config["equivalent_structs"]
+                    ),
+                    None,
+                )
+                if replacement is not None:
+                    self.compat_typename = (
+                        replacement[ptr_type]
+                        .removesuffix("restrict")
+                        .removeprefix("const ")
+                        + " *"
+                    )
+
+    @property
+    def canonical(self):
+        return Type(self.t.get_canonical())
+
+    @property
+    def pointee_type(self):
+        if self.kind == TypeKind.POINTER:
+            return Type(self.t.get_pointee())
+        if (
+            self.kind == TypeKind.INCOMPLETEARRAY
+            or self.kind == TypeKind.CONSTANTARRAY
+            or self.kind == TypeKind.VARIABLEARRAY
+        ):
+            return Type(self.t.get_array_element_type())
+        if self.kind == TypeKind.ELABORATED:
+            return None
+        log_err(
+            "unhandled pointee resolution", str(self.kind).removeprefix("TypeKind.")
+        )
+        return None
+
+    def __str__(self):
+        return self.typename
+
+    def __eq__(self, other):
+        if self.kind != other.kind:
+            if (
+                self.kind == TypeKind.INCOMPLETEARRAY
+                or other.kind == TypeKind.INCOMPLETEARRAY
+            ):
+                if self.pointee_type is None or other.pointee_type is None:
+                    return False
+                return self.pointee_type == other.pointee_type
+            elif (
+                self.kind == TypeKind.CONSTANTARRAY
+                or other.kind == TypeKind.CONSTANTARRAY
+            ):
+                if self.pointee_type is None or other.pointee_type is None:
+                    return False
+                return self.pointee_type == other.pointee_type
+            elif (
+                self.kind == TypeKind.VARIABLEARRAY
+                or other.kind == TypeKind.VARIABLEARRAY
+            ):
+                if self.pointee_type is None or other.pointee_type is None:
+                    return False
+                return self.pointee_type == other.pointee_type
+            elif self.kind == TypeKind.ELABORATED or other.kind == TypeKind.ELABORATED:
+                return (
+                    self.t.get_size() == other.t.get_size()
+                    and self.t.get_align() == other.t.get_align()
+                )
+            else:
+                return False
+
+        if str(self) == str(other):
+            return True
+        return (
+            self.compat_typename == str(other)
+            or self.compat_typename == other.compat_typename
+        )
+
+    def is_valid(self):
+        return self.t.kind != TypeKind.INVALID
+
+
+@dataclass
+class Function:
+    name: str
+    linkage: clang.cindex.LinkageKind
+    ret_type: clang.cindex.Type
+    location: clang.cindex.SourceLocation
+    arguments: typing.List[Type]
+
+    def __init__(self, c: clang.cindex.Cursor):
+        self.c = c
+        self.name = c.mangled_name
+        self.linkage = c.linkage
+        self.ret_type = c.result_type
+        self.location = c.location
+        self.arguments = list()
+
+        for arg in c.get_arguments():
+            self.arguments.append(Type(arg.type))
+
+
+@dataclass
+class MacroDefinition:
+    name: str
+    location: clang.cindex.SourceLocation
+
+    def __init__(self, c: clang.cindex.Cursor):
+        self.c = c
+        self.name = c.spelling
+        self.location = c.location
+        self.tokens = list(self.c.get_tokens())
+
+    @property
+    def first_token(self):
+        return self.tokens[1] if len(self.tokens) > 1 else None
+
+
+@dataclass
+class EnumDecl:
+    name: str
+    location: clang.cindex.SourceLocation
+
+    def __init__(self, c: clang.cindex.Cursor):
+        self.c = c
+        self.name = c.spelling
+        self.location = c.location
+
+
+@dataclass
+class StructDecl:
+    name: str
+    location: clang.cindex.SourceLocation
+
+    def __init__(self, c: clang.cindex.Cursor):
+        self.c = c
+        self.name = c.spelling
+        self.location = c.location
+        self.alignment = c.type.get_align()
+        self.size = c.type.get_size()
+
+
+@dataclass
+class Typedef:
+    name: str
+    location: clang.cindex.SourceLocation
+
+    def __init__(self, c: clang.cindex.Cursor):
+        self.c = c
+        self.name = c.spelling
+        self.location = c.location
+        self.alignment = c.type.get_align()
+        self.size = c.type.get_size()
+
+@dataclass
+class State:
+    """
+    Represents the parsed state of a set of headers.
+    """
+
+    path: pathlib.Path
+    functions: typing.Dict[str, Function] = field(default_factory=dict)
+    macros: typing.Dict[str, MacroDefinition] = field(default_factory=dict)
+    enums: typing.Dict[str, EnumDecl] = field(default_factory=dict)
+    structs: typing.Dict[str, StructDecl] = field(default_factory=dict)
+    typedefs: typing.Dict[str, StructDecl] = field(default_factory=dict)
+
+    def __init__(self, path: pathlib.Path):
+        self.path = path
+        self.functions = dict()
+        self.macros = dict()
+        self.enums = dict()
+        self.structs = dict()
+        self.typedefs = dict()
+
+
+@dataclass
+class Comparison:
+    config: dict
+
+    def is_ignored(self, typename, ignorelist, name):
+        if (
+            typename == "macros"
+            and (name.startswith("_") or name.startswith("MLIBC_"))
+            and name.endswith("_H")
+        ):
+            return True
+        if name in ignorelist:
+            return True
+        if "forced_" + typename in config and name in config["forced_" + typename]:
+            return False
+        if name.startswith("__"):
+            return True
+        if "ignored_" + typename in config and name in config["ignored_" + typename]:
+            return True
+        return False
+
+    @staticmethod
+    def is_skipped_file(base_dir: pathlib.Path, file: pathlib.Path, config):
+        if Comparison.is_ignored_file(base_dir, file, config):
+            return True
+
+        if base_dir == args.reference:
+            for p in config["base_skipped_directories"]:
+                if str(file).startswith(os.path.join(base_dir, p)):
+                    return True
+            for p in config["base_skipped_files"]:
+                stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
+                if stripped_file == p:
+                    return True
+        return False
+
+    @staticmethod
+    def is_ignored_file(base_dir: pathlib.Path, file: pathlib.Path, config):
+        if not str(file).startswith(str(base_dir)):
+            return True
+
+        if base_dir == args.reference:
+            for p in config["base_ignored_directories"]:
+                if str(file).startswith(os.path.join(base_dir, p)):
+                    return True
+            for p in config["base_ignored_files"]:
+                stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
+                if stripped_file == p:
+                    return True
+        for p in config["ignored_files"]:
+            stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
+            if stripped_file == p:
+                return True
+        for p in config["includes"]:
+            if str(base_dir).startswith(p):
+                return True
+        return False
+
+    def from_cursor(self, base_dir, header, cursor, filter_pred, state: State, level=0):
+        if cursor.location.file:
+            f = pathlib.Path(str(cursor.location.file))
+
+            if Comparison.is_ignored_file(base_dir, f, config):
+                return
+
+        if filter_pred(cursor, level):
+            if args.dump_tree:
+                print(f"{"-" * level} {cursor.kind} {cursor.spelling}")
+                for c in cursor.get_children():
+                    self.from_cursor(base_dir, header, c, filter_pred, state, level + 1)
+
+            match cursor.kind:
+                case CursorKind.TRANSLATION_UNIT:
+                    for c in cursor.get_children():
+                        self.from_cursor(
+                            base_dir, header, c, filter_pred, state, level + 1
+                        )
+                case CursorKind.INCLUSION_DIRECTIVE:
+                    pass
+                case CursorKind.FUNCTION_DECL:
+                    if not cursor.mangled_name.startswith("__"):
+                        f = Function(cursor)
+                        state.functions.update({f.name: f})
+                case CursorKind.STATIC_ASSERT | CursorKind.UNEXPOSED_DECL:
+                    pass
+                case CursorKind.ENUM_DECL:
+                    if not self.is_ignored("enums", [], cursor.spelling):
+                        for x in cursor.get_children():
+                            state.enums.update({x.spelling: EnumDecl(x)})
+                case CursorKind.MACRO_DEFINITION:
+                    if not self.is_ignored("macros", [], cursor.spelling):
+                        state.macros.update({cursor.spelling: MacroDefinition(cursor)})
+                case CursorKind.STRUCT_DECL:
+                    if not self.is_ignored("structs", [], cursor.spelling):
+                        if cursor.is_definition():
+                            state.structs.update({cursor.spelling: StructDecl(cursor)})
+                case CursorKind.UNION_DECL:
+                    if not self.is_ignored("unions", [], cursor.spelling):
+                        if cursor.is_definition():
+                            state.structs.update({cursor.spelling: StructDecl(cursor)})
+                case CursorKind.TYPEDEF_DECL:
+                    if not self.is_ignored("typedefs", [], cursor.spelling):
+                        children = list(cursor.get_children())
+                        if not children:
+                            return
+
+                        state.typedefs.update({cursor.spelling: Typedef(cursor)})
+
+                        if children[0].kind == CursorKind.TYPE_REF:
+                            child_struct_name = children[0].spelling.removeprefix(
+                                "struct "
+                            )
+
+                            if child_struct_name in state.structs:
+                                state.structs.update(
+                                    {cursor.spelling: state.structs[child_struct_name]}
+                                )
+                case CursorKind.MACRO_INSTANTIATION | CursorKind.VAR_DECL:
+                    # don't care (for now)
+                    pass
+                case _:
+                    log_err(
+                        "unhandled cursor type",
+                        f"{cursor.kind} {cursor.spelling} {cursor.displayname} {cursor.location}",
+                    )
+
+
+def cc_name():
+    if args.clang_version:
+        return [f"clang-{args.clang_version}", f"--target={f"{args.arch}-linux-gnu"}"]
+    return ["clang", f"--target={f"{args.arch}-linux-gnu"}"]
+
+
+def cxx_name():
+    # m68k on clang defaults to a small codemodel that doesn't work
+    # and I have not found a way to change it outside of `llc` other
+    if args.arch == "m68k":
+        return ["m68k-linux-gnu-g++"]
+    if args.clang_version:
+        return [f"clang++-{args.clang_version}", f"--target={f"{args.arch}-linux-gnu"}"]
+    return ["clang++", f"--target={f"{args.arch}-linux-gnu"}"]
+
+
+def parse(
+    file: pathlib.Path, resource_dir: pathlib.Path, base_dir: pathlib.Path, state: State
+):
+    index = clang.cindex.Index.create()
+    tu = None
+
+    clang_args = [f"-I{resource_dir}"]
+    clang_args += [f"-I{p}" for p in config["includes"]]
+    clang_args += [f"-I{base_dir}"]
+    clang_args += [f"-I{base_dir / f"{args.arch}-linux-gnu"}"]
+    clang_args += [f"--target={f"{args.arch}-linux-gnu"}"]
+    clang_args += ["-D_GNU_SOURCE", "-D_FILE_OFFSET_BITS=64", "-Wno-macro-redefined"]
+
+    try:
+        tu = index.parse(
+            base_dir / file,
+            args=clang_args,
+            options=clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
+            | clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES,
+        )
+    except Exception as e:
+        log_err("parsing error", f"{file}: {e}")
+        return
+
+    assert tu
+
+    if not Comparison.is_skipped_file(base_dir, base_dir / file, config):
+        if tu.diagnostics:
+            [log_err("compile error", d) for d in tu.diagnostics]
+            print(f"\n{errors_emitted} errors emitted")
+            exit(errors_emitted)
+
+        parser = Comparison(config)
+        if args.verbose:
+            print(f"// {tu.spelling.strip()}")
+        parser.from_cursor(base_dir, file, tu.cursor, no_system_includes, state)
+
+
+def compare_states(a, b):
+    global errors_emitted
+
+    a_symbols = sorted(a.functions.keys())
+    b_symbols = sorted(b.functions.keys())
+    symbols = a_symbols
+    symbols.extend(x for x in b_symbols if x not in symbols)
+    c = Comparison(config)
+
+    if args.function_signatures:
+        lines = []
+
+        for s in symbols:
+            if s not in a.functions or s not in b.functions:
+                continue
+
+            a_func = a.functions[s]
+            b_func = b.functions[s]
+
+            a_ret_type = Type(a_func.ret_type.get_canonical())
+            b_ret_type = Type(b_func.ret_type.get_canonical())
+
+            if (
+                a_ret_type != b_ret_type
+                and a_func.ret_type.spelling != b_func.ret_type.spelling
+            ):
+                lines.append(
+                    f"\t{s}: mismatched return type ({a_ret_type} vs. {b_ret_type})"
+                )
+                errors_emitted += 1
+
+            a_args = a_func.arguments
+            b_args = b_func.arguments
+
+            if len(a_args) != len(b_args):
+                lines.append(
+                    f"\t{s}: argument count mismatch ({len(a_args)} vs. {len(b_args)})"
+                )
+                errors_emitted += 1
+
+            for i, at in enumerate(a_args):
+                bt = b_args[i]
+                if at != bt and at.canonical != bt.canonical:
+                    lines.append(
+                        f"\t{s}: mismatched type for argument at position {(i + 1)} ({at} ({str(at.kind).removeprefix("TypeKind.")}) vs. {bt} ({str(bt.kind).removeprefix("TypeKind.")}))"
+                    )
+                    errors_emitted += 1
+
+        if lines:
+            print()
+            print(f"checking {len(symbols)} functions for signature mismatches:")
+            for line in lines:
+                print(line)
+
+    if args.missing_functions:
+        a_unique_symbols = list(filter(lambda e: e not in b_symbols, a_symbols))
+        b_unique_symbols = list(filter(lambda e: e not in a_symbols, b_symbols))
+
+        if args.verbose and len(a_unique_symbols) > 0:
+            print()
+            print(f"{len(a_unique_symbols)} symbols only defined in {a.path}:")
+            for s in sorted(a_unique_symbols):
+                print(f"{s} defined in {a.functions[s].location}")
+
+        if len(b_unique_symbols) > 0:
+            print()
+            print(f"{len(b_unique_symbols)} symbols only defined in {b.path}:")
+            for s in sorted(b_unique_symbols):
+                print(f"{s} defined in {b.functions[s].location}")
+
+    def loc(s):
+        return f"{s.location.file}:{s.location.line}"
+
+    if args.structs:
+
+        for mapping in config["equivalent_structs"]:
+            (a_name, b_name), = mapping.items()
+            a_name = a_name.removeprefix("struct ")
+            b_name = b_name.removeprefix("struct ")
+            if a_name not in a.structs or b_name not in b.structs:
+                continue
+            if c.is_ignored("structs", [], a_name) or c.is_ignored("structs", [], b_name):
+                continue
+            if (a_name in a.typedefs and c.is_ignored("typedefs", [], a_name)) or (c.is_ignored("typedefs", [], b_name)):
+                continue
+            b.structs[a_name] = b.structs[b_name]
+
+        common_structs = sorted(set(a.structs) & set(b.structs))
+        lines = []
+
+        for name in common_structs:
+            if c.is_ignored("typedefs", [], name):
+                continue;
+
+            sa = a.structs[name]
+            sb = b.structs[name]
+
+            if sa.alignment != sb.alignment:
+                lines.append(
+                    f"\t{name}: alignment {sa.alignment} vs. {sb.alignment} ({loc(sa)}, {loc(sb)})"
+                )
+                errors_emitted += 1
+
+            if sa.size != sb.size:
+                lines.append(
+                    f"\t{name}: size {sa.size} vs. {sb.size} ({loc(sa)}, {loc(sb)})"
+                )
+                errors_emitted += 1
+
+        if lines:
+            print()
+            print(
+                f"checking {len(common_structs)} structs for size/alignment mismatches:"
+            )
+            for line in lines:
+                print(line)
+
+    if args.typedefs:
+        common_typedefs = sorted(set(a.typedefs) & set(b.typedefs))
+        lines = []
+
+        for name in common_typedefs:
+            if (name in a.structs or name in b.structs) and c.is_ignored("structs", [], name):
+                continue;
+
+            ta = a.typedefs[name]
+            tb = b.typedefs[name]
+
+            if ta.alignment != tb.alignment and ta.alignment > 0 and tb.alignment > 0:
+                lines.append(
+                    f"\t{name}: alignment {ta.alignment} vs. {tb.alignment} ({loc(ta)}, {loc(tb)})"
+                )
+                errors_emitted += 1
+
+            if ta.size != tb.size and ta.size > 0 and tb.size > 0:
+                lines.append(
+                    f"\t{name}: size {ta.size} vs. {tb.size} ({loc(ta)}, {loc(tb)})"
+                )
+                errors_emitted += 1
+
+        if lines:
+            print()
+            print(
+                f"checking {len(common_typedefs)} typedefs for size/alignment mismatches:"
+            )
+            for line in lines:
+                print(line)
+
+    if args.macro_definitions:
+        tempdir = tempfile.TemporaryDirectory(prefix="abichecker")
+        td = pathlib.Path(tempdir.name)
+        script_path = pathlib.Path(__file__).resolve().parent
+
+        atp = open(td / "test-a-primary.hpp", "w")
+        btp = open(td / "test-b-primary.hpp", "w")
+
+        print(f'#include "{script_path}/linux-headers.h"', file=atp)
+        print(f'#include "{script_path}/linux-headers.h"', file=btp)
+
+        def filter_preprocessed_file(input, output):
+            include_next_line = False
+
+            with open(output, "w") as o:
+                with open(input, "r") as i:
+                    for line in i:
+                        if line.startswith("const auto __v_"):
+                            o.write(line)
+                            include_next_line = not line.strip().endswith(";")
+                        elif include_next_line:
+                            if not line.strip().startswith("#"):
+                                o.write(line)
+                                include_next_line = not line.strip().endswith(";")
+
+        a_included_files = list()
+        b_included_files = list()
+        tested_macros = list()
+
+        def is_macro_literal(obj):
+            if type(obj) is not MacroDefinition:
+                return False
+            return obj.first_token and obj.first_token.kind == TokenKind.LITERAL
+
+        def is_enum(obj):
+            return type(obj) is EnumDecl
+
+        for name, bm in (b.macros | b.enums).items():
+            if name in (a.macros | a.enums):
+                am = (a.macros | a.enums)[name]
+                header = (
+                    str(am.location.file)
+                    .removeprefix(str(args.reference))
+                    .removeprefix("/")
+                )
+                if header not in a_included_files and not c.is_skipped_file(
+                    args.reference, args.reference / header, config
+                ):
+                    print(f"#include <{header}>", file=atp)
+                    a_included_files.append(header)
+            header = (
+                str(bm.location.file).removeprefix(str(args.mlibc)).removeprefix("/")
+            )
+            if header not in b_included_files:
+                print(f"#include <{header}>", file=btp)
+                b_included_files.append(header)
+            if name in (a.macros | a.enums) and (
+                is_macro_literal(bm)
+                or (is_enum(bm) and not c.is_ignored("enum_constants", [], name))
+            ):
+                print(f"const auto __v_{name} = {name};", file=atp)
+                print(f"const auto __v_{name} = {name};", file=btp)
+                tested_macros.append(name)
+
+        atp.close()
+        btp.close()
+
+        a_preprocess = subprocess.run(
+            cxx_name()
+            + [
+                "-E",
+                "-std=c++23",
+                "-nostdlib",
+                f"-I{args.reference}",
+                "-o",
+                f"{tempdir.name}/test-a-preprocessed.hpp",
+                f"{tempdir.name}/test-a-primary.hpp",
+                "-D_GNU_SOURCE",
+                "-D_FILE_OFFSET_BITS=64",
+                "-D_REGEX_LARGE_OFFSETS"
+                "-Wno-macro-redefined",
+            ],
+            capture_output=True,
+        )
+        if a_preprocess.returncode != 0:
+            print(f"Preprocessing the macro list of {args.reference} failed:")
+            print(f"\tCommand: '{' '.join(a_preprocess.args)}'")
+            print(a_preprocess.stderr.decode("utf-8"))
+        b_preprocess = subprocess.run(
+            cxx_name()
+            + [
+                "-E",
+                "-std=c++23",
+                "-nostdlib",
+                f"-I{args.mlibc}",
+                "-o",
+                f"{tempdir.name}/test-b-preprocessed.hpp",
+                f"{tempdir.name}/test-b-primary.hpp",
+                "-D_GNU_SOURCE",
+                "-D_FILE_OFFSET_BITS=64",
+                "-D_REGEX_LARGE_OFFSETS"
+                "-Wno-macro-redefined",
+            ],
+            capture_output=True,
+        )
+        if b_preprocess.returncode != 0:
+            print(f"Preprocessing the macro list of {args.mlibc} failed:")
+            print(b_preprocess.stderr.decode("utf-8"))
+
+        filter_preprocessed_file(
+            td / "test-a-preprocessed.hpp", td / "test-a-filtered.hpp"
+        )
+        filter_preprocessed_file(
+            td / "test-b-preprocessed.hpp", td / "test-b-filtered.hpp"
+        )
+
+        at = open(td / "test-a.cpp", "w")
+        bt = open(td / "test-b.cpp", "w")
+
+        print(f'#include "{script_path}/linux-headers.h"', file=at)
+        print(f'#include "{script_path}/linux-headers.h"', file=bt)
+
+        for inc in a_included_files:
+            print(f"#include <{inc}>", file=at)
+        for inc in b_included_files:
+            print(f"#include <{inc}>", file=bt)
+        print("", file=at)
+        print("", file=bt)
+
+        print(f'#include "{tempdir.name}/test-a-filtered.hpp"', file=at)
+        print(f'#include "{tempdir.name}/test-b-filtered.hpp"', file=bt)
+        print(f'#include "{script_path}/to_integral.hpp"', file=at)
+        print(f'#include "{script_path}/to_integral.hpp"', file=bt)
+
+        print("int main() {", file=at)
+        print("int main() {", file=bt)
+
+        for name in tested_macros:
+            print(f'\tmacro_print("{name}", __v_{name});', file=at)
+            print(f'\tmacro_print("{name}", __v_{name});', file=bt)
+
+        print("\treturn 0;", file=at)
+        print("\treturn 0;", file=bt)
+        print("}", file=at)
+        print("}", file=bt)
+
+        at.close()
+        bt.close()
+
+        a_compile = subprocess.run(
+            cxx_name()
+            + [
+                "-std=c++23",
+                "-I",
+                f"{args.reference}",
+                "-o",
+                f"{tempdir.name}/test-a",
+                f"{tempdir.name}/test-a.cpp",
+                "-D_GNU_SOURCE",
+                "-D_FILE_OFFSET_BITS=64",
+                "-D_REGEX_LARGE_OFFSETS"
+                "-Wno-macro-redefined",
+            ],
+            capture_output=True,
+        )
+        if a_compile.returncode != 0:
+            log_err("Compiling macro test failed", f"test.cpp for {args.reference}")
+            print(a_compile.stderr.decode("utf-8"))
+            sys.exit(1)
+
+        b_compile = subprocess.run(
+            cxx_name()
+            + [
+                "-std=c++23",
+                "-I",
+                f"{args.mlibc}",
+                "-o",
+                f"{tempdir.name}/test-b",
+                f"{tempdir.name}/test-b.cpp",
+                "-D_GNU_SOURCE",
+                "-D_FILE_OFFSET_BITS=64",
+                "-D_REGEX_LARGE_OFFSETS"
+                "-Wno-macro-redefined",
+            ],
+            capture_output=True,
+        )
+        if b_compile.returncode != 0:
+            log_err("Compiling macro test failed", f"test.cpp for {args.mlibc}")
+            print(b_compile.stderr.decode("utf-8"))
+            sys.exit(1)
+
+        test_a_file = tempfile.NamedTemporaryFile(dir=tempdir.name)
+        test_b_file = tempfile.NamedTemporaryFile(dir=tempdir.name)
+        qemu_cmd = []
+
+        if args.arch != "x86_64":
+            qemu_cmd = [f"qemu-{args.arch}"]
+            if args.ld_lib:
+                qemu_cmd += ["-L", args.ld_lib]
+
+        test_a = subprocess.run(
+            qemu_cmd + [f"{tempdir.name}/test-a"], stdout=test_a_file
+        )
+        if test_a.returncode != 0:
+            log_err("Running macro test failed", f"test for {args.reference}")
+        test_b = subprocess.run(
+            qemu_cmd + [f"{tempdir.name}/test-b"], stdout=test_b_file
+        )
+        if test_b.returncode != 0:
+            log_err("Running macro test failed", f"test for {args.mlibc}")
+
+        color_output = ["--color=always"] if not on_ci() else []
+
+        diff = subprocess.run(
+            ["diff", test_a_file.name, test_b_file.name] + color_output,
+            capture_output=True,
+            text=True,
+        )
+        diff_str = diff.stdout.strip()
+        if diff_str:
+            print()
+            print("diff of macro definitions:")
+            print(diff_str)
+            ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
+            errors_emitted += sum(
+                1
+                for line in diff_str.splitlines()
+                if ansi_escape.sub("", line).startswith("< ")
+            )
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument(
+        "-m",
+        dest="missing_functions",
+        action="store_true",
+        help="search for missing functions",
+    )
+    argparser.add_argument(
+        "-M",
+        dest="macro_definitions",
+        action="store_true",
+        help="compare macro definitions",
+    )
+    argparser.add_argument(
+        "-f",
+        dest="function_signatures",
+        action="store_true",
+        help="check function signatures",
+    )
+    argparser.add_argument(
+        "-s", dest="structs", action="store_true", help="check structs"
+    )
+    argparser.add_argument(
+        "-t", dest="typedefs", action="store_true", help="check structs"
+    )
+    argparser.add_argument(
+        "-v", "--verbose", dest="verbose", action="store_true", help="verbose output"
+    )
+    argparser.add_argument(
+        "-T",
+        dest="dump_tree",
+        action="store_true",
+        help="dump tree (for debug, extremely verbose)",
+    )
+    argparser.add_argument(
+        "--config",
+        help="path to the configuration file",
+        dest="config",
+        type=argparse.FileType("r"),
+        required=True,
+    )
+    argparser.add_argument(
+        "--arch", help="target architecture", dest="arch", type=str, default="x86_64"
+    )
+    argparser.add_argument(
+        "--ld-library-path",
+        help="additional LD_LIBRARY_PATH to supply to qemu-user",
+        dest="ld_lib",
+        type=str,
+    )
+    argparser.add_argument(
+        "--clang-version",
+        help="specify which versioned clang to use",
+        dest="clang_version",
+        type=int,
+    )
+    argparser.add_argument(
+        "--exit-with-zero-for-abi-mismatches",
+        help="exit with zero even if ABI mismatches are detected",
+        dest="exit_zero",
+        action="store_true",
+    )
+    argparser.add_argument(
+        "reference", help="path to the references libc's sysroot", type=pathlib.Path
+    )
+    argparser.add_argument(
+        "mlibc", help="mlibc headers to be checked", type=pathlib.Path
+    )
+    argparser.add_argument("file", nargs="?", help="limit scope to this file")
+
+    colorama.just_fix_windows_console()
+
+    args = argparser.parse_args()
+
+    config = yaml.load(args.config, yaml.CSafeLoader)
+    reference_state = State(args.reference)
+    mlibc_state = State(args.mlibc)
+
+    # determine the path to clang's resource dir (like /usr/lib/clang/20/include)
+    resource_dir_result = subprocess.run(
+        cc_name() + ["-print-resource-dir"], capture_output=True
+    )
+    resource_dir = pathlib.Path(resource_dir_result.stdout.decode().strip()) / "include"
+
+    for pair in ((args.reference, reference_state), (args.mlibc, mlibc_state)):
+        (path, state) = pair
+        if not args.file:
+            for header in sorted(path.rglob("*.h")):
+                parse(
+                    pathlib.Path(str(header).removeprefix(str(path)).removeprefix("/")),
+                    resource_dir,
+                    path,
+                    state,
+                )
+        else:
+            parse(pathlib.Path(args.file), resource_dir, path, state)
+
+    compare_states(reference_state, mlibc_state)
+
+    if errors_emitted > 0:
+        print(f"\n{errors_emitted} errors emitted.")
+    else:
+        print("No ABI differences found.")
+
+    if args.exit_zero:
+        exit(0)
+
+    exit(min(errors_emitted, 0xFF))
@@ -0,0 +1,4 @@
+{
+  /* Hide all C++ symbols.  */
+  local: _Z*;
+};
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <fenv.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/poll.h>
+#include <net/ethernet.h>
+#include <sys/eventfd.h>
+#include <sys/ipc.h>
+#include <sys/resource.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/statvfs.h>
+#include <termios.h>
@@ -0,0 +1,267 @@
+// This file is autogenerated!
+// All changes made will be lost (eventually)!
+
+use crate::prelude::*;
+
+use crate::sighandler_t;
+
+pub type blkcnt64_t = i64;
+pub type rlimit64 = crate::rlimit;
+pub type rlim64_t = crate::rlim_t;
+pub type dirent64 = crate::dirent;
+pub type stat64 = crate::stat;
+pub type statfs64 = crate::statfs;
+pub type statvfs64 = crate::statvfs;
+pub type idtype_t = c_uint;
+pub type Ioctl = c_ulong;
+pub type pthread_t = *mut c_void;
+
+pub type __u8 = c_uchar;
+pub type __u16 = c_ushort;
+pub type __s16 = c_short;
+pub type __u32 = c_uint;
+pub type __s32 = c_int;
+pub type __u64 = c_ulonglong;
+pub type __s64 = c_longlong;
+
+pub const RTLD_DEFAULT: *mut c_void = 0i64 as *mut c_void;
+pub const RLIM_INFINITY: crate::rlim_t = !0;
+
+pub type Elf32_Half = u16;
+pub type Elf32_Word = u32;
+pub type Elf32_Off = u32;
+pub type Elf32_Addr = u32;
+
+pub type Elf64_Half = u16;
+pub type Elf64_Word = u32;
+pub type Elf64_Off = u64;
+pub type Elf64_Addr = u64;
+pub type Elf64_Xword = u64;
+
+s! {
+    pub struct Elf32_Phdr {
+        pub p_type: Elf32_Word,
+        pub p_offset: Elf32_Off,
+        pub p_vaddr: Elf32_Addr,
+        pub p_paddr: Elf32_Addr,
+        pub p_filesz: Elf32_Word,
+        pub p_memsz: Elf32_Word,
+        pub p_flags: Elf32_Word,
+        pub p_align: Elf32_Word,
+    }
+
+    pub struct Elf64_Phdr {
+        pub p_type: Elf64_Word,
+        pub p_flags: Elf64_Word,
+        pub p_offset: Elf64_Off,
+        pub p_vaddr: Elf64_Addr,
+        pub p_paddr: Elf64_Addr,
+        pub p_filesz: Elf64_Xword,
+        pub p_memsz: Elf64_Xword,
+        pub p_align: Elf64_Xword,
+    }
+}
+
+s! {
+    pub struct dl_phdr_info {
+        #[cfg(target_pointer_width = "64")]
+        pub dlpi_addr: Elf64_Addr,
+        #[cfg(target_pointer_width = "32")]
+        pub dlpi_addr: Elf32_Addr,
+
+        pub dlpi_name: *const c_char,
+
+        #[cfg(target_pointer_width = "64")]
+        pub dlpi_phdr: *const Elf64_Phdr,
+        #[cfg(target_pointer_width = "32")]
+        pub dlpi_phdr: *const Elf32_Phdr,
+
+        #[cfg(target_pointer_width = "64")]
+        pub dlpi_phnum: Elf64_Half,
+        #[cfg(target_pointer_width = "32")]
+        pub dlpi_phnum: Elf32_Half,
+
+        pub dlpi_adds: c_ulonglong,
+        pub dlpi_subs: c_ulonglong,
+        pub dlpi_tls_modid: size_t,
+        pub dlpi_tls_data: *mut c_void,
+    }
+}
+
+f! {
+    pub fn CMSG_NXTHDR(mhdr: *const msghdr, cmsg: *const cmsghdr) -> *mut cmsghdr {
+        if ((*cmsg).cmsg_len as usize) < mem::size_of::<cmsghdr>() {
+            return 0 as *mut cmsghdr;
+        };
+        let next = (cmsg as usize + super::CMSG_ALIGN((*cmsg).cmsg_len as usize)) as *mut cmsghdr;
+        let max = (*mhdr).msg_control as usize + (*mhdr).msg_controllen as usize;
+        if (next.offset(1)) as usize > max ||
+            next as usize + super::CMSG_ALIGN((*next).cmsg_len as usize) > max {
+            0 as *mut cmsghdr
+        } else {
+            next as *mut cmsghdr
+        }
+    }
+}
+
+pub const PTHREAD_MUTEX_INITIALIZER: pthread_mutex_t = pthread_mutex_t {
+    size: [0; 16],
+};
+pub const PTHREAD_COND_INITIALIZER: pthread_cond_t = pthread_cond_t {
+    size: [0; 12],
+};
+pub const PTHREAD_RWLOCK_INITIALIZER: pthread_rwlock_t = pthread_rwlock_t {
+    size: [0; 12],
+};
+
+s_no_extra_traits! {
+    pub struct ifreq {
+        pub ifru_addr: crate::sockaddr,
+        pub ifru_dstaddr: crate::sockaddr,
+        pub ifru_broadaddr: crate::sockaddr,
+        pub ifru_netmask: crate::sockaddr,
+        pub ifru_hwaddr: crate::sockaddr,
+        pub ifru_flags: c_short,
+        pub ifru_ivalue: c_int,
+        pub ifru_mtu: c_int,
+        pub ifru_map: crate::ifmap,
+        pub ifru_slave: [c_char; 16],
+        pub ifru_newname: [c_char; 16],
+        pub ifru_data: *mut c_char,
+    }
+}
+
+safe_f! {
+    pub {const} fn makedev(major: c_uint, minor: c_uint) -> crate::dev_t {
+        let major = major as crate::dev_t;
+        let minor = minor as crate::dev_t;
+        let mut dev = 0;
+        dev |= (major & 0x00000fff) << 8;
+        dev |= (major & 0xfffff000) << 32;
+        dev |= (minor & 0x000000ff) << 0;
+        dev |= (minor & 0xffffff00) << 12;
+        dev
+    }
+
+    pub {const} fn major(dev: crate::dev_t) -> c_uint {
+        let mut major = 0;
+        major |= (dev & 0x00000000000fff00) >> 8;
+        major |= (dev & 0xfffff00000000000) >> 32;
+        major as c_uint
+    }
+
+    pub {const} fn minor(dev: crate::dev_t) -> c_uint {
+        let mut minor = 0;
+        minor |= (dev & 0x00000000000000ff) >> 0;
+        minor |= (dev & 0x00000ffffff00000) >> 12;
+        minor as c_uint
+    }
+}
+
+extern "C" {
+    #[link_name = "__gnu_strerror_r"]
+    pub fn strerror_r(errnum: c_int, buf: *mut c_char, buflen: size_t) -> c_int;
+}
+
+impl siginfo_t {
+    pub unsafe fn si_status(&self) -> c_int {
+        #[repr(C)]
+        struct siginfo_sigchld {
+            _si_signo: c_int,
+            _si_errno: c_int,
+            _si_code: c_int,
+            si_pid: crate::pid_t,
+            si_uid: crate::uid_t,
+            si_status: c_int,
+            si_utime: crate::clock_t,
+            si_stime: crate::clock_t,
+        }
+        (*(self as *const siginfo_t as *const siginfo_sigchld)).si_status
+    }
+
+    pub unsafe fn si_addr(&self) -> *mut c_void {
+        #[repr(C)]
+        struct siginfo_sigfault {
+            _si_signo: c_int,
+            _si_errno: c_int,
+            _si_code: c_int,
+            si_addr: *mut c_void,
+        }
+        (*(self as *const siginfo_t as *const siginfo_sigfault)).si_addr
+    }
+}
+
+s! {
+    pub struct sockaddr_nl {
+        pub nl_family: crate::sa_family_t,
+        nl_pad: c_ushort,
+        pub nl_pid: u32,
+        pub nl_groups: u32,
+    }
+}
+
+// linux/netlink.h
+pub const NLA_ALIGNTO: c_int = 4;
+
+pub const NETLINK_ROUTE: c_int = 0;
+pub const NETLINK_UNUSED: c_int = 1;
+pub const NETLINK_USERSOCK: c_int = 2;
+pub const NETLINK_FIREWALL: c_int = 3;
+pub const NETLINK_SOCK_DIAG: c_int = 4;
+pub const NETLINK_NFLOG: c_int = 5;
+pub const NETLINK_XFRM: c_int = 6;
+pub const NETLINK_SELINUX: c_int = 7;
+pub const NETLINK_ISCSI: c_int = 8;
+pub const NETLINK_AUDIT: c_int = 9;
+pub const NETLINK_FIB_LOOKUP: c_int = 10;
+pub const NETLINK_CONNECTOR: c_int = 11;
+pub const NETLINK_NETFILTER: c_int = 12;
+pub const NETLINK_IP6_FW: c_int = 13;
+pub const NETLINK_DNRTMSG: c_int = 14;
+pub const NETLINK_KOBJECT_UEVENT: c_int = 15;
+pub const NETLINK_GENERIC: c_int = 16;
+pub const NETLINK_SCSITRANSPORT: c_int = 18;
+pub const NETLINK_ECRYPTFS: c_int = 19;
+pub const NETLINK_RDMA: c_int = 20;
+pub const NETLINK_CRYPTO: c_int = 21;
+pub const NETLINK_INET_DIAG: c_int = NETLINK_SOCK_DIAG;
+
+pub const NLM_F_REQUEST: c_int = 1;
+pub const NLM_F_MULTI: c_int = 2;
+pub const NLM_F_ACK: c_int = 4;
+pub const NLM_F_ECHO: c_int = 8;
+pub const NLM_F_DUMP_INTR: c_int = 16;
+pub const NLM_F_DUMP_FILTERED: c_int = 32;
+
+pub const NLM_F_ROOT: c_int = 0x100;
+pub const NLM_F_MATCH: c_int = 0x200;
+pub const NLM_F_ATOMIC: c_int = 0x400;
+pub const NLM_F_DUMP: c_int = NLM_F_ROOT | NLM_F_MATCH;
+
+pub const NLM_F_REPLACE: c_int = 0x100;
+pub const NLM_F_EXCL: c_int = 0x200;
+pub const NLM_F_CREATE: c_int = 0x400;
+pub const NLM_F_APPEND: c_int = 0x800;
+
+pub const NLM_F_NONREC: c_int = 0x100;
+pub const NLM_F_BULK: c_int = 0x200;
+
+pub const NLM_F_CAPPED: c_int = 0x100;
+pub const NLM_F_ACK_TLVS: c_int = 0x200;
+
+pub const NETLINK_ADD_MEMBERSHIP: c_int = 1;
+pub const NETLINK_DROP_MEMBERSHIP: c_int = 2;
+pub const NETLINK_PKTINFO: c_int = 3;
+pub const NETLINK_BROADCAST_ERROR: c_int = 4;
+pub const NETLINK_NO_ENOBUFS: c_int = 5;
+pub const NETLINK_RX_RING: c_int = 6;
+pub const NETLINK_TX_RING: c_int = 7;
+pub const NETLINK_LISTEN_ALL_NSID: c_int = 8;
+pub const NETLINK_LIST_MEMBERSHIPS: c_int = 9;
+pub const NETLINK_CAP_ACK: c_int = 10;
+pub const NETLINK_EXT_ACK: c_int = 11;
+pub const NETLINK_GET_STRICT_CHK: c_int = 12;
+
+pub const NLA_F_NESTED: c_int = 1 << 15;
+pub const NLA_F_NET_BYTEORDER: c_int = 1 << 14;
+pub const NLA_TYPE_MASK: c_int = !(NLA_F_NESTED | NLA_F_NET_BYTEORDER);
@@ -0,0 +1,722 @@
+#!/bin/env python3
+
+# HOW THIS WORKS
+#
+# This script takes mlibc header files and generates bindings to be used with rust's "libc" crate.
+# A configuration file is needed for its proper function; an example is provided alongside this
+# script. Please do note that it is used for managarm, which lives under `unix/linux_like` in the
+# "libc" crate. If your OS does not live under this directory, but e.g. under just `unix` instead,
+# you will need to adapt the configuration to fit your use.
+#
+# HOW TO USE
+#
+# > python rust-libc <path/to/your/installed/mlibc/headers> <cross-gcc> [<single-header.h>]
+#
+# By default, the script parses all header files in the directory supplied, except for when a
+# single header is provided, where it will only parse that.
+
+import argparse
+import io
+import os
+import pathlib
+import string
+import subprocess
+import sys
+
+import clang.cindex
+import colorama
+import yaml
+from clang.cindex import Cursor, CursorKind, TokenKind, TypeKind
+from dataclasses import dataclass
+
+dry_run = True
+errors_emitted = 0
+
+
+def log_err(prefix, msg):
+    global errors_emitted
+
+    print(
+        f"{colorama.Fore.RED}{prefix}{colorama.Style.RESET_ALL}: {msg}", file=sys.stderr
+    )
+    errors_emitted += 1
+
+
+def emit(msg):
+    if not dry_run:
+        print(msg)
+
+
+def no_system_includes(cursor, level):
+    """filter out verbose stuff from system include files"""
+    return (level != 1) or (
+        cursor.location.file is not None
+        and not cursor.location.file.name.startswith("/usr/include")
+    )
+
+
+class Type:
+    def __init__(
+        self,
+        c: clang.cindex.Cursor,
+        t: clang.cindex.Type = None,
+        convert_arrays_to_ptrs=False,
+    ):
+        self.cursor = c
+        self.type = t if t else c.type
+        self.convert_arrays_to_ptrs = convert_arrays_to_ptrs
+
+    @property
+    def kind(self):
+        return self.type.kind
+
+    def convert_ptr_type(self, c, ty, is_pointee=False):
+        pointee = ty if is_pointee else ty.get_pointee()
+
+        if pointee.kind == TypeKind.FUNCTIONPROTO:
+            arg_list = []
+            for f in pointee.argument_types():
+                arg_list.append(f"{Type(c, f)}")
+            args = ", ".join(arg_list)
+            ret_type = Type(c, pointee.get_result())
+            if c.semantic_parent.spelling in config["force_raw_function_pointer"]:
+                return f'extern "C" fn({args})' + (
+                    f" -> {ret_type}" if str(ret_type) != "c_void" else ""
+                )
+            else:
+                return f'Option<unsafe extern "C" fn({args})' + (
+                    f" -> {ret_type}>" if str(ret_type) != "c_void" else ">"
+                )
+
+        is_mut = not pointee.spelling.startswith("const")
+        prefix = "*" + ("mut" if is_mut else "const") + " "
+
+        type_iter = pointee
+
+        while type_iter.kind == TypeKind.POINTER:
+            prefix += "*mut "
+            type_iter = type_iter.get_pointee()
+
+        t = type_iter.spelling.removeprefix("const ")
+        tokens = t.split(" ")
+
+        match tokens:
+            case ["char", *_]:
+                return prefix + "c_char"
+            case ["struct", x, *_] if x in config["force_local_type"]:
+                return prefix + x
+            case ["struct", x, *_]:
+                return f"{prefix} crate::{x}"
+            case ["int", *_]:
+                return prefix + "c_int"
+            case ["unsigned", "char", *_]:
+                return prefix + "c_uchar"
+            case ["unsigned", "short", *_]:
+                return prefix + "c_ushort"
+            case ["unsigned", "int", *_]:
+                return prefix + "c_uint"
+            case ["unsigned", "long", *_]:
+                return prefix + "c_ulong"
+            case ["unsigned", *_]:
+                log_err("unhandled unsigned type", f"'{t}'")
+            case ["void", *_]:
+                return prefix + "c_void"
+            case ["double", *_]:
+                return prefix + "c_double"
+            case [*_]:
+                return prefix + "crate::" + t
+
+    def __str__(self):
+        typename = str(self.kind)
+        match self.kind:
+            case TypeKind.VOID:
+                typename = "c_void"
+            case TypeKind.LONG:
+                typename = "c_long"
+            case TypeKind.LONGLONG:
+                typename = "c_longlong"
+            case TypeKind.UINT:
+                typename = "c_uint"
+            case TypeKind.INT:
+                typename = "c_int"
+            case TypeKind.ULONG:
+                typename = "c_ulong"
+            case TypeKind.ULONGLONG:
+                typename = "c_ulonglong"
+            case TypeKind.USHORT:
+                typename = "c_ushort"
+            case TypeKind.SHORT:
+                typename = "c_short"
+            case TypeKind.CHAR_S:
+                typename = "c_char"
+            case TypeKind.UCHAR:
+                typename = "c_uchar"
+            case TypeKind.DOUBLE:
+                typename = "c_double"
+            case TypeKind.LONGDOUBLE:
+                typename = "c_longdouble"
+            case TypeKind.FLOAT:
+                typename = "c_float"
+            case TypeKind.CONSTANTARRAY:
+                if self.convert_arrays_to_ptrs:
+                    typename = self.convert_ptr_type(
+                        self.cursor, self.type.get_array_element_type(), is_pointee=True
+                    )
+                else:
+                    typename = f"[{str(Type(self.cursor, self.type.get_array_element_type()))}; {self.type.element_count}]"
+            case TypeKind.INCOMPLETEARRAY:
+                typename = "*mut " + str(
+                    Type(self.cursor, self.type.get_array_element_type())
+                )
+            case TypeKind.ELABORATED:
+                if self.is_va_list():
+                    typename = "*mut c_char"
+                elif self.cursor.is_anonymous():
+                    typename = "crate::" + Type.cursor_name(self.cursor)
+                elif self.type.get_declaration().displayname in (
+                    "uint8_t",
+                    "__mlibc_uint8",
+                ):
+                    typename = "u8"
+                elif self.type.get_declaration().displayname in (
+                    "int8_t",
+                    "__mlibc_int8",
+                ):
+                    typename = "i8"
+                elif self.type.get_declaration().displayname in (
+                    "uint16_t",
+                    "__mlibc_uint16",
+                ):
+                    typename = "u16"
+                elif self.type.get_declaration().displayname in (
+                    "int16_t",
+                    "__mlibc_int16",
+                ):
+                    typename = "i16"
+                elif self.type.get_declaration().displayname in (
+                    "uint32_t",
+                    "__mlibc_uint32",
+                ):
+                    typename = "u32"
+                elif self.type.get_declaration().displayname in (
+                    "int32_t",
+                    "__mlibc_int32",
+                ):
+                    typename = "i32"
+                elif self.type.get_declaration().displayname in (
+                    "uint64_t",
+                    "__mlibc_uint64",
+                ):
+                    typename = "u64"
+                elif self.type.get_declaration().displayname in (
+                    "int64_t",
+                    "__mlibc_int64",
+                ):
+                    typename = "i64"
+                elif self.type.get_declaration().displayname in (
+                    "intptr_t",
+                    "__mlibc_intptr",
+                ):
+                    typename = "isize"
+                elif self.type.get_declaration().displayname in ("__mlibc_size"):
+                    typename = "usize"
+                else:
+                    typename = "crate::" + str(self.type.get_declaration().displayname)
+            case TypeKind.POINTER:
+                typename = self.convert_ptr_type(self.cursor, self.type)
+            case TypeKind.TYPEDEF:
+                return str(self.type.spelling)
+            case TypeKind.RECORD:
+                return ""
+        return typename
+
+    @property
+    def canonical(self):
+        return str(Type(self.type.get_canonical()))
+
+    def is_valid(self):
+        return self.kind != TypeKind.INVALID
+
+    def is_va_list(self):
+        return (
+            self.kind == TypeKind.ELABORATED
+            and self.type.get_declaration().displayname == "__builtin_va_list"
+        )
+
+    def escape_name(name: str):
+        if name in ("type", "in"):
+            return f"r#{name}"
+        return name
+
+    def cursor_name(c: Cursor):
+        d = c.type.get_declaration()
+        if d and d.is_anonymous():
+            return (
+                f"anon_{pathlib.Path(str(d.location.file)).stem}_line{d.location.line}"
+            )
+        return Type.escape_name(c.displayname)
+
+
+@dataclass
+class State:
+    functions = []
+    macros = []
+    types = []
+    structs = []
+    variables = []
+
+
+@dataclass
+class RustBindingGenerator:
+    config: dict
+    in_function_block = False
+    in_struct_block = False
+    in_union_block = False
+
+    def handle_macro(self, cursor, gen, state):
+        def is_num(s):
+            if s.removeprefix("0o").isnumeric():
+                return True
+            if set(s.removeprefix("0x")).issubset(string.hexdigits):
+                return True
+            return False
+
+        done = False
+        is_negative = False
+
+        assert len(gen) >= 1
+        assert gen[0].kind == TokenKind.IDENTIFIER
+        gen.pop(0)
+
+        if len(gen) >= 1:
+            tokens = []
+            c_type = "c_int"
+            is_unsigned = False
+            i = 0
+            while not done and gen and i < len(gen):
+                c_type = "int"
+                if gen[i].kind == TokenKind.PUNCTUATION and gen[i].spelling in (
+                    "(",
+                    ")",
+                ):
+                    if not (i == 0 or i == (len(gen) - 1)):
+                        tokens.append(gen[i].spelling)
+                    i += 1
+                elif gen[i].kind == TokenKind.PUNCTUATION and gen[i].spelling == "-":
+                    is_unsigned = False
+                    i += 1
+                elif gen[i].kind in (
+                    TokenKind.LITERAL,
+                    TokenKind.IDENTIFIER,
+                    TokenKind.PUNCTUATION,
+                ):
+                    spelling = gen[i].spelling
+                    if spelling.endswith("ULL") and is_num(spelling[:-3]):
+                        spelling = spelling.removesuffix("ULL")
+                        c_type = "longlong"
+                        is_unsigned = True
+                    if spelling.endswith("LL") and is_num(spelling[:-2]):
+                        spelling = spelling.removesuffix("LL")
+                        c_type = "longlong"
+                        is_unsigned = False
+                    if spelling.endswith("UL") and is_num(spelling[:-2]):
+                        spelling = spelling.removesuffix("UL")
+                        c_type = "long"
+                        is_unsigned = True
+                    elif spelling.endswith("L") and is_num(spelling[:-1]):
+                        spelling = spelling.removesuffix("L")
+                        c_type = "long"
+                        is_unsigned = False
+                    elif spelling.endswith("U") and is_num(spelling[:-1]):
+                        spelling = spelling.removesuffix("U")
+                        is_unsigned = True
+
+                    if (
+                        is_num(spelling)
+                        and spelling.startswith("0")
+                        and not spelling.startswith("0x")
+                        and spelling != "0"
+                    ):
+                        spelling = f"0o{spelling[1:]}"
+
+                    tokens.append(spelling)
+                    i += 1
+                else:
+                    log_err(
+                        f"unexpected token in macro '{cursor.displayname}'",
+                        f"{gen[i].kind} {gen[i].spelling} at {gen[0].location}, skipping macro",
+                    )
+                    done = True
+                c_type = "c_" + ("u" if is_unsigned else "") + c_type
+            if not self.is_ignored("macros", state.macros, cursor.displayname):
+                for name in config["force_macro_type"]:
+                    if cursor.displayname in config["force_macro_type"][name]:
+                        c_type = name
+                        break
+                emit(
+                    "pub const {}: {} = {}{};".format(
+                        cursor.displayname,
+                        c_type,
+                        "-" if is_negative else "",
+                        "".join(tokens),
+                    )
+                )
+                state.macros.append(cursor.displayname)
+
+    def indent(self, level=0):
+        if self.in_function_block or self.in_struct_block or self.in_union_block:
+            return "\t" * (level + 1)
+        return ""
+
+    def handle_field_decl(self, cursor, c, inline_defs):
+        tc = Type(c)
+        assert tc.is_valid()
+        name = str(tc)
+        if c.is_anonymous():
+            name = Type.cursor_name(c)
+        if Type.cursor_name(cursor) in config["force_struct_member_type"]:
+            info = config["force_struct_member_type"][Type.cursor_name(cursor)]
+            if list(filter(lambda x: x["name"] == c.displayname, info)):
+                detail = next(filter(lambda x: x["name"] == c.displayname, info))
+                if "type" in detail:
+                    assert "replace" not in detail
+                    name = (
+                        detail["rename-to"] if "rename-to" in detail else c.displayname
+                    )
+                    emit(
+                        self.indent(1)
+                        + f"pub {Type.escape_name(name)}: {detail['type']},"
+                    )
+                    if inline_defs[-1].get_usr() == c.type.get_declaration().get_usr():
+                        inline_defs.pop()
+                    return
+                elif "replace" in detail:
+                    for member in detail["replace"]:
+                        emit(
+                            self.indent(1)
+                            + "pub {}: {},".format(member["name"], member["type"])
+                        )
+                    return
+                else:
+                    log_err(
+                        "invalid configuration",
+                        f"missing info for override for struct '{c.displayname}'",
+                    )
+        emit(self.indent(1) + f"pub {Type.escape_name(c.displayname)}: {name},")
+
+    def handle_data_structs(self, cursor, state, level=0):
+        inline_defs = []
+
+        children = [i for i in cursor.get_children()]
+
+        if (
+            not children
+            and Type.cursor_name(cursor) not in config["forced_empty_structs"]
+        ):
+            return
+
+        if self.in_struct_block and cursor.kind != CursorKind.STRUCT_DECL:
+            emit("}")
+            self.in_struct_block = False
+        if self.in_union_block and cursor.kind != CursorKind.UNION_DECL:
+            emit("}")
+            self.in_union_block = False
+
+        match cursor.kind:
+            case CursorKind.STRUCT_DECL:
+                if not self.in_struct_block:
+                    emit("s! {")
+                    self.in_struct_block = True
+                packed = False
+                for m in cursor.get_children():
+                    if CursorKind.PACKED_ATTR == m.kind:
+                        packed = True
+                        break
+                if packed:
+                    emit(self.indent() + "#[repr(packed)]")
+                emit(self.indent() + f"pub struct {Type.cursor_name(cursor)} {{")
+                state.structs.append(Type.cursor_name(cursor))
+            case CursorKind.UNION_DECL:
+                if not self.in_union_block:
+                    emit("s_no_extra_traits! {")
+                    self.in_union_block = True
+                emit("#[repr(C)]")
+                emit(self.indent() + f"pub union {Type.cursor_name(cursor)} {{")
+                state.structs.append(Type.cursor_name(cursor))
+            case CursorKind.ENUM_DECL:
+                if cursor.type.get_declaration().is_anonymous() and level == 1:
+                    # ignore anonymous enums in the global scope
+                    return
+                emit(self.indent() + f"pub enum {Type.cursor_name(cursor)} {{")
+                state.structs.append(Type.cursor_name(cursor))
+            case _:
+                log_err("unhandled data struct kind", f"{cursor.kind}")
+
+        if Type.cursor_name(cursor) in config["force_struct_zero_fill"]:
+            struct_size = cursor.type.get_size()
+            emit("\t\t#[doc(hidden)]")
+            emit(f"\t\tsize: [u8; {struct_size}],")
+        else:
+            for c in children:
+                match c.kind:
+                    case CursorKind.FIELD_DECL:
+                        self.handle_field_decl(cursor, c, inline_defs)
+                    case CursorKind.STRUCT_DECL | CursorKind.UNION_DECL:
+                        inline_defs.append(c)
+                    case CursorKind.ENUM_CONSTANT_DECL:
+                        emit(f"{c.displayname} = {c.enum_value},")
+                    case CursorKind.PACKED_ATTR:
+                        pass
+                    case _:
+                        log_err(f"unhandled {cursor.kind} member", f"kind {c.kind}")
+        emit(self.indent() + "}")
+
+        if cursor.kind == CursorKind.ENUM_DECL:
+            emit(f"impl Copy for {Type.cursor_name(cursor)} " + "{}")
+            emit(f"impl Clone for {Type.cursor_name(cursor)} " + "{")
+            emit(f"\tfn clone(&self) -> {Type.cursor_name(cursor)} {{")
+            emit("\t\t*self")
+            emit("\t}")
+            emit("}")
+
+        for s in inline_defs:
+            self.handle_data_structs(s, state, level + 1)
+
+    def is_ignored(self, typename, ignorelist, name):
+        if typename == "macros" and name.startswith("_") and name.endswith("_H"):
+            return True
+        if name in ignorelist:
+            return True
+        if "forced_" + typename in config and name in config["forced_" + typename]:
+            return False
+        if name.startswith("__"):
+            return True
+        if "ignored_" + typename in config and name in config["ignored_" + typename]:
+            return True
+        return False
+
+    def is_ignored_file(base_dir: pathlib.Path, file: pathlib.Path, config):
+        if not str(file).startswith(str(base_dir)):
+            return True
+
+        for p in config["ignored_files"]:
+            stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
+            if stripped_file == p:
+                return True
+        for p in config["includes"]:
+            if str(base_dir).startswith(p):
+                return True
+        return False
+
+    def from_cursor(self, base_dir, header, cursor, filter_pred, level=0):
+        global state
+
+        if cursor.location.file:
+            f = pathlib.Path(str(cursor.location.file))
+
+            if RustBindingGenerator.is_ignored_file(base_dir, f, config):
+                return
+
+        if filter_pred(cursor, level):
+            t = Type(cursor)
+
+            if self.in_struct_block and cursor.kind != CursorKind.STRUCT_DECL:
+                emit("}")
+                self.in_struct_block = False
+            if self.in_union_block and cursor.kind != CursorKind.STRUCT_DECL:
+                emit("}")
+                self.in_union_block = False
+            if self.in_function_block and cursor.kind != CursorKind.FUNCTION_DECL:
+                emit("}")
+                self.in_function_block = False
+
+            match cursor.kind:
+                case CursorKind.MACRO_DEFINITION:
+                    if not self.is_ignored("macros", [], cursor.displayname):
+                        gen = [token for token in cursor.get_tokens()]
+                        self.handle_macro(cursor, gen, state)
+                case CursorKind.STRUCT_DECL:
+                    if self.is_ignored("structs", state.structs, cursor.displayname):
+                        return
+
+                    self.handle_data_structs(cursor, state, level)
+                case CursorKind.UNION_DECL:
+                    if self.is_ignored("unions", state.structs, cursor.displayname):
+                        return
+
+                    self.handle_data_structs(cursor, state, level)
+                case CursorKind.ENUM_DECL:
+                    if self.is_ignored("enums", state.structs, cursor.displayname):
+                        return
+
+                    self.handle_data_structs(cursor, state, level)
+                case CursorKind.TYPEDEF_DECL:
+                    if not self.is_ignored("types", state.types, cursor.displayname):
+                        underlying = Type(cursor, cursor.underlying_typedef_type)
+                        self.from_cursor(
+                            base_dir,
+                            header,
+                            cursor.underlying_typedef_type.get_declaration(),
+                            filter_pred,
+                            level,
+                        )
+                        if cursor.displayname not in state.structs:
+                            emit(f"pub type {cursor.displayname} = {underlying};")
+                        state.types.append(cursor.displayname)
+                case CursorKind.FUNCTION_DECL:
+                    if self.is_ignored("functions", state.functions, cursor.spelling):
+                        return
+
+                    args = []
+                    for c in cursor.get_arguments():
+                        arg_name = c.displayname if c.displayname else f"arg{len(args)}"
+                        tc = Type(c, convert_arrays_to_ptrs=True)
+                        if tc.is_va_list():
+                            arg_name = c.displayname if c.displayname else "arg_list"
+                        assert tc.is_valid()
+                        if str(tc):
+                            args.append(f"{Type.escape_name(arg_name)}: {str(tc)}")
+                    if cursor.type.is_function_variadic():
+                        args.append("...")
+                    arg_str = ", ".join(args)
+                    ret_type = str(Type(cursor, cursor.type.get_result()))
+                    if not self.in_function_block:
+                        emit('extern "C" {')
+                        self.in_function_block = True
+                    emit(
+                        f"\tpub fn {cursor.spelling}({arg_str})"
+                        + (f" -> {ret_type};" if ret_type != "c_void" else ";")
+                    )
+                    state.functions.append(cursor.spelling)
+                case CursorKind.TRANSLATION_UNIT:
+                    for c in cursor.get_children():
+                        self.from_cursor(base_dir, header, c, filter_pred, level + 1)
+                case CursorKind.INCLUSION_DIRECTIVE:
+                    pass
+                case CursorKind.VAR_DECL:
+                    if self.is_ignored(
+                        "var_declarations", state.variables, cursor.spelling
+                    ):
+                        return
+                    else:
+                        log_err(
+                            "munhandled cursor type",
+                            f"VAR_DECL of '{cursor.spelling}'",
+                        )
+                case CursorKind.STATIC_ASSERT | CursorKind.UNEXPOSED_DECL:
+                    pass
+                case CursorKind.MACRO_INSTANTIATION:
+                    # TODO: cross-reference this with constant arrays?
+                    pass
+                case _:
+                    log_err(
+                        "unhandled cursor type",
+                        f"{cursor.kind} {cursor.spelling} {cursor.displayname} {cursor.location}",
+                    )
+
+                    if t.is_valid():
+                        emit(f"type '{t}' canonical '{t.canonical}'")
+
+        if level == 0 and self.in_struct_block:
+            emit("}")
+            self.in_struct_block = False
+
+        if level == 0 and self.in_union_block:
+            emit("}")
+            self.in_union_block = False
+
+        if level == 0 and self.in_function_block:
+            emit("}")
+            self.in_function_block = False
+
+
+def parse(file: pathlib.Path, base_dir: pathlib.Path):
+    index = clang.cindex.Index.create()
+    tu = None
+
+    try:
+        tu = index.parse(
+            base_dir / file,
+            args=[f"-I{p}" for p in config["includes"]] + ["-I" + str(base_dir), "-D_GNU_SOURCE"],
+            options=clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
+            | clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES,
+        )
+    except:
+        log_err("parsing error", file)
+        return
+
+    assert tu
+
+    if not RustBindingGenerator.is_ignored_file(base_dir, base_dir / file, config):
+        if tu.diagnostics:
+            [log_err("compile error", d) for d in tu.diagnostics]
+            print(f"\n{errors_emitted + 1} errors emitted")
+            exit(errors_emitted + 1)
+
+        parser = RustBindingGenerator(config)
+        emit("")
+        print(f"// {tu.spelling.removeprefix(str(base_dir)).removeprefix('/')}")
+        parser.from_cursor(base_dir, file, tu.cursor, no_system_includes)
+
+
+def gcc_install_path(gcc: str) -> pathlib.Path | None:
+    try:
+        result = subprocess.run(
+            [gcc, '-print-search-dirs'],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        for line in result.stdout.splitlines():
+            if line.startswith('install:'):
+                return (pathlib.Path(line.removeprefix('install: ').strip()) / 'include').resolve()
+    except subprocess.CalledProcessError as e:
+        print(f"Error running {gcc}:", e)
+    except FileNotFoundError:
+        print(f"{gcc} not found")
+    return None
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("-n", dest="dry_run", action="store_true")
+    argparser.add_argument("path")
+    argparser.add_argument("gcc")
+    argparser.add_argument("file", nargs="?")
+
+    args = argparser.parse_args()
+
+    dry_run = args.dry_run
+
+    colorama.just_fix_windows_console()
+
+    with io.open(os.path.join(os.path.dirname(__file__), "rust-libc-config.yml"), "r") as f:
+        config = yaml.load(f, yaml.CSafeLoader)
+
+    path = pathlib.Path(args.path)
+
+    gcc_include_path = gcc_install_path(args.gcc)
+    if not gcc_include_path:
+        print("could not determine gcc's include directory")
+        exit(1)
+
+    gcc_include_path = os.path.relpath(pathlib.Path(gcc_include_path), pathlib.Path.cwd())
+    if "includes" not in config:
+            config["includes"] = list()
+    config["includes"].insert(0, gcc_include_path)
+
+    with io.open(os.path.join(os.path.dirname(__file__), "rust-libc-header.rs"), "r") as f:
+        emit(f.read())
+
+    state = State()
+
+    if not args.file:
+        for header in sorted(path.rglob("*.h")):
+            parse(str(header).removeprefix(str(path)).removeprefix("/"), path)
+    else:
+        parse(pathlib.Path(args.file), path)
+
+    if errors_emitted > 0:
+        print(f"\n{errors_emitted} errors emitted")
+
+    exit(errors_emitted)
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <stdio.h>
+
+template<typename E>
+constexpr void macro_print(const char *name, E val) {
+	printf("%s = %ld\n", name, val);
+}
+
+template<>
+constexpr void macro_print<signed long long>(const char *name, signed long long val) {
+	printf("%s = %lld\n", name, val);
+}
+
+template<>
+constexpr void macro_print<signed long>(const char *name, signed long val) {
+	printf("%s = %ld\n", name, val);
+}
+
+template<>
+constexpr void macro_print<signed int>(const char *name, signed int val) {
+	printf("%s = %d\n", name, val);
+}
+
+template<>
+constexpr void macro_print<signed short>(const char *name, signed short val) {
+	printf("%s = %hd\n", name, val);
+}
+
+template<>
+constexpr void macro_print<signed char>(const char *name, signed char val) {
+	printf("%s = %hhd\n", name, val);
+}
+
+template<>
+constexpr void macro_print<unsigned long long>(const char *name, unsigned long long val) {
+	printf("%s = %llu\n", name, val);
+}
+
+template<>
+constexpr void macro_print<unsigned long>(const char *name, unsigned long val) {
+	printf("%s = %lu\n", name, val);
+}
+
+template<>
+constexpr void macro_print<unsigned int>(const char *name, unsigned int val) {
+	printf("%s = %u\n", name, val);
+}
+
+template<>
+constexpr void macro_print<unsigned short>(const char *name, unsigned short val) {
+	printf("%s = %hu\n", name, val);
+}
+
+template<>
+constexpr void macro_print<unsigned char>(const char *name, unsigned char val) {
+	printf("%s = %hhu ('%c')\n", name, val, val);
+}
+
+template<>
+constexpr void macro_print<const char *>(const char *name, const char *val) {
+	printf("%s = \"%s\"\n", name, val);
+}