user: implement mlibc as the libc, finally.

It's finally done..

Signed-off-by: kaguya <vpshinomiya@protonmail.com>
This commit is contained in:
kaguya
2026-05-02 03:31:49 -04:00
parent 2fa39ad85a
commit 9a9b91c940
2387 changed files with 152741 additions and 315 deletions
+12
View File
@@ -0,0 +1,12 @@
#!/bin/bash
# USAGE: put files and ports in the arrays below and export the abi you want to use
declare -a files=()
declare -a ports=()
for file in "${files[@]}"; do
for port in "${ports[@]}"; do
ln -rsiv abis/$abi/$file sysdeps/$port/include/abi-bits/$file
done
done
@@ -0,0 +1,18 @@
#!/bin/bash
shopt -s lastpipe
errors_found=0
find . -wholename '*include/*.h' -print0 | while read -rd $'\0' file; do
uses=$(grep -c -E "__MLIBC_(ANSI|BSD|POSIX|LINUX|GLIBC)_OPTION" "$file")
if [ "$uses" -ne 0 ]; then
does_include=$(grep -c "#include <mlibc-config.h>" "$file")
if [ "$does_include" -eq 0 ]; then
echo "'$file' does not include mlibc-config.h while it does use mlibc option macros"
errors_found+=1
fi
fi
done
exit $errors_found
+18
View File
@@ -0,0 +1,18 @@
#! /bin/sh
set -ex
LINUX_MAJOR=v6.x
LINUX_VERSION=6.16
: "${ARCH:=$(uname -m)}"
curl -Lo linux-$LINUX_VERSION.tar.xz https://cdn.kernel.org/pub/linux/kernel/$LINUX_MAJOR/linux-$LINUX_VERSION.tar.xz
tar -xf linux-$LINUX_VERSION.tar.xz
rm linux-$LINUX_VERSION.tar.xz
cd linux-$LINUX_VERSION
make mrproper
make ARCH=$ARCH headers_install
find usr/include -type f ! -name '*.h' -delete
mv usr/include ../linux-headers
cd ..
rm -rf linux-$LINUX_VERSION
+15
View File
@@ -0,0 +1,15 @@
[project]
name = "mlibc"
use_system_includes = false
git_repo_url = "https://github.com/managarm/mlibc/"
git_default_branch = "master"
[ignore]
paths = [
"options/ansi/musl-generic-math/"
]
[paths]
input_dir = "@source_root@"
output_dir = "@build_root@/docs/"
compile_commands = "@build_root@/compile_commands.json"
@@ -0,0 +1,303 @@
# additional includes to supply via `-I` arguments
includes: []
# paths relative to includedir whose files we don't want to parse themselves,
# but whose content we want to analyze if they're included from another header.
base_skipped_directories:
- bits
# paths relative to includedir that we want to completely ignore, even for transitive includes
base_ignored_directories:
- finclude
- gnu
# same as the two above, but for files
base_skipped_files: []
base_ignored_files: []
# record mapping from base (glibc) to mlibc
map_record_to_struct:
- "__SOCKADDR_ARG": "struct sockaddr *restrict"
- "__CONST_SOCKADDR_ARG": "const struct sockaddr *"
# struct equivalence mapping from base (glibc) to mlibc
equivalent_structs:
- "struct _IO_FILE": "struct __mlibc_file_base"
- "__mbstate_t": "struct __mlibc_mbstate"
- "cnd_t": "struct __mlibc_cond"
- "mtx_t": "struct __mlibc_mutex"
- "struct __locale_struct": "void"
- "pthread_mutexattr_t": "struct __mlibc_mutexattr"
- "pthread_mutex_t": "struct __mlibc_mutex"
- "pthread_condattr_t": "struct __mlibc_condattr"
- "pthread_barrierattr_t": "struct __mlibc_barrierattr"
- "pthread_rwlockattr_t": "struct __mlibc_rwlockattr"
- "pthread_rwlock_t": "struct __mlibc_fair_rwlock"
- "union pthread_attr_t": "struct __mlibc_threadattr"
- "pthread_barrier_t": "struct __mlibc_barrier"
- "pthread_cond_t": "struct __mlibc_cond"
- "__sigset_t": "sigset_t"
- "struct ucontext_t": "struct __ucontext"
- "struct __dirstream": "struct __mlibc_dir_struct"
- "sem_t": "struct sem_"
- "struct stat64": "struct stat"
- "struct msqid64_ds": "struct msqid_ds"
- "struct msqid_ds": "struct msqid64_ds"
- "struct rlimit64": "struct rlimit"
- "glob_t": "struct glob_t"
- "stack_t": "struct __stack"
- "__useconds_t": "useconds_t"
# files to ignore, relative to includedir
ignored_files:
- math.h
- tgmath.h
- regexp.h
- sys/asm.h
- sys/elf.h
- sys/vm86.h
ignored_structs:
# structs to ignore because we expose the kernel layout
- fd_set
- semid_ds
# libc-level constructs that are allowed to differ, and probably do
- ifaddrs
- lastlog
- regmatch_t
- sem_t
- sigevent
- utmp
- utmpx
ignored_typedefs:
# opaque structs where users can't rely on any particular layout
- cnd_t
- fpos_t
- jmp_buf
- mtx_t
- pthread_attr_t
- pthread_barrier_t
- pthread_cond_t
- pthread_condattr_t
- pthread_mutex_t
- pthread_mutexattr_t
- pthread_rwlock_t
- pthread_rwlockattr_t
- sigjmp_buf
- wordexp_t
# glibc skill issue: regoff_t should be at least ptrdiff_t or ssize_t-sized, but glibc uses int
- regmatch_t
# macros we ignore for being libc-level constructs or being allowed to diverge
ignored_macros:
- AT_XPIPE
- AT_OPENFILES
- AT_FS_SERVER
- AT_MBUS_SERVER
- _Complex_I
- FTW_F
- FTW_D
- FTW_DNR
- FTW_DP
- FTW_NS
- FTW_SL
- FTW_SLN
- FTW_PHYS
- FTW_MOUNT
- FTW_DEPTH
- FTW_CHDIR
- FTW_CONTINUE
- WRDE_APPEND
- WRDE_DOOFFS
- WRDE_NOCMD
- WRDE_REUSE
- WRDE_SHOWERR
- WRDE_UNDEF
- WRDE_SUCCESS
- WRDE_BADCHAR
- WRDE_BADVAL
- WRDE_CMDSUB
- WRDE_NOSPACE
- WRDE_SYNTAX
- PF_MAX
- SOMAXCONN
- IPPROTO_MAX
- FOPEN_MAX
- FILENAME_MAX
- TMP_MAX
- LINE_MAX
- CHARCLASS_NAME_MAX
- RE_DUP_MAX
- SIGRTMIN
- L_ctermid
- L_tmpnam
- _IOFBF
- _IOLBF
- _IONBF
- _PATH_DEFPATH
- _PATH_STDPATH
- _PATH_VARDB
- _POSIX_OPEN_MAX
- PTHREAD_DESTRUCTOR_ITERATIONS
- PTHREAD_STACK_MIN
- MINSIGSTKSZ
- SIGSTKSZ
- _PC_FILESIZEBITS
- _PC_SYMLINK_MAX
- NL_ARGMAX
- MB_LEN_MAX
- ABDAY_1
- ABDAY_2
- ABDAY_3
- ABDAY_4
- ABDAY_5
- ABDAY_6
- ABDAY_7
- DAY_1
- DAY_2
- DAY_3
- DAY_4
- DAY_5
- DAY_6
- DAY_7
- ABMON_1
- ABMON_2
- ABMON_3
- ABMON_4
- ABMON_5
- ABMON_6
- ABMON_7
- ABMON_8
- ABMON_9
- ABMON_10
- ABMON_11
- ABMON_12
- MON_1
- MON_2
- MON_3
- MON_4
- MON_5
- MON_6
- MON_7
- MON_8
- MON_9
- MON_10
- MON_11
- MON_12
- AM_STR
- PM_STR
- D_T_FMT
- D_FMT
- T_FMT
- T_FMT_AMPM
- ERA
- ERA_D_FMT
- ALT_DIGITS
- ERA_D_T_FMT
- ERA_T_FMT
- CODESET
- CRNCYSTR
- RADIXCHAR
- DECIMAL_POINT
- THOUSEP
- THOUSANDS_SEP
- YESEXPR
- NOEXPR
- LC_ALL
- LC_COLLATE
- LC_CTYPE
- LC_MONETARY
- LC_NUMERIC
- LC_TIME
- LC_MESSAGES
- LC_MEASUREMENT
- LC_GLOBAL_LOCALE
- LC_CTYPE_MASK
- LC_NUMERIC_MASK
- LC_TIME_MASK
- LC_COLLATE_MASK
- LC_MONETARY_MASK
- LC_MESSAGES_MASK
- LC_MEASUREMENT_MASK
- LC_ALL_MASK
- AI_PASSIVE
- AI_CANONNAME
- AI_NUMERICHOST
- AI_V4MAPPED
- AI_ALL
- AI_ADDRCONFIG
- AI_NUMERICSERV
- NI_NOFQDN
- NI_NUMERICHOST
- NI_NAMEREQD
- NI_NUMERICSCOPE
- NI_DGRAM
- NI_NUMERICSERV
- NI_MAXSERV
- NI_IDN
- NI_IDN_USE_STD3_ASCII_RULES
- NI_MAXHOST
- EAI_AGAIN
- EAI_BADFLAGS
- EAI_FAIL
- EAI_FAMILY
- EAI_MEMORY
- EAI_NONAME
- EAI_SERVICE
- EAI_SOCKTYPE
- EAI_SYSTEM
- EAI_OVERFLOW
- EAI_NODATA
- EAI_ADDRFAMILY
- GLOB_APPEND
- GLOB_DOOFFS
- GLOB_ERR
- GLOB_MARK
- GLOB_NOCHECK
- GLOB_NOESCAPE
- GLOB_NOSORT
- GLOB_PERIOD
- GLOB_TILDE
- GLOB_TILDE_CHECK
- GLOB_BRACE
- GLOB_NOMAGIC
- GLOB_ALTDIRFUNC
- GLOB_ONLYDIR
- GLOB_MAGCHAR
- GLOB_ABORTED
- GLOB_NOMATCH
- GLOB_NOSPACE
- GLOB_NOSYS
- BUFSIZ
- F_LOCK
- F_TEST
- F_TLOCK
- F_ULOCK
- O_LARGEFILE
- FSETLOCKING_INTERNAL
- FSETLOCKING_BYCALLER
- FSETLOCKING_QUERY
- NSS_BUFLEN_PASSWD
# enum members whose value should not be dependended on
ignored_enum_constants:
- thrd_timedout
- thrd_busy
- thrd_error
- thrd_nomem
# structs that would get ignored by default (with leading double underscores) that we
# don't actually want to get ignored
forced_structs:
- "__mlibc_thread_data"
- "__mlibc_threadattr"
- "__mlibc_mutex"
- "__mlibc_mutexattr"
- "__mlibc_cond"
- "__mlibc_condattr"
- "__ucontext"
forced_typedefs:
- "__ucontext"
+934
View File
@@ -0,0 +1,934 @@
#!/usr/bin/env python3
import argparse
import os
import pathlib
import re
import subprocess
import sys
import tempfile
import typing
from dataclasses import dataclass, field
import clang.cindex
import colorama
import yaml
from clang.cindex import CursorKind, TokenKind, TypeKind
dry_run = True
errors_emitted = 0
def on_ci() -> bool:
return "CI" in os.environ
def log_err(prefix, msg):
global errors_emitted
if on_ci():
print(f"{prefix}: {msg}", file=sys.stderr)
else:
print(
f"{colorama.Fore.RED}{prefix}{colorama.Style.RESET_ALL}: {msg}",
file=sys.stderr,
)
errors_emitted += 1
def no_system_includes(cursor, level):
"""filter out verbose stuff from system include files"""
return (level != 1) or (
cursor.location.file is not None
and not cursor.location.file.name.startswith("/usr/include")
)
class Type:
def __init__(self, t: clang.cindex.Type):
self.t = t
self.kind = t.kind
self.typename = (
str(self.t.spelling).removesuffix("restrict").removeprefix("const ")
)
self.compat_typename = None
match t.kind:
case TypeKind.ELABORATED:
replacement = next(
filter(
lambda x: self.typename in x.keys(),
config["map_record_to_struct"],
),
None,
)
if replacement is not None:
self.compat_typename = (
replacement[self.typename]
.removesuffix("restrict")
.removeprefix("const ")
)
case TypeKind.RECORD:
replacement = next(
filter(
lambda x: self.typename in x.keys(),
config["map_record_to_struct"],
),
None,
)
if replacement is not None:
self.kind = TypeKind.POINTER
self.compat_typename = (
replacement[self.typename]
.removesuffix("restrict")
.removeprefix("const ")
)
case TypeKind.POINTER:
ptr_type = (
self.t.get_pointee()
.spelling.removesuffix("restrict")
.removeprefix("const ")
)
replacement = next(
filter(
lambda x: ptr_type in x.keys(), config["equivalent_structs"]
),
None,
)
if replacement is not None:
self.compat_typename = (
replacement[ptr_type]
.removesuffix("restrict")
.removeprefix("const ")
+ " *"
)
@property
def canonical(self):
return Type(self.t.get_canonical())
@property
def pointee_type(self):
if self.kind == TypeKind.POINTER:
return Type(self.t.get_pointee())
if (
self.kind == TypeKind.INCOMPLETEARRAY
or self.kind == TypeKind.CONSTANTARRAY
or self.kind == TypeKind.VARIABLEARRAY
):
return Type(self.t.get_array_element_type())
if self.kind == TypeKind.ELABORATED:
return None
log_err(
"unhandled pointee resolution", str(self.kind).removeprefix("TypeKind.")
)
return None
def __str__(self):
return self.typename
def __eq__(self, other):
if self.kind != other.kind:
if (
self.kind == TypeKind.INCOMPLETEARRAY
or other.kind == TypeKind.INCOMPLETEARRAY
):
if self.pointee_type is None or other.pointee_type is None:
return False
return self.pointee_type == other.pointee_type
elif (
self.kind == TypeKind.CONSTANTARRAY
or other.kind == TypeKind.CONSTANTARRAY
):
if self.pointee_type is None or other.pointee_type is None:
return False
return self.pointee_type == other.pointee_type
elif (
self.kind == TypeKind.VARIABLEARRAY
or other.kind == TypeKind.VARIABLEARRAY
):
if self.pointee_type is None or other.pointee_type is None:
return False
return self.pointee_type == other.pointee_type
elif self.kind == TypeKind.ELABORATED or other.kind == TypeKind.ELABORATED:
return (
self.t.get_size() == other.t.get_size()
and self.t.get_align() == other.t.get_align()
)
else:
return False
if str(self) == str(other):
return True
return (
self.compat_typename == str(other)
or self.compat_typename == other.compat_typename
)
def is_valid(self):
return self.t.kind != TypeKind.INVALID
@dataclass
class Function:
name: str
linkage: clang.cindex.LinkageKind
ret_type: clang.cindex.Type
location: clang.cindex.SourceLocation
arguments: typing.List[Type]
def __init__(self, c: clang.cindex.Cursor):
self.c = c
self.name = c.mangled_name
self.linkage = c.linkage
self.ret_type = c.result_type
self.location = c.location
self.arguments = list()
for arg in c.get_arguments():
self.arguments.append(Type(arg.type))
@dataclass
class MacroDefinition:
name: str
location: clang.cindex.SourceLocation
def __init__(self, c: clang.cindex.Cursor):
self.c = c
self.name = c.spelling
self.location = c.location
self.tokens = list(self.c.get_tokens())
@property
def first_token(self):
return self.tokens[1] if len(self.tokens) > 1 else None
@dataclass
class EnumDecl:
name: str
location: clang.cindex.SourceLocation
def __init__(self, c: clang.cindex.Cursor):
self.c = c
self.name = c.spelling
self.location = c.location
@dataclass
class StructDecl:
name: str
location: clang.cindex.SourceLocation
def __init__(self, c: clang.cindex.Cursor):
self.c = c
self.name = c.spelling
self.location = c.location
self.alignment = c.type.get_align()
self.size = c.type.get_size()
@dataclass
class Typedef:
name: str
location: clang.cindex.SourceLocation
def __init__(self, c: clang.cindex.Cursor):
self.c = c
self.name = c.spelling
self.location = c.location
self.alignment = c.type.get_align()
self.size = c.type.get_size()
@dataclass
class State:
"""
Represents the parsed state of a set of headers.
"""
path: pathlib.Path
functions: typing.Dict[str, Function] = field(default_factory=dict)
macros: typing.Dict[str, MacroDefinition] = field(default_factory=dict)
enums: typing.Dict[str, EnumDecl] = field(default_factory=dict)
structs: typing.Dict[str, StructDecl] = field(default_factory=dict)
typedefs: typing.Dict[str, StructDecl] = field(default_factory=dict)
def __init__(self, path: pathlib.Path):
self.path = path
self.functions = dict()
self.macros = dict()
self.enums = dict()
self.structs = dict()
self.typedefs = dict()
@dataclass
class Comparison:
config: dict
def is_ignored(self, typename, ignorelist, name):
if (
typename == "macros"
and (name.startswith("_") or name.startswith("MLIBC_"))
and name.endswith("_H")
):
return True
if name in ignorelist:
return True
if "forced_" + typename in config and name in config["forced_" + typename]:
return False
if name.startswith("__"):
return True
if "ignored_" + typename in config and name in config["ignored_" + typename]:
return True
return False
@staticmethod
def is_skipped_file(base_dir: pathlib.Path, file: pathlib.Path, config):
if Comparison.is_ignored_file(base_dir, file, config):
return True
if base_dir == args.reference:
for p in config["base_skipped_directories"]:
if str(file).startswith(os.path.join(base_dir, p)):
return True
for p in config["base_skipped_files"]:
stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
if stripped_file == p:
return True
return False
@staticmethod
def is_ignored_file(base_dir: pathlib.Path, file: pathlib.Path, config):
if not str(file).startswith(str(base_dir)):
return True
if base_dir == args.reference:
for p in config["base_ignored_directories"]:
if str(file).startswith(os.path.join(base_dir, p)):
return True
for p in config["base_ignored_files"]:
stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
if stripped_file == p:
return True
for p in config["ignored_files"]:
stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
if stripped_file == p:
return True
for p in config["includes"]:
if str(base_dir).startswith(p):
return True
return False
def from_cursor(self, base_dir, header, cursor, filter_pred, state: State, level=0):
if cursor.location.file:
f = pathlib.Path(str(cursor.location.file))
if Comparison.is_ignored_file(base_dir, f, config):
return
if filter_pred(cursor, level):
if args.dump_tree:
print(f"{"-" * level} {cursor.kind} {cursor.spelling}")
for c in cursor.get_children():
self.from_cursor(base_dir, header, c, filter_pred, state, level + 1)
match cursor.kind:
case CursorKind.TRANSLATION_UNIT:
for c in cursor.get_children():
self.from_cursor(
base_dir, header, c, filter_pred, state, level + 1
)
case CursorKind.INCLUSION_DIRECTIVE:
pass
case CursorKind.FUNCTION_DECL:
if not cursor.mangled_name.startswith("__"):
f = Function(cursor)
state.functions.update({f.name: f})
case CursorKind.STATIC_ASSERT | CursorKind.UNEXPOSED_DECL:
pass
case CursorKind.ENUM_DECL:
if not self.is_ignored("enums", [], cursor.spelling):
for x in cursor.get_children():
state.enums.update({x.spelling: EnumDecl(x)})
case CursorKind.MACRO_DEFINITION:
if not self.is_ignored("macros", [], cursor.spelling):
state.macros.update({cursor.spelling: MacroDefinition(cursor)})
case CursorKind.STRUCT_DECL:
if not self.is_ignored("structs", [], cursor.spelling):
if cursor.is_definition():
state.structs.update({cursor.spelling: StructDecl(cursor)})
case CursorKind.UNION_DECL:
if not self.is_ignored("unions", [], cursor.spelling):
if cursor.is_definition():
state.structs.update({cursor.spelling: StructDecl(cursor)})
case CursorKind.TYPEDEF_DECL:
if not self.is_ignored("typedefs", [], cursor.spelling):
children = list(cursor.get_children())
if not children:
return
state.typedefs.update({cursor.spelling: Typedef(cursor)})
if children[0].kind == CursorKind.TYPE_REF:
child_struct_name = children[0].spelling.removeprefix(
"struct "
)
if child_struct_name in state.structs:
state.structs.update(
{cursor.spelling: state.structs[child_struct_name]}
)
case CursorKind.MACRO_INSTANTIATION | CursorKind.VAR_DECL:
# don't care (for now)
pass
case _:
log_err(
"unhandled cursor type",
f"{cursor.kind} {cursor.spelling} {cursor.displayname} {cursor.location}",
)
def cc_name():
if args.clang_version:
return [f"clang-{args.clang_version}", f"--target={f"{args.arch}-linux-gnu"}"]
return ["clang", f"--target={f"{args.arch}-linux-gnu"}"]
def cxx_name():
# m68k on clang defaults to a small codemodel that doesn't work
# and I have not found a way to change it outside of `llc` other
if args.arch == "m68k":
return ["m68k-linux-gnu-g++"]
if args.clang_version:
return [f"clang++-{args.clang_version}", f"--target={f"{args.arch}-linux-gnu"}"]
return ["clang++", f"--target={f"{args.arch}-linux-gnu"}"]
def parse(
file: pathlib.Path, resource_dir: pathlib.Path, base_dir: pathlib.Path, state: State
):
index = clang.cindex.Index.create()
tu = None
clang_args = [f"-I{resource_dir}"]
clang_args += [f"-I{p}" for p in config["includes"]]
clang_args += [f"-I{base_dir}"]
clang_args += [f"-I{base_dir / f"{args.arch}-linux-gnu"}"]
clang_args += [f"--target={f"{args.arch}-linux-gnu"}"]
clang_args += ["-D_GNU_SOURCE", "-D_FILE_OFFSET_BITS=64", "-Wno-macro-redefined"]
try:
tu = index.parse(
base_dir / file,
args=clang_args,
options=clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
| clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES,
)
except Exception as e:
log_err("parsing error", f"{file}: {e}")
return
assert tu
if not Comparison.is_skipped_file(base_dir, base_dir / file, config):
if tu.diagnostics:
[log_err("compile error", d) for d in tu.diagnostics]
print(f"\n{errors_emitted} errors emitted")
exit(errors_emitted)
parser = Comparison(config)
if args.verbose:
print(f"// {tu.spelling.strip()}")
parser.from_cursor(base_dir, file, tu.cursor, no_system_includes, state)
def compare_states(a, b):
global errors_emitted
a_symbols = sorted(a.functions.keys())
b_symbols = sorted(b.functions.keys())
symbols = a_symbols
symbols.extend(x for x in b_symbols if x not in symbols)
c = Comparison(config)
if args.function_signatures:
lines = []
for s in symbols:
if s not in a.functions or s not in b.functions:
continue
a_func = a.functions[s]
b_func = b.functions[s]
a_ret_type = Type(a_func.ret_type.get_canonical())
b_ret_type = Type(b_func.ret_type.get_canonical())
if (
a_ret_type != b_ret_type
and a_func.ret_type.spelling != b_func.ret_type.spelling
):
lines.append(
f"\t{s}: mismatched return type ({a_ret_type} vs. {b_ret_type})"
)
errors_emitted += 1
a_args = a_func.arguments
b_args = b_func.arguments
if len(a_args) != len(b_args):
lines.append(
f"\t{s}: argument count mismatch ({len(a_args)} vs. {len(b_args)})"
)
errors_emitted += 1
for i, at in enumerate(a_args):
bt = b_args[i]
if at != bt and at.canonical != bt.canonical:
lines.append(
f"\t{s}: mismatched type for argument at position {(i + 1)} ({at} ({str(at.kind).removeprefix("TypeKind.")}) vs. {bt} ({str(bt.kind).removeprefix("TypeKind.")}))"
)
errors_emitted += 1
if lines:
print()
print(f"checking {len(symbols)} functions for signature mismatches:")
for line in lines:
print(line)
if args.missing_functions:
a_unique_symbols = list(filter(lambda e: e not in b_symbols, a_symbols))
b_unique_symbols = list(filter(lambda e: e not in a_symbols, b_symbols))
if args.verbose and len(a_unique_symbols) > 0:
print()
print(f"{len(a_unique_symbols)} symbols only defined in {a.path}:")
for s in sorted(a_unique_symbols):
print(f"{s} defined in {a.functions[s].location}")
if len(b_unique_symbols) > 0:
print()
print(f"{len(b_unique_symbols)} symbols only defined in {b.path}:")
for s in sorted(b_unique_symbols):
print(f"{s} defined in {b.functions[s].location}")
def loc(s):
return f"{s.location.file}:{s.location.line}"
if args.structs:
for mapping in config["equivalent_structs"]:
(a_name, b_name), = mapping.items()
a_name = a_name.removeprefix("struct ")
b_name = b_name.removeprefix("struct ")
if a_name not in a.structs or b_name not in b.structs:
continue
if c.is_ignored("structs", [], a_name) or c.is_ignored("structs", [], b_name):
continue
if (a_name in a.typedefs and c.is_ignored("typedefs", [], a_name)) or (c.is_ignored("typedefs", [], b_name)):
continue
b.structs[a_name] = b.structs[b_name]
common_structs = sorted(set(a.structs) & set(b.structs))
lines = []
for name in common_structs:
if c.is_ignored("typedefs", [], name):
continue;
sa = a.structs[name]
sb = b.structs[name]
if sa.alignment != sb.alignment:
lines.append(
f"\t{name}: alignment {sa.alignment} vs. {sb.alignment} ({loc(sa)}, {loc(sb)})"
)
errors_emitted += 1
if sa.size != sb.size:
lines.append(
f"\t{name}: size {sa.size} vs. {sb.size} ({loc(sa)}, {loc(sb)})"
)
errors_emitted += 1
if lines:
print()
print(
f"checking {len(common_structs)} structs for size/alignment mismatches:"
)
for line in lines:
print(line)
if args.typedefs:
common_typedefs = sorted(set(a.typedefs) & set(b.typedefs))
lines = []
for name in common_typedefs:
if (name in a.structs or name in b.structs) and c.is_ignored("structs", [], name):
continue;
ta = a.typedefs[name]
tb = b.typedefs[name]
if ta.alignment != tb.alignment and ta.alignment > 0 and tb.alignment > 0:
lines.append(
f"\t{name}: alignment {ta.alignment} vs. {tb.alignment} ({loc(ta)}, {loc(tb)})"
)
errors_emitted += 1
if ta.size != tb.size and ta.size > 0 and tb.size > 0:
lines.append(
f"\t{name}: size {ta.size} vs. {tb.size} ({loc(ta)}, {loc(tb)})"
)
errors_emitted += 1
if lines:
print()
print(
f"checking {len(common_typedefs)} typedefs for size/alignment mismatches:"
)
for line in lines:
print(line)
if args.macro_definitions:
tempdir = tempfile.TemporaryDirectory(prefix="abichecker")
td = pathlib.Path(tempdir.name)
script_path = pathlib.Path(__file__).resolve().parent
atp = open(td / "test-a-primary.hpp", "w")
btp = open(td / "test-b-primary.hpp", "w")
print(f'#include "{script_path}/linux-headers.h"', file=atp)
print(f'#include "{script_path}/linux-headers.h"', file=btp)
def filter_preprocessed_file(input, output):
include_next_line = False
with open(output, "w") as o:
with open(input, "r") as i:
for line in i:
if line.startswith("const auto __v_"):
o.write(line)
include_next_line = not line.strip().endswith(";")
elif include_next_line:
if not line.strip().startswith("#"):
o.write(line)
include_next_line = not line.strip().endswith(";")
a_included_files = list()
b_included_files = list()
tested_macros = list()
def is_macro_literal(obj):
if type(obj) is not MacroDefinition:
return False
return obj.first_token and obj.first_token.kind == TokenKind.LITERAL
def is_enum(obj):
return type(obj) is EnumDecl
for name, bm in (b.macros | b.enums).items():
if name in (a.macros | a.enums):
am = (a.macros | a.enums)[name]
header = (
str(am.location.file)
.removeprefix(str(args.reference))
.removeprefix("/")
)
if header not in a_included_files and not c.is_skipped_file(
args.reference, args.reference / header, config
):
print(f"#include <{header}>", file=atp)
a_included_files.append(header)
header = (
str(bm.location.file).removeprefix(str(args.mlibc)).removeprefix("/")
)
if header not in b_included_files:
print(f"#include <{header}>", file=btp)
b_included_files.append(header)
if name in (a.macros | a.enums) and (
is_macro_literal(bm)
or (is_enum(bm) and not c.is_ignored("enum_constants", [], name))
):
print(f"const auto __v_{name} = {name};", file=atp)
print(f"const auto __v_{name} = {name};", file=btp)
tested_macros.append(name)
atp.close()
btp.close()
a_preprocess = subprocess.run(
cxx_name()
+ [
"-E",
"-std=c++23",
"-nostdlib",
f"-I{args.reference}",
"-o",
f"{tempdir.name}/test-a-preprocessed.hpp",
f"{tempdir.name}/test-a-primary.hpp",
"-D_GNU_SOURCE",
"-D_FILE_OFFSET_BITS=64",
"-D_REGEX_LARGE_OFFSETS"
"-Wno-macro-redefined",
],
capture_output=True,
)
if a_preprocess.returncode != 0:
print(f"Preprocessing the macro list of {args.reference} failed:")
print(f"\tCommand: '{' '.join(a_preprocess.args)}'")
print(a_preprocess.stderr.decode("utf-8"))
b_preprocess = subprocess.run(
cxx_name()
+ [
"-E",
"-std=c++23",
"-nostdlib",
f"-I{args.mlibc}",
"-o",
f"{tempdir.name}/test-b-preprocessed.hpp",
f"{tempdir.name}/test-b-primary.hpp",
"-D_GNU_SOURCE",
"-D_FILE_OFFSET_BITS=64",
"-D_REGEX_LARGE_OFFSETS"
"-Wno-macro-redefined",
],
capture_output=True,
)
if b_preprocess.returncode != 0:
print(f"Preprocessing the macro list of {args.mlibc} failed:")
print(b_preprocess.stderr.decode("utf-8"))
filter_preprocessed_file(
td / "test-a-preprocessed.hpp", td / "test-a-filtered.hpp"
)
filter_preprocessed_file(
td / "test-b-preprocessed.hpp", td / "test-b-filtered.hpp"
)
at = open(td / "test-a.cpp", "w")
bt = open(td / "test-b.cpp", "w")
print(f'#include "{script_path}/linux-headers.h"', file=at)
print(f'#include "{script_path}/linux-headers.h"', file=bt)
for inc in a_included_files:
print(f"#include <{inc}>", file=at)
for inc in b_included_files:
print(f"#include <{inc}>", file=bt)
print("", file=at)
print("", file=bt)
print(f'#include "{tempdir.name}/test-a-filtered.hpp"', file=at)
print(f'#include "{tempdir.name}/test-b-filtered.hpp"', file=bt)
print(f'#include "{script_path}/to_integral.hpp"', file=at)
print(f'#include "{script_path}/to_integral.hpp"', file=bt)
print("int main() {", file=at)
print("int main() {", file=bt)
for name in tested_macros:
print(f'\tmacro_print("{name}", __v_{name});', file=at)
print(f'\tmacro_print("{name}", __v_{name});', file=bt)
print("\treturn 0;", file=at)
print("\treturn 0;", file=bt)
print("}", file=at)
print("}", file=bt)
at.close()
bt.close()
a_compile = subprocess.run(
cxx_name()
+ [
"-std=c++23",
"-I",
f"{args.reference}",
"-o",
f"{tempdir.name}/test-a",
f"{tempdir.name}/test-a.cpp",
"-D_GNU_SOURCE",
"-D_FILE_OFFSET_BITS=64",
"-D_REGEX_LARGE_OFFSETS"
"-Wno-macro-redefined",
],
capture_output=True,
)
if a_compile.returncode != 0:
log_err("Compiling macro test failed", f"test.cpp for {args.reference}")
print(a_compile.stderr.decode("utf-8"))
sys.exit(1)
b_compile = subprocess.run(
cxx_name()
+ [
"-std=c++23",
"-I",
f"{args.mlibc}",
"-o",
f"{tempdir.name}/test-b",
f"{tempdir.name}/test-b.cpp",
"-D_GNU_SOURCE",
"-D_FILE_OFFSET_BITS=64",
"-D_REGEX_LARGE_OFFSETS"
"-Wno-macro-redefined",
],
capture_output=True,
)
if b_compile.returncode != 0:
log_err("Compiling macro test failed", f"test.cpp for {args.mlibc}")
print(b_compile.stderr.decode("utf-8"))
sys.exit(1)
test_a_file = tempfile.NamedTemporaryFile(dir=tempdir.name)
test_b_file = tempfile.NamedTemporaryFile(dir=tempdir.name)
qemu_cmd = []
if args.arch != "x86_64":
qemu_cmd = [f"qemu-{args.arch}"]
if args.ld_lib:
qemu_cmd += ["-L", args.ld_lib]
test_a = subprocess.run(
qemu_cmd + [f"{tempdir.name}/test-a"], stdout=test_a_file
)
if test_a.returncode != 0:
log_err("Running macro test failed", f"test for {args.reference}")
test_b = subprocess.run(
qemu_cmd + [f"{tempdir.name}/test-b"], stdout=test_b_file
)
if test_b.returncode != 0:
log_err("Running macro test failed", f"test for {args.mlibc}")
color_output = ["--color=always"] if not on_ci() else []
diff = subprocess.run(
["diff", test_a_file.name, test_b_file.name] + color_output,
capture_output=True,
text=True,
)
diff_str = diff.stdout.strip()
if diff_str:
print()
print("diff of macro definitions:")
print(diff_str)
ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
errors_emitted += sum(
1
for line in diff_str.splitlines()
if ansi_escape.sub("", line).startswith("< ")
)
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument(
"-m",
dest="missing_functions",
action="store_true",
help="search for missing functions",
)
argparser.add_argument(
"-M",
dest="macro_definitions",
action="store_true",
help="compare macro definitions",
)
argparser.add_argument(
"-f",
dest="function_signatures",
action="store_true",
help="check function signatures",
)
argparser.add_argument(
"-s", dest="structs", action="store_true", help="check structs"
)
argparser.add_argument(
"-t", dest="typedefs", action="store_true", help="check structs"
)
argparser.add_argument(
"-v", "--verbose", dest="verbose", action="store_true", help="verbose output"
)
argparser.add_argument(
"-T",
dest="dump_tree",
action="store_true",
help="dump tree (for debug, extremely verbose)",
)
argparser.add_argument(
"--config",
help="path to the configuration file",
dest="config",
type=argparse.FileType("r"),
required=True,
)
argparser.add_argument(
"--arch", help="target architecture", dest="arch", type=str, default="x86_64"
)
argparser.add_argument(
"--ld-library-path",
help="additional LD_LIBRARY_PATH to supply to qemu-user",
dest="ld_lib",
type=str,
)
argparser.add_argument(
"--clang-version",
help="specify which versioned clang to use",
dest="clang_version",
type=int,
)
argparser.add_argument(
"--exit-with-zero-for-abi-mismatches",
help="exit with zero even if ABI mismatches are detected",
dest="exit_zero",
action="store_true",
)
argparser.add_argument(
"reference", help="path to the references libc's sysroot", type=pathlib.Path
)
argparser.add_argument(
"mlibc", help="mlibc headers to be checked", type=pathlib.Path
)
argparser.add_argument("file", nargs="?", help="limit scope to this file")
colorama.just_fix_windows_console()
args = argparser.parse_args()
config = yaml.load(args.config, yaml.CSafeLoader)
reference_state = State(args.reference)
mlibc_state = State(args.mlibc)
# determine the path to clang's resource dir (like /usr/lib/clang/20/include)
resource_dir_result = subprocess.run(
cc_name() + ["-print-resource-dir"], capture_output=True
)
resource_dir = pathlib.Path(resource_dir_result.stdout.decode().strip()) / "include"
for pair in ((args.reference, reference_state), (args.mlibc, mlibc_state)):
(path, state) = pair
if not args.file:
for header in sorted(path.rglob("*.h")):
parse(
pathlib.Path(str(header).removeprefix(str(path)).removeprefix("/")),
resource_dir,
path,
state,
)
else:
parse(pathlib.Path(args.file), resource_dir, path, state)
compare_states(reference_state, mlibc_state)
if errors_emitted > 0:
print(f"\n{errors_emitted} errors emitted.")
else:
print("No ABI differences found.")
if args.exit_zero:
exit(0)
exit(min(errors_emitted, 0xFF))
@@ -0,0 +1,4 @@
{
/* Hide all C++ symbols. */
local: _Z*;
};
@@ -0,0 +1,14 @@
#pragma once
#include <fenv.h>
#include <getopt.h>
#include <limits.h>
#include <linux/poll.h>
#include <net/ethernet.h>
#include <sys/eventfd.h>
#include <sys/ipc.h>
#include <sys/resource.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <sys/statvfs.h>
#include <termios.h>
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,267 @@
// This file is autogenerated!
// All changes made will be lost (eventually)!
use crate::prelude::*;
use crate::sighandler_t;
pub type blkcnt64_t = i64;
pub type rlimit64 = crate::rlimit;
pub type rlim64_t = crate::rlim_t;
pub type dirent64 = crate::dirent;
pub type stat64 = crate::stat;
pub type statfs64 = crate::statfs;
pub type statvfs64 = crate::statvfs;
pub type idtype_t = c_uint;
pub type Ioctl = c_ulong;
pub type pthread_t = *mut c_void;
pub type __u8 = c_uchar;
pub type __u16 = c_ushort;
pub type __s16 = c_short;
pub type __u32 = c_uint;
pub type __s32 = c_int;
pub type __u64 = c_ulonglong;
pub type __s64 = c_longlong;
pub const RTLD_DEFAULT: *mut c_void = 0i64 as *mut c_void;
pub const RLIM_INFINITY: crate::rlim_t = !0;
pub type Elf32_Half = u16;
pub type Elf32_Word = u32;
pub type Elf32_Off = u32;
pub type Elf32_Addr = u32;
pub type Elf64_Half = u16;
pub type Elf64_Word = u32;
pub type Elf64_Off = u64;
pub type Elf64_Addr = u64;
pub type Elf64_Xword = u64;
s! {
pub struct Elf32_Phdr {
pub p_type: Elf32_Word,
pub p_offset: Elf32_Off,
pub p_vaddr: Elf32_Addr,
pub p_paddr: Elf32_Addr,
pub p_filesz: Elf32_Word,
pub p_memsz: Elf32_Word,
pub p_flags: Elf32_Word,
pub p_align: Elf32_Word,
}
pub struct Elf64_Phdr {
pub p_type: Elf64_Word,
pub p_flags: Elf64_Word,
pub p_offset: Elf64_Off,
pub p_vaddr: Elf64_Addr,
pub p_paddr: Elf64_Addr,
pub p_filesz: Elf64_Xword,
pub p_memsz: Elf64_Xword,
pub p_align: Elf64_Xword,
}
}
s! {
pub struct dl_phdr_info {
#[cfg(target_pointer_width = "64")]
pub dlpi_addr: Elf64_Addr,
#[cfg(target_pointer_width = "32")]
pub dlpi_addr: Elf32_Addr,
pub dlpi_name: *const c_char,
#[cfg(target_pointer_width = "64")]
pub dlpi_phdr: *const Elf64_Phdr,
#[cfg(target_pointer_width = "32")]
pub dlpi_phdr: *const Elf32_Phdr,
#[cfg(target_pointer_width = "64")]
pub dlpi_phnum: Elf64_Half,
#[cfg(target_pointer_width = "32")]
pub dlpi_phnum: Elf32_Half,
pub dlpi_adds: c_ulonglong,
pub dlpi_subs: c_ulonglong,
pub dlpi_tls_modid: size_t,
pub dlpi_tls_data: *mut c_void,
}
}
f! {
pub fn CMSG_NXTHDR(mhdr: *const msghdr, cmsg: *const cmsghdr) -> *mut cmsghdr {
if ((*cmsg).cmsg_len as usize) < mem::size_of::<cmsghdr>() {
return 0 as *mut cmsghdr;
};
let next = (cmsg as usize + super::CMSG_ALIGN((*cmsg).cmsg_len as usize)) as *mut cmsghdr;
let max = (*mhdr).msg_control as usize + (*mhdr).msg_controllen as usize;
if (next.offset(1)) as usize > max ||
next as usize + super::CMSG_ALIGN((*next).cmsg_len as usize) > max {
0 as *mut cmsghdr
} else {
next as *mut cmsghdr
}
}
}
pub const PTHREAD_MUTEX_INITIALIZER: pthread_mutex_t = pthread_mutex_t {
size: [0; 16],
};
pub const PTHREAD_COND_INITIALIZER: pthread_cond_t = pthread_cond_t {
size: [0; 12],
};
pub const PTHREAD_RWLOCK_INITIALIZER: pthread_rwlock_t = pthread_rwlock_t {
size: [0; 12],
};
s_no_extra_traits! {
pub struct ifreq {
pub ifru_addr: crate::sockaddr,
pub ifru_dstaddr: crate::sockaddr,
pub ifru_broadaddr: crate::sockaddr,
pub ifru_netmask: crate::sockaddr,
pub ifru_hwaddr: crate::sockaddr,
pub ifru_flags: c_short,
pub ifru_ivalue: c_int,
pub ifru_mtu: c_int,
pub ifru_map: crate::ifmap,
pub ifru_slave: [c_char; 16],
pub ifru_newname: [c_char; 16],
pub ifru_data: *mut c_char,
}
}
safe_f! {
pub {const} fn makedev(major: c_uint, minor: c_uint) -> crate::dev_t {
let major = major as crate::dev_t;
let minor = minor as crate::dev_t;
let mut dev = 0;
dev |= (major & 0x00000fff) << 8;
dev |= (major & 0xfffff000) << 32;
dev |= (minor & 0x000000ff) << 0;
dev |= (minor & 0xffffff00) << 12;
dev
}
pub {const} fn major(dev: crate::dev_t) -> c_uint {
let mut major = 0;
major |= (dev & 0x00000000000fff00) >> 8;
major |= (dev & 0xfffff00000000000) >> 32;
major as c_uint
}
pub {const} fn minor(dev: crate::dev_t) -> c_uint {
let mut minor = 0;
minor |= (dev & 0x00000000000000ff) >> 0;
minor |= (dev & 0x00000ffffff00000) >> 12;
minor as c_uint
}
}
extern "C" {
#[link_name = "__gnu_strerror_r"]
pub fn strerror_r(errnum: c_int, buf: *mut c_char, buflen: size_t) -> c_int;
}
impl siginfo_t {
pub unsafe fn si_status(&self) -> c_int {
#[repr(C)]
struct siginfo_sigchld {
_si_signo: c_int,
_si_errno: c_int,
_si_code: c_int,
si_pid: crate::pid_t,
si_uid: crate::uid_t,
si_status: c_int,
si_utime: crate::clock_t,
si_stime: crate::clock_t,
}
(*(self as *const siginfo_t as *const siginfo_sigchld)).si_status
}
pub unsafe fn si_addr(&self) -> *mut c_void {
#[repr(C)]
struct siginfo_sigfault {
_si_signo: c_int,
_si_errno: c_int,
_si_code: c_int,
si_addr: *mut c_void,
}
(*(self as *const siginfo_t as *const siginfo_sigfault)).si_addr
}
}
s! {
pub struct sockaddr_nl {
pub nl_family: crate::sa_family_t,
nl_pad: c_ushort,
pub nl_pid: u32,
pub nl_groups: u32,
}
}
// linux/netlink.h
pub const NLA_ALIGNTO: c_int = 4;
pub const NETLINK_ROUTE: c_int = 0;
pub const NETLINK_UNUSED: c_int = 1;
pub const NETLINK_USERSOCK: c_int = 2;
pub const NETLINK_FIREWALL: c_int = 3;
pub const NETLINK_SOCK_DIAG: c_int = 4;
pub const NETLINK_NFLOG: c_int = 5;
pub const NETLINK_XFRM: c_int = 6;
pub const NETLINK_SELINUX: c_int = 7;
pub const NETLINK_ISCSI: c_int = 8;
pub const NETLINK_AUDIT: c_int = 9;
pub const NETLINK_FIB_LOOKUP: c_int = 10;
pub const NETLINK_CONNECTOR: c_int = 11;
pub const NETLINK_NETFILTER: c_int = 12;
pub const NETLINK_IP6_FW: c_int = 13;
pub const NETLINK_DNRTMSG: c_int = 14;
pub const NETLINK_KOBJECT_UEVENT: c_int = 15;
pub const NETLINK_GENERIC: c_int = 16;
pub const NETLINK_SCSITRANSPORT: c_int = 18;
pub const NETLINK_ECRYPTFS: c_int = 19;
pub const NETLINK_RDMA: c_int = 20;
pub const NETLINK_CRYPTO: c_int = 21;
pub const NETLINK_INET_DIAG: c_int = NETLINK_SOCK_DIAG;
pub const NLM_F_REQUEST: c_int = 1;
pub const NLM_F_MULTI: c_int = 2;
pub const NLM_F_ACK: c_int = 4;
pub const NLM_F_ECHO: c_int = 8;
pub const NLM_F_DUMP_INTR: c_int = 16;
pub const NLM_F_DUMP_FILTERED: c_int = 32;
pub const NLM_F_ROOT: c_int = 0x100;
pub const NLM_F_MATCH: c_int = 0x200;
pub const NLM_F_ATOMIC: c_int = 0x400;
pub const NLM_F_DUMP: c_int = NLM_F_ROOT | NLM_F_MATCH;
pub const NLM_F_REPLACE: c_int = 0x100;
pub const NLM_F_EXCL: c_int = 0x200;
pub const NLM_F_CREATE: c_int = 0x400;
pub const NLM_F_APPEND: c_int = 0x800;
pub const NLM_F_NONREC: c_int = 0x100;
pub const NLM_F_BULK: c_int = 0x200;
pub const NLM_F_CAPPED: c_int = 0x100;
pub const NLM_F_ACK_TLVS: c_int = 0x200;
pub const NETLINK_ADD_MEMBERSHIP: c_int = 1;
pub const NETLINK_DROP_MEMBERSHIP: c_int = 2;
pub const NETLINK_PKTINFO: c_int = 3;
pub const NETLINK_BROADCAST_ERROR: c_int = 4;
pub const NETLINK_NO_ENOBUFS: c_int = 5;
pub const NETLINK_RX_RING: c_int = 6;
pub const NETLINK_TX_RING: c_int = 7;
pub const NETLINK_LISTEN_ALL_NSID: c_int = 8;
pub const NETLINK_LIST_MEMBERSHIPS: c_int = 9;
pub const NETLINK_CAP_ACK: c_int = 10;
pub const NETLINK_EXT_ACK: c_int = 11;
pub const NETLINK_GET_STRICT_CHK: c_int = 12;
pub const NLA_F_NESTED: c_int = 1 << 15;
pub const NLA_F_NET_BYTEORDER: c_int = 1 << 14;
pub const NLA_TYPE_MASK: c_int = !(NLA_F_NESTED | NLA_F_NET_BYTEORDER);
+722
View File
@@ -0,0 +1,722 @@
#!/bin/env python3
# HOW THIS WORKS
#
# This script takes mlibc header files and generates bindings to be used with rust's "libc" crate.
# A configuration file is needed for its proper function; an example is provided alongside this
# script. Please do note that it is used for managarm, which lives under `unix/linux_like` in the
# "libc" crate. If your OS does not live under this directory, but e.g. under just `unix` instead,
# you will need to adapt the configuration to fit your use.
#
# HOW TO USE
#
# > python rust-libc <path/to/your/installed/mlibc/headers> <cross-gcc> [<single-header.h>]
#
# By default, the script parses all header files in the directory supplied, except for when a
# single header is provided, where it will only parse that.
import argparse
import io
import os
import pathlib
import string
import subprocess
import sys
import clang.cindex
import colorama
import yaml
from clang.cindex import Cursor, CursorKind, TokenKind, TypeKind
from dataclasses import dataclass
dry_run = True
errors_emitted = 0
def log_err(prefix, msg):
global errors_emitted
print(
f"{colorama.Fore.RED}{prefix}{colorama.Style.RESET_ALL}: {msg}", file=sys.stderr
)
errors_emitted += 1
def emit(msg):
if not dry_run:
print(msg)
def no_system_includes(cursor, level):
"""filter out verbose stuff from system include files"""
return (level != 1) or (
cursor.location.file is not None
and not cursor.location.file.name.startswith("/usr/include")
)
class Type:
def __init__(
self,
c: clang.cindex.Cursor,
t: clang.cindex.Type = None,
convert_arrays_to_ptrs=False,
):
self.cursor = c
self.type = t if t else c.type
self.convert_arrays_to_ptrs = convert_arrays_to_ptrs
@property
def kind(self):
return self.type.kind
def convert_ptr_type(self, c, ty, is_pointee=False):
pointee = ty if is_pointee else ty.get_pointee()
if pointee.kind == TypeKind.FUNCTIONPROTO:
arg_list = []
for f in pointee.argument_types():
arg_list.append(f"{Type(c, f)}")
args = ", ".join(arg_list)
ret_type = Type(c, pointee.get_result())
if c.semantic_parent.spelling in config["force_raw_function_pointer"]:
return f'extern "C" fn({args})' + (
f" -> {ret_type}" if str(ret_type) != "c_void" else ""
)
else:
return f'Option<unsafe extern "C" fn({args})' + (
f" -> {ret_type}>" if str(ret_type) != "c_void" else ">"
)
is_mut = not pointee.spelling.startswith("const")
prefix = "*" + ("mut" if is_mut else "const") + " "
type_iter = pointee
while type_iter.kind == TypeKind.POINTER:
prefix += "*mut "
type_iter = type_iter.get_pointee()
t = type_iter.spelling.removeprefix("const ")
tokens = t.split(" ")
match tokens:
case ["char", *_]:
return prefix + "c_char"
case ["struct", x, *_] if x in config["force_local_type"]:
return prefix + x
case ["struct", x, *_]:
return f"{prefix} crate::{x}"
case ["int", *_]:
return prefix + "c_int"
case ["unsigned", "char", *_]:
return prefix + "c_uchar"
case ["unsigned", "short", *_]:
return prefix + "c_ushort"
case ["unsigned", "int", *_]:
return prefix + "c_uint"
case ["unsigned", "long", *_]:
return prefix + "c_ulong"
case ["unsigned", *_]:
log_err("unhandled unsigned type", f"'{t}'")
case ["void", *_]:
return prefix + "c_void"
case ["double", *_]:
return prefix + "c_double"
case [*_]:
return prefix + "crate::" + t
def __str__(self):
typename = str(self.kind)
match self.kind:
case TypeKind.VOID:
typename = "c_void"
case TypeKind.LONG:
typename = "c_long"
case TypeKind.LONGLONG:
typename = "c_longlong"
case TypeKind.UINT:
typename = "c_uint"
case TypeKind.INT:
typename = "c_int"
case TypeKind.ULONG:
typename = "c_ulong"
case TypeKind.ULONGLONG:
typename = "c_ulonglong"
case TypeKind.USHORT:
typename = "c_ushort"
case TypeKind.SHORT:
typename = "c_short"
case TypeKind.CHAR_S:
typename = "c_char"
case TypeKind.UCHAR:
typename = "c_uchar"
case TypeKind.DOUBLE:
typename = "c_double"
case TypeKind.LONGDOUBLE:
typename = "c_longdouble"
case TypeKind.FLOAT:
typename = "c_float"
case TypeKind.CONSTANTARRAY:
if self.convert_arrays_to_ptrs:
typename = self.convert_ptr_type(
self.cursor, self.type.get_array_element_type(), is_pointee=True
)
else:
typename = f"[{str(Type(self.cursor, self.type.get_array_element_type()))}; {self.type.element_count}]"
case TypeKind.INCOMPLETEARRAY:
typename = "*mut " + str(
Type(self.cursor, self.type.get_array_element_type())
)
case TypeKind.ELABORATED:
if self.is_va_list():
typename = "*mut c_char"
elif self.cursor.is_anonymous():
typename = "crate::" + Type.cursor_name(self.cursor)
elif self.type.get_declaration().displayname in (
"uint8_t",
"__mlibc_uint8",
):
typename = "u8"
elif self.type.get_declaration().displayname in (
"int8_t",
"__mlibc_int8",
):
typename = "i8"
elif self.type.get_declaration().displayname in (
"uint16_t",
"__mlibc_uint16",
):
typename = "u16"
elif self.type.get_declaration().displayname in (
"int16_t",
"__mlibc_int16",
):
typename = "i16"
elif self.type.get_declaration().displayname in (
"uint32_t",
"__mlibc_uint32",
):
typename = "u32"
elif self.type.get_declaration().displayname in (
"int32_t",
"__mlibc_int32",
):
typename = "i32"
elif self.type.get_declaration().displayname in (
"uint64_t",
"__mlibc_uint64",
):
typename = "u64"
elif self.type.get_declaration().displayname in (
"int64_t",
"__mlibc_int64",
):
typename = "i64"
elif self.type.get_declaration().displayname in (
"intptr_t",
"__mlibc_intptr",
):
typename = "isize"
elif self.type.get_declaration().displayname in ("__mlibc_size"):
typename = "usize"
else:
typename = "crate::" + str(self.type.get_declaration().displayname)
case TypeKind.POINTER:
typename = self.convert_ptr_type(self.cursor, self.type)
case TypeKind.TYPEDEF:
return str(self.type.spelling)
case TypeKind.RECORD:
return ""
return typename
@property
def canonical(self):
return str(Type(self.type.get_canonical()))
def is_valid(self):
return self.kind != TypeKind.INVALID
def is_va_list(self):
return (
self.kind == TypeKind.ELABORATED
and self.type.get_declaration().displayname == "__builtin_va_list"
)
def escape_name(name: str):
if name in ("type", "in"):
return f"r#{name}"
return name
def cursor_name(c: Cursor):
d = c.type.get_declaration()
if d and d.is_anonymous():
return (
f"anon_{pathlib.Path(str(d.location.file)).stem}_line{d.location.line}"
)
return Type.escape_name(c.displayname)
@dataclass
class State:
functions = []
macros = []
types = []
structs = []
variables = []
@dataclass
class RustBindingGenerator:
config: dict
in_function_block = False
in_struct_block = False
in_union_block = False
def handle_macro(self, cursor, gen, state):
def is_num(s):
if s.removeprefix("0o").isnumeric():
return True
if set(s.removeprefix("0x")).issubset(string.hexdigits):
return True
return False
done = False
is_negative = False
assert len(gen) >= 1
assert gen[0].kind == TokenKind.IDENTIFIER
gen.pop(0)
if len(gen) >= 1:
tokens = []
c_type = "c_int"
is_unsigned = False
i = 0
while not done and gen and i < len(gen):
c_type = "int"
if gen[i].kind == TokenKind.PUNCTUATION and gen[i].spelling in (
"(",
")",
):
if not (i == 0 or i == (len(gen) - 1)):
tokens.append(gen[i].spelling)
i += 1
elif gen[i].kind == TokenKind.PUNCTUATION and gen[i].spelling == "-":
is_unsigned = False
i += 1
elif gen[i].kind in (
TokenKind.LITERAL,
TokenKind.IDENTIFIER,
TokenKind.PUNCTUATION,
):
spelling = gen[i].spelling
if spelling.endswith("ULL") and is_num(spelling[:-3]):
spelling = spelling.removesuffix("ULL")
c_type = "longlong"
is_unsigned = True
if spelling.endswith("LL") and is_num(spelling[:-2]):
spelling = spelling.removesuffix("LL")
c_type = "longlong"
is_unsigned = False
if spelling.endswith("UL") and is_num(spelling[:-2]):
spelling = spelling.removesuffix("UL")
c_type = "long"
is_unsigned = True
elif spelling.endswith("L") and is_num(spelling[:-1]):
spelling = spelling.removesuffix("L")
c_type = "long"
is_unsigned = False
elif spelling.endswith("U") and is_num(spelling[:-1]):
spelling = spelling.removesuffix("U")
is_unsigned = True
if (
is_num(spelling)
and spelling.startswith("0")
and not spelling.startswith("0x")
and spelling != "0"
):
spelling = f"0o{spelling[1:]}"
tokens.append(spelling)
i += 1
else:
log_err(
f"unexpected token in macro '{cursor.displayname}'",
f"{gen[i].kind} {gen[i].spelling} at {gen[0].location}, skipping macro",
)
done = True
c_type = "c_" + ("u" if is_unsigned else "") + c_type
if not self.is_ignored("macros", state.macros, cursor.displayname):
for name in config["force_macro_type"]:
if cursor.displayname in config["force_macro_type"][name]:
c_type = name
break
emit(
"pub const {}: {} = {}{};".format(
cursor.displayname,
c_type,
"-" if is_negative else "",
"".join(tokens),
)
)
state.macros.append(cursor.displayname)
def indent(self, level=0):
if self.in_function_block or self.in_struct_block or self.in_union_block:
return "\t" * (level + 1)
return ""
def handle_field_decl(self, cursor, c, inline_defs):
tc = Type(c)
assert tc.is_valid()
name = str(tc)
if c.is_anonymous():
name = Type.cursor_name(c)
if Type.cursor_name(cursor) in config["force_struct_member_type"]:
info = config["force_struct_member_type"][Type.cursor_name(cursor)]
if list(filter(lambda x: x["name"] == c.displayname, info)):
detail = next(filter(lambda x: x["name"] == c.displayname, info))
if "type" in detail:
assert "replace" not in detail
name = (
detail["rename-to"] if "rename-to" in detail else c.displayname
)
emit(
self.indent(1)
+ f"pub {Type.escape_name(name)}: {detail['type']},"
)
if inline_defs[-1].get_usr() == c.type.get_declaration().get_usr():
inline_defs.pop()
return
elif "replace" in detail:
for member in detail["replace"]:
emit(
self.indent(1)
+ "pub {}: {},".format(member["name"], member["type"])
)
return
else:
log_err(
"invalid configuration",
f"missing info for override for struct '{c.displayname}'",
)
emit(self.indent(1) + f"pub {Type.escape_name(c.displayname)}: {name},")
def handle_data_structs(self, cursor, state, level=0):
inline_defs = []
children = [i for i in cursor.get_children()]
if (
not children
and Type.cursor_name(cursor) not in config["forced_empty_structs"]
):
return
if self.in_struct_block and cursor.kind != CursorKind.STRUCT_DECL:
emit("}")
self.in_struct_block = False
if self.in_union_block and cursor.kind != CursorKind.UNION_DECL:
emit("}")
self.in_union_block = False
match cursor.kind:
case CursorKind.STRUCT_DECL:
if not self.in_struct_block:
emit("s! {")
self.in_struct_block = True
packed = False
for m in cursor.get_children():
if CursorKind.PACKED_ATTR == m.kind:
packed = True
break
if packed:
emit(self.indent() + "#[repr(packed)]")
emit(self.indent() + f"pub struct {Type.cursor_name(cursor)} {{")
state.structs.append(Type.cursor_name(cursor))
case CursorKind.UNION_DECL:
if not self.in_union_block:
emit("s_no_extra_traits! {")
self.in_union_block = True
emit("#[repr(C)]")
emit(self.indent() + f"pub union {Type.cursor_name(cursor)} {{")
state.structs.append(Type.cursor_name(cursor))
case CursorKind.ENUM_DECL:
if cursor.type.get_declaration().is_anonymous() and level == 1:
# ignore anonymous enums in the global scope
return
emit(self.indent() + f"pub enum {Type.cursor_name(cursor)} {{")
state.structs.append(Type.cursor_name(cursor))
case _:
log_err("unhandled data struct kind", f"{cursor.kind}")
if Type.cursor_name(cursor) in config["force_struct_zero_fill"]:
struct_size = cursor.type.get_size()
emit("\t\t#[doc(hidden)]")
emit(f"\t\tsize: [u8; {struct_size}],")
else:
for c in children:
match c.kind:
case CursorKind.FIELD_DECL:
self.handle_field_decl(cursor, c, inline_defs)
case CursorKind.STRUCT_DECL | CursorKind.UNION_DECL:
inline_defs.append(c)
case CursorKind.ENUM_CONSTANT_DECL:
emit(f"{c.displayname} = {c.enum_value},")
case CursorKind.PACKED_ATTR:
pass
case _:
log_err(f"unhandled {cursor.kind} member", f"kind {c.kind}")
emit(self.indent() + "}")
if cursor.kind == CursorKind.ENUM_DECL:
emit(f"impl Copy for {Type.cursor_name(cursor)} " + "{}")
emit(f"impl Clone for {Type.cursor_name(cursor)} " + "{")
emit(f"\tfn clone(&self) -> {Type.cursor_name(cursor)} {{")
emit("\t\t*self")
emit("\t}")
emit("}")
for s in inline_defs:
self.handle_data_structs(s, state, level + 1)
def is_ignored(self, typename, ignorelist, name):
if typename == "macros" and name.startswith("_") and name.endswith("_H"):
return True
if name in ignorelist:
return True
if "forced_" + typename in config and name in config["forced_" + typename]:
return False
if name.startswith("__"):
return True
if "ignored_" + typename in config and name in config["ignored_" + typename]:
return True
return False
def is_ignored_file(base_dir: pathlib.Path, file: pathlib.Path, config):
if not str(file).startswith(str(base_dir)):
return True
for p in config["ignored_files"]:
stripped_file = str(file).removeprefix(str(base_dir)).removeprefix("/")
if stripped_file == p:
return True
for p in config["includes"]:
if str(base_dir).startswith(p):
return True
return False
def from_cursor(self, base_dir, header, cursor, filter_pred, level=0):
global state
if cursor.location.file:
f = pathlib.Path(str(cursor.location.file))
if RustBindingGenerator.is_ignored_file(base_dir, f, config):
return
if filter_pred(cursor, level):
t = Type(cursor)
if self.in_struct_block and cursor.kind != CursorKind.STRUCT_DECL:
emit("}")
self.in_struct_block = False
if self.in_union_block and cursor.kind != CursorKind.STRUCT_DECL:
emit("}")
self.in_union_block = False
if self.in_function_block and cursor.kind != CursorKind.FUNCTION_DECL:
emit("}")
self.in_function_block = False
match cursor.kind:
case CursorKind.MACRO_DEFINITION:
if not self.is_ignored("macros", [], cursor.displayname):
gen = [token for token in cursor.get_tokens()]
self.handle_macro(cursor, gen, state)
case CursorKind.STRUCT_DECL:
if self.is_ignored("structs", state.structs, cursor.displayname):
return
self.handle_data_structs(cursor, state, level)
case CursorKind.UNION_DECL:
if self.is_ignored("unions", state.structs, cursor.displayname):
return
self.handle_data_structs(cursor, state, level)
case CursorKind.ENUM_DECL:
if self.is_ignored("enums", state.structs, cursor.displayname):
return
self.handle_data_structs(cursor, state, level)
case CursorKind.TYPEDEF_DECL:
if not self.is_ignored("types", state.types, cursor.displayname):
underlying = Type(cursor, cursor.underlying_typedef_type)
self.from_cursor(
base_dir,
header,
cursor.underlying_typedef_type.get_declaration(),
filter_pred,
level,
)
if cursor.displayname not in state.structs:
emit(f"pub type {cursor.displayname} = {underlying};")
state.types.append(cursor.displayname)
case CursorKind.FUNCTION_DECL:
if self.is_ignored("functions", state.functions, cursor.spelling):
return
args = []
for c in cursor.get_arguments():
arg_name = c.displayname if c.displayname else f"arg{len(args)}"
tc = Type(c, convert_arrays_to_ptrs=True)
if tc.is_va_list():
arg_name = c.displayname if c.displayname else "arg_list"
assert tc.is_valid()
if str(tc):
args.append(f"{Type.escape_name(arg_name)}: {str(tc)}")
if cursor.type.is_function_variadic():
args.append("...")
arg_str = ", ".join(args)
ret_type = str(Type(cursor, cursor.type.get_result()))
if not self.in_function_block:
emit('extern "C" {')
self.in_function_block = True
emit(
f"\tpub fn {cursor.spelling}({arg_str})"
+ (f" -> {ret_type};" if ret_type != "c_void" else ";")
)
state.functions.append(cursor.spelling)
case CursorKind.TRANSLATION_UNIT:
for c in cursor.get_children():
self.from_cursor(base_dir, header, c, filter_pred, level + 1)
case CursorKind.INCLUSION_DIRECTIVE:
pass
case CursorKind.VAR_DECL:
if self.is_ignored(
"var_declarations", state.variables, cursor.spelling
):
return
else:
log_err(
"munhandled cursor type",
f"VAR_DECL of '{cursor.spelling}'",
)
case CursorKind.STATIC_ASSERT | CursorKind.UNEXPOSED_DECL:
pass
case CursorKind.MACRO_INSTANTIATION:
# TODO: cross-reference this with constant arrays?
pass
case _:
log_err(
"unhandled cursor type",
f"{cursor.kind} {cursor.spelling} {cursor.displayname} {cursor.location}",
)
if t.is_valid():
emit(f"type '{t}' canonical '{t.canonical}'")
if level == 0 and self.in_struct_block:
emit("}")
self.in_struct_block = False
if level == 0 and self.in_union_block:
emit("}")
self.in_union_block = False
if level == 0 and self.in_function_block:
emit("}")
self.in_function_block = False
def parse(file: pathlib.Path, base_dir: pathlib.Path):
index = clang.cindex.Index.create()
tu = None
try:
tu = index.parse(
base_dir / file,
args=[f"-I{p}" for p in config["includes"]] + ["-I" + str(base_dir), "-D_GNU_SOURCE"],
options=clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD
| clang.cindex.TranslationUnit.PARSE_SKIP_FUNCTION_BODIES,
)
except:
log_err("parsing error", file)
return
assert tu
if not RustBindingGenerator.is_ignored_file(base_dir, base_dir / file, config):
if tu.diagnostics:
[log_err("compile error", d) for d in tu.diagnostics]
print(f"\n{errors_emitted + 1} errors emitted")
exit(errors_emitted + 1)
parser = RustBindingGenerator(config)
emit("")
print(f"// {tu.spelling.removeprefix(str(base_dir)).removeprefix('/')}")
parser.from_cursor(base_dir, file, tu.cursor, no_system_includes)
def gcc_install_path(gcc: str) -> pathlib.Path | None:
try:
result = subprocess.run(
[gcc, '-print-search-dirs'],
capture_output=True,
text=True,
check=True
)
for line in result.stdout.splitlines():
if line.startswith('install:'):
return (pathlib.Path(line.removeprefix('install: ').strip()) / 'include').resolve()
except subprocess.CalledProcessError as e:
print(f"Error running {gcc}:", e)
except FileNotFoundError:
print(f"{gcc} not found")
return None
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument("-n", dest="dry_run", action="store_true")
argparser.add_argument("path")
argparser.add_argument("gcc")
argparser.add_argument("file", nargs="?")
args = argparser.parse_args()
dry_run = args.dry_run
colorama.just_fix_windows_console()
with io.open(os.path.join(os.path.dirname(__file__), "rust-libc-config.yml"), "r") as f:
config = yaml.load(f, yaml.CSafeLoader)
path = pathlib.Path(args.path)
gcc_include_path = gcc_install_path(args.gcc)
if not gcc_include_path:
print("could not determine gcc's include directory")
exit(1)
gcc_include_path = os.path.relpath(pathlib.Path(gcc_include_path), pathlib.Path.cwd())
if "includes" not in config:
config["includes"] = list()
config["includes"].insert(0, gcc_include_path)
with io.open(os.path.join(os.path.dirname(__file__), "rust-libc-header.rs"), "r") as f:
emit(f.read())
state = State()
if not args.file:
for header in sorted(path.rglob("*.h")):
parse(str(header).removeprefix(str(path)).removeprefix("/"), path)
else:
parse(pathlib.Path(args.file), path)
if errors_emitted > 0:
print(f"\n{errors_emitted} errors emitted")
exit(errors_emitted)
@@ -0,0 +1,63 @@
#pragma once
#include <stdio.h>
template<typename E>
constexpr void macro_print(const char *name, E val) {
printf("%s = %ld\n", name, val);
}
template<>
constexpr void macro_print<signed long long>(const char *name, signed long long val) {
printf("%s = %lld\n", name, val);
}
template<>
constexpr void macro_print<signed long>(const char *name, signed long val) {
printf("%s = %ld\n", name, val);
}
template<>
constexpr void macro_print<signed int>(const char *name, signed int val) {
printf("%s = %d\n", name, val);
}
template<>
constexpr void macro_print<signed short>(const char *name, signed short val) {
printf("%s = %hd\n", name, val);
}
template<>
constexpr void macro_print<signed char>(const char *name, signed char val) {
printf("%s = %hhd\n", name, val);
}
template<>
constexpr void macro_print<unsigned long long>(const char *name, unsigned long long val) {
printf("%s = %llu\n", name, val);
}
template<>
constexpr void macro_print<unsigned long>(const char *name, unsigned long val) {
printf("%s = %lu\n", name, val);
}
template<>
constexpr void macro_print<unsigned int>(const char *name, unsigned int val) {
printf("%s = %u\n", name, val);
}
template<>
constexpr void macro_print<unsigned short>(const char *name, unsigned short val) {
printf("%s = %hu\n", name, val);
}
template<>
constexpr void macro_print<unsigned char>(const char *name, unsigned char val) {
printf("%s = %hhu ('%c')\n", name, val, val);
}
template<>
constexpr void macro_print<const char *>(const char *name, const char *val) {
printf("%s = \"%s\"\n", name, val);
}