├── python ├── .projectile ├── .envrc ├── rsysapps │ ├── __init__.py │ └── dnspython_LUA.py ├── arepl │ ├── tests │ │ ├── __init__.py │ │ └── test_repl.py │ ├── __init__.py │ ├── help.py │ ├── astcodeop.py │ ├── aeval.py │ └── acompile.py ├── requirements.txt ├── rsyscall │ ├── tests │ │ ├── __init__.py │ │ ├── test_mmap.py │ │ ├── test_eventfd.py │ │ ├── test_timerfd.py │ │ ├── test_sched.py │ │ ├── test_signal.py │ │ ├── test_pointer.py │ │ ├── test_prctl.py │ │ ├── test_allocator.py │ │ ├── test_resource.py │ │ ├── test_fd.py │ │ ├── test_chroot.py │ │ ├── test_user.py │ │ ├── test_inotify.py │ │ ├── test_benchmark.py │ │ ├── test_misc.py │ │ ├── test_repl.py │ │ ├── test_nix.py │ │ ├── test_stdinboot.py │ │ ├── test_pipe.py │ │ ├── test_cat.py │ │ ├── test_pidns.py │ │ ├── test_setuid.py │ │ ├── utils.py │ │ ├── test_proc.py │ │ ├── test_seek.py │ │ ├── test_stub.py │ │ ├── trio_test_case.py │ │ ├── test_concurrency.py │ │ ├── test_clone.py │ │ ├── test_ip.py │ │ └── test_persistent.py │ ├── doc │ │ └── __init__.py │ ├── scripts │ │ ├── __init__.py │ │ └── local_repl.py │ ├── memory │ │ ├── __init__.py │ │ ├── span.py │ │ └── allocation_interface.py │ ├── network │ │ └── __init__.py │ ├── sys │ │ ├── __init__.py │ │ ├── ioctl.py │ │ ├── eventfd.py │ │ ├── signalfd.py │ │ ├── mount.py │ │ ├── syscall.py │ │ ├── prctl.py │ │ ├── timerfd.py │ │ └── un.py │ ├── net │ │ └── __init__.py │ ├── netinet │ │ ├── __init__.py │ │ ├── ip.py │ │ └── tcp.py │ ├── limits.py │ ├── tasks │ │ ├── ssh_bootstrap.sh │ │ └── __init__.py │ ├── stdlib │ │ ├── __init__.py │ │ └── mktemp.py │ ├── linux │ │ ├── __init__.py │ │ ├── fs.py │ │ ├── rtnetlink.py │ │ ├── netlink.py │ │ ├── memfd.py │ │ └── futex.py │ ├── path.py │ ├── unistd │ │ ├── cwd.py │ │ ├── exec.py │ │ ├── credentials.py │ │ ├── pipe.py │ │ └── io.py │ ├── near │ │ └── __init__.py │ ├── command.py │ ├── fcntl.py │ └── time.py ├── README ├── MANIFEST.in ├── default.nix ├── pyproject.toml ├── dneio │ ├── outcome.py │ └── __init__.py ├── setup.py ├── setup.cfg └── package.nix ├── nixdeps ├── .envrc ├── default.nix ├── nixdeps │ ├── __init__.py │ └── load.py ├── package.nix └── setup.py ├── default.nix ├── c ├── default.nix ├── rsyscall.pc.in ├── configure.ac ├── package.nix ├── src │ ├── rsyscall_x86_64.S │ ├── rsyscall_server_main.c │ ├── rsyscall.h │ ├── rsyscall_stdin_bootstrap.c │ ├── remote_cat.c │ └── rsyscall_unix_stub.c └── Makefile.am ├── research ├── sigbovik2020 │ ├── example.sh │ ├── paper.pdf │ ├── prog.c │ ├── README.org │ └── bibliography.bib ├── rsyscall_ui.jpg ├── README.org ├── interp.nix ├── run_in_build.nix ├── default.nix ├── stat.c ├── shell.sh ├── sleeper.py ├── dirfd.c ├── check_robust_list.c ├── nowait.c ├── sockopen.c ├── mmap_growsdown.c ├── pidns.c ├── test_epoll_signalfd.c ├── getdents.c └── extensible_visitor.py ├── docs ├── perspective_intros │ ├── capability.org │ ├── README.org │ ├── infracode.org │ ├── language_implementer.org │ ├── high_performance.org │ ├── rpc.org │ └── djbware.org ├── pitch.org ├── background.org └── conceptual.org ├── flake.lock ├── overlay.nix ├── pinned.nix ├── .gitignore ├── flake.nix └── README.org /python/.projectile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nixdeps/.envrc: -------------------------------------------------------------------------------- 1 | use_nix 2 | -------------------------------------------------------------------------------- /python/.envrc: -------------------------------------------------------------------------------- 1 | use_nix 2 | -------------------------------------------------------------------------------- /python/rsysapps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | import ./python 2 | -------------------------------------------------------------------------------- /python/arepl/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "Tests for our async repl" 2 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | cffi 2 | trio 3 | h11 4 | typeguard 5 | -------------------------------------------------------------------------------- /python/rsyscall/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "Tests for rsyscall." 2 | -------------------------------------------------------------------------------- /python/README: -------------------------------------------------------------------------------- 1 | This is the Python version of the rsyscall library. 2 | -------------------------------------------------------------------------------- /python/rsyscall/doc/__init__.py: -------------------------------------------------------------------------------- 1 | "Various prose documentation strings" 2 | -------------------------------------------------------------------------------- /c/default.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ../pinned.nix; 3 | in 4 | pkgs.librsyscall 5 | -------------------------------------------------------------------------------- /python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ffibuilder.py 2 | include rsyscall/tasks/ssh_bootstrap.sh 3 | -------------------------------------------------------------------------------- /research/sigbovik2020/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ls; which ls 3 | stat / 4 | foo|bar 5 | -------------------------------------------------------------------------------- /nixdeps/default.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ../pinned.nix; 3 | in 4 | pkgs.python39Packages.nixdeps 5 | -------------------------------------------------------------------------------- /research/rsyscall_ui.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catern/rsyscall/HEAD/research/rsyscall_ui.jpg -------------------------------------------------------------------------------- /python/default.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ../pinned.nix; 3 | in 4 | pkgs.python310Packages.rsyscall 5 | -------------------------------------------------------------------------------- /python/rsyscall/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | "Various miscellaneous command-line scripts built with rsyscall" 2 | -------------------------------------------------------------------------------- /research/sigbovik2020/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/catern/rsyscall/HEAD/research/sigbovik2020/paper.pdf -------------------------------------------------------------------------------- /python/rsyscall/memory/__init__.py: -------------------------------------------------------------------------------- 1 | "Classes and functions which provide the capability to interact with memory" 2 | -------------------------------------------------------------------------------- /python/rsyscall/network/__init__.py: -------------------------------------------------------------------------------- 1 | "Deals with communication between processes, even if that happens on a single host" 2 | -------------------------------------------------------------------------------- /python/rsyscall/sys/__init__.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | Headers for general Linux functionality 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "cffi"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /python/rsyscall/net/__init__.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | Headers for functionality related to network devices. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /python/rsyscall/netinet/__init__.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | Headers for functionality related to the Internet. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /python/rsyscall/limits.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | 4 | NAME_MAX: int = lib.NAME_MAX 5 | -------------------------------------------------------------------------------- /research/README.org: -------------------------------------------------------------------------------- 1 | These are C programs, shell scripts, and notes which were written while researching or demonstrating some functionality. 2 | -------------------------------------------------------------------------------- /docs/perspective_intros/capability.org: -------------------------------------------------------------------------------- 1 | * Perspective 2 | - Object-capability security is cool 3 | - Capsicum is cool 4 | - CloudABI is cool 5 | 6 | 7 | -------------------------------------------------------------------------------- /research/interp.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ./pinned.nix; 3 | rsyscall = import ./python/default.nix; 4 | in 5 | pkgs.python37.withPackages (ps: [ rsyscall ]) 6 | -------------------------------------------------------------------------------- /nixdeps/nixdeps/__init__.py: -------------------------------------------------------------------------------- 1 | "Support for depending on non-Python Nix packages, with dependencies resolved at Python package build time" 2 | from nixdeps.load import PackageClosure 3 | -------------------------------------------------------------------------------- /python/rsyscall/tasks/ssh_bootstrap.sh: -------------------------------------------------------------------------------- 1 | dir="$(mktemp --directory)" 2 | cat >"$dir/bootstrap" 3 | chmod +x "$dir/bootstrap" 4 | cd "$dir" || exit 1 5 | echo "$dir" 6 | exec "$dir/bootstrap" socket 7 | -------------------------------------------------------------------------------- /docs/perspective_intros/README.org: -------------------------------------------------------------------------------- 1 | Here are introductions to rsyscall from various perspectives. 2 | 3 | First, we describe the perspective. 4 | 5 | Then, we describe rsyscall from that perspective. 6 | -------------------------------------------------------------------------------- /research/run_in_build.nix: -------------------------------------------------------------------------------- 1 | with import {}; 2 | 3 | runCommand "run_script.sh" { python = python36; } '' 4 | ls -l /proc/self/fd 5 | $python/bin/python -c 'open("/dev/stdout", "wb")' 6 | echo foo > /proc/self/fd/1 7 | echo bar 8 | '' 9 | -------------------------------------------------------------------------------- /python/rsyscall/stdlib/__init__.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | This module contains functionality built on top of `rsyscall.thread`. 4 | 5 | """ 6 | from rsyscall.stdlib.mktemp import mkdtemp 7 | 8 | __all__ = [ 9 | 'mkdtemp', 10 | ] 11 | -------------------------------------------------------------------------------- /docs/perspective_intros/infracode.org: -------------------------------------------------------------------------------- 1 | * Perspective 2 | - Infrastructure as code is cool 3 | - Writing regular code to set up my infrastructure is cool 4 | - JSON, YAML, "declarative" config files are not cool 5 | - Ansible 6 | - Pulumi is cool 7 | * rsyscall lets you 8 | -------------------------------------------------------------------------------- /python/rsyscall/linux/__init__.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | Headers for general Linux functionality, usually non-standard 4 | 5 | All these headers are for Linux-specific functionality, although not 6 | all Linux-specific functionality is in these headers. 7 | """ 8 | -------------------------------------------------------------------------------- /nixdeps/package.nix: -------------------------------------------------------------------------------- 1 | { pythonPackages 2 | }: 3 | 4 | pythonPackages.buildPythonPackage { 5 | name = "nixdeps"; 6 | src = ./.; 7 | pythonImportsCheck = [ "nixdeps.setuptools" ]; 8 | propagatedBuildInputs = [ 9 | pythonPackages.setuptools 10 | ]; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /python/arepl/__init__.py: -------------------------------------------------------------------------------- 1 | """Pure functions to support creating REPLs which can run asynchronous code. 2 | 3 | There's no actual REPL in here; this is all 4 | [sans-io](https://sans-io.readthedocs.io/). 5 | 6 | """ 7 | from arepl.repl import PureREPL, ExpressionResult, run_repl, FromREPL 8 | -------------------------------------------------------------------------------- /c/rsyscall.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | pkglibexecdir=@libexecdir@/@PACKAGE@ 6 | 7 | Name: rsyscall 8 | Description: Make syscalls remotely 9 | Version: @VERSION@ 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} -lrsyscall 12 | 13 | -------------------------------------------------------------------------------- /nixdeps/nixdeps/load.py: -------------------------------------------------------------------------------- 1 | "The runtime functionality to actually use Nix dependencies" 2 | from dataclasses import dataclass 3 | from pathlib import Path 4 | import typing as t 5 | 6 | __all__ = [ 7 | 'PackageClosure', 8 | ] 9 | 10 | @dataclass 11 | class PackageClosure: 12 | path: Path 13 | closure: t.List[str] 14 | -------------------------------------------------------------------------------- /python/dneio/outcome.py: -------------------------------------------------------------------------------- 1 | "Just the outcome library, with a little more typing" 2 | from outcome import Value, Error 3 | import outcome 4 | import typing as t 5 | 6 | __all__ = [ 7 | 'Outcome', 8 | 'Value', 9 | 'Error', 10 | ] 11 | 12 | T = t.TypeVar('T') 13 | class Outcome(t.Generic[T], outcome.Outcome): 14 | pass 15 | -------------------------------------------------------------------------------- /docs/perspective_intros/language_implementer.org: -------------------------------------------------------------------------------- 1 | * Perspective 2 | - I'm a language implementer 3 | * rsyscall is a better Linux interaction model for your new language 4 | Linking against libc is so old hat, and so very un-type-safe. 5 | Don't you want to garbage collect (or reference count, or track with linearity, or whatever) file descriptors, 6 | just like you do memory? 7 | -------------------------------------------------------------------------------- /research/default.nix: -------------------------------------------------------------------------------- 1 | { 2 | vm = (import { configuration = { 3 | virtualisation.graphics = false; 4 | services.mingetty.autologinUser = "root"; 5 | users.users.root.initialHashedPassword = ""; 6 | }; }).vm; 7 | } 8 | 9 | # SHARED_DIR=/home/sbaugh/.local/src/rsyscall ./result/bin/run-nixos-vm -kernel $(readlink -f arch/x86_64/boot/bzImage) -serial pty 10 | -------------------------------------------------------------------------------- /docs/perspective_intros/high_performance.org: -------------------------------------------------------------------------------- 1 | * Perspective 2 | - I want my code to go fast. 3 | * rsyscall decouples language concurrency and kernel parallelism 4 | You can be explicitly parallel without being concurrent. 5 | 6 | Also, you can send your syscalls to another process to execute them while your main process continues running, 7 | reducing the locality hit of entering the kernel, ala FlexSC. 8 | 9 | -------------------------------------------------------------------------------- /python/rsyscall/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | """Various process implementations with various different special abilities. 2 | 3 | Note that all process types can launch any other process type, in arbitrary combinations. 4 | 5 | Note also that users are able to implement their own types of processs, 6 | so if these available process types do not meet your needs, 7 | you can implement a new kind of process which does. 8 | 9 | """ 10 | -------------------------------------------------------------------------------- /research/sigbovik2020/prog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() { 6 | int rc; 7 | rc = fork(); 8 | if (rc == 0) { execlp("foo", "foo", "bar", "baz", NULL); } 9 | else { wait(NULL); } 10 | rc = fork(); 11 | if (rc == 0) { execlp("whatever", "whatever", "quux", NULL); } 12 | else { wait(NULL); } 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_mmap.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | 5 | from rsyscall.sys.mman import PROT, MAP, MADV 6 | 7 | class TestMisc(TrioTestCase): 8 | async def test_madvise(self) -> None: 9 | mapping = await self.process.task.mmap(4096, PROT.READ|PROT.WRITE, MAP.SHARED) 10 | await mapping.madvise(MADV.REMOVE) 11 | await mapping.munmap() 12 | -------------------------------------------------------------------------------- /c/configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([rsyscall], [0.0.1], [sbaugh@catern.com]) 2 | AC_CONFIG_AUX_DIR([build-aux]) 3 | AC_CONFIG_MACRO_DIRS([m4]) 4 | AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) 5 | AC_PROG_CC 6 | AM_PROG_AR 7 | AM_PROG_AS 8 | LT_INIT 9 | dnl workaround for https://debbugs.gnu.org/20082 10 | AC_SUBST(AR_FLAGS, [cr]) 11 | PKG_INSTALLDIR 12 | AC_CONFIG_HEADERS([config.h]) 13 | AC_CONFIG_FILES([ 14 | Makefile 15 | rsyscall.pc 16 | ]) 17 | AC_OUTPUT 18 | -------------------------------------------------------------------------------- /research/stat.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include /* Definition of AT_* constants */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | int main() { 15 | struct stat statbuf = {}; 16 | int ret = fstatat(AT_FDCWD, "stat.c", &statbuf, 0); 17 | if (ret != 0) { 18 | err(1, "fstatat"); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /c/package.nix: -------------------------------------------------------------------------------- 1 | { stdenv 2 | , autoreconfHook 3 | , autoconf 4 | , automake 5 | , libtool 6 | , pkgconfig 7 | , glibc 8 | }: 9 | 10 | stdenv.mkDerivation { 11 | name = "rsyscall"; 12 | src = ./.; 13 | # rsyscall needs to build some static bootstrap binaries; that requires its library be built 14 | # statically in addition to dynamically. 15 | # we'll install both static and dynamic libraries, which is fine. 16 | dontDisableStatic = true; 17 | buildInputs = [ autoreconfHook autoconf automake libtool pkgconfig glibc glibc.static ]; 18 | } 19 | -------------------------------------------------------------------------------- /research/shell.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o nounset -o errexit 3 | tmpdir=$(mktemp -d) 4 | ssh-keygen -b 1024 -q -N '' -C '' -f "$tmpdir/key" 5 | ssh -F /dev/null \ 6 | -o LogLevel=DEBUG \ 7 | -o IdentityFile="$tmpdir/key" \ 8 | -o BatchMode=yes \ 9 | -o StrictHostKeyChecking=no \ 10 | -o UserKnownHostsFile=/dev/null \ 11 | -o ProxyCommand="sshd -i -f /dev/null \ 12 | -o HostKey=$tmpdir/key \ 13 | -o AuthorizedKeysFile=$tmpdir/key.pub \ 14 | -o StrictModes=no \ 15 | -o PrintLastLog=no \ 16 | -o PrintMotd=no \ 17 | "\ 18 | localhost "$@" 19 | rm "$tmpdir/key" "$tmpdir/key.pub" 20 | rmdir "$tmpdir" 21 | -------------------------------------------------------------------------------- /research/sleeper.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tasks.local import local_process 2 | 3 | async def main(): 4 | true = await local_process.environ.which("true") 5 | sleep_inf = (await local_process.environ.which("sleep")).args("inf") 6 | for i in range(100): 7 | print("doing true", i) 8 | procs = [await (await local_process.fork()).exec(sleep_inf) for _ in range(500)] 9 | await local_process.run(true) 10 | for proc in procs: 11 | await proc.kill() 12 | await proc.wait() 13 | print("done true") 14 | 15 | if __name__ == "__main__": 16 | import trio 17 | trio.run(main) 18 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_eventfd.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sys.eventfd import * 3 | from rsyscall.struct import Int64 4 | 5 | class TestEventfd(TrioTestCase): 6 | async def asyncSetUp(self) -> None: 7 | self.fd = await self.process.task.eventfd(0) 8 | 9 | async def asyncTearDown(self) -> None: 10 | await self.fd.close() 11 | 12 | async def test(self) -> None: 13 | inval = Int64(10) 14 | written, _ = await self.fd.write(await self.process.task.ptr(inval)) 15 | read, _ = await self.fd.read(written) 16 | self.assertEqual(inval, await read.read()) 17 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_timerfd.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sys.timerfd import * 3 | 4 | class TestTimerfd(TrioTestCase): 5 | async def asyncSetUp(self) -> None: 6 | self.fd = await self.process.task.timerfd_create(CLOCK.REALTIME) 7 | 8 | async def asyncTearDown(self) -> None: 9 | await self.fd.close() 10 | 11 | async def test(self) -> None: 12 | await self.fd.timerfd_settime( 13 | TFD_TIMER.NONE, await self.process.task.ptr(Itimerspec(Timespec(0, 0), Timespec(0, 1)))) 14 | await self.fd.timerfd_gettime(await self.process.task.malloc(Itimerspec)) 15 | -------------------------------------------------------------------------------- /research/dirfd.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include /* Definition of AT_* constants */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | int main() { 17 | int ret; 18 | int dirfd; 19 | dirfd = ret = open("dir", O_DIRECTORY); 20 | if (ret < 0) err(1, "open"); 21 | char buf[4096]; 22 | ret = read(dirfd, buf, sizeof(buf)); 23 | if (ret < 0) err(1, "read"); 24 | } 25 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "nixpkgs": { 4 | "locked": { 5 | "lastModified": 1623580589, 6 | "narHash": "sha256-zkMevY2WLU+K7T/P4wVj18Ms8zyeRfp05ILf556m5Y8=", 7 | "owner": "nixos", 8 | "repo": "nixpkgs", 9 | "rev": "e38a06b96a0448d6972b7e51b2424130bb08ab1b", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "nixos", 14 | "ref": "nixos-unstable", 15 | "repo": "nixpkgs", 16 | "type": "github" 17 | } 18 | }, 19 | "root": { 20 | "inputs": { 21 | "nixpkgs": "nixpkgs" 22 | } 23 | } 24 | }, 25 | "root": "root", 26 | "version": 7 27 | } 28 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_sched.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sched import CpuSet 3 | 4 | class TestEventfd(TrioTestCase): 5 | async def test(self) -> None: 6 | avail = await (await self.process.task.sched_getaffinity(await self.process.malloc(CpuSet))).read() 7 | with self.assertRaises(OSError, msg="calling setaffinity with an empty set should fail"): 8 | await self.process.task.sched_setaffinity(await self.process.ptr(CpuSet())) 9 | await self.process.task.sched_setaffinity(await self.process.ptr(CpuSet([list(avail)[0]]))) 10 | await self.process.task.sched_setaffinity(await self.process.ptr(avail)) 11 | -------------------------------------------------------------------------------- /python/rsyscall/netinet/ip.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | import enum 4 | 5 | # ip.h is a superset of in.h 6 | from rsyscall.netinet.in_ import SockaddrIn, SockaddrIn6 7 | __all__ = [ 8 | "SockaddrIn", 9 | "SockaddrIn6", 10 | "IPPROTO", 11 | "IP", 12 | ] 13 | 14 | class IPPROTO(enum.IntEnum): 15 | "Used for a variety of things" 16 | ICMPV6 = lib.IPPROTO_ICMPV6 17 | 18 | class IP(enum.IntEnum): 19 | "Mostly for socket options" 20 | RECVERR = lib.IP_RECVERR 21 | PKTINFO = lib.IP_PKTINFO 22 | MULTICAST_TTL = lib.IP_MULTICAST_TTL 23 | MTU_DISCOVER = lib.IP_MTU_DISCOVER 24 | PMTUDISC_DONT = lib.IP_PMTUDISC_DONT 25 | -------------------------------------------------------------------------------- /overlay.nix: -------------------------------------------------------------------------------- 1 | let 2 | pythonOverrides = final: prev: { 3 | rsyscall = final.callPackage ./python/package.nix { }; 4 | nixdeps = final.callPackage ./nixdeps/package.nix { }; 5 | }; 6 | in 7 | self: super: { 8 | librsyscall = self.callPackage ./c/package.nix { }; 9 | 10 | python38 = super.python38.override { 11 | packageOverrides = pythonOverrides; 12 | }; 13 | python39 = super.python39.override { 14 | packageOverrides = pythonOverrides; 15 | }; 16 | python310 = super.python310.override { 17 | packageOverrides = pythonOverrides; 18 | }; 19 | 20 | python38Packages = self.python38.pkgs; 21 | python39Packages = self.python39.pkgs; 22 | python310Packages = self.python310.pkgs; 23 | } 24 | -------------------------------------------------------------------------------- /python/arepl/help.py: -------------------------------------------------------------------------------- 1 | """Convert `help()` output to a string. 2 | 3 | `help()` usually sends its output to stdout, which might not be where 4 | we want to write it, if our REPL is targeted somewhere else... 5 | 6 | """ 7 | 8 | import typing as t 9 | import pydoc # type: ignore 10 | 11 | __all__ = [ 12 | 'help_to_str', 13 | ] 14 | 15 | class Output: 16 | def __init__(self) -> None: 17 | self.results: t.List[str] = [] 18 | 19 | def write(self, s): 20 | self.results.append(s) 21 | 22 | def help_to_str(request: t.Any) -> str: 23 | "Call `help` on `result`, and return the result as a string" 24 | out = Output() 25 | pydoc.Helper(None, out).help(request) # type: ignore 26 | return "".join(out.results) 27 | -------------------------------------------------------------------------------- /research/check_robust_list.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | int main(int argc, char** argv) 13 | { 14 | if (argc != 2) errx(1, "usage: check_robust_list "); 15 | errno = 0; 16 | const int pid = strtol(argv[1], NULL, 0); 17 | if (errno != 0) err(1, "strtol(%s)", argv[1]); 18 | struct robust_list_head *headptr; 19 | size_t lenptr; 20 | int ret = syscall(SYS_get_robust_list, pid, &headptr, &lenptr); 21 | if (ret != 0) err(1, "get_robust_list(%d)", pid); 22 | printf("robust_list_head: %p\n", headptr); 23 | } 24 | -------------------------------------------------------------------------------- /nixdeps/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='nixdeps', 4 | version='1.0.0', 5 | description='A setuptools entrypoint to store dependencies on Nix packages at build-time', 6 | long_description='See the `nixdeps` module docstrings for more information on usage.', 7 | classifiers=[ 8 | "Programming Language :: Python :: 3", 9 | "License :: OSI Approved :: MIT License", 10 | ], 11 | url='https://github.com/catern/rsyscall', 12 | author='catern', 13 | author_email='sbaugh@catern.com', 14 | license='MIT', 15 | packages=['nixdeps'], 16 | entry_points={ 17 | 'distutils.setup_keywords': [ 18 | "nixdeps = nixdeps.setuptools:nixdeps", 19 | ], 20 | }, 21 | ) 22 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_signal.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.signal import * 3 | 4 | class TestSignal(TrioTestCase): 5 | async def test_sigaction(self) -> None: 6 | sa = Sigaction(Sighandler.DFL) 7 | ptr = await self.process.task.ptr(sa) 8 | await self.process.task.sigaction(SIG.WINCH, ptr, None) 9 | await self.process.task.sigaction(SIG.WINCH, None, ptr) 10 | out_sa = await ptr.read() 11 | self.assertEqual(sa.handler, out_sa.handler) 12 | self.assertEqual(sa.flags, out_sa.flags) 13 | self.assertEqual(sa.mask, out_sa.mask) 14 | self.assertEqual(sa.restorer, out_sa.restorer) 15 | 16 | # TODO test_signalblock 17 | # async def test_signalblock(self) -> None: 18 | # pass 19 | -------------------------------------------------------------------------------- /pinned.nix: -------------------------------------------------------------------------------- 1 | # Super stripped down version of nixpkgs compat 2 | # https://github.com/edolstra/flake-compat/blob/master/default.nix 3 | 4 | let 5 | lockFile = builtins.fromJSON (builtins.readFile ./flake.lock); 6 | fetchTree = info: 7 | if info.type == "github" then 8 | { 9 | outPath = builtins.fetchTarball "https://api.${info.host or "github.com"}/repos/${info.owner}/${info.repo}/tarball/${info.rev}"; 10 | rev = info.rev; 11 | shortRev = builtins.substring 0 7 info.rev; 12 | lastModified = info.lastModified; 13 | narHash = info.narHash; 14 | } 15 | else 16 | throw "flake input has unsupported input type '${info.type}'"; 17 | in 18 | import (fetchTree lockFile.nodes.nixpkgs.locked) { 19 | config = { }; 20 | overlays = [ (import ./overlay.nix) ]; 21 | } 22 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_pointer.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.handle.pointer import UseAfterFreeError 3 | 4 | class TestPointer(TrioTestCase): 5 | async def test_use_after_free_ptr(self) -> None: 6 | buf = await self.process.malloc(bytes, 16) 7 | buf.free() 8 | with self.assertRaises(UseAfterFreeError): 9 | buf.near 10 | str(buf) 11 | 12 | async def test_use_after_free_allocation(self) -> None: 13 | buf = await self.process.malloc(bytes, 16) 14 | buf.allocation.free(buf.mapping) 15 | with self.assertRaises(UseAfterFreeError): 16 | buf.near 17 | buf = await self.process.ptr(b'foo') 18 | buf.allocation.free(buf.mapping) 19 | with self.assertRaises(UseAfterFreeError): 20 | buf.near 21 | str(buf) 22 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_prctl.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from rsyscall.tests.trio_test_case import TrioTestCase 3 | from rsyscall.sys.prctl import * 4 | logger = logging.getLogger(__name__) 5 | 6 | class TestUser(TrioTestCase): 7 | async def asyncSetUp(self) -> None: 8 | self.process = await self.process.fork() 9 | 10 | async def asyncTearDown(self) -> None: 11 | await self.process.exit(0) 12 | 13 | async def test_name(self) -> None: 14 | namep = await self.process.task.prctl(PR.GET_NAME, await self.process.malloc(str, 16)) 15 | logger.info("My initial name is %s", await namep.read()) 16 | newname = "newname" 17 | await self.process.task.prctl(PR.SET_NAME, await self.process.ptr(newname)) 18 | namep = await self.process.task.prctl(PR.GET_NAME, namep) 19 | self.assertEqual(newname, await namep.read()) 20 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_allocator.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | from rsyscall.memory.allocator import BumpAllocator, OutOfSpaceError 5 | 6 | class TestMisc(TrioTestCase): 7 | async def test_madvise(self) -> None: 8 | size = 4096 9 | allocator = await BumpAllocator.make(self.process.task, size) 10 | mapping, first = await allocator.malloc(size//2, 1) 11 | _, second = await allocator.malloc(size//2, 1) 12 | with self.assertRaises(OutOfSpaceError): 13 | await allocator.malloc(size//2, 1) 14 | first.free(mapping) 15 | # this only works because the free happens immediately on the local process, 16 | # instead of waiting for the madvise response to come back, 17 | # usually free is asynchronous 18 | _, third = await allocator.malloc(size//2, 1) 19 | -------------------------------------------------------------------------------- /python/rsyscall/scripts/local_repl.py: -------------------------------------------------------------------------------- 1 | from rsyscall.wish import wish, Wish 2 | import trio 3 | import typing as t 4 | 5 | async def main() -> None: 6 | try: 7 | raise Exception("um") 8 | except Exception as exn: 9 | games = await wish(Wish(t.List[str], "i wish you would tell me ur faverut games right now!!!"), from_exn=exn) 10 | for game in games: 11 | print(game, "is fun") 12 | number = await wish(Wish(int, "i wish you would tell me a number!!!")) 13 | flavor = await wish(Wish(str, "Sorry for being so rude, spirit. Could you tell me your favorite flavor of pie?")) 14 | pies = [f"A tasty {flavor} pie."]*number 15 | await wish(Wish(type(None), 16 | f"Here you go spirit! {number} delicious {flavor} pies! Return when you're done eating them!")) 17 | print("Bye spirit! See you later!") 18 | 19 | if __name__ == "__main__": 20 | trio.run(main) 21 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_resource.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sys.resource import * 3 | 4 | class TestResource(TrioTestCase): 5 | async def asyncSetUp(self) -> None: 6 | self.process = await self.process.fork() 7 | 8 | async def asyncTearDown(self) -> None: 9 | await self.process.exit(0) 10 | 11 | async def test_rlimit(self) -> None: 12 | old_rlimit = await (await self.process.task.getrlimit(RLIMIT.FSIZE, await self.process.malloc(Rlimit))).read() 13 | rlimit = Rlimit(old_rlimit.cur - 1, old_rlimit.max - 1) 14 | await self.process.task.setrlimit(RLIMIT.FSIZE, await self.process.ptr(rlimit)) 15 | new_rlimit = await (await self.process.task.getrlimit(RLIMIT.FSIZE, await self.process.malloc(Rlimit))).read() 16 | self.assertEqual(rlimit, new_rlimit) 17 | self.assertNotEqual(old_rlimit, new_rlimit) 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /c/Makefile.in 2 | /c/aclocal.m4 3 | /c/autom4te.cache/ 4 | /c/build-aux/ 5 | /c/config.h.in 6 | /c/configure 7 | /c/m4/ 8 | /c/Makefile 9 | /c/config.h 10 | /c/config.log 11 | /c/config.status 12 | /c/remote_cat 13 | /c/rsyscall-bootstrap 14 | /c/rsyscall-server 15 | /c/rsyscall-stdin-bootstrap 16 | /c/rsyscall-unix-stub 17 | *.la 18 | *.a 19 | *.lo 20 | *.o 21 | *.so 22 | .libs 23 | .deps 24 | .dirstamp 25 | /c/libtool 26 | /c/rsyscall.pc 27 | c/stamp-h1 28 | /python/build/ 29 | /python/rsyscall.egg-info/ 30 | __pycache__ 31 | /python/.mypy_cache/ 32 | /python/.pytest_cache/ 33 | /python/rsyscall/_nixdeps 34 | /python/TAGS 35 | /research/sigbovik2020/paper.aux 36 | /research/sigbovik2020/paper.bbl 37 | /research/sigbovik2020/paper.blg 38 | /research/sigbovik2020/paper.log 39 | /research/sigbovik2020/paper.out 40 | /python/strace/ 41 | result 42 | /nixdeps/dist/ 43 | /nixdeps/nixdeps.egg-info/ 44 | /python/dist/ 45 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_fd.py: -------------------------------------------------------------------------------- 1 | from rsyscall.fcntl import O 2 | from rsyscall.sched import CLONE 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | import gc 5 | 6 | class TestFS(TrioTestCase): 7 | async def test_fd_gc(self) -> None: 8 | "Verify that file descriptors actually get GC'd." 9 | gc.collect() 10 | await self.process.task.run_fd_table_gc() 11 | devnull = await self.process.ptr("/dev/null") 12 | first = int(await self.process.task.open(devnull, O.RDONLY)) 13 | for _ in range(5): 14 | child = await self.process.clone(CLONE.FILES) 15 | for _ in range(50): 16 | await child.task.open(await child.ptr("/dev/null"), O.RDONLY) 17 | gc.collect() 18 | await self.process.task.run_fd_table_gc() 19 | last = int(await self.process.task.open(devnull, O.RDONLY)) 20 | self.assertEqual(first, last) 21 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_chroot.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.stdlib import mkdtemp 4 | from rsyscall.sys.mount import MS 5 | from rsyscall.sched import CLONE 6 | from rsyscall.unistd import O 7 | import unittest 8 | 9 | class TestChroot(TrioTestCase): 10 | async def asyncSetUp(self) -> None: 11 | self.tmpdir = await mkdtemp(self.process) 12 | self.process = await self.process.clone(CLONE.NEWUSER|CLONE.NEWNS) 13 | 14 | async def asyncTearDown(self) -> None: 15 | await self.tmpdir.cleanup() 16 | 17 | async def test_basic(self) -> None: 18 | await self.process.mkdir(self.tmpdir/"proc") 19 | await self.process.mount("/proc", self.tmpdir/"proc", "", MS.BIND, "") 20 | await self.process.task.chroot(await self.process.ptr(self.tmpdir)) 21 | await self.process.task.open(await self.process.ptr("/proc/self"), O.RDONLY) 22 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_user.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.fcntl import O 3 | from rsyscall.sys.prctl import * 4 | from rsyscall.sys.capability import * 5 | from rsyscall.sched import CLONE 6 | 7 | class TestUser(TrioTestCase): 8 | async def asyncSetUp(self) -> None: 9 | self.process = await self.process.clone(CLONE.NEWUSER) 10 | 11 | async def asyncTearDown(self) -> None: 12 | await self.process.exit(0) 13 | 14 | async def test_ambient_caps(self) -> None: 15 | hdr = await self.process.ptr(CapHeader()) 16 | data_ptr = await self.process.task.capget(hdr, await self.process.malloc(CapData)) 17 | data = await data_ptr.read() 18 | data.inheritable.add(CAP.SYS_ADMIN) 19 | await self.process.task.capset(hdr, await data_ptr.write(data)) 20 | await self.process.task.prctl(PR.CAP_AMBIENT, PR_CAP_AMBIENT.RAISE, CAP.SYS_ADMIN) 21 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_inotify.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.sys.inotify import * 4 | from rsyscall.inotify_watch import Inotify 5 | from rsyscall.fcntl import O 6 | from rsyscall.stdlib import mkdtemp 7 | 8 | class TestInotify(TrioTestCase): 9 | async def asyncSetUp(self) -> None: 10 | self.tmpdir = await mkdtemp(self.process) 11 | self.ify = await Inotify.make(self.process) 12 | 13 | async def asyncTearDown(self) -> None: 14 | await self.tmpdir.cleanup() 15 | 16 | async def test_create(self) -> None: 17 | watch = await self.ify.add(self.tmpdir, IN.CREATE) 18 | name = "foo" 19 | fd = await self.process.task.open(await self.process.task.ptr(self.tmpdir/name), O.CREAT|O.EXCL) 20 | event = await watch.wait_until_event(IN.CREATE, name) 21 | self.assertEqual(event.name, name) 22 | self.assertEqual(event.mask, IN.CREATE) 23 | -------------------------------------------------------------------------------- /research/nowait.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() { 11 | int fds[2]; 12 | if (pipe(fds) < 0) { 13 | err(1, "pipe failed"); 14 | }; 15 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) { 16 | err(1, "pipe failed"); 17 | }; 18 | write(fds[1], "foo", 3); 19 | char buf[4096]; 20 | struct iovec iov = { .iov_base = &buf, .iov_len = sizeof(buf) }; 21 | printf("reading\n"); 22 | int ret = preadv2(fds[0], &iov, 1, -1, RWF_NOWAIT); 23 | warn("read! got %d", ret); 24 | errno = 0; 25 | printf("reading stdin\n"); 26 | ret = preadv2(0, &iov, 1, -1, RWF_NOWAIT); 27 | warn("read! got %d", ret); 28 | printf("reading blocking...\n"); 29 | errno = 0; 30 | ret = preadv2(fds[0], &iov, 1, -1, 0); 31 | warn("read! got %d", ret); 32 | } 33 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "A very basic flake"; 3 | 4 | inputs = { nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; }; 5 | 6 | outputs = { self, nixpkgs }: 7 | 8 | let 9 | systems = [ "x86_64-linux" "aarch64-linux" "i686-linux" "x86_64-darwin" ]; 10 | forAllSystems = f: nixpkgs.lib.genAttrs systems (system: f system); 11 | nixpkgsFor = forAllSystems (system: 12 | import nixpkgs { 13 | inherit system; 14 | overlays = [ self.overlay ]; 15 | }); 16 | 17 | in 18 | { 19 | overlay = import ./overlay.nix; 20 | 21 | packages = forAllSystems (system: with (nixpkgsFor.${system}); { 22 | inherit librsyscall; 23 | rsyscall = python3Packages.rsyscall; 24 | python38-rsyscall = python38Packages.rsyscall; 25 | python39-rsyscall = python39Packages.rsyscall; 26 | }); 27 | 28 | defaultPackage = forAllSystems (system: self.packages.${system}.rsyscall); 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='rsyscall', 4 | version='0.0.3', 5 | description='A library for making system calls remotely, through another process, which may be located on a remote host', 6 | classifiers=[ 7 | "Programming Language :: Python :: 3", 8 | "License :: OSI Approved :: MIT License", 9 | "Operating System :: POSIX :: Linux", 10 | ], 11 | keywords='linux syscall distributed', 12 | url='https://github.com/catern/rsyscall', 13 | author='catern', 14 | author_email='sbaugh@catern.com', 15 | license='MIT', 16 | cffi_modules=["ffibuilder.py:ffibuilder"], 17 | packages=find_packages(), 18 | install_requires=["cffi", "trio", "typeguard"], 19 | extras_require={ 20 | "rsysapps": ["h11"], 21 | }, 22 | nixdeps={'rsyscall._nixdeps': ['nix', 'librsyscall', 'openssh', 'coreutils']}, 23 | include_package_data=True, 24 | ) 25 | -------------------------------------------------------------------------------- /research/sockopen.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include /* Definition of AT_* constants */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | int main() { 17 | int ret; 18 | int sockfd; 19 | sockfd = ret = socket(AF_UNIX, SOCK_STREAM, 0); 20 | if (ret < 0) err(1, "socket"); 21 | 22 | struct sockaddr_un sockpath = { 23 | .sun_family = AF_UNIX, 24 | .sun_path = {}, 25 | }; 26 | strcpy(sockpath.sun_path, "hello.sock"); 27 | ret = bind(sockfd, &sockpath, sizeof(sockpath)); 28 | if (ret < 0) err(1, "bind"); 29 | ret = listen(sockfd, 10); 30 | if (ret < 0) err(1, "listen"); 31 | 32 | ret = open("hello.sock", O_RDWR); 33 | if (ret < 0) err(1, "open %d", errno); 34 | } 35 | -------------------------------------------------------------------------------- /python/rsyscall/linux/fs.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | from rsyscall._raw import ffi, lib # type: ignore 4 | from rsyscall.struct import Struct 5 | import dataclasses 6 | import enum 7 | import typing as t 8 | if t.TYPE_CHECKING: 9 | from rsyscall.handle import FileDescriptor 10 | else: 11 | FileDescriptor = object 12 | 13 | class FI(enum.IntEnum): 14 | CLONERANGE = lib.FICLONERANGE 15 | CLONE = lib.FICLONE 16 | 17 | @dataclasses.dataclass 18 | class FileCloneRange(Struct): 19 | src_fd: FileDescriptor 20 | src_offset: int 21 | src_length: int 22 | dest_offset: int 23 | 24 | def to_bytes(self) -> bytes: 25 | return bytes(ffi.buffer(ffi.new('struct file_clone_range const*', { 26 | "src_fd": self.src_fd, 27 | "src_offset": self.src_offset, 28 | "src_length": self.src_length, 29 | "dest_offset": self.dest_offset, 30 | }))) 31 | 32 | @classmethod 33 | def sizeof(cls) -> int: 34 | return ffi.sizeof('struct file_clone_range') 35 | -------------------------------------------------------------------------------- /python/rsyscall/linux/rtnetlink.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | import pyroute2.netlink.rtnl as rtnl 3 | import enum 4 | 5 | __all__ = [ 6 | "RTMGRP", 7 | ] 8 | 9 | class RTMGRP(enum.IntFlag): 10 | NONE = rtnl.RTMGRP_NONE 11 | LINK = rtnl.RTMGRP_LINK 12 | NOTIFY = rtnl.RTMGRP_NOTIFY 13 | NEIGH = rtnl.RTMGRP_NEIGH 14 | TC = rtnl.RTMGRP_TC 15 | IPV4_IFADDR = rtnl.RTMGRP_IPV4_IFADDR 16 | IPV4_MROUTE = rtnl.RTMGRP_IPV4_MROUTE 17 | IPV4_ROUTE = rtnl.RTMGRP_IPV4_ROUTE 18 | IPV4_RULE = rtnl.RTMGRP_IPV4_RULE 19 | IPV6_IFADDR = rtnl.RTMGRP_IPV6_IFADDR 20 | IPV6_MROUTE = rtnl.RTMGRP_IPV6_MROUTE 21 | IPV6_ROUTE = rtnl.RTMGRP_IPV6_ROUTE 22 | IPV6_IFINFO = rtnl.RTMGRP_IPV6_IFINFO 23 | DECnet_IFADDR = rtnl.RTMGRP_DECnet_IFADDR 24 | NOP2 = rtnl.RTMGRP_NOP2 25 | DECnet_ROUTE = rtnl.RTMGRP_DECnet_ROUTE 26 | DECnet_RULE = rtnl.RTMGRP_DECnet_RULE 27 | NOP4 = rtnl.RTMGRP_NOP4 28 | IPV6_PREFIX = rtnl.RTMGRP_IPV6_PREFIX 29 | IPV6_RULE = rtnl.RTMGRP_IPV6_RULE 30 | MPLS_ROUTE = rtnl.RTMGRP_MPLS_ROUTE 31 | 32 | -------------------------------------------------------------------------------- /python/rsysapps/dnspython_LUA.py: -------------------------------------------------------------------------------- 1 | import dns.rdata 2 | import dns.tokenizer 3 | 4 | class LUA(dns.rdata.Rdata): 5 | __slots__ = ['type', 'lua'] 6 | 7 | def __init__(self, rdclass, rdtype, type: str, lua: str) -> None: 8 | super().__init__(rdclass, rdtype) 9 | self.type = type 10 | self.lua = lua 11 | 12 | def to_text(self, origin=None, relativize=True, **kw): 13 | return self.type + " \"" + self.lua + "\"" 14 | 15 | @classmethod 16 | def from_text(cls, rdclass, rdtype, tok: dns.tokenizer.Tokenizer, origin=None, relativize=True): 17 | typ = tok.get_identifier() 18 | # now we spin in a loop, getting tokens, skipping whitespace... 19 | # until we get an eol? 20 | # oh, until we see an eol token, yes 21 | cur = tok.get() 22 | if cur.is_eol_or_eof(): 23 | raise Exception("expecting some lua code") 24 | lua = cur.value 25 | while True: 26 | cur = tok.get() 27 | if cur.is_eol_or_eof(): 28 | break 29 | lua += " " + cur.value 30 | return cls(rdclass, rdtype, typ, lua) 31 | -------------------------------------------------------------------------------- /python/rsyscall/sys/ioctl.py: -------------------------------------------------------------------------------- 1 | """`#include ` 2 | 3 | Abandon all hope, ye who enter here. 4 | 5 | """ 6 | import typing as t 7 | import errno 8 | 9 | #### Classes #### 10 | from rsyscall.handle.fd import BaseFileDescriptor 11 | from rsyscall.handle.pointer import Pointer 12 | 13 | class IoctlFileDescriptor(BaseFileDescriptor): 14 | async def ioctl(self, request: int, arg: Pointer) -> int: 15 | self._validate() 16 | arg._validate() 17 | try: 18 | return (await _ioctl(self.task.sysif, self.near, request, arg.near)) 19 | except OSError as e: 20 | if e.errno == errno.ENOTTY: 21 | e.filename = request 22 | raise 23 | 24 | #### Raw syscalls #### 25 | import rsyscall.near.types as near 26 | from rsyscall.near.sysif import SyscallInterface 27 | from rsyscall.sys.syscall import SYS 28 | 29 | async def _ioctl(sysif: SyscallInterface, fd: near.FileDescriptor, 30 | request: int, arg: t.Optional[t.Union[int, near.Address]]=None) -> int: 31 | if arg is None: 32 | arg = 0 33 | return (await sysif.syscall(SYS.ioctl, fd, request, arg)) 34 | 35 | -------------------------------------------------------------------------------- /research/mmap_growsdown.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void touch(char* p) { 7 | printf("writing to %p\n", p); 8 | strcpy(p, "hello world"); 9 | printf("written data: %s\n", p); 10 | }; 11 | 12 | int main() { 13 | char *buf, *p; 14 | buf = mmap((void *) 0x80000000, 4096, PROT_READ|PROT_WRITE, 15 | MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0); 16 | printf("addr: %p\n", buf); 17 | 18 | // man mmap says, when MAP_GROWSDOWN is set: "The return address 19 | // is one page lower than the memory area that is actually created 20 | // in the process's virtual address space." 21 | 22 | // buf, therefore is one page lower than our mapping: so our mapping is buf + 4096. 23 | // So this should succeed. 24 | touch(buf + 4096 + 128); 25 | 26 | // man mmap says, when MAP_GROWSDOWN is set: "Touching an address 27 | // in the "guard" page below the mapping will cause the mapping to 28 | // grow by a page. 29 | // So this should grow the mapping: 30 | touch(buf + 1024); 31 | // and so this should succeed. 32 | touch(buf - 1024); 33 | } 34 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_benchmark.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall import Process, Command 3 | from rsyscall.sys.wait import W 4 | import typing as t 5 | import cProfile 6 | import pstats 7 | 8 | async def rsys_run(parent: Process, cmd: Command, count: int) -> None: 9 | for _ in range(count): 10 | process = await parent.fork() 11 | child = await process.exec(cmd) 12 | await child.waitpid(W.EXITED) 13 | 14 | class TestBenchmark(TrioTestCase): 15 | async def test_bench(self): 16 | "Run a simple clone and exec case inside cProfile (without asserting the results)" 17 | 18 | await self.process.environ.as_arglist(self.process.task) 19 | cmd = (await self.process.environ.which("echo")).args("-n") 20 | pr = cProfile.Profile() 21 | warm_up_runs = 1 22 | real_runs = 1 23 | await rsys_run(self.process, cmd, warm_up_runs) 24 | pr.enable() 25 | await rsys_run(self.process, cmd, real_runs) 26 | pr.disable() 27 | # pr.print_stats(sort='cumtime') 28 | # ps = pstats.Stats(pr).strip_dirs().sort_stats('cumulative') 29 | # ps.print_callees() 30 | -------------------------------------------------------------------------------- /python/rsyscall/sys/eventfd.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | from rsyscall._raw import lib # type: ignore 4 | from rsyscall.near.sysif import SyscallInterface 5 | from rsyscall.sys.syscall import SYS 6 | import enum 7 | import rsyscall.near.types as near 8 | import typing as t 9 | from rsyscall.handle.fd import BaseFileDescriptor, FileDescriptorTask 10 | 11 | __all__ = [ 12 | "EFD", 13 | "EventFileDescriptor", 14 | "EventfdTask", 15 | ] 16 | 17 | class EFD(enum.IntFlag): 18 | NONE = 0 19 | CLOEXEC = lib.EFD_CLOEXEC 20 | NONBLOCK = lib.EFD_NONBLOCK 21 | SEMAPHORE = lib.EFD_SEMAPHORE 22 | 23 | async def _eventfd(sysif: SyscallInterface, initval: int, flags: EFD) -> near.FileDescriptor: 24 | "The raw, near, eventfd syscall." 25 | return near.FileDescriptor(await sysif.syscall(SYS.eventfd2, initval, flags)) 26 | 27 | T_fd = t.TypeVar('T_fd', bound='EventFileDescriptor') 28 | class EventFileDescriptor(BaseFileDescriptor): 29 | pass 30 | 31 | class EventfdTask(FileDescriptorTask[T_fd]): 32 | async def eventfd(self, initval: int, flags: EFD=EFD.NONE) -> T_fd: 33 | return self.make_fd_handle(await _eventfd(self.sysif, initval, flags|EFD.CLOEXEC)) 34 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_misc.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import unittest 3 | 4 | from rsyscall.tests.trio_test_case import TrioTestCase 5 | 6 | from rsyscall.thread import do_cloexec_except 7 | 8 | from rsyscall.tests.utils import do_async_things 9 | from rsyscall.fcntl import O 10 | from rsyscall.unistd import Pipe 11 | from rsyscall.sched import CLONE 12 | 13 | class TestMisc(TrioTestCase): 14 | async def asyncSetUp(self) -> None: 15 | self.process = await self.process.fork() 16 | 17 | async def asyncTearDown(self) -> None: 18 | await self.process.exit(0) 19 | 20 | async def test_do_cloexec_except(self) -> None: 21 | pipe = await (await self.process.task.pipe(await self.process.task.malloc(Pipe))).read() 22 | close_set = set([fd.near for fd in self.process.task.fd_handles]) 23 | close_set.remove(pipe.read.near) 24 | await do_cloexec_except(self.process, close_set) 25 | 26 | data = await self.process.task.ptr(b"foo") 27 | with self.assertRaises(OSError): 28 | # this side was closed due to being cloexec 29 | await pipe.read.read(data) 30 | with self.assertRaises(BrokenPipeError): 31 | # this side is still open, but gets EPIPE 32 | await pipe.write.write(data) 33 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_repl.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from rsyscall.tests.trio_test_case import TrioTestCase 3 | from arepl import * 4 | from rsyscall.wish import serve_repls 5 | from rsyscall.stdlib import mkdtemp 6 | from rsyscall.sys.socket import AF, SOCK 7 | from rsyscall.sys.un import SockaddrUn 8 | import unittest 9 | import typing as t 10 | 11 | class TestREPL(TrioTestCase): 12 | async def asyncSetUp(self) -> None: 13 | self.tmpdir = await mkdtemp(self.process, "test_stub") 14 | self.sock_path = self.tmpdir/"repl.sock" 15 | 16 | async def test_repl(self) -> None: 17 | sockfd = await self.process.make_afd(await self.process.socket(AF.UNIX, SOCK.STREAM|SOCK.NONBLOCK)) 18 | addr = await self.process.task.ptr(await SockaddrUn.from_path(self.process, self.sock_path)) 19 | await sockfd.handle.bind(addr) 20 | await sockfd.handle.listen(10) 21 | clientfd = await self.process.make_afd(await self.process.socket(AF.UNIX, SOCK.STREAM|SOCK.NONBLOCK)) 22 | await clientfd.connect(addr) 23 | await clientfd.write_all_bytes(b"foo = 11\n") 24 | await clientfd.write_all_bytes(b"return foo * 2\n") 25 | ret = await serve_repls(sockfd, {'locals': locals()}, int, "hello") 26 | self.assertEqual(ret, 22) 27 | -------------------------------------------------------------------------------- /python/rsyscall/path.py: -------------------------------------------------------------------------------- 1 | "A slightly improved version of `pathlib.PurePosixPath`" 2 | from __future__ import annotations 3 | import pathlib 4 | 5 | class Path(pathlib.PurePosixPath): 6 | """A version of `pathlib.PurePosixPath` which is safe to inherit from 7 | 8 | `pathlib` does a lot of crazy stuff which makes it hard to inherit from. This 9 | class insulates us from that stuff, so it can be inherited from naively. 10 | 11 | We use this as Path, rather than using `pathlib.Path`, to avoid confusion about 12 | `pathlib.Path`'s filesystem-interaction methods, which are not rsyscall-aware. 13 | 14 | """ 15 | def __new__(cls, *args, **kwargs) -> Path: 16 | """Override `pathlib.PurePath.__new__` to restore default behavior 17 | 18 | `pathlib.PurePath` inherits from `object`, so we just use `object.__new__`. 19 | """ 20 | return object.__new__(cls) 21 | 22 | def __init__(self, *args) -> None: 23 | """Override `pathlib.PurePath.__init__` to create more sane behavior 24 | 25 | We copy a small amount of code from `pathlib.PurePath._from_parts` to implement this 26 | method. 27 | """ 28 | drv, root, parts = self._parse_args(args) # type: ignore 29 | self._drv = drv 30 | self._root = root 31 | self._parts = parts 32 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_nix.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.nix import * 4 | from rsyscall.sched import CLONE 5 | from rsyscall.stdlib import mkdtemp 6 | import rsyscall._nixdeps.nix 7 | 8 | class TestNix(TrioTestCase): 9 | async def asyncSetUp(self) -> None: 10 | self.parent, self.process = self.process, await self.process.fork() 11 | self.tmpdir = await mkdtemp(self.parent, "test_nix") 12 | await enter_nix_container(self.parent, rsyscall._nixdeps.nix.closure, self.process, self.tmpdir) 13 | 14 | async def asyncTearDown(self) -> None: 15 | await self.tmpdir.cleanup() 16 | 17 | async def test_true(self) -> None: 18 | true = (await deploy(self.process, rsyscall._nixdeps.coreutils.closure)).bin('true') 19 | await self.process.run(true) 20 | 21 | async def test_with_daemon(self) -> None: 22 | nix_daemon = (await deploy(self.process, rsyscall._nixdeps.nix.closure)).bin("nix-daemon") 23 | nd_child = await (await self.process.fork()).exec(nix_daemon) 24 | self.process.environ['NIX_REMOTE'] = 'daemon' 25 | true = (await deploy(self.process, rsyscall._nixdeps.coreutils.closure)).bin('true') 26 | await self.process.run(true) 27 | await nd_child.kill() 28 | await nd_child.wait() 29 | -------------------------------------------------------------------------------- /python/rsyscall/unistd/cwd.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | #### Classes #### 4 | from rsyscall.handle.pointer import WrittenPointer 5 | from rsyscall.handle.fd import BaseFileDescriptor, FileDescriptorTask 6 | import os 7 | 8 | class CWDTask(FileDescriptorTask): 9 | async def chdir(self, path: WrittenPointer[t.Union[str, os.PathLike]]) -> None: 10 | with path.borrow(self) as path_n: 11 | await _chdir(self.sysif, path_n) 12 | 13 | async def fchdir(self, fd: BaseFileDescriptor) -> None: 14 | with fd.borrow(self) as fd_n: 15 | await _fchdir(self.sysif, fd_n) 16 | 17 | async def chroot(self, path: WrittenPointer[t.Union[str, os.PathLike]]) -> None: 18 | with path.borrow(self) as path_n: 19 | await _chroot(self.sysif, path_n) 20 | 21 | #### Raw syscalls #### 22 | import rsyscall.near.types as near 23 | from rsyscall.near.sysif import SyscallInterface 24 | from rsyscall.sys.syscall import SYS 25 | 26 | async def _chdir(sysif: SyscallInterface, path: near.Address) -> None: 27 | await sysif.syscall(SYS.chdir, path) 28 | 29 | async def _fchdir(sysif: SyscallInterface, fd: near.FileDescriptor) -> None: 30 | await sysif.syscall(SYS.fchdir, fd) 31 | 32 | async def _chroot(sysif: SyscallInterface, path: near.Address) -> None: 33 | await sysif.syscall(SYS.chroot, path) 34 | -------------------------------------------------------------------------------- /python/rsyscall/linux/netlink.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | from rsyscall.sys.socket import Sockaddr, AF, _register_sockaddr 4 | import typing as t 5 | import enum 6 | from dataclasses import dataclass 7 | 8 | __all__ = [ 9 | "NETLINK", 10 | "SockaddrNl", 11 | ] 12 | 13 | class NETLINK(enum.IntEnum): 14 | ROUTE = lib.NETLINK_ROUTE 15 | 16 | @dataclass 17 | class SockaddrNl(Sockaddr): 18 | # not an actual process pid, but rather "port id", which is unique per netlink socket 19 | pid: int 20 | groups: int 21 | family = AF.NETLINK 22 | 23 | def to_bytes(self) -> bytes: 24 | struct = ffi.new('struct sockaddr_nl*', (AF.NETLINK, 0, self.pid, self.groups)) 25 | return bytes(ffi.buffer(struct)) 26 | 27 | T = t.TypeVar('T', bound='SockaddrNl') 28 | @classmethod 29 | def from_bytes(cls: t.Type[T], data: bytes) -> T: 30 | if len(data) < cls.sizeof(): 31 | raise Exception("data too small", data) 32 | struct = ffi.cast('struct sockaddr_nl*', ffi.from_buffer(data)) 33 | cls.check_family(AF(struct.nl_family)) 34 | return cls(struct.nl_pid, struct.nl_groups) 35 | 36 | @classmethod 37 | def sizeof(cls) -> int: 38 | return ffi.sizeof('struct sockaddr_nl') 39 | _register_sockaddr(SockaddrNl) 40 | -------------------------------------------------------------------------------- /research/pidns.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int child(void *arg) { 11 | sleep(999); 12 | return 0; 13 | } 14 | 15 | int main(int argc, char **argv) { 16 | int ret; 17 | bool should_use_newpid = false; 18 | if (argc > 1) { 19 | should_use_newpid = true; 20 | } 21 | sigset_t sigset; 22 | ret = sigaddset(&sigset, SIGCHLD); 23 | if (ret < 0) err(1, "sigaddset"); 24 | ret = sigprocmask(SIG_BLOCK, &sigset, NULL); 25 | if (ret < 0) err(1, "sigprocmask"); 26 | int flags = CLONE_VM|CLONE_SIGHAND|CLONE_NEWUSER|SIGCHLD; 27 | if (should_use_newpid) { 28 | printf("using CLONE_NEWPID\n"); 29 | flags |= CLONE_NEWPID; 30 | } else { 31 | printf("not using CLONE_NEWPID\n"); 32 | } 33 | printf("pid: %d\n", getpid()); 34 | int child_pid = ret = clone(child, malloc(4096), flags, 0); 35 | if (ret < 0) err(1, "clone"); 36 | printf("child_pid: %d\n", child_pid); 37 | ret = kill(child_pid, SIGKILL); 38 | if (ret < 0) err(1, "kill"); 39 | int signum; 40 | printf("waiting for signal\n"); 41 | ret = sigwait(&sigset, &signum); 42 | if (ret < 0) err(1, "sigwait"); 43 | printf("got signal: %d\n", signum); 44 | } 45 | -------------------------------------------------------------------------------- /python/setup.cfg: -------------------------------------------------------------------------------- 1 | [pydocstyle] 2 | match=(?!(test_|near)).*\.py 3 | # D105: I don't see the need for docstrings on magic methods 4 | # D203, D204, D213: Insists on adding unnecessary empty lines 5 | # D300: insists on using """ not ", but " is nicer for one-liners 6 | ignore=D105,D203,D204,D213,D300 7 | ignore_decorators=overload 8 | 9 | [mypy] 10 | python_version = 3.9 11 | check_untyped_defs = True 12 | 13 | [mypy-trio._core._run] 14 | ignore_missing_imports = True 15 | 16 | [mypy-trio] 17 | ignore_missing_imports = True 18 | 19 | [mypy-trio.hazmat] 20 | ignore_missing_imports = True 21 | 22 | [mypy-prctl] 23 | ignore_missing_imports = True 24 | 25 | [mypy-typeguard] 26 | ignore_missing_imports = True 27 | 28 | [mypy-h11] 29 | ignore_missing_imports = True 30 | 31 | [mypy-pyroute2] 32 | ignore_missing_imports = True 33 | 34 | [mypy-pyroute2.netlink] 35 | ignore_missing_imports = True 36 | 37 | [mypy-pyroute2.netlink.rtnl] 38 | ignore_missing_imports = True 39 | 40 | [mypy-dns] 41 | ignore_missing_imports = True 42 | 43 | [mypy-dns.rdata] 44 | ignore_missing_imports = True 45 | 46 | [mypy-dns.tokenizer] 47 | ignore_missing_imports = True 48 | 49 | [mypy-dns.zone] 50 | ignore_missing_imports = True 51 | 52 | [mypy-dns.rdataset] 53 | ignore_missing_imports = True 54 | 55 | [mypy-outcome] 56 | ignore_missing_imports = True 57 | 58 | [mypy-nixdeps] 59 | ignore_missing_imports = True 60 | -------------------------------------------------------------------------------- /python/package.nix: -------------------------------------------------------------------------------- 1 | { pythonPackages 2 | , librsyscall 3 | , nix 4 | , socat 5 | , pkg-config 6 | , openssh 7 | , coreutils 8 | }: 9 | 10 | with pythonPackages; 11 | buildPythonPackage { 12 | name = "rsyscall"; 13 | src = ./.; 14 | checkInputs = [ 15 | pydocstyle 16 | mypy 17 | typing-extensions 18 | pytest 19 | socat 20 | ]; 21 | # ssh tests don't work because the build user's login shell is /noshell :( 22 | # net tests don't work because /dev/net/tun doesn't exist 23 | # nix tests don't work because something about "error: creating directory '/nix/var': Permission denied" 24 | # test_pgid doesn't work because /proc/sys/kernel/ns_last_pid isn't available for some reason 25 | # fuse tests don't work because /dev/fuse doesn't exist 26 | checkPhase = '' 27 | cd $out 28 | pytest -k 'not test_ssh and not test_net and not test_nix and not test_pgid and not test_fuse' 29 | ''; 30 | nativeBuildInputs = [ 31 | pkg-config 32 | ipython 33 | (pdoc3.overridePythonAttrs (_: { doCheck = false; })) 34 | ]; 35 | buildInputs = [ 36 | cffi 37 | librsyscall 38 | ]; 39 | propagatedBuildInputs = [ 40 | trio 41 | typeguard 42 | pyroute2 43 | outcome 44 | nixdeps 45 | ]; 46 | exportReferencesGraph = [ 47 | "nix" nix 48 | "librsyscall" librsyscall 49 | "openssh" openssh 50 | "coreutils" coreutils 51 | ]; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /python/arepl/astcodeop.py: -------------------------------------------------------------------------------- 1 | """Like the stdlib codeop module, but returning an AST instead. 2 | 3 | This is useful because we can properly deal with `await`s at the AST level. 4 | 5 | We lack functionality precisely equivalent to `codeop.Compile` or `codeop.CommandCompiler`, 6 | because the AST object returned from `compile(ONLY_AST)` doesn't expose the information to 7 | us about what `__future__` statements the compile process has seen. To properly implement 8 | those classes, either the return value of `compile(ONLY_AST)` needs to contain that 9 | information, or we need to reimplement the simple `__future__` statement scanner contained 10 | in the Python core. 11 | 12 | """ 13 | import codeop 14 | import ast 15 | import typing as t 16 | 17 | def _ast_compile(source, filename, symbol) -> t.Any: 18 | PyCF_DONT_IMPLY_DEDENT = codeop.PyCF_DONT_IMPLY_DEDENT # type: ignore 19 | return compile(source, filename, symbol, ast.PyCF_ONLY_AST|PyCF_DONT_IMPLY_DEDENT) 20 | 21 | def ast_compile_command(source: str, filename="", symbol="single") -> t.Any: 22 | "Like codeop.compile_command, but returns an AST instead." 23 | _maybe_compile = codeop._maybe_compile # type: ignore 24 | return _maybe_compile(_ast_compile, source, filename, symbol) 25 | 26 | def ast_compile_interactive(source: str) -> t.Optional[ast.Interactive]: 27 | "Compiles this single interactive statement into an AST" 28 | return ast_compile_command(source, "", "single") 29 | -------------------------------------------------------------------------------- /research/sigbovik2020/README.org: -------------------------------------------------------------------------------- 1 | This directory contains information 2 | related to the "Type-directed decompilation of shell scripts" paper in SIGBOVIK 2020. 3 | A copy of the paper is in [[file:paper.pdf][paper.pdf]]. 4 | 5 | The tool described in that paper is called "symsh", 6 | so named because, in some sense, 7 | it's providing the ability to pass-in an arbitrary "*symantics*" (to use Oleg's TFS terminology) 8 | for Unix *shell* scripts. 9 | 10 | Really it works for arbitrary Unix executables, and the symantics match the Unix interface, 11 | so symunix might be a better name, 12 | but that doesn't roll off the tongue quite as well. 13 | 14 | Note that the rest of this repo is essentially unrelated to symsh, 15 | except that it's some pre-existing research which we used to implement symsh. 16 | (It would be uncouth to shill for one's real research in a SIGBOVIK paper, but... 17 | rsyscall sure made it a lot easier to implement this!) 18 | 19 | The implementation is entirely contained in 20 | [[file:../../python/rsyscall/scripts/symsh.py][symsh.py]]. 21 | 22 | A simple example can be run with 23 | =python -m rsyscall.scripts.symsh example=. 24 | 25 | You can use symsh on your own executables with 26 | =python -m rsyscall.scripts.symsh exec [path_to_some_executable]=; 27 | for example, =example.sh= in this directory. 28 | 29 | Of course, as the paper describes, it can be used for more than just shell scripts. 30 | Try compiling =prog.c= in this directory and running symsh on it. 31 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_stdinboot.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | from rsyscall.tasks.stdin_bootstrap import * 5 | 6 | from rsyscall.tests.utils import do_async_things 7 | from rsyscall import Command, AsyncChildPid, Process 8 | 9 | class TestStdinboot(TrioTestCase): 10 | command: Command 11 | local_child: AsyncChildPid 12 | remote: Process 13 | 14 | @classmethod 15 | async def asyncSetUpClass(cls) -> None: 16 | path = await stdin_bootstrap_path_with_nix(cls.process) 17 | cls.command = Command(path, ['rsyscall-stdin-bootstrap'], {}) 18 | cls.local_child, cls.remote = await stdin_bootstrap(cls.process, cls.command) 19 | 20 | @classmethod 21 | async def asyncTearDownClass(cls) -> None: 22 | await cls.remote.exit(0) 23 | await cls.local_child.wait() 24 | 25 | async def test_async(self) -> None: 26 | await do_async_things(self, self.remote.epoller, self.remote) 27 | 28 | async def test_nest(self) -> None: 29 | child, new_process = await stdin_bootstrap(self.remote, self.command) 30 | async with child: 31 | await do_async_things(self, new_process.epoller, new_process) 32 | 33 | async def test_nest_multiple(self) -> None: 34 | for i in range(5): 35 | child = await self.remote.fork() 36 | await do_async_things(self, child.epoller, child) 37 | await child.exit(0) 38 | -------------------------------------------------------------------------------- /python/rsyscall/unistd/exec.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | import trio 3 | 4 | #### Raw syscalls #### 5 | import rsyscall.near.types as near 6 | from rsyscall.near.sysif import SyscallInterface, SyscallHangup 7 | from rsyscall.sys.syscall import SYS 8 | 9 | async def _execve(sysif: SyscallInterface, 10 | path: near.Address, argv: near.Address, envp: near.Address) -> None: 11 | def handle(exn): 12 | if isinstance(exn, SyscallHangup): 13 | return None 14 | else: 15 | return exn 16 | with trio.MultiError.catch(handle): 17 | await sysif.syscall(SYS.execve, path, argv, envp) 18 | 19 | async def _execveat(sysif: SyscallInterface, 20 | dirfd: t.Optional[near.FileDescriptor], path: near.Address, 21 | argv: near.Address, envp: near.Address, flags: int) -> None: 22 | if dirfd is None: 23 | dirfd = AT.FDCWD # type: ignore 24 | def handle(exn): 25 | if isinstance(exn, SyscallHangup): 26 | return None 27 | else: 28 | return exn 29 | with trio.MultiError.catch(handle): 30 | await sysif.syscall(SYS.execveat, dirfd, path, argv, envp, flags) 31 | 32 | async def _exit(sysif: SyscallInterface, status: int) -> None: 33 | def handle(exn): 34 | if isinstance(exn, SyscallHangup): 35 | return None 36 | else: 37 | return exn 38 | with trio.MultiError.catch(handle): 39 | await sysif.syscall(SYS.exit, status) 40 | -------------------------------------------------------------------------------- /python/rsyscall/near/__init__.py: -------------------------------------------------------------------------------- 1 | """Definitions of namespace-local identifiers, syscalls, and SyscallInterface 2 | 3 | These namespace-local identifiers are like near pointers, in systems 4 | with segmented memory. They are valid only within a specific segment 5 | (namespace). 6 | 7 | The syscalls are instructions, operating on near pointers and other 8 | arguments. 9 | 10 | The SyscallInterface is the segment register override prefix, which is 11 | used with the instruction to say which segment register to use for the 12 | syscall. 13 | 14 | We don't know from a segment register override prefix alone that the 15 | near pointers we are passing to an instruction are valid pointers in 16 | the segment currently contained in the segment register. 17 | 18 | In terms of our actual classes: We don't know from a SyscallInterface 19 | alone that the identifiers we are passing to a syscall match the 20 | namespaces active in the task behind the SyscallInterface. 21 | 22 | (The task is like the segment register, in this analogy.) 23 | 24 | """ 25 | # re-exported namepsace-local identifiers 26 | from rsyscall.near.types import ( 27 | FileDescriptor, 28 | WatchDescriptor, 29 | Address, 30 | MemoryMapping, 31 | Pid, 32 | Pgid, 33 | ) 34 | # re-exported SyscallInterface 35 | from rsyscall.near.sysif import SyscallInterface, SyscallHangup 36 | __all__ = [ 37 | 'FileDescriptor', 38 | 'WatchDescriptor', 39 | 'Address', 40 | 'MemoryMapping', 41 | 'Pid', 42 | 'Pgid', 43 | 'SyscallInterface', 'SyscallHangup', 44 | ] 45 | -------------------------------------------------------------------------------- /python/arepl/tests/test_repl.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from arepl import * 3 | import typing as t 4 | import arepl.aeval 5 | import arepl.astcodeop 6 | 7 | T = t.TypeVar('T') 8 | def await_pure(awaitable: t.Awaitable[T]) -> T: 9 | iterable = awaitable.__await__() 10 | try: 11 | next(iterable) 12 | except StopIteration as e: 13 | return e.value 14 | else: 15 | raise Exception("this awaitable actually is impure! it yields!") 16 | 17 | async def anoop() -> None: 18 | return None 19 | 20 | class TestPure(unittest.TestCase): 21 | def test_add(self) -> None: 22 | async def test() -> None: 23 | repl = PureREPL({'anoop': anoop}) 24 | async def eval(line: str) -> t.Any: 25 | result = await repl.add_line(line + '\n') 26 | if isinstance(result, ExpressionResult): 27 | return result.value 28 | else: 29 | raise Exception("unexpected", result) 30 | self.assertEqual(await eval('1'), 1) 31 | self.assertEqual(await eval('1+1'), 2) 32 | await repl.add_line('foo = 1\n') 33 | self.assertEqual(await eval('foo*4'), 4) 34 | self.assertEqual(await eval('await anoop()'), None) 35 | await_pure(test()) 36 | 37 | def test_newlocals(self) -> None: 38 | astob = arepl.astcodeop.ast_compile_interactive("foo = 42") 39 | global_vars = {} 40 | await_pure(arepl.aeval.eval_single(astob, global_vars)) 41 | self.assertEqual(global_vars['foo'], 42) 42 | -------------------------------------------------------------------------------- /python/arepl/aeval.py: -------------------------------------------------------------------------------- 1 | "An async-supporting equivalent of eval(..., 'single')" 2 | from dataclasses import dataclass 3 | from arepl.acompile import compile_to_awaitable, _InternalResult 4 | import ast 5 | import typing as t 6 | 7 | class Result: 8 | pass 9 | 10 | @dataclass 11 | class ReturnResult(Result): 12 | "The statement returned a value." 13 | value: t.Any 14 | 15 | @dataclass 16 | class ExceptionResult(Result): 17 | "The statement raised an exception." 18 | exception: BaseException 19 | 20 | @dataclass 21 | class ExpressionResult(Result): 22 | "The statement was actually an expression, and evaluated to a value." 23 | value: t.Any 24 | 25 | @dataclass 26 | class FallthroughResult(Result): 27 | "The statement was an assignment, or pass, or something, and we've fallen through, with nothing to print." 28 | pass 29 | 30 | async def eval_single(astob: ast.Interactive, global_vars: t.Dict[str, t.Any]) -> Result: 31 | "Compile and evaluate this snippet of AST, with these globals, and return its result" 32 | awaitable = compile_to_awaitable(astob, global_vars) 33 | try: 34 | val = await awaitable 35 | except _InternalResult as e: 36 | if e.is_expression: 37 | return ExpressionResult(e.value) 38 | else: 39 | return FallthroughResult() 40 | except BaseException as e: 41 | # We want to skip the innermost frame of the traceback, which shows "await awaitable". 42 | e.__traceback__ = e.__traceback__.tb_next # type: ignore 43 | return ExceptionResult(e) 44 | else: 45 | return ReturnResult(val) 46 | -------------------------------------------------------------------------------- /research/test_epoll_signalfd.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | int try(int ret) { 12 | if (ret < 0) err(EXIT_FAILURE, NULL); 13 | return ret; 14 | } 15 | 16 | void parent(long epfd) { 17 | printf("parent pid is %d\n", getpid()); 18 | sleep(1); 19 | printf("raising in parent\n"); 20 | // try(kill(getpid(), SIGPIPE)); 21 | printf("calling epoll_wait\n"); 22 | struct epoll_event receive_event; 23 | int events = try(epoll_wait(epfd, &receive_event, 1, -1)); 24 | printf("got %d events back from epoll_wait\n", events); 25 | } 26 | 27 | int child(void *arg) { 28 | printf("child pid is %d\n", getpid()); 29 | long epfd = (long) arg; 30 | sigset_t mask = {}; 31 | try(sigaddset(&mask, SIGPIPE)); 32 | try(sigprocmask(SIG_BLOCK, &mask, NULL)); 33 | int sigfd = try(signalfd(-1, &mask, SFD_NONBLOCK)); 34 | struct epoll_event monitor_event = { .events = EPOLLIN, .data = 0 }; 35 | try(epoll_ctl(epfd, EPOLL_CTL_ADD, sigfd, &monitor_event)); 36 | printf("signalfd added to epfd\n"); 37 | printf("raising in child\n"); 38 | // try(kill(getpid(), SIGPIPE)); 39 | sleep(100); 40 | } 41 | 42 | int main() { 43 | long epfd = try(epoll_create1(0)); 44 | sigset_t mask = {}; 45 | void *stack = malloc(4096) + 4096; 46 | try(sigaddset(&mask, SIGPIPE)); 47 | try(sigprocmask(SIG_BLOCK, &mask, NULL)); 48 | try(clone(child, stack, CLONE_VM|CLONE_SIGHAND, (void *) epfd)); 49 | parent(epfd); 50 | } 51 | -------------------------------------------------------------------------------- /research/getdents.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include /* Definition of AT_* constants */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include /* For SYS_xxx definitions */ 15 | 16 | struct linux_dirent64 { 17 | ino64_t d_ino; /* 64-bit inode number */ 18 | off64_t d_off; /* 64-bit offset to next structure */ 19 | unsigned short d_reclen; /* Size of this dirent */ 20 | unsigned char d_type; /* File type */ 21 | char d_name[]; /* Filename (null-terminated) */ 22 | }; 23 | 24 | int getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { 25 | return syscall(SYS_getdents64, fd, dirp, count); 26 | }; 27 | 28 | void print_dirent(struct linux_dirent64 *p) { 29 | warnx("dirent: inode %lu off %lu reclen %d type %d", p->d_ino, p->d_off, p->d_reclen, p->d_type); 30 | warnx("dirent: name %s", p->d_name); 31 | } 32 | 33 | int main() { 34 | int ret = open(".", O_DIRECTORY); 35 | if (ret < 0) err(1, "open"); 36 | char buf[4096]; 37 | ret = getdents64(ret, (struct linux_dirent64 *) buf, sizeof(buf)); 38 | if (ret < 0) err(1, "getdents"); 39 | warnx("getdents result %d", ret); 40 | struct linux_dirent64 *cur; 41 | char *curp = buf; 42 | int i = 0; 43 | while (curp < (buf+ret)) { 44 | cur = (struct linux_dirent64 *) curp; 45 | print_dirent(cur); 46 | curp = curp + cur->d_reclen; 47 | i++; 48 | if (i > 30) break; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_pipe.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.sys.socket import MSG 4 | from rsyscall.sys.uio import IovecList 5 | from rsyscall.unistd import Pipe 6 | from rsyscall.fcntl import O 7 | 8 | class TestPipe(TrioTestCase): 9 | async def asyncSetUp(self) -> None: 10 | self.pipe = await (await self.process.task.pipe(await self.process.malloc(Pipe))).read() 11 | 12 | async def asyncTearDown(self) -> None: 13 | await self.pipe.read.close() 14 | await self.pipe.write.close() 15 | 16 | async def test_read_write(self): 17 | in_data = await self.process.ptr(b"hello") 18 | written, _ = await self.pipe.write.write(in_data) 19 | valid, _ = await self.pipe.read.read(written) 20 | self.assertEqual(in_data.value, await valid.read()) 21 | 22 | async def test_readv_writev(self): 23 | in_data = [b"hello", b"world"] 24 | iov = await self.process.ptr(IovecList([await self.process.ptr(data) for data in in_data])) 25 | written, partial, rest = await self.pipe.write.writev(iov) 26 | read, partial, rest = await self.pipe.read.readv(written) 27 | self.assertEqual(in_data, [await ptr.read() for ptr in read.value]) 28 | 29 | async def test_recv(self) -> None: 30 | """Sadly, recv doesn't work on pipes 31 | 32 | Which is a major bummer, because that would allow us to avoid 33 | messing with O_NONBLOCK stuff 34 | 35 | """ 36 | in_data = await self.process.ptr(b"hello") 37 | written, _ = await self.pipe.write.write(in_data) 38 | with self.assertRaises(OSError): 39 | valid, _ = await self.pipe.read.recv(written, MSG.NONE) 40 | -------------------------------------------------------------------------------- /c/src/rsyscall_x86_64.S: -------------------------------------------------------------------------------- 1 | #define __ASSEMBLY__ 2 | #include 3 | #include 4 | 5 | .globl rsyscall_raw_syscall 6 | rsyscall_raw_syscall: 7 | // Most of our parameters are already in the correct registers. 8 | // Fourth parameter for function calls is in %rcx, but should be in %r10 for system calls. 9 | movq %rcx, %r10 10 | // System call number is passed on the stack 11 | movq 8(%rsp), %rax 12 | syscall 13 | ret 14 | 15 | .globl rsyscall_trampoline 16 | rsyscall_trampoline: 17 | // call prctl(PR_SET_PDEATHSIG, SIGTERM) so we die when our parent does 18 | movq $1, %rdi 19 | movq $SIGKILL, %rsi 20 | movq $__NR_prctl, %rax 21 | syscall 22 | // load argument registers from the stack 23 | pop %rdi 24 | pop %rsi 25 | pop %rdx 26 | pop %rcx 27 | pop %r8 28 | pop %r9 29 | // load function to call from stack 30 | pop %rax 31 | // call function; we assume we're aligned properly 32 | call *%rax 33 | // store return value on stack 34 | push %rax 35 | // exit thread cleanly 36 | movq $0, %rdi 37 | movq $0, %rsi 38 | movq $__NR_exit, %rax 39 | syscall 40 | 41 | .globl rsyscall_futex_helper 42 | rsyscall_futex_helper: 43 | // safely stash the 44 | movq %rdi, %r8 // futex address and 45 | movq %rsi, %r9 // futex value. 46 | // get my tid 47 | movq $__NR_gettid, %rax 48 | syscall 49 | // SIGSTOP myself 50 | movq %rax, %rdi 51 | movq $SIGSTOP, %rsi 52 | movq $__NR_tkill, %rax 53 | syscall 54 | // futex(futex_address, FUTEX_WAIT, futex_value) 55 | movq %r8, %rdi 56 | movq $0, %rsi // FUTEX_WAIT == 0 57 | movq %r9, %rdx 58 | movq $__NR_futex, %rax 59 | syscall 60 | // exit(0) 61 | movq $0, %rdi 62 | movq $__NR_exit, %rax 63 | syscall 64 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_cat.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.unistd import Pipe 4 | from rsyscall.fcntl import O 5 | from rsyscall.sched import CLONE 6 | 7 | class TestCat(TrioTestCase): 8 | async def asyncSetUp(self) -> None: 9 | self.cat = await self.process.environ.which("cat") 10 | self.pipe_in = await (await self.process.task.pipe(await self.process.task.malloc(Pipe))).read() 11 | self.pipe_out = await (await self.process.task.pipe(await self.process.task.malloc(Pipe))).read() 12 | process = await self.process.fork() 13 | await process.task.inherit_fd(self.pipe_in.read).dup2(process.stdin) 14 | await process.task.inherit_fd(self.pipe_out.write).dup2(process.stdout) 15 | self.child = await process.exec(self.cat) 16 | 17 | async def test_cat_pipe(self) -> None: 18 | in_data = await self.process.task.ptr(b"hello") 19 | written, _ = await self.pipe_in.write.write(in_data) 20 | valid, _ = await self.pipe_out.read.read(written) 21 | self.assertEqual(in_data.value, await valid.read()) 22 | 23 | await self.pipe_in.write.close() 24 | await self.child.check() 25 | 26 | async def test_cat_async(self) -> None: 27 | stdin = await self.process.make_afd(self.pipe_in.write, set_nonblock=True) 28 | stdout = await self.process.make_afd(self.pipe_out.read, set_nonblock=True) 29 | in_data = await self.process.task.ptr(b"hello") 30 | written, _ = await stdin.write(in_data) 31 | valid, _ = await stdout.read(written) 32 | self.assertEqual(in_data.value, await valid.read()) 33 | 34 | await self.pipe_in.write.close() 35 | await self.child.check() 36 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_pidns.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sys.socket import AF, SOCK, Socketpair 3 | from rsyscall.unistd import Pipe 4 | from rsyscall.fcntl import O 5 | 6 | from rsyscall.sched import CLONE 7 | from rsyscall.tests.utils import assert_process_works 8 | 9 | class TestPidns(TrioTestCase): 10 | async def asyncSetUp(self) -> None: 11 | self.init = await self.process.clone(CLONE.NEWUSER|CLONE.NEWPID|CLONE.FILES) 12 | 13 | async def test_cat(self) -> None: 14 | cat = await self.process.environ.which('cat') 15 | pair = await (await self.process.task.socketpair( 16 | AF.UNIX, SOCK.STREAM, 0, await self.process.task.malloc(Socketpair))).read() 17 | child = await self.init.fork() 18 | child_side = child.task.inherit_fd(pair.first) 19 | # close in parent so we'll get EOF on other side when cat dies 20 | await pair.first.close() 21 | await child_side.dup2(child.stdin) 22 | await child_side.dup2(child.stdout) 23 | child_pid = await child.exec(cat) 24 | await self.init.exit(0) 25 | # cat dies, get EOF on socket 26 | read, _ = await pair.second.read(await self.process.task.malloc(bytes, 16)) 27 | self.assertEqual(read.size(), 0) 28 | 29 | async def test_sleep(self) -> None: 30 | pipe = await (await self.process.task.pipe(await self.process.task.malloc(Pipe))).read() 31 | child = await self.init.fork() 32 | child_fd = child.task.inherit_fd(pipe.write) 33 | await pipe.write.close() 34 | await child_fd.disable_cloexec() 35 | child_pid = await child.exec(child.environ.sh.args('-c', '{ sleep inf & } &')) 36 | await child_pid.check() 37 | await self.init.exit(0) 38 | read, _ = await pipe.read.read(await self.process.task.malloc(bytes, 1)) 39 | self.assertEqual(read.size(), 0) 40 | -------------------------------------------------------------------------------- /python/rsyscall/linux/memfd.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | import os 4 | import typing as t 5 | import enum 6 | 7 | class MFD(enum.IntFlag): 8 | NONE = 0 9 | CLOEXEC = lib.MFD_CLOEXEC 10 | ALLOW_SEALING = lib.MFD_ALLOW_SEALING 11 | HUGETLB = lib.MFD_HUGETLB 12 | HUGE_2MB = lib.MFD_HUGE_2MB 13 | HUGE_1GB = lib.MFD_HUGE_1GB 14 | 15 | #### Classes #### 16 | from rsyscall.handle.fd import T_fd, FileDescriptorTask 17 | from rsyscall.handle.pointer import WrittenPointer 18 | 19 | class MemfdTask(FileDescriptorTask[T_fd]): 20 | @t.overload 21 | async def memfd_create(self, name: str | os.PathLike, flags: MFD=MFD.NONE) -> T_fd: ... 22 | @t.overload 23 | async def memfd_create(self, name: WrittenPointer[str | os.PathLike], flags: MFD=MFD.NONE) -> T_fd: ... 24 | 25 | async def memfd_create(self, name: str | os.PathLike | WrittenPointer[str | os.PathLike], flags: MFD=MFD.NONE) -> T_fd: 26 | if isinstance(name, WrittenPointer): 27 | return await memfd_create(self, name, flags=flags) 28 | else: 29 | return await memfd_create(self, await self.ptr(name), flags=flags) 30 | 31 | #### Pointer-taking syscalls #### 32 | async def memfd_create(task: FileDescriptorTask[T_fd], 33 | name: WrittenPointer[t.Union[str, os.PathLike]], flags: MFD=MFD.NONE) -> T_fd: 34 | with name.borrow(task) as name_n: 35 | fd = await _memfd_create(task.sysif, name_n, flags|MFD.CLOEXEC) 36 | return task.make_fd_handle(fd) 37 | 38 | 39 | #### Raw syscalls #### 40 | import rsyscall.near.types as near 41 | from rsyscall.near.sysif import SyscallInterface 42 | from rsyscall.sys.syscall import SYS 43 | 44 | async def _memfd_create(sysif: SyscallInterface, 45 | name: near.Address, flags: MFD) -> near.FileDescriptor: 46 | return near.FileDescriptor(await sysif.syscall(SYS.memfd_create, name, flags)) 47 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_setuid.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.unistd import O, SEEK 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class TestSetuid(TrioTestCase): 8 | async def test_getdent_proc_pid_fd_after_setuid(self) -> None: 9 | "For some reason, we can still getdent on /proc/pid/fd after the target process setuids" 10 | other = await self.process.clone() 11 | other_file = await other.task.memfd_create("foo") 12 | logger.info("Disable cloexec so other_file actually gets inherited.") 13 | await other_file.disable_cloexec() 14 | 15 | logger.info("We can open /proc/pid/fd just fine right now...") 16 | proc_fds = await self.process.task.open(other.task.pid.as_proc_path()/"fd", O.RDONLY|O.DIRECTORY) 17 | logger.info("...and see other_file there.") 18 | for ent in await (await proc_fds.getdents()).read(): 19 | if ent.name == str(int(other_file.near)): 20 | break 21 | else: 22 | raise AssertionError("expected to see", other_file, "in other's /proc/pid/fd") 23 | 24 | logger.info("Once we sudo, however...") 25 | sudo = await other.environ.which("sudo") 26 | async with await other.exec(sudo.args("sleep", "inf")): 27 | logger.info("...we can still see other_file in our old opened copy of /proc/pid/fd...") 28 | await proc_fds.lseek(0, SEEK.SET) 29 | for ent in await (await proc_fds.getdents()).read(): 30 | if ent.name == str(int(other_file.near)): 31 | break 32 | else: 33 | raise AssertionError("expected to see", other_file, "in other's /proc/pid/fd") 34 | 35 | logger.info("...but if we try to open /proc/pid/fd again, we get EPERM.") 36 | with self.assertRaises(PermissionError): 37 | await self.process.task.open(other.task.pid.as_proc_path()/"fd", O.RDONLY|O.DIRECTORY) 38 | -------------------------------------------------------------------------------- /python/rsyscall/tests/utils.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | import trio 3 | from rsyscall.epoller import Epoller, AsyncFileDescriptor 4 | from rsyscall.unistd import Pipe 5 | from rsyscall.fcntl import O 6 | from rsyscall.thread import Process 7 | 8 | import logging 9 | logger = logging.getLogger(__name__) 10 | # logging.basicConfig(level=logging.DEBUG) 11 | 12 | import unittest 13 | async def do_async_things(self: unittest.TestCase, epoller: Epoller, thr: Process, i: int=0, 14 | *, task_status=trio.TASK_STATUS_IGNORED) -> None: 15 | logger.debug("Setting up for do_async_things(%d)", i) 16 | pipe = await (await thr.task.pipe(await thr.task.malloc(Pipe), O.NONBLOCK)).read() 17 | async_pipe_rfd = await AsyncFileDescriptor.make(epoller, pipe.read) 18 | async_pipe_wfd = await AsyncFileDescriptor.make(epoller, pipe.write) 19 | task_status.started(None) 20 | data = b"hello world" 21 | logger.debug("Starting do_async_things(%d)", i) 22 | async def stuff(): 23 | logger.debug("do_async_things(%d): read(%s): starting", i, async_pipe_rfd.handle.near) 24 | result = await async_pipe_rfd.read_some_bytes() 25 | logger.debug("do_async_things(%d): read(%s): returned", i, async_pipe_rfd.handle.near) 26 | self.assertEqual(result, data) 27 | async with trio.open_nursery() as nursery: 28 | nursery.start_soon(stuff) 29 | await trio.sleep(0.0001) 30 | # hmmm MMM MMMmmmm MMM mmm MMm mm MM mmm MM mm MM 31 | # does this make sense? 32 | logger.debug("do_async_things(%d): write(%s): starting", i, async_pipe_wfd.handle.near) 33 | await async_pipe_wfd.write_all_bytes(data) 34 | logger.debug("do_async_things(%d): write(%s): returned", i, async_pipe_wfd.handle.near) 35 | await async_pipe_rfd.close() 36 | await async_pipe_wfd.close() 37 | logger.debug("Done with do_async_things(%d)", i) 38 | 39 | async def assert_process_works(self: unittest.TestCase, thr: Process) -> None: 40 | await do_async_things(self, thr.epoller, thr) 41 | -------------------------------------------------------------------------------- /c/src/rsyscall_server_main.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "rsyscall.h" 12 | 13 | struct options { 14 | int describefd; 15 | int infd; 16 | int outfd; 17 | }; 18 | 19 | struct options parse_options(int argc, char** argv) 20 | { 21 | if (argc < 3) { 22 | errx(1, "Usage: %s [passedfd [passedfd [passedfd...]]]", 23 | argc ? argv[0] : "rsyscall-server"); 24 | } 25 | errno = 0; 26 | const int describefd = strtol(argv[1], NULL, 0); 27 | if (errno != 0) err(1, "strtol(%s)", argv[1]); 28 | const int infd = strtol(argv[2], NULL, 0); 29 | if (errno != 0) err(1, "strtol(%s)", argv[2]); 30 | const int outfd = strtol(argv[3], NULL, 0); 31 | if (errno != 0) err(1, "strtol(%s)", argv[3]); 32 | for (int i = 4; i < argc; i++) { 33 | // re-enable cloexec on all passed-in fds 34 | const int passedfd = strtol(argv[i], NULL, 0); 35 | if (errno != 0) err(1, "strtol(argv[%d] = %s)", i, argv[i]); 36 | if (fcntl(passedfd, F_SETFD, FD_CLOEXEC) < 0) err(1, "fcntl(%d, F_SETFD, FD_CLOEXEC)", passedfd); 37 | } 38 | const struct options opt = { 39 | .describefd = describefd, 40 | .infd = infd, 41 | .outfd = outfd, 42 | }; 43 | return opt; 44 | } 45 | 46 | int main(int argc, char** argv) 47 | { 48 | const struct options opt = parse_options(argc, argv); 49 | fcntl(opt.infd, F_SETFL, fcntl(opt.infd, F_GETFL) & ~O_NONBLOCK); 50 | fcntl(opt.outfd, F_SETFL, fcntl(opt.outfd, F_GETFL) & ~O_NONBLOCK); 51 | struct rsyscall_symbol_table table = rsyscall_symbol_table(); 52 | // TODO could partial write 53 | int ret = write(opt.describefd, &table, sizeof(table)); 54 | if (ret != sizeof(table)) err(1, "write(describefd, table, sizeof(table))"); 55 | rsyscall_server(opt.infd, opt.outfd); 56 | } 57 | -------------------------------------------------------------------------------- /c/Makefile.am: -------------------------------------------------------------------------------- 1 | # We're trying, with these cflags, to make a dynamic library which, when loaded, has no 2 | # dependencies on PLT/GOT/whatever other stuff, which are normally accessed through TLS. 3 | # We want to work even if TLS isn't set up. 4 | AM_CFLAGS = -Wall -Werror -Wl,-znow -g -O0 5 | AM_LDFLAGS = -Wl,-znow -O0 -g 6 | 7 | # Library 8 | pkgconfig_DATA = rsyscall.pc 9 | lib_LTLIBRARIES = librsyscall.la 10 | 11 | librsyscall_la_SOURCES = src/rsyscall.c src/rsyscall_x86_64.S 12 | include_HEADERS = src/rsyscall.h 13 | 14 | # We want executables which are fully static and have no dependencies on libc; we aren't 15 | # actually achieving that because we make libc calls in a few places. We don't want to 16 | # achieve this by statically linking libc because that's unnecessary bloat - we're trying 17 | # to replace libc anyway. 18 | pkglibexec_PROGRAMS = rsyscall-server rsyscall-bootstrap rsyscall-stdin-bootstrap rsyscall-unix-stub 19 | 20 | rsyscall_stdin_bootstrap_SOURCES = src/rsyscall_stdin_bootstrap.c 21 | rsyscall_stdin_bootstrap_LDADD = librsyscall.la 22 | 23 | rsyscall_server_SOURCES = src/rsyscall_server_main.c 24 | rsyscall_server_LDADD = librsyscall.la 25 | 26 | # This is the only executable that really really needs to be static. The real use case that demands that is when using 27 | # it to bootstrap an rsyscall-controlled process over ssh, where it will be copied from the source host to a target host 28 | # which may have a completely different library setup, or no glibc, or other issues like that. 29 | # Another use case for a static executable might be, for example, to use it as init and start it in an empty filesystem; 30 | # but we probably aren't going to be able to use the same executable for that purpose as we use for ssh bootstrapping, 31 | # as nice as that would be. 32 | rsyscall_bootstrap_SOURCES = src/rsyscall_bootstrap.c 33 | rsyscall_bootstrap_LDADD = librsyscall.la 34 | rsyscall_bootstrap_LDFLAGS = -all-static 35 | 36 | rsyscall_unix_stub_SOURCES = src/rsyscall_unix_stub.c 37 | rsyscall_unix_stub_LDADD = librsyscall.la 38 | 39 | noinst_PROGRAMS = remote_cat 40 | remote_cat_SOURCES = src/remote_cat.c 41 | -------------------------------------------------------------------------------- /c/src/rsyscall.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct rsyscall_syscall { 6 | int64_t sys; 7 | int64_t args[6]; 8 | }; 9 | 10 | int rsyscall_server(const int infd, const int outfd); 11 | int rsyscall_persistent_server(int infd, int outfd, const int listensock); 12 | 13 | /* Assembly-language routines: */ 14 | /* careful: the syscall number is the last arg, to make the assembly more convenient. */ 15 | long rsyscall_raw_syscall(long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long sys); 16 | /* SIGSTOPs itself when it starts up, then waits on the futex, then does exit(0). */ 17 | void rsyscall_futex_helper(void *futex_addr); 18 | /* A trampoline useful when used with clone to call arbitrary functions. */ 19 | void rsyscall_trampoline(void); 20 | 21 | /* The stack should be set up as follows to use rsyscall_trampoline. */ 22 | struct rsyscall_trampoline_stack { 23 | int64_t rdi; 24 | int64_t rsi; 25 | int64_t rdx; 26 | int64_t rcx; 27 | int64_t r8; 28 | int64_t r9; 29 | void* function; 30 | }; 31 | 32 | /* A symbol table and a routine for dumping it. */ 33 | struct rsyscall_symbol_table { 34 | void* rsyscall_server; 35 | void* rsyscall_persistent_server; 36 | void* rsyscall_futex_helper; 37 | void* rsyscall_trampoline; 38 | }; 39 | struct rsyscall_symbol_table rsyscall_symbol_table(); 40 | 41 | struct rsyscall_bootstrap { 42 | struct rsyscall_symbol_table symbols; 43 | pid_t pid; 44 | int listening_sock; 45 | int syscall_sock; 46 | int data_sock; 47 | size_t envp_count; 48 | }; 49 | struct rsyscall_stdin_bootstrap { 50 | struct rsyscall_symbol_table symbols; 51 | pid_t pid; 52 | int syscall_fd; 53 | int data_fd; 54 | int futex_memfd; 55 | int connecting_fd; 56 | size_t envp_count; 57 | }; 58 | struct rsyscall_unix_stub { 59 | struct rsyscall_symbol_table symbols; 60 | pid_t pid; 61 | int syscall_fd; 62 | int data_fd; 63 | int futex_memfd; 64 | int connecting_fd; 65 | size_t argc; 66 | size_t envp_count; 67 | uint64_t sigmask; 68 | }; 69 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_proc.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from rsyscall.tests.trio_test_case import TrioTestCase 3 | from rsyscall.sys.socket import AF, SOCK, Socketpair 4 | from rsyscall.unistd import Pipe, SEEK 5 | from rsyscall.fcntl import O 6 | from rsyscall.sys.wait import W, Siginfo 7 | 8 | from rsyscall.sched import CLONE 9 | from rsyscall.handle import Pid 10 | from rsyscall.tests.utils import assert_process_works 11 | from rsyscall.signal import SIG 12 | 13 | class TestProc(TrioTestCase): 14 | async def asyncSetUp(self) -> None: 15 | self.init = await self.process.clone(CLONE.NEWUSER|CLONE.NEWPID) 16 | # set up proc 17 | 18 | async def test_pgid(self) -> None: 19 | try: 20 | last_pid = await self.init.task.open(await self.init.ptr("/proc/sys/kernel/ns_last_pid"), O.WRONLY) 21 | except FileNotFoundError: 22 | raise unittest.SkipTest("Requires /proc/sys/kernel/ns_last_pid, which requires CONFIG_CHECKPOINT_RESTORE") 23 | 24 | pgldr = await self.init.fork() 25 | await pgldr.task.setpgid() 26 | pgflr = await self.init.fork() 27 | await pgflr.task.setpgid(pgldr.pid.pid) 28 | self.assertEqual(int(await pgflr.task.getpgid()), 2) 29 | await pgldr.exit(0) 30 | await pgldr.pid.waitpid(W.EXITED) 31 | self.assertIsNotNone(pgldr.pid.pid.death_state) 32 | if pgldr.pid.pid.death_state is None: raise Exception # for mypy 33 | self.assertEqual(pgldr.pid.pid.death_state.pid, 2) 34 | self.assertTrue(pgldr.pid.pid.death_state.died()) 35 | self.assertEqual(int(await pgflr.task.getpgid()), 2) 36 | 37 | await self.init.spit(last_pid, b"1\n") 38 | 39 | with self.assertRaises(ProcessLookupError): 40 | await self.init.task._make_pid(2).kill(SIG.NONE) 41 | pg_two = await self.init.fork() 42 | with self.assertRaises(ProcessLookupError): 43 | await self.init.task._make_pid(2).kill(SIG.NONE) 44 | # Linux skips right over process 2, even though it's dead, because it's still used by the process group 45 | self.assertEqual(int(pg_two.task.pid.near), 3) 46 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_seek.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.sys.stat import Stat 3 | from rsyscall.linux.fs import FI, FileCloneRange 4 | import errno 5 | 6 | class TestSeek(TrioTestCase): 7 | async def asyncSetUp(self) -> None: 8 | self.file = await self.process.task.memfd_create(await self.process.ptr("file")) 9 | 10 | async def asyncTearDown(self) -> None: 11 | await self.file.close() 12 | 13 | async def test_preadwrite(self) -> None: 14 | stat = await (await self.file.fstat(await self.process.malloc(Stat))).read() 15 | self.assertEqual(stat.size, 0) 16 | read, _ = await self.file.pread(await self.process.malloc(bytes, 16), offset=0) 17 | self.assertEqual(read.size(), 0) 18 | read, _ = await self.file.pread(await self.process.malloc(bytes, 16), offset=1) 19 | self.assertEqual(read.size(), 0) 20 | # we can write to an offset past the end 21 | data = b'abc' 22 | wrote, _ = await self.file.pwrite(await self.process.ptr(data), offset=1) 23 | self.assertEqual(wrote.size(), len(data)) 24 | # the data is written fine 25 | read, _ = await self.file.pread(await self.process.malloc(bytes, 16), offset=1) 26 | self.assertEqual(await read.read(), data) 27 | # the earlier bytes are now zeros 28 | read, _ = await self.file.pread(await self.process.malloc(bytes, 16), offset=0) 29 | self.assertEqual(await read.read(), b'\0' + data) 30 | # size is now 4 31 | stat = await (await self.file.fstat(await self.process.malloc(Stat))).read() 32 | self.assertEqual(stat.size, 4) 33 | 34 | async def test_ficlonerange(self) -> None: 35 | await self.file.ftruncate(4096*2) 36 | with self.assertRaises(OSError) as cm: 37 | await self.file.ioctl(FI.CLONERANGE, await self.process.ptr(FileCloneRange( 38 | src_fd=self.file, 39 | src_offset=4096, 40 | src_length=4096, 41 | dest_offset=0, 42 | ))) 43 | self.assertEqual(cm.exception.errno, errno.EOPNOTSUPP) 44 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_stub.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | from rsyscall.tasks.stub import * 5 | 6 | import rsyscall.nix as nix 7 | 8 | from rsyscall.tests.utils import do_async_things 9 | from rsyscall.command import Command 10 | from rsyscall.stdlib import mkdtemp 11 | 12 | import os 13 | 14 | class TestStub(TrioTestCase): 15 | async def asyncSetUp(self) -> None: 16 | self.tmpdir = await mkdtemp(self.process, "test_stub") 17 | # make sure that this name doesn't collide with shell builtins 18 | # so it can be run from the shell in test_read_stdin 19 | self.stub_name = "dummy_stub" 20 | self.server = await StubServer.make(self.process, self.tmpdir, self.stub_name) 21 | self.exec_process = await self.process.fork() 22 | 23 | async def asyncTearDown(self) -> None: 24 | await self.tmpdir.cleanup() 25 | 26 | async def test_exit(self) -> None: 27 | command = Command(self.tmpdir/self.stub_name, [self.stub_name], {}) 28 | child = await self.exec_process.exec(command) 29 | self.nursery.start_soon(child.check) 30 | argv, new_process = await self.server.accept() 31 | await new_process.exit(0) 32 | 33 | async def test_async(self) -> None: 34 | command = Command(self.tmpdir/self.stub_name, [self.stub_name], {}) 35 | child = await self.exec_process.exec(command) 36 | self.nursery.start_soon(child.check) 37 | argv, new_process = await self.server.accept() 38 | await do_async_things(self, new_process.epoller, new_process) 39 | 40 | async def test_read_stdin(self) -> None: 41 | data_in = "hello" 42 | command = self.exec_process.environ.sh.args( 43 | "-c", f"printf {data_in} | {self.stub_name}" 44 | ).env(PATH=os.fsdecode(self.tmpdir)) 45 | child = await self.exec_process.exec(command) 46 | self.nursery.start_soon(child.check) 47 | argv, new_process = await self.server.accept() 48 | valid, _ = await new_process.stdin.read( 49 | await new_process.task.malloc(bytes, len(data_in))) 50 | self.assertEqual(data_in, (await valid.read()).decode()) 51 | 52 | -------------------------------------------------------------------------------- /docs/pitch.org: -------------------------------------------------------------------------------- 1 | This is our stock elevator pitch for rsyscall. 2 | 3 | * slogan 4 | In rsyscall, you explicitly pass the process when making a syscall, 5 | instead of implicitly operating on the current process. 6 | 7 | * fork 8 | In other operating systems APIs, when you make a syscall, 9 | you implicitly make that syscall in the context of the current process. 10 | In rsyscall, you explicitly pass a process to operate on when making a syscall. 11 | 12 | This has many benefits; for example, in process creation. 13 | Unlike posix_spawn, 14 | which requires explicit support for each attribute of the new process that you want to configure, 15 | or fork, 16 | which is inefficient, because you need to copy resources, 17 | and is complicated, because fork returns twice leading to tricky coordination between the parent and child process, 18 | including when the child process fails to initialize, 19 | rsyscall allows a straightforward API for process creation. 20 | 21 | Simply create a new process, which starts out inert, 22 | then explicitly make arbitrary syscalls in the new process to set it up, 23 | finishing off with an exec or some other function. 24 | 25 | rsyscall seeks to provide complete coverage of the Linux API, 26 | immediately supporting new syscalls as they come out. 27 | An rsyscall library is currently available for Python; 28 | as an ancillary benefit, it has excellent support for Python 3 type-checking annotations, 29 | providing a type-safe API for Linux in Python. 30 | * language-based system 31 | Making the process explicit has other benefits as well. 32 | 33 | There are cases where we need to access resources from another processes; 34 | for example, when operating on resources inside a container or on another host. 35 | 36 | With rsyscall, instead of writing a separate program to run in a separate process, 37 | and communicating with it by passing strings on the command line and/or using various limited RPC protocols, 38 | we can have a single program which operates in many processes, 39 | and which uses language-level abstractions like function calls or classes or monads to provide modularity. 40 | 41 | You can operate on a process on a remote system, 42 | and use that process as your interface to that system, 43 | starting and supervising other processes as children of that process. 44 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | * Summary 2 | rsyscall is a library which provides a interface to an ever-growing subset of Linux system calls. 3 | This interface is: 4 | - *process-independent*: all system calls are called as methods on process objects, 5 | which can refer to the "local" process or to other processes under our control, possibly on remote hosts. 6 | - *type-safe*: many Linux API constraints, which are usually left to user code to enforce, 7 | are made explicit in the type system. 8 | - *low-level*: any action which is possible with the underlying Linux APIs, 9 | is possible with rsyscall; nothing is forbidden or discouraged. 10 | 11 | For more detail on the concepts and motivation behind rsyscall, 12 | read the [[file:docs/conceptual.org][conceptual introduction]]. 13 | * Quick Start 14 | Check out the docs at [[http://rsyscall.org/rsyscall][rsyscall.org]]. 15 | 16 | Note that the rsyscall Python API uses some recent Python 3 features: 17 | async/await and type annotations. 18 | If you haven't seen those before, skim the [[file:docs/background.org][background tutorial]] first. 19 | * Installation 20 | There's no official release of rsyscall yet. 21 | 22 | But, if you want to try it, you can do so with Nix: 23 | #+begin_src bash 24 | git clone https://github.com/catern/rsyscall 25 | cd rsyscall/python 26 | nix-shell 27 | python 28 | #+end_src 29 | 30 | Or you can build it by hand in a virtual environment: 31 | #+begin_src bash 32 | git clone https://github.com/catern/rsyscall 33 | cd rsyscall 34 | python3 -m venv venv 35 | . venv/bin/activate 36 | pip install -U pip 37 | 38 | cd c 39 | autoreconf -fvi 40 | ./configure --prefix=$PWD/../venv 41 | make install 42 | cd .. 43 | export LD_LIBRARY_PATH=$PWD/venv/lib 44 | export PKG_CONFIG_PATH=$PWD/venv/lib/pkgconfig 45 | 46 | pip install -e python/ 47 | python 48 | #+end_src 49 | * Getting started 50 | rsyscall is an async library that uses [[https://trio.readthedocs.io][Trio]], so the easiest way to 51 | try it out is with an async Python REPL. [[https://ipython.org/][IPython]] provides a convenient 52 | REPL: 53 | #+begin_example 54 | $ pip install ipython 55 | $ ipython 56 | In [1]: %autoawait trio 57 | 58 | In [2]: import rsyscall 59 | 60 | In [3]: t = await rsyscall.local_process.fork() 61 | 62 | In [4]: await t.stdout.write(await t.ptr("Hello world!\n")) 63 | Hello world! 64 | #+end_example 65 | * Discussion 66 | irc://irc.oftc.net/#rsyscall 67 | 68 | -------------------------------------------------------------------------------- /python/rsyscall/unistd/credentials.py: -------------------------------------------------------------------------------- 1 | "Named for the excellent manpage about these process attributes, credentials(7)" 2 | from __future__ import annotations 3 | import rsyscall.near as near 4 | import typing as t 5 | 6 | #### Classes #### 7 | if t.TYPE_CHECKING: 8 | from rsyscall.handle.process import ChildPid 9 | import rsyscall.far 10 | 11 | class CredentialsTask(rsyscall.far.Task): 12 | async def getuid(self) -> int: 13 | return (await _getuid(self.sysif)) 14 | 15 | async def getgid(self) -> int: 16 | return (await _getgid(self.sysif)) 17 | 18 | async def getpid(self) -> near.Pid: 19 | return (await _getpid(self.sysif)) 20 | 21 | async def getpgid(self) -> near.Pgid: 22 | return (await _getpgid(self.sysif, None)) 23 | 24 | async def setpgid(self, pgid: t.Optional[ChildPid]=None) -> None: 25 | if pgid is None: 26 | await _setpgid(self.sysif, None, None) 27 | else: 28 | if pgid.task.pidns != self.pidns: 29 | raise rsyscall.far.NamespaceMismatchError( 30 | "different pid namespaces", pgid.task.pidns, self.pidns) 31 | with pgid.borrow(): 32 | await _setpgid(self.sysif, None, pgid._as_process_group()) 33 | 34 | async def setsid(self) -> int: 35 | return (await _setsid(self.sysif)) 36 | 37 | #### Raw syscalls #### 38 | from rsyscall.near.sysif import SyscallInterface 39 | from rsyscall.sys.syscall import SYS 40 | 41 | async def _getuid(sysif: SyscallInterface) -> int: 42 | return (await sysif.syscall(SYS.getuid)) 43 | 44 | async def _getgid(sysif: SyscallInterface) -> int: 45 | return (await sysif.syscall(SYS.getgid)) 46 | 47 | async def _getpid(sysif: SyscallInterface) -> near.Pid: 48 | return near.Pid(await sysif.syscall(SYS.getpid)) 49 | 50 | async def _getpgid(sysif: SyscallInterface, pid: t.Optional[near.Pid]) -> near.Pgid: 51 | if pid is None: 52 | pid = 0 # type: ignore 53 | return near.Pgid(await sysif.syscall(SYS.getpgid, pid)) 54 | 55 | async def _setpgid(sysif: SyscallInterface, 56 | pid: t.Optional[near.Pid], pgid: t.Optional[near.Pgid]) -> None: 57 | if pid is None: 58 | pid = 0 # type: ignore 59 | if pgid is None: 60 | pgid = 0 # type: ignore 61 | await sysif.syscall(SYS.setpgid, pid, pgid) 62 | 63 | async def _setsid(sysif: SyscallInterface) -> int: 64 | return (await sysif.syscall(SYS.setsid)) 65 | -------------------------------------------------------------------------------- /docs/background.org: -------------------------------------------------------------------------------- 1 | #+TITLE: Python background 2 | 3 | * async 4 | In the tutorials, you can safely ignore the presence of the "async" and "await" keywords. 5 | We don't use any actual async features in the tutorials, 6 | so everything we're doing is equivalent to regular, non-async Python. 7 | 8 | Python 3 async/await syntax looks like this: 9 | #+BEGIN_SRC python 10 | async def f(x): 11 | return await some_library_function(x, x) 12 | 13 | async def main(): 14 | print(await f("hello")) 15 | #+END_SRC 16 | 17 | Any async function =f= (a function defined with =async def f(args)=) 18 | has to be called with =await f(args)=. 19 | =await= can only be used in an async function, 20 | so async functions can only be called by async functions. 21 | 22 | The main async function is run by a special non-async function provided by an async library (such as trio or asyncio). 23 | At the top level of an rsyscall program is therefore a call to =trio.run= or =asyncio.run=. 24 | 25 | Async functions are, from the user perspective, equivalent to normal functions. 26 | Only when using async library features does a difference appear, 27 | which, as previously mentioned, 28 | we don't do in the tutorials. 29 | rsyscall uses Python async/await primarily so that users can use async/await, 30 | not because rsyscall gets much utility out of it. 31 | 32 | For more information on async/await syntax in Python, 33 | check out the [[https://trio.readthedocs.io/en/latest/tutorial.html][trio tutorial]]. 34 | While rsyscall uses trio, 35 | it isn't necessary or useful to know anything about the trio library itself to use rsyscall. 36 | As shown in the [[file:single_threaded.org][single-threaded tutorial]], 37 | rsyscall provides its own interfaces for IO, 38 | and makes no use of trio's interfaces for IO. 39 | * types 40 | You can safely ignore the Python 3 type annotations, both in the tutorials and in general; 41 | in Python, type annotations are purely for use by static analyzers, and have no runtime significance. 42 | 43 | Python 3 type annotations appear in function declarations like this: 44 | #+BEGIN_SRC python 45 | def f(x: int, y: str, z: t.List[str]) -> str: ... 46 | #+END_SRC 47 | 48 | This function =f= takes three arguments, 49 | an int (=x: int=), a string (=y: str=), and a list of strings (=z: t.List[str]=), 50 | and returns another string (=-> str=). 51 | 52 | For more information on type annotations in Python, 53 | check out the [[https://mypy.readthedocs.io/en/stable/getting_started.html][mypy manual]]. 54 | -------------------------------------------------------------------------------- /python/rsyscall/sys/signalfd.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | import typing as t 3 | from rsyscall._raw import ffi, lib # type: ignore 4 | from rsyscall.struct import Struct 5 | from dataclasses import dataclass 6 | 7 | from rsyscall.signal import SIG, Sigset 8 | import enum 9 | 10 | class SFD(enum.IntFlag): 11 | NONE = 0 12 | NONBLOCK = lib.SFD_NONBLOCK 13 | CLOEXEC = lib.SFD_CLOEXEC 14 | 15 | @dataclass 16 | class SignalfdSiginfo(Struct): 17 | # TODO fill in the rest of the data 18 | # (even though we don't use any of it ourselves) 19 | signo: SIG 20 | 21 | def to_bytes(self) -> bytes: 22 | struct = ffi.new('struct signalfd_siginfo*') 23 | struct.ssi_signo = self.signo 24 | return bytes(ffi.buffer(struct)) 25 | 26 | T = t.TypeVar('T', bound='SignalfdSiginfo') 27 | @classmethod 28 | def from_bytes(cls: t.Type[T], data: bytes) -> T: 29 | struct = ffi.cast('struct signalfd_siginfo const*', ffi.from_buffer(data)) 30 | return cls( 31 | signo=SIG(struct.ssi_signo), 32 | ) 33 | 34 | @classmethod 35 | def sizeof(cls) -> int: 36 | return ffi.sizeof('struct signalfd_siginfo') 37 | 38 | #### Classes #### 39 | from rsyscall.handle.fd import BaseFileDescriptor, FileDescriptorTask 40 | from rsyscall.handle.pointer import Pointer 41 | 42 | T_fd = t.TypeVar('T_fd', bound='SignalFileDescriptor') 43 | class SignalFileDescriptor(BaseFileDescriptor): 44 | async def signalfd(self, mask: Pointer[Sigset], flags: SFD) -> None: 45 | self._validate() 46 | with mask.borrow(self.task) as mask_n: 47 | await _signalfd(self.task.sysif, self.near, mask_n, mask.size(), flags) 48 | 49 | class SignalfdTask(FileDescriptorTask[T_fd]): 50 | async def signalfd(self, mask: Pointer[Sigset], flags: SFD=SFD.NONE) -> T_fd: 51 | with mask.borrow(self) as mask_n: 52 | fd = await _signalfd(self.sysif, None, mask_n, mask.size(), flags|SFD.CLOEXEC) 53 | return self.make_fd_handle(fd) 54 | 55 | #### Raw syscalls #### 56 | import rsyscall.near.types as near 57 | from rsyscall.near.sysif import SyscallInterface 58 | from rsyscall.sys.syscall import SYS 59 | 60 | async def _signalfd(sysif: SyscallInterface, fd: t.Optional[near.FileDescriptor], 61 | mask: near.Address, sizemask: int, flags: SFD) -> near.FileDescriptor: 62 | if fd is None: 63 | fd = -1 # type: ignore 64 | return near.FileDescriptor(await sysif.syscall(SYS.signalfd4, fd, mask, sizemask, flags)) 65 | -------------------------------------------------------------------------------- /research/sigbovik2020/bibliography.bib: -------------------------------------------------------------------------------- 1 | @InProceedings{bashcc, 2 | author="Baugh, Spencer", 3 | title="bashcc: Multi-prompt one-shot delimited continuations for Bash", 4 | booktitle="Proceedings of SIGBOVIK 2018", 5 | year="2018", 6 | publisher="Association for Computational Heresy", 7 | address="Pittsburgh, Pennsylvania", 8 | pages="161--164", 9 | } 10 | @book{frankenstein, 11 | place={London}, title={Frankenstein: or The modern prometheus}, publisher={Lackington, Hughes, Harding, Mavor, and Jones}, author={Shelley, Mary W.}, year=1818} 12 | 13 | @Inbook{oleg, 14 | author="Kiselyov, Oleg", 15 | editor="Gibbons, Jeremy", 16 | title="Typed Tagless Final Interpreters", 17 | bookTitle="Generic and Indexed Programming: International Spring School, SSGIP 2010, Oxford, UK, March 22-26, 2010, Revised Lectures", 18 | year="2012", 19 | publisher="Springer Berlin Heidelberg", 20 | address="Berlin, Heidelberg", 21 | pages="130--174", 22 | abstract="The so-called `typed tagless final' approach of [6] has collected and polished a number of techniques for representing typed higher-order languages in a typed metalanguage, along with type-preserving interpretation, compilation and partial evaluation. The approach is an alternative to the traditional, or `initial' encoding of an object language as a (generalized) algebraic data type. Both approaches permit multiple interpretations of an expression, to evaluate it, pretty-print, etc. The final encoding represents all and only typed object terms without resorting to generalized algebraic data types, dependent or other fancy types. The final encoding lets us add new language forms and interpretations without breaking the existing terms and interpreters.", 23 | isbn="978-3-642-32202-0", 24 | doi="10.1007/978-3-642-32202-0_3", 25 | url="https://doi.org/10.1007/978-3-642-32202-0_3" 26 | } 27 | 28 | @InProceedings{danvy, 29 | author="Danvy, Olivier", 30 | editor="Hatcliff, John 31 | and Mogensen, Torben {\AE} 32 | and Thiemann, Peter", 33 | title="Type-Directed Partial Evaluation", 34 | booktitle="Partial Evaluation", 35 | year="1999", 36 | publisher="Springer Berlin Heidelberg", 37 | address="Berlin, Heidelberg", 38 | pages="367--411", 39 | abstract="Type-directed partial evaluation uses a normalization function to achieve partial evaluation. These lecture notes review its background, foundations, practice, and applications. Of specific interest is the modular technique of offline and online type-directed partial evaluation in Standard ML of New Jersey.", 40 | isbn="978-3-540-47018-2" 41 | } 42 | -------------------------------------------------------------------------------- /python/rsyscall/stdlib/mktemp.py: -------------------------------------------------------------------------------- 1 | "Functions for making temporary directories." 2 | import random 3 | import string 4 | from rsyscall.thread import Process 5 | from rsyscall.path import Path 6 | from rsyscall.handle import WrittenPointer 7 | import os 8 | import typing as t 9 | 10 | def random_string(k: int=8) -> str: 11 | "Return a random string - useful for making files that don't conflict with others." 12 | return ''.join(random.choices(string.ascii_letters + string.digits, k=k)) 13 | 14 | async def update_symlink(process: Process, path: WrittenPointer[Path], 15 | target: t.Union[str, os.PathLike]) -> WrittenPointer[Path]: 16 | "Atomically update this path to contain a symlink pointing at this target." 17 | tmpname = path.value.name + ".updating." + random_string(k=8) 18 | tmppath = await process.task.ptr(path.value.parent/tmpname) 19 | await process.task.symlink(await process.task.ptr(target), tmppath) 20 | await process.task.rename(tmppath, path) 21 | return path 22 | 23 | async def mkdtemp(process: Process, prefix: str="mkdtemp") -> 'TemporaryDirectory': 24 | "Make a temporary directory in process.environ.tmpdir." 25 | parent = process.environ.tmpdir 26 | name = prefix+"."+random_string(k=8) 27 | await process.task.mkdir(await process.task.ptr(parent/name), 0o700) 28 | return TemporaryDirectory(process, parent, name) 29 | 30 | class TemporaryDirectory(Path): 31 | "A temporary directory we've created and are responsible for cleaning up." 32 | def __init__(self, process: Process, parent: Path, name: str) -> None: 33 | "Don't directly instantiate, use rsyscall.mktemp.mkdtemp to create this class." 34 | self.process = process 35 | super().__init__(parent, name) 36 | 37 | async def cleanup(self) -> None: 38 | """Delete this temporary directory and everything inside it. 39 | 40 | We do this cleanup by execing sh; that's the cheapest way to do it. We have to 41 | chmod -R +w the directory before we rm -rf it, because the directory might contain 42 | files without the writable bit set, which would prevent us from deleting it. 43 | 44 | """ 45 | # TODO would be nice if not sharing the fs information gave us a cap to chdir 46 | cleanup = await self.process.fork() 47 | await cleanup.task.chdir(await cleanup.task.ptr(self.parent)) 48 | child = await cleanup.exec(self.process.environ.sh.args( 49 | '-c', f"chmod -R +w -- {self.name} && rm -rf -- {self.name}")) 50 | await child.check() 51 | 52 | async def __aenter__(self) -> Path: 53 | return self 54 | 55 | async def __aexit__(self, *args, **kwargs): 56 | await self.cleanup() 57 | -------------------------------------------------------------------------------- /python/rsyscall/command.py: -------------------------------------------------------------------------------- 1 | "Provides the Command class, which is a convenient representation of the arguments to execve." 2 | import typing as t 3 | from rsyscall.path import Path 4 | import os 5 | 6 | T_command = t.TypeVar('T_command', bound="Command") 7 | class Command: 8 | "A convenient builder-pattern representation of the arguments to execve." 9 | def __init__(self, 10 | executable_path: t.Union[str, os.PathLike], 11 | arguments: t.List[t.Union[str, os.PathLike]], 12 | env_updates: t.Mapping[str, t.Union[str, os.PathLike]]) -> None: 13 | self.executable_path = executable_path 14 | self.arguments = arguments 15 | self.env_updates = env_updates 16 | 17 | def args(self: T_command, *args: t.Union[str, os.PathLike]) -> T_command: 18 | "Add more arguments to this Command." 19 | return type(self)(self.executable_path, 20 | [*self.arguments, *args], 21 | self.env_updates) 22 | 23 | def env(self: T_command, env_updates: t.Mapping[str, t.Union[str, os.PathLike]]={}, 24 | **updates: t.Union[str, os.PathLike]) -> T_command: 25 | """Add more environment variable updates to this Command. 26 | 27 | There are two ways to pass arguments to this method (which can be used simultaneously): 28 | - you can pass a dictionary of environment updates, 29 | - or you can provide your environment updates as keyword arguments. 30 | Both are necessary, since there are many valid environment variable 31 | names which are not valid Python keyword argument names. 32 | 33 | """ 34 | return type(self)(self.executable_path, 35 | self.arguments, 36 | {**self.env_updates, **env_updates, **updates}) 37 | 38 | def in_shell_form(self) -> str: 39 | "Render this Command as a string which could be passed to a shell." 40 | ret = "" 41 | for key, value in self.env_updates.items(): 42 | ret += os.fsdecode(key) + "=" + os.fsdecode(value) + " " 43 | ret += os.fsdecode(self.executable_path) 44 | # skip first argument 45 | for arg in self.arguments[1:]: 46 | ret += " " + os.fsdecode(arg) 47 | return ret 48 | 49 | def __str__(self) -> str: 50 | ret = "Command(" 51 | for key, value in self.env_updates.items(): 52 | ret += f"{key}={value} " 53 | ret += f"{os.fsdecode(self.executable_path)}," 54 | for arg in self.arguments: 55 | ret += " " + os.fsdecode(arg) 56 | ret += ")" 57 | return ret 58 | 59 | def __repr__(self) -> str: 60 | return str(self) 61 | -------------------------------------------------------------------------------- /python/arepl/acompile.py: -------------------------------------------------------------------------------- 1 | """Async equivalent of compile, which takes an AST and returns an awaitable 2 | 3 | """ 4 | from dataclasses import dataclass 5 | import ast 6 | import builtins 7 | import types 8 | import inspect 9 | import typing as t 10 | 11 | @dataclass 12 | class _InternalResult(Exception): 13 | is_expression: bool 14 | value: t.Any 15 | 16 | __result_exception__ = _InternalResult 17 | 18 | def compile_to_awaitable(astob: ast.Interactive, 19 | global_vars: t.Dict[str, t.Any]) -> t.Awaitable: 20 | """Compile this AST, which may contain await statements, to an awaitable. 21 | 22 | - If the AST calls return, then a value is returned from the awaitable. 23 | - If the AST raises an exception, then the awaitable raises that exception. 24 | - If the AST neither returns a value nor raises an exception, then __result_exception__ is 25 | raised. 26 | - If the last statement in the AST is an expression, then on the __result_exception__ 27 | exception, is_expression is set and value contains the value of the expression. 28 | - If the last statement in the AST is not an expression, then on the __result_exception__ 29 | exception, is_expression is False and value contains None. 30 | 31 | """ 32 | wrapper_name = "__toplevel__" 33 | # we rely on the user not messing with __builtins__ in the REPL; that's something you 34 | # really aren't supposed to do, so I think that's fine. 35 | wrapper = ast.parse(f""" 36 | async def {wrapper_name}(): 37 | try: 38 | pass 39 | finally: 40 | __builtins__.globals().update(__builtins__.locals()) 41 | """, filename="", mode="single") 42 | try_block = wrapper.body[0].body[0] # type: ignore 43 | try_block.body = astob.body 44 | if isinstance(try_block.body[-1], (ast.Expr, ast.Await)): 45 | # if the last statement in the AST is an expression, then have its value be 46 | # propagated up by throwing it from the __result_exception__ exception. 47 | wrapper_raise = ast.parse("raise __result_exception__(True, None)", filename="", mode="single").body[0] # type: ignore 48 | wrapper_raise.exc.args[1] = try_block.body[-1].value # type: ignore 49 | try_block.body[-1] = wrapper_raise 50 | else: 51 | wrapper_raise = ast.parse("raise __result_exception__(False, None)", filename="", mode="single").body[0] # type: ignore 52 | try_block.body.append(wrapper_raise) 53 | global_vars.update({ 54 | '__builtins__': builtins, 55 | '__result_exception__': __result_exception__, 56 | }) 57 | exec(compile(wrapper, '', 'single'), global_vars) 58 | func = global_vars[wrapper_name] 59 | del global_vars[wrapper_name] 60 | return func() 61 | -------------------------------------------------------------------------------- /python/rsyscall/memory/span.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from rsyscall.memory.allocation_interface import AllocationInterface 3 | from rsyscall.handle import Pointer 4 | import typing as t 5 | 6 | @dataclass 7 | class SpanAllocation(AllocationInterface): 8 | """An allocation which is a subspan of some other allocation, and can be split freely 9 | 10 | This should be built into our allocation system. In fact, it is: This is what split is 11 | for. But the ownership is tricky: Splitting an allocation consumes it. We aren't 12 | supposed to take ownership of the pointers passed to us for write/read, so 13 | we can't naively split the pointers. Instead, we use to_span, below, to make them use 14 | SpanAllocation, so we can split them freely without taking ownership. 15 | 16 | We should make it possible to split an allocation without consuming it, or otherwise 17 | have multiple references to the same allocation, then we can get rid of this. 18 | 19 | """ 20 | alloc: AllocationInterface 21 | _offset: int 22 | _size: int 23 | 24 | def __post_init__(self) -> None: 25 | if self._offset + self._size > self.alloc.size(): 26 | raise Exception("span falls off the end of the underlying allocation", 27 | self._offset, self._size, self.alloc.size()) 28 | 29 | def offset(self) -> int: 30 | return self.alloc.offset() + self._offset 31 | 32 | def size(self) -> int: 33 | return self._size 34 | 35 | def split(self, size: int) -> t.Tuple[AllocationInterface, AllocationInterface]: 36 | if size > self.size(): 37 | raise Exception("called split with size", size, "greater than this allocation's total size", self.size()) 38 | return (SpanAllocation(self.alloc, self._offset, size), 39 | SpanAllocation(self.alloc, self._offset + size, self._size - size)) 40 | 41 | def merge(self, other: AllocationInterface) -> AllocationInterface: 42 | if not isinstance(other, SpanAllocation): 43 | raise Exception("can only merge SpanAllocation with SpanAllocation, not", other) 44 | if self.alloc == other.alloc: 45 | if self._offset + self._size == other._offset: 46 | return SpanAllocation(self.alloc, self._offset, self._size + other._size) 47 | else: 48 | raise Exception("spans are not adjacent") 49 | else: 50 | raise Exception("can't merge spans over two different allocations") 51 | 52 | def free(self, mapping) -> None: 53 | pass 54 | 55 | def to_span(ptr: Pointer) -> Pointer: 56 | "Wraps the pointer's allocation in SpanAllocation so it can be split freely" 57 | return Pointer( 58 | ptr.mapping, 59 | ptr.serializer, 60 | SpanAllocation(ptr.allocation, 0, ptr.allocation.size()), 61 | ptr.typ) 62 | -------------------------------------------------------------------------------- /python/rsyscall/linux/futex.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | import typing as t 4 | from rsyscall._raw import ffi, lib # type: ignore 5 | import enum 6 | from dataclasses import dataclass 7 | from rsyscall.struct import Struct 8 | from rsyscall.handle import Pointer, WrittenPointer 9 | 10 | FUTEX_WAITERS: int = lib.FUTEX_WAITERS 11 | FUTEX_TID_MASK: int = lib.FUTEX_TID_MASK 12 | 13 | @dataclass 14 | class FutexNode(Struct): 15 | # this is our bundle of struct robust_list with a futex. since it's tricky to handle the 16 | # reference management of taking a reference to just one field in a structure (the futex, in 17 | # cases where we don't care about the robust list), we always deal in the entire FutexNode 18 | # structure whenever we talk about futexes. that's a bit of overhead but we barely use futexes, 19 | # so it's fine. 20 | next: t.Optional[Pointer[FutexNode]] 21 | futex: int 22 | 23 | def to_bytes(self) -> bytes: 24 | struct = ffi.new('struct futex_node*', { 25 | # technically we're supposed to have a pointer to the first node in the robust list to 26 | # indicate the end. but that's tricky to do. so instead let's just use a NULL pointer; 27 | # the kernel will EFAULT when it hits the end. make sure not to map 0, or we'll 28 | # break. https://imgflip.com/i/2zwysg 29 | 'list': (ffi.cast('struct robust_list*', int(self.next.near)) if self.next else ffi.NULL,), 30 | 'futex': self.futex, 31 | }) 32 | return bytes(ffi.buffer(struct)) 33 | 34 | @classmethod 35 | def sizeof(cls) -> int: 36 | return ffi.sizeof('struct futex_node') 37 | 38 | @dataclass 39 | class RobustListHead(Struct): 40 | first: WrittenPointer[FutexNode] 41 | 42 | def to_bytes(self) -> bytes: 43 | struct = ffi.new('struct robust_list_head*', { 44 | 'list': (ffi.cast('struct robust_list*', int(self.first.near)),), 45 | 'futex_offset': ffi.offsetof('struct futex_node', 'futex'), 46 | 'list_op_pending': ffi.NULL, 47 | }) 48 | return bytes(ffi.buffer(struct)) 49 | 50 | @classmethod 51 | def sizeof(cls) -> int: 52 | return ffi.sizeof('struct robust_list_head') 53 | 54 | #### Classes #### 55 | import rsyscall.far 56 | from rsyscall.handle.pointer import WrittenPointer 57 | 58 | class FutexTask(rsyscall.far.Task): 59 | async def set_robust_list(self, head: WrittenPointer[RobustListHead]) -> None: 60 | with head.borrow(self): 61 | await _set_robust_list(self.sysif, head.near, head.size()) 62 | 63 | #### Raw syscalls #### 64 | import rsyscall.near.types as near 65 | from rsyscall.near.sysif import SyscallInterface 66 | from rsyscall.sys.syscall import SYS 67 | 68 | async def _set_robust_list(sysif: SyscallInterface, head: near.Address, len: int) -> None: 69 | await sysif.syscall(SYS.set_robust_list, head, len) 70 | -------------------------------------------------------------------------------- /python/rsyscall/sys/mount.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import lib # type: ignore 3 | import os 4 | import typing as t 5 | import enum 6 | 7 | class MS(enum.IntFlag): 8 | NONE = 0 9 | BIND = lib.MS_BIND 10 | DIRSYNC = lib.MS_DIRSYNC 11 | LAZYTIME = lib.MS_LAZYTIME 12 | MANDLOCK = lib.MS_MANDLOCK 13 | MOVE = lib.MS_MOVE 14 | NODEV = lib.MS_NODEV 15 | NOEXEC = lib.MS_NOEXEC 16 | NOSUID = lib.MS_NOSUID 17 | RDONLY = lib.MS_RDONLY 18 | REC = lib.MS_REC 19 | RELATIME = lib.MS_RELATIME 20 | REMOUNT = lib.MS_REMOUNT 21 | SILENT = lib.MS_SILENT 22 | SLAVE = lib.MS_SLAVE 23 | STRICTATIME = lib.MS_STRICTATIME 24 | SYNCHRONOUS = lib.MS_SYNCHRONOUS 25 | UNBINDABLE = lib.MS_UNBINDABLE 26 | 27 | class UMOUNT(enum.IntFlag): 28 | NONE = 0 29 | FORCE = lib.MNT_FORCE 30 | DETACH = lib.MNT_DETACH 31 | EXPIRE = lib.MNT_EXPIRE 32 | NOFOLLOW = lib.UMOUNT_NOFOLLOW 33 | 34 | #### Classes #### 35 | import rsyscall.far 36 | from rsyscall.handle.pointer import WrittenPointer 37 | 38 | class MountTask(rsyscall.far.Task): 39 | async def mount(self, 40 | source: WrittenPointer[t.Union[str, os.PathLike]], 41 | target: WrittenPointer[t.Union[str, os.PathLike]], 42 | filesystemtype: WrittenPointer[t.Union[str, os.PathLike]], 43 | mountflags: MS, 44 | data: WrittenPointer[t.Union[str, os.PathLike]]) -> None: 45 | with source.borrow(self): 46 | with target.borrow(self): 47 | with filesystemtype.borrow(self): 48 | with data.borrow(self): 49 | try: 50 | return (await _mount( 51 | self.sysif, 52 | source.near, target.near, filesystemtype.near, 53 | mountflags, data.near)) 54 | except OSError as exn: 55 | exn.filename = source.value 56 | exn.filename2 = (target.value, filesystemtype.value, data.value) 57 | raise 58 | 59 | async def umount(self, target: WrittenPointer[t.Union[str, os.PathLike]], flags: UMOUNT=UMOUNT.NONE) -> None: 60 | with target.borrow(self): 61 | await _umount2(self.sysif, target.near, flags) 62 | 63 | #### Raw syscalls #### 64 | import rsyscall.near.types as near 65 | from rsyscall.near.sysif import SyscallInterface 66 | from rsyscall.sys.syscall import SYS 67 | 68 | async def _mount(sysif: SyscallInterface, source: near.Address, target: near.Address, 69 | filesystemtype: near.Address, mountflags: MS, 70 | data: near.Address) -> None: 71 | await sysif.syscall(SYS.mount, source, target, filesystemtype, mountflags, data) 72 | 73 | async def _umount2(sysif: SyscallInterface, target: near.Address, flags: UMOUNT) -> None: 74 | await sysif.syscall(SYS.umount2, target, flags) 75 | -------------------------------------------------------------------------------- /c/src/rsyscall_stdin_bootstrap.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "rsyscall.h" 12 | 13 | static void receive_fds(const int sock, int *fds, int n) { 14 | union { 15 | struct cmsghdr hdr; 16 | char buf[CMSG_SPACE(sizeof(int) * n)]; 17 | } cmsg; 18 | char waste_data; 19 | struct iovec io = { 20 | .iov_base = &waste_data, 21 | .iov_len = sizeof(waste_data), 22 | }; 23 | struct msghdr msg = { 24 | .msg_name = NULL, 25 | .msg_namelen = 0, 26 | .msg_iov = &io, 27 | .msg_iovlen = 1, 28 | .msg_control = &cmsg, 29 | .msg_controllen = sizeof(cmsg), 30 | }; 31 | if (recvmsg(sock, &msg, MSG_CMSG_CLOEXEC) < 0) { 32 | err(1, "recvmsg(sock=%d)", sock); 33 | } 34 | if (msg.msg_controllen != sizeof(cmsg)) { 35 | err(1, "Message has wrong controllen"); 36 | } 37 | // if (cmsg.hdr.cmsg_len != sizeof(cmsg.buf)) { 38 | // err(1, "Control message has wrong length"); 39 | // } 40 | if (cmsg.hdr.cmsg_level != SOL_SOCKET) { 41 | err(1, "Control message has wrong level"); 42 | } 43 | if (cmsg.hdr.cmsg_type != SCM_RIGHTS) { 44 | err(1, "Control message has wrong type"); 45 | } 46 | memcpy(fds, CMSG_DATA(&cmsg.hdr), sizeof(int) * n); 47 | } 48 | 49 | int main(int argc, char** argv, char** envp) 50 | { 51 | if (argc != 1) errx(1, "usage: %s", argv[0]); 52 | const int connsock = 0; 53 | const int nfds = 3; 54 | int fds[nfds]; 55 | receive_fds(connsock, fds, nfds); 56 | const int syscall_fd = fds[0]; 57 | const int data_fd = fds[1]; 58 | const int connecting_fd = fds[2]; 59 | size_t envp_count = 0; 60 | for (; envp[envp_count] != NULL; envp_count++); 61 | struct rsyscall_stdin_bootstrap describe = { 62 | .symbols = rsyscall_symbol_table(), 63 | .pid = getpid(), 64 | .syscall_fd = syscall_fd, 65 | .data_fd = data_fd, 66 | .connecting_fd = connecting_fd, 67 | .envp_count = envp_count, 68 | }; 69 | int ret = write(data_fd, &describe, sizeof(describe)); 70 | if (ret != sizeof(describe)) { 71 | err(1, "write(data_fd, &describe, sizeof(describe))"); 72 | } 73 | for (; *envp != NULL; envp++) { 74 | char* cur = *envp; 75 | size_t size = strlen(cur); 76 | ret = write(data_fd, &size, sizeof(size)); 77 | if (ret != sizeof(size)) { 78 | err(1, "write(data_fd, &size, sizeof(size))"); 79 | } 80 | while (size > 0) { 81 | ret = write(data_fd, cur, size); 82 | if (ret < 0) { 83 | err(1, "write(data_fd=%d, cur, size=%lu)", data_fd, size); 84 | } 85 | size -= ret; 86 | cur += ret; 87 | } 88 | } 89 | rsyscall_server(syscall_fd, syscall_fd); 90 | } 91 | -------------------------------------------------------------------------------- /python/rsyscall/unistd/pipe.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | from dataclasses import dataclass 4 | from rsyscall.struct import FixedSize, Serializer 5 | import rsyscall.near.types as near 6 | from rsyscall.handle.fd import FileDescriptorTask 7 | import typing as t 8 | if t.TYPE_CHECKING: 9 | from rsyscall.handle import FileDescriptor 10 | 11 | T_pipe = t.TypeVar('T_pipe', bound='Pipe') 12 | @dataclass 13 | class Pipe(FixedSize): 14 | "A pair of file descriptors, as written by pipe." 15 | read: FileDescriptor 16 | write: FileDescriptor 17 | 18 | def __getitem__(self, idx: int) -> FileDescriptor: 19 | if idx == 0: 20 | return self.read 21 | elif idx == 1: 22 | return self.write 23 | else: 24 | raise IndexError("only index 0 or 1 are valid for Pipe:", idx) 25 | 26 | def __iter__(self) -> t.Iterable[FileDescriptor]: 27 | return iter([self.read, self.write]) 28 | 29 | @classmethod 30 | def sizeof(cls) -> int: 31 | return ffi.sizeof('struct fdpair') 32 | 33 | @classmethod 34 | def get_serializer(cls: t.Type[T_pipe], task: FileDescriptorTask[FileDescriptor]) -> Serializer[T_pipe]: 35 | return PipeSerializer(cls, task) 36 | 37 | @dataclass 38 | class PipeSerializer(Serializer[T_pipe]): 39 | cls: t.Type[T_pipe] 40 | task: FileDescriptorTask[FileDescriptor] 41 | 42 | def to_bytes(self, pair: T_pipe) -> bytes: 43 | struct = ffi.new('struct fdpair*', (pair.read, pair.write)) 44 | return bytes(ffi.buffer(struct)) 45 | 46 | def from_bytes(self, data: bytes) -> T_pipe: 47 | struct = ffi.cast('struct fdpair const*', ffi.from_buffer(data)) 48 | def make(n: int) -> FileDescriptor: 49 | return self.task.make_fd_handle(near.FileDescriptor(int(n))) 50 | return self.cls(make(struct.first), make(struct.second)) 51 | 52 | #### Classes #### 53 | from rsyscall.fcntl import O 54 | from rsyscall.handle.pointer import Pointer, LinearPointer 55 | 56 | class PipeTask(FileDescriptorTask): 57 | async def pipe(self, buf: Pointer[Pipe], flags: O=O(0)) -> LinearPointer[Pipe]: 58 | """create pipe 59 | 60 | manpage: pipe2(2) 61 | """ 62 | # TODO we should force the serializer for the pipe to be using this task... 63 | # otherwise it could get deserialized by a task with which we share memory, 64 | # but not share file descriptor tables. 65 | # Maybe we could create the Serializer right here, and discard 66 | # the passed-in one? That wouldn't allow a different task in 67 | # the same fd table to receive the handles though. 68 | with buf.borrow(self): 69 | await _pipe(self.sysif, buf.near, flags|O.CLOEXEC) 70 | return buf._linearize() 71 | 72 | #### Raw syscalls #### 73 | from rsyscall.near.sysif import SyscallInterface 74 | from rsyscall.sys.syscall import SYS 75 | 76 | async def _pipe(sysif: SyscallInterface, pipefd: near.Address, flags: O) -> None: 77 | await sysif.syscall(SYS.pipe2, pipefd, flags) 78 | -------------------------------------------------------------------------------- /c/src/remote_cat.c: -------------------------------------------------------------------------------- 1 | /* Just an exploration of what using rsyscall in C looks like */ 2 | #define _GNU_SOURCE 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include /* For SYS_xxx definitions */ 11 | #include "rsyscall.h" 12 | 13 | struct remote_connection { 14 | int tofd; 15 | int fromfd; 16 | }; 17 | 18 | struct options { 19 | struct remote_connection remote; 20 | int infd; 21 | int outfd; 22 | }; 23 | 24 | int to_int(const char *s) { 25 | errno = 0; 26 | const int ret = strtol(s, NULL, 0); 27 | if (errno != 0) err(1, "strtol(%s)", s); 28 | return ret; 29 | } 30 | 31 | struct options parse_options(int argc, char** argv) 32 | { 33 | if (argc != 3) { 34 | errx(1, "Usage: %s ", argc ? argv[0] : "remote_cat"); 35 | } 36 | struct remote_connection remote = { 37 | .tofd = to_int(argv[1]), 38 | .fromfd = to_int(argv[2]), 39 | }; 40 | const struct options opt = { 41 | .remote = remote, 42 | .infd = 0, 43 | .outfd = 1, 44 | }; 45 | return opt; 46 | } 47 | 48 | long rsyscall(struct remote_connection remote, long number, 49 | long arg1, long arg2, long arg3, long arg4, long arg5, long arg6) { 50 | const struct rsyscall_syscall request = { 51 | .sys = number, 52 | .args = { arg1, arg2, arg3, arg4, arg5, arg6 }, 53 | }; 54 | int ret; 55 | ret = write(remote.tofd, &request, sizeof(request)); 56 | if (ret < 0) err(1, "write(remote.tofd, &request, sizeof(request))"); 57 | if (ret != sizeof(request)) errx(1, "write(remote.tofd, &request, sizeof(request)) partial write"); 58 | 59 | int64_t response; 60 | ret = read(remote.fromfd, &response, sizeof(response)); 61 | if (ret < 0) err(1, "read(remote.fromfd, &response, sizeof(response)) failed"); 62 | if (ret != sizeof(response)) err(1, "read(remote.fromfd, &response, sizeof(response)) partial read"); 63 | 64 | if (response < 0) { 65 | errno = -response; 66 | return -1; 67 | } else { 68 | return response; 69 | } 70 | } 71 | 72 | void do_remote_splice(struct remote_connection remote, int infd, int outfd) { 73 | int ret = rsyscall(remote, SYS_splice, infd, 0, outfd, 0, 4096, 0); 74 | if (ret < 0) err(1, "remote splice(%d, NULL, %d, NULL, NULL, 4096, 0) failed", infd, outfd); 75 | if (ret != 4096) warnx("remote splice(%d, NULL, %d, NULL, NULL, 4096, 0) partial splice of %d", infd, outfd, ret); 76 | if (ret == 0) exit(0); 77 | } 78 | 79 | noreturn void remote_cat(struct options opt) 80 | { 81 | for (;;) { 82 | do_remote_splice(opt.remote, opt.infd, opt.outfd); 83 | } 84 | } 85 | 86 | int main(int argc, char** argv) 87 | { 88 | const struct options opt = parse_options(argc, argv); 89 | remote_cat(opt); 90 | } 91 | -------------------------------------------------------------------------------- /python/rsyscall/fcntl.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import lib, ffi # type: ignore 3 | import enum 4 | import typing as t 5 | 6 | class AT(enum.IntFlag): 7 | """The flags argument to many *at syscall; mostly specifies changes to path resolution. 8 | 9 | Except for `AT.REMOVEDIR`, these are all path resolution changes. 10 | 11 | """ 12 | NONE = 0 13 | FDCWD = lib.AT_FDCWD 14 | REMOVEDIR = lib.AT_REMOVEDIR 15 | "When passed to `FileDescriptor.unlinkat`, remove directories instead of linking files." 16 | EMPTY_PATH = lib.AT_EMPTY_PATH 17 | SYMLINK_NOFOLLOW = lib.AT_SYMLINK_NOFOLLOW 18 | SYMLINK_FOLLOW = lib.AT_SYMLINK_FOLLOW 19 | 20 | class O(enum.IntFlag): 21 | "The flags argument to open and some other syscalls." 22 | RDONLY = lib.O_RDONLY 23 | WRONLY = lib.O_WRONLY 24 | RDWR = lib.O_RDWR 25 | CREAT = lib.O_CREAT 26 | EXCL = lib.O_EXCL 27 | NOCTTY = lib.O_NOCTTY 28 | TRUNC = lib.O_TRUNC 29 | APPEND = lib.O_APPEND 30 | NONBLOCK = lib.O_NONBLOCK 31 | DSYNC = lib.O_DSYNC 32 | DIRECT = lib.O_DIRECT 33 | LARGEFILE = lib.O_LARGEFILE 34 | DIRECTORY = lib.O_DIRECTORY 35 | NOFOLLOW = lib.O_NOFOLLOW 36 | NOATIME = lib.O_NOATIME 37 | CLOEXEC = lib.O_CLOEXEC 38 | SYNC = lib.O_SYNC 39 | PATH = lib.O_PATH 40 | TMPFILE = lib.O_TMPFILE 41 | # internal kernel flags, visible through FUSE and possibly other places 42 | FMODE_EXEC = 0x20 43 | FMODE_NONOTIFY = 0x4000000 44 | 45 | class F(enum.IntEnum): 46 | "The cmd argument to fcntl; specifies what fcntl operation we want to do." 47 | SETFD = lib.F_SETFD 48 | GETFD = lib.F_GETFD 49 | SETFL = lib.F_SETFL 50 | ADD_SEALS = lib.F_ADD_SEALS 51 | GET_SEALS = lib.F_GET_SEALS 52 | 53 | class F_SEAL(enum.IntEnum): 54 | "The single argument used with fcntl F.ADD_SEALS and F.GET_SEALS" 55 | SEAL = lib.F_SEAL_SEAL 56 | SHRINK = lib.F_SEAL_SHRINK 57 | GROW = lib.F_SEAL_GROW 58 | WRITE = lib.F_SEAL_WRITE 59 | FUTURE_WRITE = lib.F_SEAL_FUTURE_WRITE 60 | 61 | class FD(enum.IntFlag): 62 | """The argument to fcntl F.SETFD and return value of fcntl F.GETFD 63 | 64 | This is totally different from `rsyscall.FileDescriptor`, despite 65 | "FD" being a frequently-used abbreviation for that class name. 66 | 67 | """ 68 | CLOEXEC = lib.FD_CLOEXEC 69 | 70 | #### Classes #### 71 | from rsyscall.handle.fd import BaseFileDescriptor 72 | 73 | class FcntlFileDescriptor(BaseFileDescriptor): 74 | async def fcntl(self, cmd: F, arg: t.Optional[int]=None) -> int: 75 | self._validate() 76 | try: 77 | return (await _fcntl(self.task.sysif, self.near, cmd, arg)) 78 | except OSError as exn: 79 | exn.filename = (self, cmd, arg) 80 | raise 81 | 82 | #### Raw syscalls #### 83 | import rsyscall.near.types as near 84 | from rsyscall.near.sysif import SyscallInterface 85 | from rsyscall.sys.syscall import SYS 86 | 87 | async def _fcntl(sysif: SyscallInterface, fd: near.FileDescriptor, 88 | cmd: F, arg: t.Optional[t.Union[int, near.Address]]=None) -> int: 89 | if arg is None: 90 | arg = 0 91 | return (await sysif.syscall(SYS.fcntl, fd, cmd, arg)) 92 | -------------------------------------------------------------------------------- /python/rsyscall/memory/allocation_interface.py: -------------------------------------------------------------------------------- 1 | "Defines AllocationInterface." 2 | from __future__ import annotations 3 | import abc 4 | import typing as t 5 | if t.TYPE_CHECKING: 6 | from rsyscall.handle import Task 7 | from rsyscall.sys.mman import MemoryMapping 8 | 9 | class UseAfterFreeError(Exception): 10 | pass 11 | 12 | class AllocationInterface: 13 | """Represents an allocation of a range of bytes in some file 14 | 15 | The file is typically mapped into memory. Essentially, this is a single allocation 16 | returned by malloc; but it's not necessarily tied to memory. 17 | 18 | """ 19 | @abc.abstractmethod 20 | def offset(self) -> int: 21 | """Get the offset of this allocation in its memory mapping; throws if this allocation has been invalidated. 22 | 23 | TODO this should return the offset of the allocation 24 | 25 | """ 26 | pass 27 | @abc.abstractmethod 28 | def size(self) -> int: 29 | "Get the size of this allocation." 30 | pass 31 | @abc.abstractmethod 32 | def split(self, size: int) -> t.Tuple[AllocationInterface, AllocationInterface]: 33 | """Invalidate this allocation and split it into two adjacent allocations. 34 | 35 | These two allocations can be independently freed, or split again, ad infinitum; 36 | they can also be merged back together with merge. 37 | 38 | """ 39 | pass 40 | @abc.abstractmethod 41 | def merge(self, other: AllocationInterface) -> AllocationInterface: 42 | """Invalidate these two adjacent allocations and merge them into one; only works if they came from split. 43 | 44 | Call this on the left allocation returned from split, and pass the right allocation. 45 | 46 | Depending on the characteristics of the underlying allocator, this may also work 47 | for two unrelated allocations rather than just ones that came from split, but you 48 | certainly shouldn't try. 49 | 50 | """ 51 | pass 52 | @abc.abstractmethod 53 | def free(self, mapping: MemoryMapping) -> None: 54 | "Invalidate this allocation and return its range for re-allocation; must be called explicitly." 55 | pass 56 | 57 | class OutOfSpaceError(Exception): 58 | "Raised by malloc if the allocation request couldn't be satisfied." 59 | pass 60 | 61 | class AllocatorInterface: 62 | "A memory allocator; raises OutOfSpaceError if there's no more space." 63 | async def bulk_malloc(self, sizes: t.List[t.Tuple[int, int]]) -> t.Sequence[t.Tuple[MemoryMapping, AllocationInterface]]: 64 | # A naive bulk allocator 65 | return [await self.malloc(size, alignment) for size, alignment in sizes] 66 | 67 | @abc.abstractmethod 68 | async def malloc(self, size: int, alignment: int) -> t.Tuple[MemoryMapping, AllocationInterface]: ... 69 | 70 | def inherit(self, task: Task) -> AllocatorInterface: 71 | raise Exception("can't be inherited:", self) 72 | 73 | class UnusableAllocator(AllocatorInterface): 74 | "A memory allocator; raises OutOfSpaceError if there's no more space." 75 | async def malloc(self, size: int, alignment: int) -> t.Tuple[MemoryMapping, AllocationInterface]: 76 | raise NotImplementedError 77 | 78 | -------------------------------------------------------------------------------- /python/rsyscall/tests/trio_test_case.py: -------------------------------------------------------------------------------- 1 | "A trio-enabled variant of unittest.TestCase" 2 | import trio 3 | import unittest 4 | import contextlib 5 | import functools 6 | import sys 7 | import types 8 | import warnings 9 | from rsyscall import local_process, Process 10 | 11 | @contextlib.contextmanager 12 | def raise_unraisables(): 13 | unraisables = [] 14 | try: 15 | orig_unraisablehook, sys.unraisablehook = sys.unraisablehook, unraisables.append 16 | yield 17 | finally: 18 | sys.unraisablehook = orig_unraisablehook 19 | if unraisables: 20 | raise trio.MultiError([unr.exc_value for unr in unraisables]) 21 | 22 | class TrioTestCase(unittest.TestCase): 23 | "A trio-enabled variant of unittest.TestCase" 24 | nursery: trio.Nursery 25 | process: Process = local_process 26 | 27 | async def asyncSetUp(self) -> None: 28 | "Asynchronously set up resources for tests in this TestCase" 29 | pass 30 | 31 | async def asyncTearDown(self) -> None: 32 | "Asynchronously clean up resources for tests in this TestCase" 33 | pass 34 | 35 | @classmethod 36 | async def asyncSetUpClass(cls) -> None: 37 | "Asynchronously set up class-level resources for tests in this TestCase" 38 | pass 39 | 40 | @classmethod 41 | async def asyncTearDownClass(cls) -> None: 42 | "Asynchronously clean up class-level resources for tests in this TestCase" 43 | pass 44 | 45 | @classmethod 46 | def setUpClass(cls) -> None: 47 | trio.run(cls.asyncSetUpClass) 48 | 49 | @classmethod 50 | def tearDownClass(cls) -> None: 51 | trio.run(cls.asyncTearDownClass) 52 | 53 | def __init__(self, methodName='runTest') -> None: 54 | test = getattr(type(self), methodName) 55 | @functools.wraps(test) 56 | async def test_with_setup() -> None: 57 | async with trio.open_nursery() as nursery: 58 | self.nursery = nursery 59 | await self.asyncSetUp() 60 | try: 61 | await test(self) 62 | finally: 63 | await self.asyncTearDown() 64 | nursery.cancel_scope.cancel() 65 | @functools.wraps(test_with_setup) 66 | def sync_test_with_setup(self) -> None: 67 | # Throw an exception if there were any "coroutine was never awaited" warnings, to fail the test. 68 | # See https://github.com/python-trio/pytest-trio/issues/86 69 | # We also need raise_unraisables, otherwise the exception is suppressed, since it's in __del__ 70 | with raise_unraisables(): 71 | # Restore the old warning filter after the test. 72 | with warnings.catch_warnings(): 73 | warnings.filterwarnings('error', message='.*was never awaited', category=RuntimeWarning) 74 | trio.run(test_with_setup) 75 | setattr(self, methodName, types.MethodType(sync_test_with_setup, self)) 76 | super().__init__(methodName) 77 | 78 | class Test(unittest.TestCase): 79 | def test_coro_warning(self) -> None: 80 | class Test(TrioTestCase): 81 | async def test(self): 82 | trio.sleep(0) 83 | with self.assertRaises(RuntimeWarning): 84 | Test('test').test() 85 | -------------------------------------------------------------------------------- /docs/perspective_intros/rpc.org: -------------------------------------------------------------------------------- 1 | * The perspective of the reader 2 | - My favorite language has lots of cool ways to abstract and simplify functionality. 3 | - Functions are cool, types are cool, objects are cool, modules are cool, etc. 4 | - There are resources on remote hosts that I want to access, 5 | like CPU time, or storage space. 6 | - HTTP, RPC, etc, are not cool, 7 | because I can't use my normal way to abstract things. 8 | I (usually) can't pass functions or objects, 9 | I can't use my language's rich type system which is much more powerful than my RPC system, 10 | I don't have the modularity and abstraction features I like. 11 | - JSON, YAML, Protocol Buffers, etc, are not cool, 12 | for the same reasons. 13 | - [[https://github.com/pkamenarsky/replica][Replica]] and [[https://github.com/dbohdan/liveviews][Phoenix LiveView]] and the like are cool, 14 | because they let you write a single program which runs in one place, on your server, 15 | which nevertheless can use and manipulate remote resources: 16 | the DOM in a user's browser tab. 17 | - Mobile code is cool, 18 | other forms of automatically compiling a program 19 | into a distributed system which transparently uses RPC to communicate are cool, 20 | but they require a lot of runtime and language-level support, 21 | which is uncommon. 22 | Even basic support for remote manipulation is better. 23 | - Practically, I want to access resources and run processes across multiple Linux hosts, 24 | and I'd like to use all the abstraction techniques of my favorite language to do that, 25 | rather than write out a static, serialized, lowest-common-denominator description, 26 | or using some constrained DSL separate from my usual language. 27 | * rsyscall allows a single Python program to work with Linux resources across multiple nodes 28 | We have to run processes on multiple nodes, 29 | which communicate over the narrow channel of RPC. 30 | All the beautiful abstraction of our favorite language is lost 31 | when splitting things out over RPC. 32 | 33 | And what's worse, 34 | we still have to find out some way to run these processes on these hosts, 35 | and get them set up with the resources they need. 36 | 37 | We could write out some static configuration and hand it to some service, 38 | but that's the same defeat again; 39 | we can't use the language features to abstract and simplify things, 40 | because we ultimately have to describe everything 41 | in a static, lowest-common-denominator format. 42 | We could use some constrained DSL; 43 | but that's not our actual language, with all the abstraction features we want. 44 | 45 | If your preferred language is Python, or if Python is close enough, 46 | you're in luck. 47 | rsyscall allows a single Python program to work with Linux resources across multiple nodes. 48 | 49 | We can write completely conventional Python, 50 | running in a single Python interpreter in a single place, 51 | and use Python-level features 52 | like functions, objects, generators, coroutines, 53 | and (with Python 3 type annotations) static types. 54 | 55 | And we can use Linux resources, including child processes, across multiple nodes 56 | in the same way we would use them locally. 57 | For example, we can write a function which starts up a process, 58 | and call it for each entry in a list of hosts, 59 | and then run a coroutine to monitor that process and restart it if it dies. 60 | -------------------------------------------------------------------------------- /python/rsyscall/sys/syscall.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | import enum 4 | 5 | class SYS(enum.IntEnum): 6 | """The syscall number argument passed to the low-level `syscall` method and underlying instruction 7 | 8 | Passing one of these numbers is how a userspace program indicates to the kernel which 9 | syscall it wants to call. 10 | 11 | """ 12 | accept4 = lib.SYS_accept4 13 | bind = lib.SYS_bind 14 | capget = lib.SYS_capget 15 | capset = lib.SYS_capset 16 | chdir = lib.SYS_chdir 17 | chroot = lib.SYS_chroot 18 | clone = lib.SYS_clone 19 | close = lib.SYS_close 20 | connect = lib.SYS_connect 21 | dup3 = lib.SYS_dup3 22 | epoll_create1 = lib.SYS_epoll_create1 23 | epoll_ctl = lib.SYS_epoll_ctl 24 | epoll_wait = lib.SYS_epoll_wait 25 | eventfd2 = lib.SYS_eventfd2 26 | execve = lib.SYS_execve 27 | execveat = lib.SYS_execveat 28 | exit = lib.SYS_exit 29 | faccessat = lib.SYS_faccessat 30 | fchdir = lib.SYS_fchdir 31 | fchmod = lib.SYS_fchmod 32 | fcntl = lib.SYS_fcntl 33 | fstat = lib.SYS_fstat 34 | ftruncate = lib.SYS_ftruncate 35 | getdents64 = lib.SYS_getdents64 36 | getgid = lib.SYS_getgid 37 | getpeername = lib.SYS_getpeername 38 | getpgid = lib.SYS_getpgid 39 | getpid = lib.SYS_getpid 40 | getpriority = lib.SYS_getpriority 41 | getsockname = lib.SYS_getsockname 42 | getsockopt = lib.SYS_getsockopt 43 | getuid = lib.SYS_getuid 44 | inotify_add_watch = lib.SYS_inotify_add_watch 45 | inotify_init1 = lib.SYS_inotify_init1 46 | inotify_rm_watch = lib.SYS_inotify_rm_watch 47 | ioctl = lib.SYS_ioctl 48 | kill = lib.SYS_kill 49 | linkat = lib.SYS_linkat 50 | listen = lib.SYS_listen 51 | lseek = lib.SYS_lseek 52 | memfd_create = lib.SYS_memfd_create 53 | mkdirat = lib.SYS_mkdirat 54 | madvise = lib.SYS_madvise 55 | mmap = lib.SYS_mmap 56 | mount = lib.SYS_mount 57 | munmap = lib.SYS_munmap 58 | openat = lib.SYS_openat 59 | pipe2 = lib.SYS_pipe2 60 | prctl = lib.SYS_prctl 61 | pread64 = lib.SYS_pread64 62 | preadv2 = lib.SYS_preadv2 63 | prlimit64 = lib.SYS_prlimit64 64 | pwrite64 = lib.SYS_pwrite64 65 | pwritev2 = lib.SYS_pwritev2 66 | read = lib.SYS_read 67 | readlinkat = lib.SYS_readlinkat 68 | recvfrom = lib.SYS_recvfrom 69 | recvmsg = lib.SYS_recvmsg 70 | renameat2 = lib.SYS_renameat2 71 | rt_sigaction = lib.SYS_rt_sigaction 72 | rt_sigprocmask = lib.SYS_rt_sigprocmask 73 | sched_setaffinity = lib.SYS_sched_setaffinity 74 | sched_getaffinity = lib.SYS_sched_getaffinity 75 | sendmsg = lib.SYS_sendmsg 76 | sendto = lib.SYS_sendto 77 | set_robust_list = lib.SYS_set_robust_list 78 | set_tid_address = lib.SYS_set_tid_address 79 | setns = lib.SYS_setns 80 | setpgid = lib.SYS_setpgid 81 | setpriority = lib.SYS_setpriority 82 | setsid = lib.SYS_setsid 83 | setsockopt = lib.SYS_setsockopt 84 | shutdown = lib.SYS_shutdown 85 | signalfd4 = lib.SYS_signalfd4 86 | socket = lib.SYS_socket 87 | socketpair = lib.SYS_socketpair 88 | symlinkat = lib.SYS_symlinkat 89 | timerfd_create = lib.SYS_timerfd_create 90 | timerfd_gettime = lib.SYS_timerfd_gettime 91 | timerfd_settime = lib.SYS_timerfd_settime 92 | umount2 = lib.SYS_umount2 93 | unlinkat = lib.SYS_unlinkat 94 | unshare = lib.SYS_unshare 95 | waitid = lib.SYS_waitid 96 | write = lib.SYS_write 97 | -------------------------------------------------------------------------------- /docs/perspective_intros/djbware.org: -------------------------------------------------------------------------------- 1 | * The perspective of the reader 2 | - Shell pipelines are cool 3 | - UCSPI (tcpclient, tcpserver, etc.) is cool 4 | - s6, execline, etc. are cool 5 | - [[http://www.catb.org/~esr/writings/taoup/html/ch06s06.html][Bernstein chaining]] is cool 6 | - inetd is cool 7 | - "Daemons" should just run in the foreground, not fork away into the background. 8 | - Software should operate on the file descriptors and environment it inherits, 9 | instead of duplicating the logic to set everything up. 10 | - Don't open a network socket and bind it to a host and port specified in some configuration; 11 | inherit the socket from your parent instead, 12 | let them set it up, 13 | and let them choose whether you listen on the network or on a Unix socket or something else. 14 | - Software should do one thing and do it well. 15 | - Unix gives us a toolbox full of tools that we can compose to build functionality; 16 | we should build software using the tools that Unix already gives us. 17 | - To achieve some purpose, 18 | we should prefer to add new orthogonal tools to this toolbox, 19 | and then compose them together with existing tools using scripts. 20 | * rsyscall allows composing Unix tools using *all* the features of Unix 21 | We want to compose together tools from the Unix toolbox to build our systems. 22 | Each tool runs in a separate process, 23 | and communicates over pipes or other communication mechanisms set up by the shell. 24 | 25 | But the traditional Unix shell is limited in what it can express. 26 | There are many features in Unix which we can't use from the shell. 27 | So we add more tools, such as tcpclient or s6 or execline, 28 | which let us use those features. 29 | 30 | Things like creating a socketpair with =socketpair=, 31 | so that two processes can communicate bidirectionally; 32 | or creating a listening socket with =socket=, =bind=, and =listen=, 33 | which a process can inherit and accept connections on; 34 | or opening a file with =open(..., O_EXCL|O_CREAT)= to create it atomically. 35 | 36 | These Unix syscalls are easy to use from C. 37 | But it's not easy, in general, to compose processes in C. 38 | Creating processes with =fork= makes our code complex and hard to read, 39 | but we have no other choice if we want the full power of Unix; 40 | we need to be able to call arbitrary system calls in our children to set them up. 41 | 42 | rsyscall is a new shell which provides access to all the Unix system calls, like C, 43 | but which lets you start processes with normal, straight-line, fork-free code, 44 | like the traditional shell. 45 | 46 | With rsyscall, 47 | we can create our child processes and call system calls from inside them to set them up. 48 | Our process creation logic is simple, 49 | while still allowing us to use any Unix feature. 50 | 51 | We can compose tools from the Unix toolbox 52 | using all the system calls Unix provides, 53 | instead of the limited subset provided by the shell and various wrappers. 54 | 55 | In this way, we can easily build arbitrarily complex systems out of Unix tools. 56 | 57 | The increased ability to manipulate the environment ahead of time 58 | will encourage Unix hackers to rely on "already correct" environment, 59 | so their software tools will be smaller, more composable, and more reusable. 60 | The result will be a move away from 61 | large, monolithic software packages that manage "everything", 62 | towards smaller general-purpose tools, 63 | composed together by purpose-specific scripts. 64 | -------------------------------------------------------------------------------- /python/rsyscall/unistd/io.py: -------------------------------------------------------------------------------- 1 | "The subset of functionality in unistd.h which relates to IO" 2 | from __future__ import annotations 3 | from rsyscall._raw import lib # type: ignore 4 | import enum 5 | import typing as t 6 | 7 | __all__ = [ 8 | "SEEK", 9 | "IOFileDescriptor", 10 | "SeekableFileDescriptor", 11 | ] 12 | 13 | class SEEK(enum.IntEnum): 14 | "The whence argument to lseek." 15 | SET = lib.SEEK_SET 16 | CUR = lib.SEEK_CUR 17 | END = lib.SEEK_END 18 | DATA = lib.SEEK_DATA 19 | HOLE = lib.SEEK_HOLE 20 | 21 | #### Classes #### 22 | from rsyscall.handle.fd import BaseFileDescriptor, FileDescriptorTask 23 | from rsyscall.handle.pointer import Pointer, WrittenPointer, ReadablePointer 24 | 25 | T = t.TypeVar('T') 26 | class IOFileDescriptor(BaseFileDescriptor): 27 | async def read(self, buf: Pointer[T]) -> t.Tuple[ReadablePointer[T], Pointer[T]]: 28 | """read from a file descriptor 29 | 30 | manpage: read(2) 31 | """ 32 | self._validate() 33 | buf.check_address_space(self.task) 34 | ret = await _read(self.task.sysif, self.near, buf.near, buf.size()) 35 | return buf.readable_split(ret) 36 | 37 | async def write(self, buf: Pointer[T]) -> t.Tuple[Pointer[T], Pointer[T]]: 38 | """write to a file descriptor 39 | 40 | manpage: write(2) 41 | """ 42 | self._validate() 43 | buf.check_address_space(self.task) 44 | ret = await _write(self.task.sysif, self.near, buf.near, buf.size()) 45 | return buf.split(ret) 46 | 47 | class SeekableFileDescriptor(IOFileDescriptor): 48 | async def pread(self, buf: Pointer[T], offset: int) -> t.Tuple[ReadablePointer[T], Pointer[T]]: 49 | self._validate() 50 | with buf.borrow(self.task): 51 | ret = await _pread(self.task.sysif, self.near, buf.near, buf.size(), offset) 52 | return buf.readable_split(ret) 53 | 54 | async def pwrite(self, buf: Pointer[T], offset: int) -> t.Tuple[Pointer[T], Pointer[T]]: 55 | self._validate() 56 | with buf.borrow(self.task): 57 | ret = await _pwrite(self.task.sysif, self.near, buf.near, buf.size(), offset) 58 | return buf.split(ret) 59 | 60 | async def lseek(self, offset: int, whence: SEEK) -> int: 61 | self._validate() 62 | return (await _lseek(self.task.sysif, self.near, offset, whence)) 63 | 64 | #### Raw syscalls #### 65 | import rsyscall.near.types as near 66 | from rsyscall.near.sysif import SyscallInterface 67 | from rsyscall.sys.syscall import SYS 68 | 69 | async def _read(sysif: SyscallInterface, fd: near.FileDescriptor, 70 | buf: near.Address, count: int) -> int: 71 | return (await sysif.syscall(SYS.read, fd, buf, count)) 72 | 73 | async def _write(sysif: SyscallInterface, fd: near.FileDescriptor, 74 | buf: near.Address, count: int) -> int: 75 | return (await sysif.syscall(SYS.write, fd, buf, count)) 76 | 77 | async def _pread(sysif: SyscallInterface, fd: near.FileDescriptor, 78 | buf: near.Address, count: int, offset: int) -> int: 79 | return (await sysif.syscall(SYS.pread64, fd, buf, count, offset)) 80 | 81 | async def _pwrite(sysif: SyscallInterface, fd: near.FileDescriptor, 82 | buf: near.Address, count: int, offset: int) -> int: 83 | return (await sysif.syscall(SYS.pwrite64, fd, buf, count, offset)) 84 | 85 | async def _lseek(sysif: SyscallInterface, fd: near.FileDescriptor, 86 | offset: int, whence: SEEK) -> int: 87 | return (await sysif.syscall(SYS.lseek, fd, offset, whence)) 88 | -------------------------------------------------------------------------------- /python/rsyscall/sys/prctl.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import lib # type: ignore 3 | import typing as t 4 | import enum 5 | 6 | __all__ = [ 7 | "PR", 8 | "PR_CAP_AMBIENT", 9 | "PrctlTask", 10 | ] 11 | 12 | class PR(enum.IntEnum): 13 | GET_NAME = lib.PR_GET_NAME 14 | SET_NAME = lib.PR_SET_NAME 15 | SET_PDEATHSIG = lib.PR_SET_PDEATHSIG 16 | CAP_AMBIENT = lib.PR_CAP_AMBIENT 17 | 18 | class PR_CAP_AMBIENT(enum.IntEnum): 19 | RAISE = lib.PR_CAP_AMBIENT_RAISE 20 | 21 | #### Classes #### 22 | import rsyscall.far 23 | from rsyscall.handle.pointer import Pointer, WrittenPointer, ReadablePointer 24 | from rsyscall.sys.capability import CAP 25 | from rsyscall.signal import SIG 26 | 27 | class PrctlTask(rsyscall.far.Task): 28 | "A base class providing the `prctl` syscall" 29 | 30 | async def prctl_set_pdeathsig(self, option: t.Literal[PR.SET_PDEATHSIG], arg2: t.Union[SIG, t.Literal[0]]) -> None: 31 | await _prctl(self.sysif, option, arg2) 32 | 33 | async def prctl_cap_ambient(self, option: t.Literal[PR.CAP_AMBIENT], arg2: PR_CAP_AMBIENT, arg3: CAP) -> int: 34 | return await _prctl(self.sysif, option, arg2, arg3) 35 | 36 | async def prctl_get_name(self, option: t.Literal[PR.GET_NAME], arg2: Pointer[str]) -> ReadablePointer[str]: 37 | with arg2.borrow(self) as arg2_n: 38 | await _prctl(self.sysif, option, arg2_n) 39 | return arg2.readable_split(16)[0] 40 | 41 | async def prctl_set_name(self, option: t.Literal[PR.SET_NAME], arg2: WrittenPointer[str]) -> None: 42 | with arg2.borrow(self) as arg2_n: 43 | await _prctl(self.sysif, option, arg2_n) 44 | return None 45 | 46 | @t.overload 47 | async def prctl(self, option: t.Literal[PR.SET_PDEATHSIG], arg2: t.Union[SIG, t.Literal[0]]) -> None: ... 48 | @t.overload 49 | async def prctl(self, option: t.Literal[PR.CAP_AMBIENT], arg2: PR_CAP_AMBIENT, arg3: CAP) -> int: ... 50 | @t.overload 51 | async def prctl(self, option: t.Literal[PR.GET_NAME], arg2: Pointer[str]) -> ReadablePointer[str]: ... 52 | @t.overload 53 | async def prctl(self, option: t.Literal[PR.SET_NAME], arg2: WrittenPointer[str]) -> None: ... 54 | 55 | async def prctl(self, option: PR, arg2, arg3=0, arg4=0, arg5=0) -> t.Union[int, Pointer, None]: 56 | """operations on a process or process 57 | 58 | This has overloads for each prctl option, so it's type-safe to use this method. 59 | 60 | man: prctl(2) 61 | """ 62 | if option is PR.SET_PDEATHSIG: 63 | return await self.prctl_set_pdeathsig(option, arg2) 64 | elif option is PR.CAP_AMBIENT: 65 | return await self.prctl_cap_ambient(option, arg2, arg3) 66 | elif option is PR.GET_NAME: 67 | return await self.prctl_get_name(option, arg2) 68 | elif option is PR.SET_NAME: 69 | return await self.prctl_set_name(option, arg2) 70 | else: 71 | return await _prctl(self.sysif, option, arg2, arg3, arg4, arg5) 72 | 73 | #### Raw syscalls #### 74 | import rsyscall.near.types as near 75 | from rsyscall.near.sysif import SyscallInterface 76 | from rsyscall.sys.syscall import SYS 77 | 78 | async def _prctl(sysif: SyscallInterface, option: PR, arg2: t.Union[int, near.Address], 79 | arg3: t.Optional[t.Union[int, near.Address]]=None, arg4: t.Optional[t.Union[int, near.Address]]=None, 80 | arg5: t.Optional[t.Union[int, near.Address]]=None) -> int: 81 | if arg3 is None: 82 | arg3 = 0 83 | if arg4 is None: 84 | arg4 = 0 85 | if arg5 is None: 86 | arg5 = 0 87 | return (await sysif.syscall(SYS.prctl, option, arg2, arg3, arg4, arg5)) 88 | -------------------------------------------------------------------------------- /python/rsyscall/sys/timerfd.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | from rsyscall._raw import lib # type: ignore 4 | from rsyscall.near.sysif import SyscallInterface 5 | from rsyscall.sys.syscall import SYS 6 | import enum 7 | import rsyscall.near.types as near 8 | import typing as t 9 | from rsyscall.handle.fd import BaseFileDescriptor, FileDescriptorTask 10 | from rsyscall.handle.pointer import Pointer, WrittenPointer 11 | 12 | # re-exported 13 | from rsyscall.time import Timespec, Itimerspec 14 | 15 | __all__ = [ 16 | "CLOCK", 17 | "TFD", 18 | "TFD_TIMER", 19 | "TimerFileDescriptor", 20 | "Timespec", 21 | "Itimerspec", 22 | ] 23 | 24 | class CLOCK(enum.IntEnum): 25 | REALTIME = lib.CLOCK_REALTIME 26 | MONOTONIC = lib.CLOCK_MONOTONIC 27 | BOOTTIME = lib.CLOCK_BOOTTIME 28 | REALTIME_ALARM = lib.CLOCK_REALTIME_ALARM 29 | BOOTTIME_ALARM = lib.CLOCK_BOOTTIME_ALARM 30 | 31 | class TFD(enum.IntFlag): 32 | NONE = 0 33 | CLOEXEC = lib.EFD_CLOEXEC 34 | NONBLOCK = lib.EFD_NONBLOCK 35 | 36 | class TFD_TIMER(enum.IntFlag): 37 | NONE = 0 38 | ABSTIME = lib.TFD_TIMER_ABSTIME 39 | CANCEL_ON_SET = lib.TFD_TIMER_CANCEL_ON_SET 40 | 41 | async def _timerfd_create(sysif: SyscallInterface, clockid: CLOCK, flags: TFD) -> near.FileDescriptor: 42 | return near.FileDescriptor(await sysif.syscall(SYS.timerfd_create, clockid, flags)) 43 | 44 | async def _timerfd_settime(sysif: SyscallInterface, fd: near.FileDescriptor, 45 | flags: TFD_TIMER, 46 | new_value: near.Address, old_value: t.Optional[near.Address]) -> None: 47 | if old_value is None: 48 | old_value = 0 # type: ignore 49 | await sysif.syscall(SYS.timerfd_settime, fd, flags, new_value, old_value) 50 | 51 | async def _timerfd_gettime(sysif: SyscallInterface, fd: near.FileDescriptor, 52 | curr_value: near.Address) -> None: 53 | await sysif.syscall(SYS.timerfd_gettime, fd, curr_value) 54 | 55 | 56 | T_fd = t.TypeVar('T_fd', bound='TimerFileDescriptor') 57 | class TimerFileDescriptor(BaseFileDescriptor): 58 | @t.overload 59 | async def timerfd_settime( 60 | self, flags: TFD_TIMER, new_value: WrittenPointer[Itimerspec]) -> None: ... 61 | 62 | @t.overload 63 | async def timerfd_settime( 64 | self, flags: TFD_TIMER, new_value: WrittenPointer[Itimerspec], 65 | old_value: Pointer[Itimerspec]) -> Pointer[Itimerspec]: ... 66 | 67 | async def timerfd_settime( 68 | self, flags: TFD_TIMER, new_value: WrittenPointer[Itimerspec], 69 | old_value: t.Optional[Pointer[Itimerspec]]=None) -> t.Optional[Pointer[Itimerspec]]: 70 | self._validate() 71 | with new_value.borrow(self.task): 72 | if old_value: 73 | with old_value.borrow(self.task): 74 | await _timerfd_settime( 75 | self.task.sysif, self.near, flags, new_value.near, old_value.near) 76 | return old_value 77 | else: 78 | await _timerfd_settime( 79 | self.task.sysif, self.near, flags, new_value.near, None) 80 | return None 81 | 82 | async def timerfd_gettime(self, curr_value: Pointer[Itimerspec]) -> Pointer[Itimerspec]: 83 | self._validate() 84 | with curr_value.borrow(self.task): 85 | await _timerfd_gettime(self.task.sysif, self.near, curr_value.near) 86 | return curr_value 87 | 88 | class TimerfdTask(FileDescriptorTask[T_fd]): 89 | async def timerfd_create(self, clockid: CLOCK, flags: TFD=TFD.NONE) -> T_fd: 90 | return self.make_fd_handle(await _timerfd_create(self.sysif, clockid, flags|TFD.CLOEXEC)) 91 | -------------------------------------------------------------------------------- /python/rsyscall/netinet/tcp.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from rsyscall._raw import ffi, lib # type: ignore 3 | import enum 4 | 5 | __all__ = [ 6 | 'TCP', 7 | ] 8 | 9 | class TCP(enum.IntEnum): 10 | "User-settable options (used with setsockopt)" 11 | NODELAY = lib.TCP_NODELAY # Don't delay send to coalesce packets 12 | MAXSEG = lib.TCP_MAXSEG # Set maximum segment size 13 | CORK = lib.TCP_CORK # Control sending of partial frames 14 | KEEPIDLE = lib.TCP_KEEPIDLE # Start keeplives after this period 15 | KEEPINTVL = lib.TCP_KEEPINTVL # Interval between keepalives 16 | KEEPCNT = lib.TCP_KEEPCNT # Number of keepalives before death 17 | SYNCNT = lib.TCP_SYNCNT # Number of SYN retransmits 18 | LINGER2 = lib.TCP_LINGER2 # Life time of orphaned FIN-WAIT-2 state 19 | DEFER_ACCEPT = lib.TCP_DEFER_ACCEPT # Wake up listener only when data arrive 20 | WINDOW_CLAMP = lib.TCP_WINDOW_CLAMP # Bound advertised window 21 | INFO = lib.TCP_INFO # Information about this connection. 22 | QUICKACK = lib.TCP_QUICKACK # Bock/reenable quick ACKs. 23 | CONGESTION = lib.TCP_CONGESTION # Congestion control algorithm. 24 | MD5SIG = lib.TCP_MD5SIG # TCP MD5 Signature (RFC2385) 25 | COOKIE_TRANSACTIONS = lib.TCP_COOKIE_TRANSACTIONS # TCP Cookie Transactions 26 | THIN_LINEAR_TIMEOUTS = lib.TCP_THIN_LINEAR_TIMEOUTS # Use linear timeouts for thin streams 27 | THIN_DUPACK = lib.TCP_THIN_DUPACK # Fast retrans. after 1 dupack 28 | USER_TIMEOUT = lib.TCP_USER_TIMEOUT # How long for loss retry before timeout 29 | REPAIR = lib.TCP_REPAIR # TCP sock is under repair right now 30 | REPAIR_QUEUE = lib.TCP_REPAIR_QUEUE # Set TCP queue to repair 31 | QUEUE_SEQ = lib.TCP_QUEUE_SEQ # Set sequence number of repaired queue. 32 | REPAIR_OPTIONS = lib.TCP_REPAIR_OPTIONS # Repair TCP connection options 33 | FASTOPEN = lib.TCP_FASTOPEN # Enable FastOpen on listeners 34 | TIMESTAMP = lib.TCP_TIMESTAMP # TCP time stamp 35 | NOTSENT_LOWAT = lib.TCP_NOTSENT_LOWAT # Limit number of unsent bytes in write queue. 36 | CC_INFO = lib.TCP_CC_INFO # Get Congestion Control (optional) info. 37 | SAVE_SYN = lib.TCP_SAVE_SYN # Record SYN headers for new connections. 38 | SAVED_SYN = lib.TCP_SAVED_SYN # Get SYN headers recorded for connection. 39 | REPAIR_WINDOW = lib.TCP_REPAIR_WINDOW # Get/set window parameters. 40 | FASTOPEN_CONNECT = lib.TCP_FASTOPEN_CONNECT # Attempt FastOpen with connect. 41 | ULP = lib.TCP_ULP # Attach a ULP to a TCP connection. 42 | MD5SIG_EXT = lib.TCP_MD5SIG_EXT # TCP MD5 Signature with extensions. 43 | FASTOPEN_KEY = lib.TCP_FASTOPEN_KEY # Set the key for Fast Open (cookie). 44 | FASTOPEN_NO_COOKIE = lib.TCP_FASTOPEN_NO_COOKIE # Enable TFO without a TFO cookie. 45 | ZEROCOPY_RECEIVE = lib.TCP_ZEROCOPY_RECEIVE # Perform a zerocopy receive 46 | INQ = lib.TCP_INQ # Notify bytes available to read as a cmsg on read. 47 | CM_INQ = lib.TCP_CM_INQ # CMSG type for bytes available 48 | TX_DELAY = lib.TCP_TX_DELAY # Delay outgoing packets by XX usec. 49 | -------------------------------------------------------------------------------- /research/extensible_visitor.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import typing as t 3 | import abc 4 | T = t.TypeVar('T') 5 | 6 | #### Dog is one variant 7 | class DogImpl: 8 | def bark(self) -> str: 9 | return "bark bark bark" 10 | 11 | def accept(self, visitor: DogVisitor[T]) -> T: 12 | return visitor.visit_dog(self) 13 | 14 | class DogVisitor(t.Generic[T]): 15 | # The visitor method doesn't have to have a different name per variant, but 16 | # overloading is a hassle in Python, so we use different names for this example. 17 | @abc.abstractmethod 18 | def visit_dog(self, dog: DogImpl) -> T: ... 19 | 20 | #### Cat is another variant 21 | class CatImpl: 22 | def lives(self) -> int: 23 | return 9 24 | 25 | def accept(self, visitor: CatVisitor[T]) -> T: 26 | return visitor.visit_cat(self) 27 | 28 | class CatVisitor(t.Generic[T]): 29 | @abc.abstractmethod 30 | def visit_cat(self, cat: CatImpl) -> T: ... 31 | 32 | #### Mammal can be a Cat or a Dog 33 | class Mammal: 34 | @abc.abstractmethod 35 | def accept(self, visitor: MammalVisitor[T]) -> T: ... 36 | 37 | class MammalVisitor(t.Generic[T], DogVisitor[T], CatVisitor[T]): 38 | pass 39 | 40 | # "constructors" for the Mammal variant 41 | class MammalCat(CatImpl, Mammal): 42 | pass 43 | 44 | class MammalDog(DogImpl, Mammal): 45 | pass 46 | 47 | # we can write normal programs using the visitor pattern 48 | class Petter(MammalVisitor[str]): 49 | def visit_cat(self, cat: CatImpl) -> str: 50 | return f"petting this cat with {cat.lives()} lives" 51 | 52 | def visit_dog(self, dog: DogImpl) -> str: 53 | return f"petting this dog; it says {dog.bark()}" 54 | 55 | def pet_mammals(mammals: t.List[Mammal]) -> None: 56 | petter = Petter() 57 | for mammal in mammals: 58 | print(mammal.accept(petter)) 59 | 60 | my_mammals: t.List[Mammal] = [MammalDog(), MammalCat(), MammalDog()] 61 | 62 | pet_mammals(my_mammals) 63 | 64 | #### Fish is a third new variant 65 | class FishImpl: 66 | def weight(self) -> float: 67 | return 2.334 68 | 69 | def accept(self, visitor: FishVisitor[T]) -> T: 70 | return visitor.visit_fish(self) 71 | 72 | class FishVisitor(t.Generic[T]): 73 | @abc.abstractmethod 74 | def visit_fish(self, fish: FishImpl) -> T: ... 75 | 76 | #### Animal can be Cat, Dog, or Fish 77 | class Animal: 78 | @abc.abstractmethod 79 | def accept(self, visitor: AnimalVisitor[T]) -> T: ... 80 | 81 | class AnimalVisitor(t.Generic[T], MammalVisitor[T], FishVisitor[T]): 82 | pass 83 | 84 | class AnimalCat(CatImpl, Animal): 85 | pass 86 | 87 | class AnimalDog(DogImpl, Animal): 88 | pass 89 | 90 | class AnimalFish(FishImpl, Animal): 91 | pass 92 | 93 | # our old programs still work; 94 | pet_mammals(my_mammals) 95 | # but we can write new programs too that work on the new expanded class 96 | class Weigher(AnimalVisitor[float]): 97 | def visit_cat(self, cat: CatImpl) -> float: 98 | return float(cat.lives()) 99 | 100 | def visit_dog(self, dog: DogImpl) -> float: 101 | # estimate weight by size of bark 102 | return float(len(dog.bark())) 103 | 104 | def visit_fish(self, fish: FishImpl) -> float: 105 | return fish.weight() 106 | 107 | def total_weight(animals: t.List[Animal]) -> float: 108 | weigher = Weigher() 109 | return sum(animal.accept(weigher) for animal in animals) 110 | 111 | these_animals: t.List[Animal] = [AnimalDog(), AnimalFish(), AnimalCat()] 112 | 113 | print("these animals weigh", sum(animal.accept(Weigher()) for animal in these_animals)) 114 | # and our new programs work on the smaller old classes 115 | print("my mammals weigh", sum(animal.accept(Weigher()) for animal in my_mammals)) 116 | -------------------------------------------------------------------------------- /python/rsyscall/sys/un.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | import typing as t 4 | from rsyscall._raw import ffi, lib # type: ignore 5 | from rsyscall.sys.socket import AF, Sockaddr, _register_sockaddr 6 | from rsyscall.path import Path 7 | from dataclasses import dataclass 8 | import os 9 | from rsyscall.fcntl import O 10 | if t.TYPE_CHECKING: 11 | from rsyscall.handle import FileDescriptor 12 | from rsyscall.thread import Process 13 | 14 | __all__ = [ 15 | "PathTooLongError", 16 | "SockaddrUn", 17 | ] 18 | 19 | class PathTooLongError(ValueError): 20 | pass 21 | 22 | @dataclass 23 | class SockaddrUn(Sockaddr): 24 | path: bytes 25 | 26 | family = AF.UNIX 27 | def __post_init__(self) -> None: 28 | if len(self.path) > 108: 29 | raise PathTooLongError("path", self.path, "is longer than the maximum unix address size") 30 | 31 | @staticmethod 32 | async def from_path(thr: Process, path: t.Union[str, os.PathLike]) -> SockaddrUn: 33 | """Turn this path into a SockaddrUn, hacking around the 108 byte limit on socket addresses. 34 | 35 | If the passed path is too long to fit in an address, this function will open the parent 36 | directory with O_PATH and return SockaddrUn("/proc/self/fd/n/name"). 37 | 38 | """ 39 | try: 40 | return SockaddrUn(os.fsencode(path)) 41 | except PathTooLongError: 42 | ppath = Path(path) 43 | fd = await thr.task.open(await thr.task.ptr(ppath.parent), O.PATH) 44 | return SockaddrUnProcFd(fd, ppath.name) 45 | 46 | T = t.TypeVar('T', bound='SockaddrUn') 47 | @classmethod 48 | def from_bytes(cls: t.Type[T], data: bytes) -> T: 49 | header = ffi.sizeof('sa_family_t') 50 | if len(data) < header: 51 | raise Exception("data too smalllll", data) 52 | struct = ffi.cast('struct sockaddr_un*', ffi.from_buffer(data)) 53 | cls.check_family(AF(struct.sun_family)) 54 | if len(data) == header: 55 | # unnamed socket, name is empty 56 | length = 0 57 | elif struct.sun_path[0] == b'\0': 58 | # abstract socket, entire buffer is part of path 59 | length = len(data) - header 60 | else: 61 | # TODO handle the case where there's no null terminator 62 | # pathname socket, path is null-terminated 63 | length = lib.strlen(struct.sun_path) 64 | return cls(bytes(ffi.buffer(struct.sun_path, length))) 65 | 66 | def to_bytes(self) -> bytes: 67 | addr = ffi.new('struct sockaddr_un*', (AF.UNIX, self.path)) 68 | real_length = ffi.sizeof('sa_family_t') + len(self.path) + 1 69 | return bytes(ffi.buffer(addr))[:real_length] 70 | 71 | @classmethod 72 | def sizeof(cls) -> int: 73 | return ffi.sizeof('struct sockaddr_un') 74 | 75 | def __str__(self) -> str: 76 | return f"SockaddrUn({self.path!r})" 77 | 78 | async def close(self) -> None: 79 | pass 80 | _register_sockaddr(SockaddrUn) 81 | 82 | class SockaddrUnProcFd(SockaddrUn): 83 | def __init__(self, fd: FileDescriptor, name: str) -> None: 84 | super().__init__(os.fsencode(f"/proc/self/fd/{int(fd)}/{name}")) 85 | self.fd = fd 86 | self.name = name 87 | 88 | async def close(self) -> None: 89 | await self.fd.close() 90 | 91 | 92 | #### Tests #### 93 | from unittest import TestCase 94 | class TestUn(TestCase): 95 | def test_sockaddrun(self) -> None: 96 | initial = SockaddrUn(b"asefliasjeflsaifje0.1") 97 | output = SockaddrUn.from_bytes(initial.to_bytes()) 98 | self.assertEqual(initial, output) 99 | from rsyscall.sys.socket import SockaddrStorage 100 | out = SockaddrStorage.from_bytes(initial.to_bytes()).parse() 101 | self.assertEqual(initial, output) 102 | 103 | -------------------------------------------------------------------------------- /docs/conceptual.org: -------------------------------------------------------------------------------- 1 | #+title: Conceptual Introduction 2 | In most operating system APIs, when you run a syscall, 3 | you implicitly operate on the current process. 4 | In rsyscall, we explicitly specify the process in which we want to run a syscall. 5 | 6 | For rsyscall to create a new process in which the user can run syscalls, 7 | it starts a process running the "rsyscall server stub", 8 | which reads syscall requests from a file descriptor and sends back responses. 9 | 10 | Several parts of the Linux API are non-trivial to use in such an environment; 11 | fork, clone and exec, among others. 12 | We have designed and implemented clean methods for using such syscalls, 13 | and make them all available to the user. 14 | 15 | The rsyscall API allows us to initialize new processes in a new way. 16 | Some other systems specify all the characteristics of the new process up front, 17 | like NT's =CreateProcess= or POSIX's =posix_spawn=; 18 | but that requires explicit support for everything we want to change about the new process. 19 | Still other systems copy the attributes for the new process from the current process, like =fork=; 20 | but [[https://lwn.net/Articles/785430/][that's inefficient]]. 21 | 22 | rsyscall allows you to create a new process which shares everything 23 | (address space, file descriptor tables, etc) 24 | with the current process, and so is cheap to create, as with traditional threading models. 25 | Then, you can mutate this process by calling arbitrary syscalls inside of it, 26 | and gradually unshare things through calls to =unshare= and =execve=. 27 | This is more efficient than =fork=, and more powerful than =posix_spawn=. 28 | 29 | Besides the efficiency benefit, 30 | there's also a comprehensibility benefit of rsyscall 31 | relative to =fork= (and similar calls like =vfork=). 32 | =fork= is a single system call which returns twice, 33 | and splits your program into two contexts of execution; 34 | among other things, this makes it difficult to coordinate actions between both contexts. 35 | 36 | In rsyscall there is a single running program, 37 | which is a completely conventional straight-line program. 38 | This single running program explicitly acts on both processes in a "single-threaded" manner, 39 | freely interleaving actions in either process. 40 | For example, 41 | one line can open a file in the child process, 42 | then the next line can use that file in the parent process, 43 | since the file descriptor table is shared. 44 | 45 | In the classification of threading systems as [[https://en.wikipedia.org/wiki/Thread_(computing)#Models]["1:1", "N:1", and "M:N"]], 46 | rsyscall is in a new category: "1:N". 47 | A single "application-level thread" maps on to multiple "kernel-level entities". 48 | This is possible and useful because 49 | we explicitly denote which "kernel-level entity" (which process) 50 | we want to run a syscall in. 51 | 52 | In addition to the process creation and manipulation benefits, 53 | we also provide robust, wide-ranging, low-level support for many Linux features. 54 | rsyscall is useful for writing many kinds of scripts and applications 55 | which make heavy use of the Linux API. 56 | Such programs don't have to give up on high-level language features and use C, 57 | nor do they have to use other languages 58 | with complex runtimes that can silently interfere with features like =unshare= and =clone=. 59 | 60 | We've implemented the rsyscall API initially in Python, 61 | along with a limited C API, 62 | to maximize its usability. 63 | 64 | In Python, we treat each syscall as a method on some object, 65 | such as Process or FileDescriptor or ChildPid. 66 | We've also used the type annotation features of Python 3. 67 | This is idiomatic for Python, 68 | but we'd love to support other languages. 69 | Other languages will likely have different approaches to the API. 70 | If you'd like to work on supporting other languages, just file an issue. 71 | Any language is interesting, but particularly interesting would be 72 | languages with rich type systems, 73 | like Haskell and OCaml, 74 | and languages that currently have poor APIs for interacting with Linux, 75 | like Java. 76 | 77 | To learn more about the specifics of the API, 78 | take a look at the documentation. 79 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_concurrency.py: -------------------------------------------------------------------------------- 1 | from dneio import RequestQueue, reset, Event 2 | from rsyscall.tests.trio_test_case import TrioTestCase 3 | import outcome 4 | import trio 5 | 6 | class MyException(Exception): 7 | pass 8 | 9 | async def sleep_and_throw() -> None: 10 | async with trio.open_nursery() as nursery: 11 | async def thing1() -> None: 12 | await trio.sleep(0) 13 | raise MyException("ha ha") 14 | async def thing2() -> None: 15 | await trio.sleep(1000) 16 | nursery.start_soon(thing1) 17 | nursery.start_soon(thing2) 18 | 19 | class TestConcurrency(TrioTestCase): 20 | async def test_nursery(self) -> None: 21 | async with trio.open_nursery() as nursery: 22 | async def a1() -> None: 23 | await trio.sleep(10) 24 | async def a2() -> None: 25 | try: 26 | await sleep_and_throw() 27 | except MyException: 28 | pass 29 | finally: 30 | nursery.cancel_scope.cancel() 31 | nursery.start_soon(a1) 32 | nursery.start_soon(a2) 33 | 34 | async def test_nest_cancel_inside_shield(self) -> None: 35 | "If we cancel_scope.cancel() inside a CancelScope which is shielded, it works." 36 | with trio.CancelScope(shield=True): 37 | async with trio.open_nursery() as nursery: 38 | nursery.start_soon(trio.sleep_forever) 39 | nursery.cancel_scope.cancel() 40 | 41 | def failing_function(x): 42 | raise Exception("failed with", x) 43 | 44 | class TestConcur(TrioTestCase): 45 | async def _first_runner(self, queue: RequestQueue) -> None: 46 | while True: 47 | many = await queue.get_many() 48 | for val, coro in many[::-1]: 49 | if val == 1337: 50 | try: 51 | failing_function(x) # type: ignore 52 | except Exception as e: 53 | coro.resume(outcome.Error(e)) 54 | else: 55 | coro.resume(outcome.Value(val + 10)) 56 | 57 | async def _second_runner(self, queue: RequestQueue[int, int]) -> None: 58 | while True: 59 | many = await queue.get_many() 60 | for val, coro in many[::-1]: 61 | try: 62 | result = await self.queue.request(val + 100) 63 | except Exception as e: 64 | coro.resume(outcome.Error(e)) 65 | else: 66 | coro.resume(outcome.Value(result)) 67 | 68 | async def asyncSetUp(self) -> None: 69 | self.queue = RequestQueue[int, int]() 70 | reset(self._first_runner(self.queue)) 71 | self.second_queue = RequestQueue[int, int]() 72 | reset(self._second_runner(self.second_queue)) 73 | 74 | async def test_parallel(self) -> None: 75 | async def req(i: int): 76 | ret = await self.queue.request(i) 77 | self.assertEqual(i+10, ret) 78 | async with trio.open_nursery() as nursery: 79 | for i in range(3): 80 | nursery.start_soon(req, i) 81 | 82 | async def test_through_multiple(self) -> None: 83 | self.assertEqual(await self.second_queue.request(1), 111) 84 | 85 | async def test_event(self) -> None: 86 | ev = Event() 87 | async with trio.open_nursery() as nursery: 88 | @nursery.start_soon 89 | async def foo(): 90 | await trio.sleep(0) 91 | ev.set() 92 | await ev.wait() 93 | 94 | async def test_failure(self) -> None: 95 | should_fail = 1237 96 | def length_traceback(tb) -> int: 97 | if tb is None: 98 | return 0 99 | else: 100 | return 1 + length_traceback(tb.tb_next) 101 | try: 102 | await self.second_queue.request(should_fail) 103 | except Exception as e: 104 | self.assertLess(length_traceback(e.__traceback__), 10) 105 | 106 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_clone.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | from rsyscall.tests.utils import do_async_things 3 | from rsyscall.epoller import Epoller 4 | from rsyscall.monitor import AsyncSignalfd 5 | 6 | from rsyscall.sched import CLONE 7 | from rsyscall.signal import SIG, Sigset 8 | from rsyscall.stdlib import mkdtemp 9 | from rsyscall.sys.signalfd import SignalfdSiginfo 10 | from rsyscall.sys.wait import CalledProcessError 11 | 12 | class TestClone(TrioTestCase): 13 | async def asyncSetUp(self) -> None: 14 | self.child = await self.process.clone(CLONE.FILES) 15 | 16 | async def test_exit(self) -> None: 17 | await self.child.exit(0) 18 | 19 | async def test_nest_exit(self) -> None: 20 | process = await self.child.clone(CLONE.FILES) 21 | await process.exit(0) 22 | 23 | async def test_nest_multiple(self) -> None: 24 | for i in range(5): 25 | child = await self.child.fork() 26 | await do_async_things(self, child.epoller, child) 27 | await child.exit(0) 28 | 29 | async def test_two_children_exec(self) -> None: 30 | """Start two child and exec in each of them. 31 | 32 | This test would (sometimes) catch a race condition we had where waitpid 33 | on one child would consume the SIGCHLD for another child, and our logic 34 | to eagerly call waitid was wrong, so waitpid on the other child would 35 | block forever. 36 | 37 | """ 38 | thr2 = await self.process.fork() 39 | cmd = self.child.environ.sh.args('-c', 'true') 40 | child1 = await self.child.exec(cmd) 41 | child2 = await thr2.exec(cmd) 42 | await child1.check() 43 | await child2.check() 44 | 45 | async def test_async(self) -> None: 46 | epoller = await Epoller.make_root(self.child.task) 47 | await do_async_things(self, epoller, self.child) 48 | 49 | async def test_nest_async(self) -> None: 50 | process = await self.child.clone(CLONE.FILES) 51 | epoller = await Epoller.make_root(process.task) 52 | await do_async_things(self, epoller, process) 53 | await process.exit(0) 54 | 55 | async def test_unshare_async(self) -> None: 56 | await self.child.unshare(CLONE.FILES) 57 | process = await self.child.clone(CLONE.FILES) 58 | epoller = await Epoller.make_root(process.task) 59 | await process.unshare(CLONE.FILES) 60 | await do_async_things(self, epoller, process) 61 | await process.exit(0) 62 | 63 | async def test_exec(self) -> None: 64 | child = await self.child.exec(self.child.environ.sh.args('-c', 'false')) 65 | with self.assertRaises(CalledProcessError): 66 | await child.check() 67 | 68 | async def test_check_in_nursery(self) -> None: 69 | "We broke this with some concurrency refactoring once" 70 | child = await self.child.exec(self.child.environ.sh.args('-c', 'sleep inf')) 71 | self.nursery.start_soon(child.check) 72 | 73 | async def test_nest_exec(self) -> None: 74 | child = await self.child.fork() 75 | grandchild = await child.fork() 76 | cmd = self.child.environ.sh.args('-c', 'true') 77 | await (await child.exec(cmd)).check() 78 | await (await grandchild.exec(cmd)).check() 79 | 80 | async def test_mkdtemp(self) -> None: 81 | async with (await mkdtemp(self.child)): 82 | pass 83 | 84 | async def test_signal_queue(self) -> None: 85 | epoller = await Epoller.make_root(self.child.task) 86 | sigfd = await AsyncSignalfd.make(self.child.task, epoller, Sigset({SIG.INT})) 87 | sigevent = sigfd.next_signal 88 | await self.child.pid.kill(SIG.INT) 89 | await sigevent.wait() 90 | 91 | class TestCloneUnshareFiles(TrioTestCase): 92 | async def asyncSetUp(self) -> None: 93 | self.child = await self.process.fork() 94 | 95 | async def asyncTearDown(self) -> None: 96 | await self.child.exit(0) 97 | 98 | async def test_nest_async(self) -> None: 99 | process = await self.child.fork() 100 | epoller = await Epoller.make_root(process.task) 101 | await do_async_things(self, epoller, process) 102 | await process.exit(0) 103 | -------------------------------------------------------------------------------- /c/src/rsyscall_unix_stub.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "rsyscall.h" 14 | 15 | static void receive_fds(const int sock, int *fds, int n) { 16 | union { 17 | struct cmsghdr hdr; 18 | char buf[CMSG_SPACE(sizeof(int) * n)]; 19 | } cmsg; 20 | char waste_data; 21 | struct iovec io = { 22 | .iov_base = &waste_data, 23 | .iov_len = sizeof(waste_data), 24 | }; 25 | struct msghdr msg = { 26 | .msg_name = NULL, 27 | .msg_namelen = 0, 28 | .msg_iov = &io, 29 | .msg_iovlen = 1, 30 | .msg_control = &cmsg, 31 | .msg_controllen = sizeof(cmsg), 32 | }; 33 | if (recvmsg(sock, &msg, MSG_CMSG_CLOEXEC) < 0) { 34 | err(1, "recvmsg(sock=%d)", sock); 35 | } 36 | if (msg.msg_controllen != sizeof(cmsg)) { 37 | err(1, "Message has wrong controllen"); 38 | } 39 | // if (cmsg.hdr.cmsg_len != sizeof(cmsg.buf)) { 40 | // err(1, "Control message has wrong length"); 41 | // } 42 | if (cmsg.hdr.cmsg_level != SOL_SOCKET) { 43 | err(1, "Control message has wrong level"); 44 | } 45 | if (cmsg.hdr.cmsg_type != SCM_RIGHTS) { 46 | err(1, "Control message has wrong type"); 47 | } 48 | memcpy(fds, CMSG_DATA(&cmsg.hdr), sizeof(int) * n); 49 | } 50 | 51 | void write_null_terminated_array(int fd, char** argv) 52 | { 53 | int ret; 54 | for (; *argv != NULL; argv++) { 55 | char* cur = *argv; 56 | size_t size = strlen(cur); 57 | ret = write(fd, &size, sizeof(size)); 58 | if (ret != sizeof(size)) { 59 | err(1, "write(fd=%d, &size, sizeof(size))", fd); 60 | } 61 | while (size > 0) { 62 | ret = write(fd, cur, size); 63 | if (ret < 0) { 64 | err(1, "write(fd=%d, cur, size=%lu)", fd, size); 65 | } 66 | size -= ret; 67 | cur += ret; 68 | } 69 | } 70 | } 71 | 72 | static int connect_unix_socket(struct sockaddr_un addr) { 73 | int sockfd = socket(AF_UNIX, SOCK_STREAM, 0); 74 | if (sockfd < 0) { 75 | err(1, "socket"); 76 | } 77 | if (connect(sockfd, &addr, sizeof(addr)) < 0) { 78 | err(1, "bind"); 79 | } 80 | return sockfd; 81 | } 82 | 83 | int main(int argc, char** argv, char** envp) 84 | { 85 | const char *sock_path = getenv("RSYSCALL_UNIX_STUB_SOCK_PATH"); 86 | if (!sock_path) { 87 | err(1, "missing environment variable RSYSCALL_UNIX_STUB_SOCK_PATH"); 88 | } 89 | const int sock_path_fd = open(sock_path, O_CLOEXEC|O_PATH); 90 | struct sockaddr_un pass_addr = { .sun_family = AF_UNIX, .sun_path = {}}; 91 | snprintf(pass_addr.sun_path, sizeof(pass_addr.sun_path), "/proc/self/fd/%d", sock_path_fd); 92 | const int connsock = connect_unix_socket(pass_addr); 93 | 94 | const int nfds = 4; 95 | int fds[nfds]; 96 | receive_fds(connsock, fds, nfds); 97 | const int syscall_fd = fds[0]; 98 | const int data_fd = fds[1]; 99 | const int futex_memfd = fds[2]; 100 | const int connecting_fd = fds[3]; 101 | size_t envp_count = 0; 102 | for (; envp[envp_count] != NULL; envp_count++); 103 | sigset_t sigmask; 104 | int ret; 105 | ret = sigprocmask(-1, NULL, &sigmask); 106 | if (ret < 0) { 107 | err(1, "sigprocmask(-1, NULL, &sigmask)"); 108 | } 109 | struct rsyscall_unix_stub describe = { 110 | .symbols = rsyscall_symbol_table(), 111 | .pid = getpid(), 112 | .syscall_fd = syscall_fd, 113 | .data_fd = data_fd, 114 | .futex_memfd = futex_memfd, 115 | .connecting_fd = connecting_fd, 116 | .argc = argc, 117 | .envp_count = envp_count, 118 | .sigmask = *((uint64_t*) &sigmask), 119 | }; 120 | ret = write(data_fd, &describe, sizeof(describe)); 121 | if (ret != sizeof(describe)) { 122 | err(1, "write(data_fd, &describe, sizeof(describe))"); 123 | } 124 | write_null_terminated_array(data_fd, argv); 125 | write_null_terminated_array(data_fd, envp); 126 | rsyscall_server(syscall_fd, syscall_fd); 127 | } 128 | -------------------------------------------------------------------------------- /python/dneio/__init__.py: -------------------------------------------------------------------------------- 1 | """Concurrency based on `shift`/`reset` and object-oriented effect handlers 2 | 3 | We implement a portable `shift`/`reset`[0] for async Python, which works when running 4 | under any supported async runner. We also build several concurrent communication 5 | mechanisms with `shift`/`reset`. 6 | 7 | Delimited continuations, such as `shift`/`reset`, can be used to transform a 8 | callback-based concurrency system into a more ergonomic direct-style system.[1] 9 | For example, we can go from: 10 | 11 | ``` 12 | def cb(data): 13 | if data: 14 | more_work(data) 15 | file.read_cb(cb) 16 | ``` 17 | 18 | to: 19 | 20 | ``` 21 | data = await shift(file.read_cb) 22 | if data: 23 | more_work(data) 24 | ``` 25 | 26 | We can then construct an ergonomic direct-style concurrency system by layering 27 | delimited continuations on top of a traditional callback-based concurrency 28 | system. This has a number of advantages. 29 | 30 | Instead of a coroutine performing a blocking operation by yielding up to a 31 | global event loop, a coroutine performs a blocking operation by calling `shift` 32 | with any object that accepts a callback. Context switching between running 33 | coroutines happens automatically, in a distributed fashion, as objects receive 34 | events and call callbacks. Instead of relying implicitly on a global event loop 35 | and global scheduler, a coroutine explicitly selects what object is responsible 36 | for scheduling it, simply by making a call into that object. That object may 37 | perform its duties by calling into other objects or by performing blocking 38 | system calls itself, and ultimately resumes the coroutine with the result by 39 | calling the callback. 40 | 41 | With callback-based concurrency, an object can guarantee that when callbacks are 42 | registered in a certain order, the underlying operations are performed in that 43 | order; and that when events happen in some order, the registered callbacks are 44 | called in that same order. This preserves information about event ordering and 45 | allows for much simpler bookkeeping of state. This guarantee is preserved when 46 | using `shift`/`reset`. 47 | 48 | For example, suppose some underlying bit of state is either "True" or "False", 49 | and we read that state on each related operation, and maintain a Python boolean 50 | which is supposed to match the underlying state. If callbacks are called in 51 | order, we know that the underlying state is always equal to the state at the 52 | time of the last callback, so we can just set the tracking bool to the bit we 53 | read. Thus, we can safely perform multiple operations at a time, in parallel, 54 | from unrelated coroutines, with no explicit sequencing in user code. Without 55 | this guarantee, we would have to explicitly track the order in which operations 56 | are performed, or, more easily, lock the underlying state so that we only 57 | perform one operation at a time, reducing parallelism. 58 | 59 | Our object-oriented style also gives us effect handlers which are not based on a 60 | stack discipline. We don't need to be concerned about computations being 61 | performed in or out of the scope of handlers. Effect handlers are simply 62 | regular garbage collected objects: An effect handler stays alive as long as 63 | there is something with a reference to it. 64 | 65 | We also get an object-capability-style effect system for free. Since we don't 66 | rely on an implicit global event loop, there are no implicit global asynchronous 67 | effects. To allow a function to perform an asynchronous effect, we pass it (as 68 | a normal argument) the object that implements that effect. The type of an 69 | effectful function is simply a regular function type, accepting one or more 70 | objects with which it can perform effects.[2] 71 | 72 | [0]: If you aren't familiar with delimited continuations or shift/reset, 73 | read this tutorial: 74 | http://pllab.is.ocha.ac.jp/~asai/cw2011tutorial/main-e.pdf 75 | 76 | [1]: Using delimited continuations to pass callbacks is a well known technique; 77 | here's one post about it: 78 | http://www.gregrosenblatt.com/writing/reinvert-control-delim-cont.html 79 | 80 | [2]: Since we can close over effect handlers (they're regular objects), a single 81 | object or function might abstract over multiple effects. Is this a bad thing? 82 | Some say yes, but it seems like a pretty normal form of abstraction to me. 83 | 84 | """ 85 | from dneio.core import shift, reset, Continuation, is_running_directly_under_trio 86 | from dneio.concur import RequestQueue, Event, Future, make_n_in_parallel, run_all 87 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_ip.py: -------------------------------------------------------------------------------- 1 | from rsyscall.tests.trio_test_case import TrioTestCase 2 | 3 | from rsyscall.sys.socket import * 4 | from rsyscall.netinet.in_ import * 5 | from rsyscall import Process, Pointer, FileDescriptor 6 | import trio 7 | import unittest 8 | 9 | class TestIP(TrioTestCase): 10 | async def test_stream_listen(self) -> None: 11 | sockfd = await self.process.task.socket(AF.INET, SOCK.STREAM) 12 | addr = await self.process.bind_getsockname(sockfd, SockaddrIn(0, '127.0.0.1')) 13 | await sockfd.listen(10) 14 | 15 | real_addr = await self.process.task.ptr(addr) 16 | clientfd = await self.process.task.socket(AF.INET, SOCK.STREAM) 17 | await clientfd.connect(real_addr) 18 | connfd = await sockfd.accept() 19 | 20 | in_data = await self.process.task.ptr(b"hello") 21 | written, _ = await clientfd.write(in_data) 22 | valid, _ = await connfd.read(written) 23 | self.assertEqual(in_data.value, await valid.read()) 24 | 25 | async def test_dgram_connect(self) -> None: 26 | sockfd = await self.process.task.socket(AF.INET, SOCK.DGRAM) 27 | addr = await self.process.bind_getsockname(sockfd, SockaddrIn(0, '127.0.0.1')) 28 | 29 | real_addr = await self.process.task.ptr(addr) 30 | clientfd = await self.process.task.socket(AF.INET, SOCK.DGRAM) 31 | await clientfd.connect(real_addr) 32 | 33 | in_data = await self.process.task.ptr(b"hello") 34 | written, _ = await clientfd.write(in_data) 35 | valid, _ = await sockfd.read(written) 36 | self.assertEqual(in_data.value, await valid.read()) 37 | 38 | async def test_write_to_unconnected(self) -> None: 39 | sockfd = await self.process.task.socket(AF.INET, SOCK.STREAM) 40 | with self.assertRaises(BrokenPipeError): 41 | await sockfd.write(await self.process.task.ptr(b"hello")) 42 | 43 | @unittest.skip("This test is slow and non-deterministic") 44 | async def test_send_is_not_atomic(self) -> None: 45 | """send does, in fact, do partial writes, at least when set to NONBLOCK 46 | 47 | That is to say, it won't return anything less than sending the whole pointer, or (in the 48 | case of NONBLOCK) sending none of the pointer. This is supported by the manpage, which says: 49 | "When the message does not fit into the send buffer of the socket, send() normally blocks". 50 | 51 | Unfortunately, it's still not atomic. If multiple processes are sending at once, the data can 52 | be interleaved. 53 | 54 | """ 55 | sockfd = await self.process.task.socket(AF.INET, SOCK.STREAM) 56 | addr = await self.process.bind_getsockname(sockfd, SockaddrIn(0, '127.0.0.1')) 57 | await sockfd.listen(10) 58 | 59 | real_addr = await self.process.task.ptr(addr) 60 | clientfd = await self.process.task.socket(AF.INET, SOCK.STREAM) 61 | await clientfd.connect(real_addr) 62 | connfd = await sockfd.accept() 63 | orig_in_fd = clientfd 64 | orig_out_fd = connfd 65 | data = "".join(str(i) for i in range(8000)).encode() 66 | 67 | count = 100 68 | processes = [await self.process.fork() for _ in range(10)] 69 | in_ptrs = [await thr.ptr(data) for thr in processes] 70 | handles = [thr.task.inherit_fd(orig_in_fd) for thr in processes] 71 | async def run_send(process: Process, in_ptr: Pointer, fd: FileDescriptor) -> None: 72 | for i in range(count): 73 | in_ptr, rest = await fd.write(in_ptr) 74 | if rest.size() != 0: 75 | print("failure! rest.size() is", rest.size()) 76 | self.assertEqual(rest.size(), 0) 77 | read_process = await self.process.fork() 78 | out_buf = await read_process.malloc(bytes, len(data)) 79 | out_fd = read_process.inherit_fd(orig_out_fd) 80 | 81 | had_interleaving = False 82 | async with trio.open_nursery() as nursery: 83 | for process, in_ptr, fd in zip(processes, in_ptrs, handles): 84 | nursery.start_soon(run_send, process, in_ptr, fd) 85 | for i in range(len(processes) * count): 86 | out_buf, rest = await out_fd.recv(out_buf, MSG.WAITALL) 87 | self.assertEqual(rest.size(), 0) 88 | if not had_interleaving: 89 | indata = await out_buf.read() 90 | if indata != data: 91 | # oops, looks like the data from multiple processes was interleaved 92 | had_interleaving = True 93 | for process in processes: 94 | await process.exit(0) 95 | await read_process.exit(0) 96 | self.assertTrue(had_interleaving) 97 | -------------------------------------------------------------------------------- /python/rsyscall/tests/test_persistent.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from rsyscall import AsyncChildPid, Process 3 | from rsyscall.tests.trio_test_case import TrioTestCase 4 | from rsyscall.tasks.persistent import * 5 | from rsyscall.tasks.ssh import make_local_ssh, SSHHost 6 | from rsyscall.near.sysif import SyscallHangup 7 | from rsyscall.tests.utils import assert_process_works 8 | from rsyscall.sched import CLONE 9 | from rsyscall.signal import SIG 10 | from rsyscall.stdlib import mkdtemp 11 | import unittest 12 | 13 | class TestPersistent(TrioTestCase): 14 | host: SSHHost 15 | local_child: AsyncChildPid 16 | remote: Process 17 | 18 | @classmethod 19 | async def asyncSetUpClass(cls) -> None: 20 | cls.host = await make_local_ssh(cls.process) 21 | cls.local_child, cls.remote = await cls.host.ssh(cls.process) 22 | 23 | @classmethod 24 | async def asyncTearDownClass(cls) -> None: 25 | await cls.local_child.kill() 26 | 27 | async def asyncSetUp(self) -> None: 28 | self.tmpdir = await mkdtemp(self.process, "test_stub") 29 | self.sock_path = self.tmpdir/"persist.sock" 30 | 31 | async def asyncTearDown(self) -> None: 32 | await self.tmpdir.cleanup() 33 | 34 | async def test_reconnect_exit(self) -> None: 35 | per_thr = await clone_persistent(self.process, self.sock_path) 36 | await assert_process_works(self, per_thr) 37 | await per_thr.reconnect(self.process) 38 | await assert_process_works(self, per_thr) 39 | await per_thr.exit(0) 40 | 41 | async def test_exit_reconnect(self) -> None: 42 | process = await self.process.fork() 43 | per_thr = await clone_persistent(self.process, self.sock_path) 44 | await per_thr.prep_for_reconnect() 45 | await per_thr.exit(0) 46 | # when we try to reconnect, we'll fail 47 | with self.assertRaises(ConnectionRefusedError): 48 | await per_thr.reconnect(self.process) 49 | 50 | async def test_nest_exit(self) -> None: 51 | per_thr = await clone_persistent(self.process, self.sock_path) 52 | process = await per_thr.clone(CLONE.FILES) 53 | await per_thr.reconnect(self.process) 54 | await assert_process_works(self, process) 55 | await process.exit(0) 56 | 57 | async def test_nest_unshare_files_exit(self) -> None: 58 | per_thr = await clone_persistent(self.process, self.sock_path) 59 | process = await per_thr.fork() 60 | await per_thr.reconnect(self.process) 61 | await assert_process_works(self, process) 62 | await process.exit(0) 63 | 64 | async def test_ssh_same(self) -> None: 65 | per_thr = await clone_persistent(self.remote, self.sock_path) 66 | await per_thr.reconnect(self.remote) 67 | await assert_process_works(self, per_thr) 68 | await per_thr.exit(0) 69 | 70 | async def test_ssh_new(self) -> None: 71 | "Start the persistent process from one ssh process, then reconnect to it from a new ssh process." 72 | per_thr = await clone_persistent(self.remote, self.sock_path) 73 | 74 | local_child, new_remote = await self.host.ssh(self.process) 75 | await per_thr.reconnect(new_remote) 76 | await assert_process_works(self, per_thr) 77 | 78 | await per_thr.exit(0) 79 | 80 | async def test_no_make_persistent(self) -> None: 81 | pidns_thr = await self.process.clone(CLONE.NEWUSER|CLONE.NEWPID) 82 | sacr_thr = await pidns_thr.fork() 83 | await sacr_thr.task.setpgid() 84 | per_thr = await clone_persistent(sacr_thr, self.sock_path) 85 | # kill sacr_thr's process group to kill per_thr too 86 | await sacr_thr.pid.killpg(SIG.KILL) 87 | # the persistent process is dead, we can't reconnect to it 88 | with self.assertRaises(BaseException): # type: ignore 89 | await per_thr.reconnect(self.process) 90 | 91 | async def test_make_persistent(self) -> None: 92 | # use a pidns so that the persistent task will be killed after all 93 | pidns_thr = await self.process.clone(CLONE.NEWUSER|CLONE.NEWPID) 94 | sacr_thr = await pidns_thr.fork() 95 | await sacr_thr.task.setpgid() 96 | per_thr = await clone_persistent(sacr_thr, self.sock_path) 97 | # make the persistent process, actually persistent. 98 | await per_thr.make_persistent() 99 | # kill sacr_thr's process group 100 | await sacr_thr.pid.killpg(SIG.KILL) 101 | # the persistent process is still around! 102 | await per_thr.reconnect(self.process) 103 | await assert_process_works(self, per_thr) 104 | await per_thr.exit(0) 105 | -------------------------------------------------------------------------------- /python/rsyscall/time.py: -------------------------------------------------------------------------------- 1 | "`#include `" 2 | from __future__ import annotations 3 | from rsyscall._raw import lib, ffi # type: ignore 4 | from rsyscall.struct import Struct 5 | import typing as t 6 | from decimal import Decimal 7 | import decimal 8 | import math 9 | from dataclasses import dataclass 10 | 11 | NSEC_PER_SEC = 1_000_000_000 12 | 13 | @dataclass 14 | class Timespec: 15 | """struct timespec, as used by several time-related system-calls. 16 | 17 | This struct specifies time with nanosecond precision, but there's no good standard way 18 | in Python to represent such times. The growing standard is to use an integer number of 19 | nanoseconds, but that's easy to confuse with an integer number of seconds, and most 20 | functions don't take number-of-nanoseconds, they take number-of-seconds. 21 | 22 | So, this class supports conversion to and from a bunch of other formats. 23 | 24 | See the proposal of using Decimal to represent nanosecond timestamps: 25 | 26 | https://www.python.org/dev/peps/pep-0410/ 27 | https://www.python.org/dev/peps/pep-0564/ 28 | https://vstinner.github.io/python37-pep-564-nanoseconds.html 29 | 30 | The rejection of that proposal by Guido: 31 | 32 | https://mail.python.org/pipermail/python-dev/2012-February/116837.html 33 | https://bugs.python.org/issue23084 34 | 35 | """ 36 | sec: int 37 | nsec: int 38 | 39 | def to_decimal(self) -> Decimal: 40 | raise NotImplementedError 41 | 42 | def to_nanos(self) -> int: 43 | raise NotImplementedError 44 | 45 | def _to_cffi_dict(self) -> t.Dict[str, int]: 46 | return { 47 | "tv_sec": self.sec, 48 | "tv_nsec": self.nsec, 49 | } 50 | 51 | def to_bytes(self) -> bytes: 52 | return bytes(ffi.buffer(ffi.new('struct timespec const*', self._to_cffi_dict()))) 53 | 54 | T = t.TypeVar('T', bound='Timespec') 55 | @classmethod 56 | def from_float(cls: t.Type[T], value: t.Union[float, Decimal, Timespec]) -> T: 57 | if isinstance(value, Timespec): 58 | return cls(value.sec, value.nsec) 59 | elif isinstance(value, Decimal): 60 | frac, i = decimal.getcontext().divmod(value, Decimal(1)) 61 | return cls(int(i), int(frac*NSEC_PER_SEC)) 62 | else: 63 | fractional, integer = math.modf(value) 64 | return cls(int(integer), int(fractional*NSEC_PER_SEC)) 65 | 66 | @classmethod 67 | def from_nanos(cls: t.Type[T], nsec: int) -> T: 68 | raise NotImplementedError 69 | 70 | @classmethod 71 | def from_cffi(cls: t.Type[T], cffi_value: t.Any) -> T: 72 | return cls(cffi_value.tv_sec, cffi_value.tv_nsec) 73 | 74 | @classmethod 75 | def from_bytes(cls: t.Type[T], data: bytes) -> T: 76 | struct = ffi.cast('struct timespec*', ffi.from_buffer(data)) 77 | return cls.from_cffi(struct) 78 | 79 | @classmethod 80 | def sizeof(cls) -> int: 81 | return ffi.sizeof('struct timespec') 82 | 83 | @dataclass 84 | class Itimerspec(Struct): 85 | interval: Timespec 86 | value: Timespec 87 | 88 | def __init__(self, interval: t.Union[float, Decimal, Timespec], 89 | value: t.Union[float, Decimal, Timespec]) -> None: 90 | self.interval = Timespec.from_float(interval) 91 | self.value = Timespec.from_float(interval) 92 | 93 | def _to_cffi_dict(self) -> t.Dict[str, t.Dict[str, int]]: 94 | return { 95 | "it_interval": self.interval._to_cffi_dict(), 96 | "it_value": self.value._to_cffi_dict(), 97 | } 98 | 99 | def to_bytes(self) -> bytes: 100 | return bytes(ffi.buffer(ffi.new('struct itimerspec const*', self._to_cffi_dict()))) 101 | 102 | T = t.TypeVar('T', bound='Itimerspec') 103 | @classmethod 104 | def from_bytes(cls: t.Type[T], data: bytes) -> T: 105 | struct = ffi.cast('struct itimerspec*', ffi.from_buffer(data)) 106 | return cls(Timespec.from_cffi(struct.it_interval), Timespec.from_cffi(struct.it_value)) 107 | 108 | @classmethod 109 | def sizeof(cls) -> int: 110 | return ffi.sizeof('struct itimerspec') 111 | 112 | #### Tests #### 113 | from unittest import TestCase 114 | class TestTime(TestCase): 115 | def test_timespec(self) -> None: 116 | initial = Timespec.from_float(Decimal('4.1')) 117 | output = Timespec.from_bytes(initial.to_bytes()) 118 | self.assertEqual(initial, output) 119 | 120 | def test_itimerspec(self) -> None: 121 | initial = Itimerspec(Timespec.from_float(Decimal('1.2')), Timespec.from_float(Decimal('3.4'))) 122 | output = Itimerspec.from_bytes(initial.to_bytes()) 123 | self.assertEqual(initial, output) 124 | --------------------------------------------------------------------------------