├── .gitignore ├── Makefile ├── README.md ├── TODO ├── memory.rkt └── slave.c /.gitignore: -------------------------------------------------------------------------------- 1 | compiled/ 2 | slave.S 3 | slave 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -Os -W -Wall -Wextra -Werror 3 | LINK_FLAGS = -ldl 4 | 5 | slave: slave.c 6 | $(CC) $(CFLAGS) -o $@ $< $(LINK_FLAGS) 7 | 8 | slave.S: slave.c 9 | $(CC) $(CFLAGS) -S -o $@ $< 10 | 11 | clean: 12 | git clean -xfd 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | My Own Linear Lisp 2 | ================== 3 | 4 | Goal: low key experiment with a Linear Lisp, create a maru-style evaluator. 5 | 6 | * Implementation language: racket? gerbil? maru? SBCL? ocaml? Coq? 7 | 8 | 9 | Slave 10 | ----- 11 | 12 | The slave implements a trivial low-level protocol 13 | for a slave process to be commanded by a master process, 14 | by default via pipes for stdin and stdout. 15 | The master process is supposed to do all the hard work, 16 | including compiling code to binary, and 17 | either linking it or making it position-independent. 18 | The master may query for protocol version 19 | and for machine architecture data from uname, in case it's unsure 20 | (for instance, if the slave is on a remote machine). 21 | A vector is provided for either master- or slave- side linking. 22 | The slave makes all dynamic libraries available for use, 23 | which is great for practical purposes, but 24 | can also make security analysis harder. 25 | 26 | For static analysis, a better slave would be a specially crafted 27 | architecture-specific statically-linked assembly program 28 | that contains but one exit instruction, 29 | with all the program being ptrace'd into existence — 30 | the master would fork, PTRACE_TRACEME in the slave, and waitpid in the parent, 31 | then exec the slave and use ptrace to modify it into something useful and minimal, 32 | the minimal aspect making whole-program static analysis easier. 33 | However, that's harder to work with, and cannot reuse any dynamic library 34 | (short of reimplementing and maintaining a compatible dynamic linker, 35 | or an entire compilation toolchain). 36 | Therefore, at least to start with, we have this trivial protocol. 37 | The program main may still mmap existing things out of existence, 38 | if it wants its image to be minimal. 39 | 40 | 41 | Bibliography 42 | ------------ 43 | 44 | Linear Lisp 45 | 46 | * Henry Baker's paper trove 47 | for basic ideas about a Linear Lisp 48 | http://home.pipeline.com/~hbaker1/ 49 | 50 | * Alan Bawden's thesis (is it "Linear Graph Reduction: Confronting the Cost of Naming" ?) 51 | for a complete Scheme implementation on top of a Linear Graph Reduction engine 52 | 53 | * Jesse A. Tov's thesis "Practical Programming with Substructural Types" 54 | for a practical approach to dealing with linearity in a static language 55 | http://users.eecs.northwestern.edu/~jesse/pubs/dissertation/tov-dissertation-screen.pdf 56 | 57 | * ATS 58 | for a complete static language with dependent types and linear types 59 | http://www.ats-lang.org/ 60 | 61 | * Computability Logic 62 | For the game semantics interpretation of linear operators, and 63 | the universal abstraction quantifier vs the regular computational quantifiers. 64 | http://www.csc.villanova.edu/~japaridz/CL/clx.html#Lecture_notes 65 | 66 | 67 | Lisp Implementation to get inspired from or embrace 68 | 69 | * Ian Piumarta's Maru 70 | for its short bootstrapping loop 71 | http://piumarta.com/hg/maru 72 | 73 | * Abdulaziz Ghuloum's Ikarus Scheme 74 | for its incremental approach to compilation 75 | "An Incremental Approach to Compiler Construction", 76 | and extended draft tutorial, "Compilers: Backend to Frontend and Back to Front Again". 77 | https://github.com/namin/inc 78 | 79 | * Marc Feeley's Gambit 80 | for clean virtual machine design. 81 | 82 | * PLT Racket 83 | for its well layered "module" system that make it a multi-language platform. 84 | 85 | * Dimitris Vyzovitis's Gerbil 86 | for its attempt to cleanly bootstrap a Racket-like platform on top of Gambit. 87 | 88 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | * Complete the bibliography 2 | 3 | * Write a variant of libgc suitable for the linear lisp 4 | 5 | * Write a variant of the core language interpreter, 6 | * in the style of maru? of gambit? 7 | 8 | * Bootstrap a fuller Linear Lisp on top of the core language 9 | * in the style of gerbil? 10 | 11 | * Use ptrace and/or /proc for editing memory in a foreign process 12 | * by default manipulate an external slave, don't modify current image 13 | * initial state for the external slave is a mindless worker waiting 14 | * still have special ways to work in the same process? atomically? 15 | By creating a slave that hooks back onto the detached parent? 16 | * Bibliography: 17 | http://dev.exherbo.org/~alip/pinktrace/ 18 | http://www.linuxjournal.com/article/6100 19 | http://www.linuxjournal.com/node/6210 20 | * Use SYSV shared memory (or mmap to same file?) to share data with child. 21 | beware read/write barriers and icache flush. 22 | 23 | * Reuse the insns.dat from nasm and write an assembler / disassembler 24 | * have a maru-style assembler, except defined based on the nasm database 25 | * use some reversible computing framework if possible 26 | * use a framework that makes partial evaluation easier 27 | so we can assemble faster by eliminating unused cases. 28 | * if that helps, start by implementing 64-bit mode only 29 | * General x86 information: http://www.sandpile.org/ 30 | * Nasm: git://repo.or.cz/nasm.git 31 | 32 | * Bootstrap with some maru-style emit.l based on the above assembler 33 | 34 | * Maintain a content-addressed database of slave processes vs source 35 | * where "source" means "all the things that led to building that" 36 | * where "source" gives us complete offline debug information 37 | * where we also have intent-addressed mapping of source to object 38 | 39 | * Profit! 40 | -------------------------------------------------------------------------------- /memory.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | ;;; Simple (slow) virtual low-level memory access 4 | 5 | (provide +word-size-in-bytes+ +memory-size-in-bytes+ 6 | peek poke peek-unsigned poke-unsigned peek-word poke-word 7 | peek-u8 poke-u8 peek-u16 poke-u16 peek-u32 poke-u32 peek-u64 poke-u64 8 | peek-bytes poke-bytes 9 | peek-string/latin-1 poke-string/latin-1 peek-string/utf-8 poke-string/utf-8) 10 | 11 | (define +word-size-in-bytes+ 8) 12 | (define +memory-size-in-bytes+ (expt 2 15)) ; 32KiB 13 | 14 | (define *memory* (make-bytes +memory-size-in-bytes+ 0)) 15 | 16 | 17 | (define (peek address) (bytes-ref *memory* address)) 18 | (define (poke address value) (bytes-set! *memory* address value)) 19 | 20 | (define (peek-unsigned length address) 21 | (do ((address (+ address length -1) (- address 1)) 22 | (length length (- length 1)) 23 | (sum 0)) 24 | ((zero? length) sum) 25 | (set! sum (+ (arithmetic-shift sum 8) (peek address))))) 26 | 27 | (define (poke-unsigned length address value) 28 | (do ((address address (+ address 1)) 29 | (length length (- length 1)) 30 | (shift 0 (+ 8 shift))) 31 | ((zero? length)) 32 | (poke address (bitwise-and value 255)) 33 | (set! value (arithmetic-shift value -8)))) 34 | 35 | (define (peek-u8 address) (peek address)) 36 | (define (poke-u8 address value) (poke address value)) 37 | (define (peek-u16 address) (peek-unsigned 2 address)) 38 | (define (poke-u16 address value) (poke-unsigned 2 address value)) 39 | (define (peek-u32 address) (peek-bytes 4 address)) 40 | (define (poke-u32 address value) (poke-unsigned 4 address value)) 41 | (define (peek-u64 address) (peek-bytes 8 address)) 42 | (define (poke-u64 address value) (poke-unsigned 8 address value)) 43 | 44 | (define (peek-word address) (peek-unsigned +word-size-in-bytes+ address)) 45 | (define (poke-word address value) (poke-unsigned +word-size-in-bytes+ address value)) 46 | 47 | (define (peek-bytes address length) 48 | (subbytes *memory* address (+ address length))) 49 | (define (poke-bytes address value) 50 | (bytes-copy! *memory* address value) 51 | (bytes-length value)) 52 | 53 | (define (peek-string/latin-1 address length) 54 | (bytes->string/latin-1 (peek-bytes address length))) 55 | (define (poke-string/latin-1 address value) 56 | (poke-bytes address (string->bytes/latin-1 value))) 57 | (define (peek-string/utf-8 address length) 58 | (bytes->string/utf-8 (peek-bytes address length))) 59 | (define (poke-string/utf-8 address value) 60 | (poke-bytes address (string->bytes/utf-8 value))) 61 | -------------------------------------------------------------------------------- /slave.c: -------------------------------------------------------------------------------- 1 | /* 2 | A mindless slave, waiting to be taken over by another process 3 | like a Rykor waiting for a Kaldane. 4 | 5 | This slave assumes a byte-addressed machine with basic Unix runtime, 6 | and word size 64 byte or smaller. 7 | The master better know about the low-level details of the slave machine. 8 | If it's remote, it can use the 'u' command to get uname details. 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | 21 | #define PROTOCOL_VERSION 1 22 | 23 | // Size of command packets in 8-byte words, then in bytes. 24 | // It better be less than 512 octets, so it's portably smaller than PIPE_BUF 25 | #define PACKET_WORDS 10 26 | #define PACKET_BYTES (8*PACKET_WORDS) 27 | 28 | void write_buffer (int out, void* buffer, size_t count) 29 | { 30 | uint8_t* buf = buffer; 31 | ssize_t n; 32 | 33 | while(count) { 34 | n = write(out, buf, count); 35 | if (!n) { 36 | exit(101); 37 | } 38 | buf += n; 39 | count -= n; 40 | } 41 | } 42 | 43 | void write_word (int out, uint64_t w) 44 | { 45 | uint64_t le = htole64(w); 46 | write_buffer(out, &le, 8); 47 | } 48 | 49 | void write_counted_buffer (int out, void* buffer, size_t count) 50 | { 51 | write_word(out, count); 52 | write_buffer(out, buffer, count); 53 | } 54 | 55 | void write_uname (int out) 56 | { 57 | struct utsname u; 58 | int n = uname(&u); 59 | if (!n) { 60 | exit(102); 61 | } 62 | write_counted_buffer(out, &u, sizeof(u)); 63 | } 64 | 65 | 66 | void read_buffer (int in, void* buffer, size_t count) 67 | { 68 | uint8_t* buf = buffer; 69 | ssize_t n; 70 | while(count) { 71 | n = read(in, buf, count); 72 | if (!n) { 73 | exit(103); 74 | } 75 | buf += n; 76 | count -= n; 77 | } 78 | } 79 | 80 | int main (int argc, char**argv) 81 | { 82 | void* vector[] = { (void*)PROTOCOL_VERSION, &vector, &read_buffer, &write_buffer, 83 | &dlopen, &dlsym, &dlerror, &dlsym, &dlclose }; 84 | int in = 0; 85 | int out = 1; 86 | int n; 87 | union packet_t { 88 | uint8_t c[PACKET_BYTES]; 89 | uint64_t w[PACKET_WORDS]; 90 | } p; 91 | uint8_t* base; 92 | 93 | // Parse arguments: alternate i/o fds. 94 | for (n = 1; n <= argc; n++) { 95 | if (strcmp(argv[n],"-i")) { 96 | in = atoi(argv[++n]); 97 | } else if (strcmp(argv[n],"-o")) { 98 | out = atoi(argv[++n]); 99 | } else { 100 | exit(104); 101 | } 102 | } 103 | 104 | // Command loop 105 | while(1) { 106 | read_buffer(in, &p, PACKET_BYTES); 107 | switch(p.c[0]) { 108 | case 'V': // Version 109 | write_word(out, PROTOCOL_VERSION); 110 | break; 111 | case 'u': // uname 112 | write_uname(out); 113 | break; 114 | case 'v': // dynamic linking vector 115 | write_counted_buffer(out, vector, sizeof(vector)); 116 | break; 117 | case 'c': // call routine with (up to) 8 arguments, output result 118 | write_word(out, 119 | ((uint64_t(*)(uint64_t,uint64_t,uint64_t,uint64_t, 120 | uint64_t,uint64_t,uint64_t,uint64_t))(p.w[1])) 121 | (p.w[2], p.w[3], p.w[4], p.w[5], p.w[6], p.w[7], p.w[8], p.w[9])); 122 | break; 123 | case 'r': // read (peek) data 124 | write_buffer(out, (void*)p.w[1], p.w[2]); 125 | break; 126 | case 'w': // write (poke) data 127 | read_buffer(out, (void*)p.w[1], p.w[2]); 128 | break; 129 | case 'p': // mmap program into existence, all in one swoop 130 | base = mmap((void*)p.w[1], p.w[2], p.w[3], p.w[4], p.w[5], p.w[6]); 131 | if (base == MAP_FAILED) { 132 | exit(105); 133 | } 134 | read_buffer(in, base+p.w[7], p.w[8]); 135 | // if given a start vector, call it, if not, output base address 136 | if (p.w[9]) { 137 | ((void(*)(void*,void*[]))(base+p.w[9]))(base, vector); 138 | } else { 139 | write_word(out, (uint64_t)base); 140 | } 141 | break; 142 | } 143 | } 144 | } 145 | --------------------------------------------------------------------------------