├── .circleci └── config.yml ├── .gitignore ├── .gitmodules ├── .hgignore ├── .hgtags ├── Documentation ├── allocsld-debugging.txt ├── allocsld.txt ├── bigallocs.txt ├── custom-allocators.md ├── init-order.txt ├── malloc-indexing.txt ├── malloc-overview.txt ├── malloc-wrapping.txt ├── overview-runtime.txt ├── overview-toolchain.txt ├── projects.md ├── static-symbols.txt └── subrepo-structure.svg ├── LICENSE ├── LICENSE.gpl3 ├── LICENSE.lgpl3 ├── Makefile.am ├── README.md ├── allocsld ├── Makefile ├── allocinstr.c ├── asmutil.h ├── chain.c ├── cover-tracks.h ├── cover-tracks.inc.c ├── malloc-patcher.c └── return-hook.S ├── autogen.sh ├── buildtest ├── debian-buster │ └── Dockerfile ├── debian-stretch │ └── Dockerfile ├── ubuntu-18.04 │ └── Dockerfile └── ubuntu-20.04 │ └── Dockerfile ├── config.mk.in ├── configure.ac ├── contrib └── Makefile ├── examples ├── Makefile ├── client-allocator-elf.c ├── elf-allocators.c ├── elf-allocators.h ├── elf-refs.c ├── elf-refs.h ├── emit-asm.c └── emit-asm.h ├── include ├── allocmeta.h ├── allocs.h ├── allocsites.h ├── fake-libunwind.h ├── generic_malloc_index.h ├── liballocs.h ├── liballocs_cil_inlines.h ├── liballocs_config.h.in ├── liballocs_ext.h ├── linear_malloc_index.h ├── malloc-meta.h ├── memtable.h ├── metavec.h ├── pageindex.h ├── uniqtype-bfs.h └── vec.h ├── lib └── Makefile ├── liballocs.pc.in ├── src ├── Makefile ├── addrlist.c ├── allocators │ ├── alloca.c │ ├── auxv.c │ ├── brk.c │ ├── default-lib-malloc.c │ ├── generic_small.c │ ├── ld-so-malloc.c │ ├── mmap.c │ ├── packed-seq.c │ ├── stack.c │ ├── stackframe.c │ ├── static-file.c │ ├── static-section.c │ ├── static-segment.c │ └── static-symbol.c ├── allocsites.c ├── cache.c ├── counters.c ├── dlmalloc-ext.c ├── dlmalloc-includes.h ├── dummy_thread_locals.c ├── dummyweaks.c ├── err.c ├── fake-libunwind.c ├── ifunc.c ├── liballocs.c ├── liballocs.so ├── liballocs_private.h ├── lifetime_policies.c ├── meta-dso.c ├── nonshared_hook_wrappers.c ├── pageindex.c ├── preload.c ├── private-libc.c ├── private-malloc-plain.lds ├── private-malloc-wrapped.lds ├── query.c ├── rt-uniqtypes.c ├── stubs.c.example ├── systrap.c ├── systrap_noop.c ├── uniqtype-bfs.c ├── uniqtype-util.c ├── util.c ├── walk.c └── workarounds.c ├── tests ├── Makefile ├── abort-alloca-clang │ ├── abort-alloca-clang.c │ └── mk.inc ├── addrtaken-allocator │ ├── addrtaken-allocator.c │ └── mk.inc ├── alloca │ ├── alloca.c │ └── mk.inc ├── allocsite-id │ └── allocsite-id.c ├── anon-aliases │ ├── anon-aliases.c │ ├── header.h │ ├── lib1.c │ ├── lib2.c │ ├── lib3.c │ ├── mk.inc │ ├── sameheader.h │ └── subdir ├── bit-fields │ └── bit-fields.c ├── char-allocs │ └── char-allocs.c ├── hello-via-wrapper │ ├── hello-via-wrapper.c │ └── mk.inc ├── lib-test │ ├── lib-test.c │ └── mk.inc ├── malloc-from-dso │ ├── dso.c │ ├── malloc-from-dso.c │ └── mk.inc ├── malloc-in-exe │ ├── dlmalloc.c │ ├── malloc-in-exe.c │ └── mk.inc ├── metavec-layout │ └── metavec-layout.c ├── no-unbind-self │ ├── mk.inc │ └── no-unbind-self.c ├── nopreload-alloca │ └── nopreload-alloca.c ├── offsetof │ ├── mk.inc │ └── offsetof.c ├── packed-seq-walk │ └── packed-seq-walk.c ├── plugin-hello │ ├── mk.inc │ └── plugin-hello.c ├── ptr-to-opaque │ ├── mk.inc │ ├── other-cu.c │ └── ptr-to-opaque.c ├── realloc-multi-union │ └── realloc-multi-union.c ├── section-group │ ├── Makefile │ ├── lib1.c │ ├── lib1a.c │ ├── lib2.c │ ├── lib2a.c │ └── section-group.c ├── simple-client │ ├── mk.inc │ └── simple-client.c ├── simple-multi-alloc │ ├── mk.inc │ └── simple-multi-alloc.c ├── sizeof-subtract │ └── sizeof-subtract.c ├── sloppy-dumptypes │ └── sloppy-dumptypes.c ├── stack-types │ └── stack-types.c ├── stack-walk │ ├── mk.inc │ └── stack-walk.c ├── string-lit │ ├── mk.inc │ └── string-lit.c ├── uniqtype-make-precise │ └── uniqtype-make-precise.c ├── uniqtype-walk │ ├── mk.inc │ └── uniqtype-walk.c └── unit-tests │ └── Makefile └── tools ├── Makefile.meta ├── alias-linker-opts-for-base-types.sh ├── alloc.c ├── allocs-cflags ├── allocs-install ├── allocs-ldflags ├── allocs-wrapper ├── allocscompilerwrapper.py ├── allocsites.cpp ├── allocstubs.c ├── alloctypes.cpp ├── callsites.cpp ├── compilerwrapper.py ├── cufiles.cpp ├── debian-print-srcpkgs.sh ├── debug-funcs.sh ├── dump-symlinks.sh ├── dumpptrs.cpp ├── dwarf-machine.hpp ├── dwarftypes.cpp ├── extrasyms.cpp ├── find-allocated-type-size.cpp ├── frametypes.cpp ├── frametypes2.cpp ├── gather-srcallocs.sh ├── gather-srcmemacc.sh ├── gold-plugin.cpp ├── guess-allocsite-types.sh ├── ifacetypes.cpp ├── interp-pad.S ├── lang ├── Makefile ├── c++ │ ├── bin │ │ ├── allocsc++ │ │ └── link-used-types │ └── lib │ │ └── allocscxx.py ├── c │ ├── Makefile │ ├── base-type-equivs.txt │ ├── bin │ │ ├── allocscc │ │ ├── c-gather-srcallocs │ │ ├── c-gather-srcmemacc │ │ └── link-used-types │ ├── cilallocs │ │ ├── .merlin │ │ └── cilallocs.ml │ ├── dumpallocs │ │ ├── .merlin │ │ ├── dumpallocs-gdb │ │ └── dumpallocs.ml │ ├── dumpmemacc │ │ └── dumpmemacc.ml │ ├── lib │ │ ├── allocscc.py │ │ ├── debian-build-funcs.sh │ │ └── symname-funcs.sh │ ├── monalloca │ │ └── monalloca.ml │ ├── src │ │ ├── Makefile │ │ └── base-types-translation.cpp │ └── trapptrwrites │ │ ├── .merlin │ │ ├── tests │ │ ├── Makefile │ │ ├── arrays.c │ │ ├── closure.c │ │ ├── log_ptr_writes.c │ │ └── writer.c │ │ └── trapptrwrites.ml └── fortran │ ├── bin │ └── allocsfc │ └── lib │ └── allocsfc.py ├── ldd-funcs.sh ├── merge-allocs.sh ├── metavector.cpp ├── noopgen.cpp ├── objcopy-unbind-syms-naive.sh ├── objcopy-unbind-syms-real.sh ├── objcopy-unbind-syms.sh ├── objdeps ├── objdumpallocs ├── objdumpmemacc ├── objdumpmeta ├── objdumpmeta.awk ├── objdumpmeta.sh ├── pervasive-types.cpp ├── pubsyms.sh ├── starts.cpp ├── strip-non-dynamic-relocs.sh ├── stubgen.h ├── to-globalize.sh ├── used-types-funcs.sh └── usedtypes.cpp /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.0 2 | jobs: 3 | build: 4 | docker: 5 | - image: ubuntu:24.04 6 | steps: 7 | - run: 8 | name: Install dependencies 9 | command: | 10 | apt update 11 | DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y \ 12 | build-essential libbsd-dev libelf-dev libdw-dev binutils-dev \ 13 | autoconf automake libtool pkg-config autoconf-archive \ 14 | g++-10 gcc-10 ocaml ocamlbuild ocaml-findlib libnum-ocaml-dev \ 15 | default-jdk-headless python3 \ 16 | make git gawk gdb wget \ 17 | libunwind-dev libc6-dev-i386 zlib1g-dev libc6-dbg \ 18 | libboost-iostreams-dev libboost-regex-dev \ 19 | libboost-serialization-dev libboost-filesystem-dev libffi-dev 20 | cd /usr/bin && ln -sf gcc-10 gcc 21 | cd /usr/bin && ln -sf g++-10 g++ 22 | - run: 23 | name: Report OCaml version for debugging 24 | command: | 25 | dpkg -l | grep ocaml 26 | - checkout 27 | - run: 28 | name: Update submodules 29 | command: git submodule update --init --recursive 30 | - run: 31 | name: Build submodules 32 | command: make -C contrib -j 2 33 | - run: 34 | name: Build project 35 | command: | 36 | . contrib/env.sh 37 | ./autogen.sh 38 | ./configure 39 | make -j 2 40 | #- run: # now our glibc is built with DWARF 5, so don't do this 41 | # name: Create liballocs metadata 42 | # no_output_timeout: 25m 43 | # command: | 44 | # mkdir -p /usr/lib/meta && \ 45 | # make -f tools/Makefile.meta \ 46 | # $(for libname in `ldd /bin/true | sed -En '/[[:blank:]]*([^[:blank:]]* => )?(.*) \(0x[0-9a-f]+\)/ {s//\2/;p}' | egrep 'libc\.so\.6|ld-linux.*\.so' | xargs readlink -f`; do echo "/usr/lib/meta${libname}-meta.so"; done) 47 | - persist_to_workspace: 48 | root: / 49 | paths: root/project usr/lib/meta 50 | test: 51 | requires: 52 | - build 53 | docker: 54 | - image: ubuntu:24.04 55 | steps: 56 | - run: 57 | name: Install dependencies 58 | command: | 59 | apt update 60 | DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y \ 61 | ca-certificates make build-essential g++-10 autoconf automake libtool \ 62 | libelf-dev python3 git gawk libunwind-dev \ 63 | ocaml ocamlbuild ocaml-findlib \ 64 | libdw-dev binutils-dev libffi-dev \ 65 | libboost-iostreams-dev libboost-regex-dev \ 66 | libboost-serialization-dev libboost-filesystem-dev \ 67 | pkg-config libglib2.0-dev # <-- for 'simple-multi-alloc' test case only 68 | cd /usr/bin && ln -sf gcc-10 gcc 69 | cd /usr/bin && ln -sf g++-10 g++ 70 | - attach_workspace: 71 | at: / 72 | - run: 73 | name: Run tests 74 | command: make -C /root/project/tests -j 2 -k 75 | 76 | workflows: 77 | version: 2 78 | default: 79 | jobs: 80 | - build 81 | - test: 82 | requires: 83 | - build 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.allocstubs.c 2 | *.allocstubs.s 3 | *.cil.c 4 | *.cil.i 5 | *.cil.s 6 | *.cma 7 | *.cmi 8 | *.cmo 9 | *.cmx 10 | *.cmxs 11 | *.i 12 | *.i.allocs 13 | *.o.fixuplog 14 | *.swp 15 | 16 | /tests/alloca/alloca 17 | /tests/offsetof/offsetof 18 | /tests/section-group/section-group 19 | /tests/simple-client/simple-client 20 | /tests/simple-multi-alloc/simple-multi-alloc 21 | /tests/sloppy-dumptypes/sloppy-dumptypes 22 | /tools/allocsites 23 | /tools/dumptypes 24 | /tools/ifacetypes 25 | /tools/find-allocated-type-size 26 | /tools/lang/c/bin/base-types-translation 27 | /tools/lang/c/src/base-types-translation 28 | /tools/usedtypes 29 | /tools/lang/c/*/*.d 30 | /tools/cufiles 31 | /tools/dumpptrs 32 | /tools/*.s 33 | 34 | /contrib/antlr-3.4-complete.jar 35 | /contrib/antlr-3.4.tar.gz 36 | /contrib/config.mk 37 | /contrib/env.sh 38 | /contrib/libantlr3c-3.4/ 39 | /libltdl/ 40 | 41 | /lib/outdir 42 | 43 | /config.h 44 | /config.h.in 45 | /config.mk 46 | /include/liballocs_config.h 47 | 48 | 49 | /Makefile 50 | 51 | __pycache__/ 52 | *.py[cod] 53 | 54 | .deps/ 55 | .libs/ 56 | 57 | 58 | # http://www.gnu.org/software/automake 59 | 60 | Makefile.in 61 | 62 | # http://www.gnu.org/software/autoconf 63 | 64 | .dirstamp 65 | /autom4te.cache 66 | /aclocal.m4 67 | /compile 68 | /configure 69 | /depcomp 70 | /install-sh 71 | /missing 72 | /stamp-h1 73 | /include/stamp-h2 74 | /m4/ 75 | /config.guess 76 | /config.sub 77 | /config.status 78 | /config.log 79 | /ltmain.sh 80 | /libtool 81 | 82 | 83 | # Compiled Object files 84 | *.slo 85 | *.lo 86 | *.o 87 | *.obj 88 | 89 | *.pc 90 | 91 | # Dependencies 92 | .*.d 93 | 94 | # Precompiled Headers 95 | *.gch 96 | *.pch 97 | 98 | # Compiled Dynamic libraries 99 | *.so 100 | *.dylib 101 | *.dll 102 | 103 | # Fortran module files 104 | *.mod 105 | 106 | # Compiled Static libraries 107 | *.lai 108 | *.la 109 | *.a 110 | *.lib 111 | 112 | # Executables 113 | *.exe 114 | *.out 115 | *.app 116 | 117 | 118 | # Backup files 119 | *~ 120 | \#*\# 121 | .\#* 122 | 123 | # WTF Dropbox 124 | 125 | .fuse_hidden* 126 | .nfs* 127 | .dropbox* 128 | 129 | # we might have .s files, but not in tools/ 130 | /tools/*.s 131 | 132 | /config.mk 133 | /tests/addrtaken-allocator/addrtaken-allocator 134 | /tools/alloctypes 135 | /tools/dwarftypes 136 | /tools/extrasyms 137 | /tools/frametypes 138 | /tools/metavector 139 | 140 | /tests/unit-tests/metavec 141 | /tests/unit-tests/metavec-debug 142 | /tests/unit-tests/*.s 143 | /tests/unit-tests/*.h 144 | 145 | tmp-*.txt 146 | 147 | /tools/pervasive-types 148 | /tools/roottypes.c 149 | 150 | /tests/alloca/-ldl.res 151 | /tests/allocsite-id/allocsite-id 152 | .tests/bit-fields/bit-fields 153 | /tests/char-allocs/char-allocs 154 | /tests/lib-test/lib-test 155 | /tests/malloc-in-exe/malloc-in-exe 156 | /tests/metavec-layout/metavec-layout 157 | /tests/no-unbind-self/no-unbind-self 158 | /tests/nopreload-alloca/nopreload-alloca 159 | /tests/ptr-to-opaque/ptr-to-opaque 160 | /tests/realloc-multi-union/realloc-multi-union 161 | /tests/relf-auxv-dynamic/relf-auxv-dynamic 162 | /tests/sizeof-subtract/sizeof-subtract 163 | /tests/stack-types/stack-types 164 | /tests/stack-walk/stack-walk 165 | /tests/string-lit/string-lit 166 | /tests/uniqtype-make-precise/uniqtype-make-precise 167 | /tests/uniqtype-walk/uniqtype-walk 168 | 169 | /tools/lang/c/cil.stamp 170 | 171 | /allocsld/allocsld.lds 172 | 173 | config.h.in 174 | contrib/config.mk 175 | contrib/env.sh 176 | 177 | /allocsld/*.s 178 | /TODO.srk 179 | /00*.patch 180 | *.ii 181 | /build 182 | /configure.cmd* 183 | /contrib/cil.stamp 184 | /examples/elf-file-symbolised* 185 | /examples/elf-file-vanilla* 186 | /examples/elf-file.test 187 | /examples/test* 188 | /tests/lib-test/elf-file-symbolised* 189 | /tests/lib-test/elf-file-vanilla* 190 | /tests/lib-test/elf-file.test 191 | /tests/lib-test/test* 192 | examples/client-allocator-elf+meta 193 | examples/test* 194 | oopsla*.ps 195 | *.rej 196 | *.orig 197 | /tests/bit-fields/bit-fields 198 | /tests/packed-seq-walk/packed-seq-walk 199 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "contrib/libmallochooks"] 2 | path = contrib/libmallochooks 3 | url = https://github.com/stephenrkell/libmallochooks 4 | [submodule "contrib/libdlbind"] 5 | path = contrib/libdlbind 6 | url = https://github.com/stephenrkell/libdlbind.git 7 | [submodule "contrib/liballocstool"] 8 | path = contrib/liballocstool 9 | url = https://github.com/stephenrkell/liballocstool.git 10 | [submodule "contrib/libsystrap"] 11 | path = contrib/libsystrap 12 | url = https://github.com/stephenrkell/libsystrap.git 13 | [submodule "contrib/toolsub"] 14 | path = contrib/toolsub 15 | url = https://github.com/stephenrkell/toolsub.git 16 | [submodule "contrib/donald"] 17 | path = contrib/donald 18 | url = https://github.com/stephenrkell/donald.git 19 | [submodule "contrib/cil"] 20 | path = contrib/cil 21 | url = https://github.com/stephenrkell/cil.git 22 | [submodule "contrib/elftin"] 23 | path = contrib/elftin 24 | url = https://github.com/stephenrkell/elftin.git 25 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | *.cil.c 3 | *.cil.s 4 | *.o 5 | *.i 6 | makefile 7 | *.orig 8 | logwrites.ml 9 | *.cmx 10 | *.cma 11 | *.cmo 12 | *.cmi 13 | Old/* 14 | *.allocs 15 | *.d 16 | lib/liballocs.so 17 | test/hello/hello 18 | allocsites/allocsites 19 | allocsites/dumptypes 20 | allocsites/git-types.c 21 | allocsites/liballocs.a 22 | allocsites/subtle-questions.txt 23 | allocsites/test-simple.c 24 | allocsites/test-simple.out-types.c 25 | allocsites/test.out-types.c 26 | Notes/* 27 | cachegrind.out.* 28 | test/section-group/lib1.so 29 | test/section-group/lib2.so 30 | test/section-group/section-group 31 | test/hello-stubgen/hello-stubgen 32 | allocsites/usedtypes 33 | lib/liballocs_noop.so 34 | lib/liballocs_preload.so 35 | src/liballocs_noop.so 36 | src/liballocs_preload.so 37 | test/sloppy-dumptypes/sloppy-dumptypes 38 | tools/lang/c/src/base-types-translation 39 | *.s 40 | tools/find-allocated-type-size 41 | test/simple-multi-alloc/simple-multi-alloc 42 | src/libdumpsmaps.so 43 | tests/heap-index/multi-level 44 | lib/liballocs.a 45 | lib/liballocs_preload.a 46 | src/liballocs.a 47 | src/liballocs_preload.a 48 | tests/section-group/lib1.so 49 | tests/section-group/lib2.so 50 | tests/section-group/section-group 51 | tests/simple-multi-alloc/simple-multi-alloc 52 | tests/sloppy-dumptypes/sloppy-dumptypes 53 | tools/allocsites 54 | tools/dumptypes 55 | tools/liballocstool.a 56 | tools/usedtypes 57 | *.pyc 58 | tools/lang/c/cil* 59 | tools/lang/c/bin/base-types-translation 60 | src/*_fast.c 61 | *debuglog.txt 62 | filemap-libcrunch 63 | filemap-libcrunch-norenames 64 | filemap-pmirror 65 | lib/liballocs_nonshared.a 66 | lib/liballocs_noop.a 67 | *.rej 68 | src/liballocs_nonshared.a 69 | src/liballocs_noop.a 70 | *.allocstubs.c 71 | *.o.fixuplog 72 | tests/offsetof/offsetof 73 | tests/simple-client/simple-client 74 | *.ltrans.out 75 | tests/sloppy-dumptypes/errvar 76 | tools/lang/c++/notes.txt 77 | *.cmxs 78 | free.res 79 | tests/alloca/alloca 80 | Makefile 81 | Makefile.in 82 | aclocal.m4 83 | autom4te.cache/* 84 | config.guess 85 | config.log 86 | config.status 87 | config.sub 88 | configure 89 | configure.cmd 90 | depcomp 91 | install-sh 92 | liballocs.pc 93 | libtool 94 | ltmain.sh 95 | m4/libtool.m4 96 | m4/ltoptions.m4 97 | m4/ltsugar.m4 98 | m4/ltversion.m4 99 | m4/lt~obsolete.m4 100 | missing 101 | tests/abort-alloca-clang/abort-alloca-clang 102 | tests/nopreload-alloca/nopreload-alloca 103 | tests/relf-auxv-dynamic/relf-auxv-dynamic 104 | tests/relf-auxv-static/relf-auxv-static 105 | tests/uniqtype-walk/uniqtype-walk 106 | tools/.deps/* 107 | .dirstamp 108 | tools/ifacetypes 109 | contrib/libantlr3c-3.4/* 110 | contrib/antlr-3.4.tar.gz 111 | contrib/antlr-3.4-complete.jar 112 | contrib/config.mk 113 | contrib/env.sh 114 | lib/liballocs_dummyweaks.so 115 | core.* 116 | src/dlmalloc.a 117 | gdb.log 118 | src/liballocs_dummyweaks.so 119 | stashed-*.patch 120 | tools/.libs/* 121 | tools/dumpptrs 122 | compile 123 | config.h 124 | config.h.in 125 | configure.cmd~ 126 | git-mapfile-orig 127 | libltdl/* 128 | m4/argz.m4 129 | m4/ltdl.m4 130 | */oprofile_data/* 131 | src/liballocs_pic.a 132 | src/liballocs_test.so 133 | stamp-h1 134 | *.i.memacc 135 | */-ldl.res 136 | tools/gold-plugin.la 137 | tools/gold-plugin.lo 138 | tools/gold-plugin.so 139 | tools/objdumpallocs-llvm 140 | oprofile_data/* 141 | tests/lib-test/lib-test 142 | tests/malloc-in-exe/malloc-in-exe 143 | tests/uniqtype-make-precise/uniqtype-make-precise 144 | tmp.patch 145 | tools/dumptypes.static 146 | tools/dwarfc/dwarf-machine 147 | tools/dwarfc/frame-machine 148 | tools/dwarfc/stack-machine 149 | contrib/libsrk31c++ 150 | contrib/libc++fileno 151 | *.bc 152 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 3c8db164b2bc76393b87eeaef38f7d298dcd46d2 default/oldmaster 2 | 6d7006829700c5f290e84f166223579db63b7b72 default/master 3 | -------------------------------------------------------------------------------- /Documentation/allocsld-debugging.txt: -------------------------------------------------------------------------------- 1 | allocsld.so is implemented as a 'chain loader' which secretly loads the 2 | 'real' dynamic linker and gets it to link the program. 3 | 4 | All this has the capacity to really confuse a debugger. A debugger 5 | attached to allocsld.so would not know /a priori/ anything about other 6 | binaries that get loaded. 7 | 8 | Debuggers learn about dynamically linked libraries by finding a 'struct 9 | r_debug' instance pointed to by the DT_DEBUG entry of the executable's 10 | .dynamic section (or by a symbol named _r_debug). 11 | 12 | So the short summary is that we give allocsld.so such a DT_DEBUG entry, 13 | but point it at the 'struct r_debug' in the *real* dynamic linker. That 14 | way, if a debugger looks in allocsld.so, thinking it the executable, it 15 | will find the real link map. 16 | 17 | It's not quite that easy. A debugger looking for a DT_DEBUG will also 18 | expect to find a _dl_debug_state() function in the same binary (actually 19 | it can have several names; see allocsld/chain.c). This is a no-op 20 | function on which the debugger secretly sets a breakpoint; the dynamic 21 | linker calls it whenever it loads or unloads something, so the debugger 22 | can stay aware. In our case, the 'real' dynamic linker is doing all the 23 | work and will happily call its own _dl_debug_state function, but the 24 | debugger will not know to set a breakpoint on it because it might look 25 | only at the 'apparent' dynamic linker, allocsld. We have no choice but to 26 | define our own _dl_debug_state function. We can set the r_brk field 27 | in the _r_debug, defined by the real dynamic linker, to point to our 28 | own function. (It appears gdb is wrong to behave this way. A comment in 29 | glibc's dl-debug.c attests: "The debugger is supposed to find this 30 | function's address by examining the r_brk member of struct r_debug, but 31 | GDB 4.15 in fact looks for this particular symbol name in the PT_INTERP 32 | file.") 33 | 34 | Note that simply symbol-overriding _dl_debug_state does not work. Even 35 | though it has default (global) visibility so appears subject to overriding, 36 | allocsld doesn't actually exist as far as the link map is concerned, so 37 | defining our own there is no good. In any case, glibc's dynamic linker 38 | seems to make internal calls to its own function, which are not overriddable. 39 | And once we've "covered our tracks", the debugger will only find the "real" 40 | dynamic linker anyway. Our solution is to overwrite the 'real' ld.so's 41 | _dl_debug_state so that it calls ours. Since functions have 16-byte alignment 42 | on x86-64, so we have 16 bytes to play with. We make the real ld.so 43 | _dl_debug_state tail-call (jump) into ours in allocsld. Our function is empty 44 | and just returns to the caller. Calling the ld.so's _dl_debug_state will 45 | activate *both* functions, so we've covered both debugger-bases. 46 | -------------------------------------------------------------------------------- /Documentation/bigallocs.txt: -------------------------------------------------------------------------------- 1 | Liballocs conceptually divides the allocation tree into "big" 2 | allocations nearer the top, and "non-big" allocations at the 3 | leaves. A special data structure -- the bigallocs table, combined 4 | with the pageindex -- records details of all existing big 5 | allocations. 6 | 7 | The idea here is to (1) optimise handling of queries that want to 8 | hit the leaves, e.g. type information for program variables, (2) 9 | represent allocation nesting in a uniform but flexible way, and (3) 10 | keep liballocs mostly out of the allocation path in the case of 11 | "small" allocations, which are inherently more frquent than big 12 | allocations. 13 | 14 | Liballocs assumes that allocations are properly nested. This is 15 | typically true, but requires a bit of manoeuvring in the case of 16 | memory mappings: the unit of allocation by the mmap allocator is a 17 | "mapping sequence" not a single mapping, so that multiple 18 | contiguous mappings make up a bigger allocation that can then be 19 | nested within without regard for the internal boundary between 20 | originally mmap'd pieces. 21 | 22 | Since allocations form a tree, allocations generally higher up the 23 | tree are "big". This means they appear in the "big allocations" 24 | table. 25 | 26 | Traditionally, there has been an invariant that bigallocness ends 27 | roughly where type information begins. More precisely: if an 28 | allocation has a uniqtype, then it may or may not be a big 29 | allocation, but there will be no nested allocations under it. 30 | "Type information is at the leaves." 31 | 32 | It's likely that the design will have to be relaxed so that 33 | allocations can also nest underneath the uniqtype level. See GitHub 34 | issues #53, #82. The necesary design change seems to be to 35 | recognise that uniqtypes are themselves a kind of allocator. 36 | 37 | Despite their name, there is no minimum size for a bigalloc. 38 | 39 | One example of a bigalloc is a memory mapping sequence. Another is 40 | a data segment, or maybe a section within that segment. In these 41 | cases, the allocation itself has no type information, but type 42 | information begins at the next level down. 43 | 44 | Another is a big malloc chunk that is being used for a nested 45 | allocator. A custom allocator might get hold of memory by doing a 46 | big malloc, then nesting its allocations within that. The big chunk 47 | could conceivably have type info, although that would be unusual. 48 | 49 | To record the fact that other allocations are nested within a 50 | bigalloc, it has a nominated "suballocator", i.e. a pointer to the 51 | "struct allocator" describing the allocator that is parcelling out 52 | the allocation's space. 53 | 54 | It follows that any allocation arena is a bigalloc (but not all 55 | bigalloc are allocation arenas). 56 | 57 | Bigallocs may or may not be suballocated, but they are never 58 | sub-sub-allocated. Rather, if we have a malloc within a malloc, we 59 | promote the nested chunk to be its own bigalloc. 60 | 61 | bigalloc at level n (nested allocations shown with 'x') 62 | +---------------------------------------------------+ 63 | |xxxx| |xx| |xx| |xxxxxxxxxxxxxxxxxxxxxxxxxxxx|...| 64 | +---------------------------------------------------+ 65 | ^ ----bigalloc at level n+1 ^ (nested allocations shown with 'y') 66 | +----------------------------+ 67 | | |yy| |yy| |yy| | 68 | +----------------------------+ 69 | 70 | This kind of structure could be made recursively, reaching 71 | arbitrarily many layers down. 72 | 73 | This means that we can have one bigalloc that is "mostly" 74 | suballocated into non-big allocations, but a specific chunk within 75 | it, allocated by that suballocator and subsequently used for 76 | parcelling out by a nested allocator, is itself a bigalloc. In 77 | other words, a bigalloc may have both a suballocator (general) and 78 | child bigallocs (promoted). 79 | 80 | How to support queries in such a world is tricky. If we query the 81 | free space at level n+1, do we get the metadata (e.g. type info) 82 | from the malloc chunk at level n? Most likely, each query should be 83 | viewed as "cut off" at some level on the tree, possibly bounded 84 | both above and below. Cut-offs below can account for e.g. run-time 85 | type checking within an allocator's code, where its own bookkeeping 86 | should be subject to run-time checks but it should not know or care 87 | what structure lies below. Cut-offs above would handle the converse 88 | case where querying free space at level n+1 might be best not to 89 | return the containing allocation. 90 | 91 | -------------------------------------------------------------------------------- /Documentation/custom-allocators.md: -------------------------------------------------------------------------------- 1 | To declare wrappers, set environment variables something like this at 2 | build time. 3 | 4 | LIBALLOCS_ALLOC_FNS="mymalloc(Z)p mycalloc(zZ)p myrealloc(pZ)p" 5 | LIBALLOCS_FREE_FNS="myfree(P)" 6 | 7 | ... where in the signature mini-language, "z" stands for "size_t", "p" 8 | for any pointer, and "i" for int. Capitals denote the significant 9 | argument (type-determining size or alloc-being-freed). 10 | 11 | This is necessary for C code, to analyse the use of `sizeof` which 12 | provides type information for allocator calls. 13 | 14 | To declare allocators, use something like this. 15 | 16 | LIBALLOCS_SUBALLOC_FNS="mysubmalloc(Z)p" 17 | LIBALLOCS_SUBFREE_FNS="mysubfree(P)->mysubmalloc" 18 | 19 | This will ensure that these functions are wrapped such that new 20 | allocations are indexed in a generic (but slow!) structure. If code 21 | changes are feasible, is also possible to write a custom index and 22 | perform the wrapping yourself. 23 | 24 | The above facilities are very ad-hoc and will at some point be 25 | redesigned into something more general (contributions welcome!). 26 | -------------------------------------------------------------------------------- /Documentation/malloc-overview.txt: -------------------------------------------------------------------------------- 1 | There can be many malloc implementations in a process. For example, there 2 | may be the libc's malloc in a shared library, a custom malloc in the 3 | executable, and (in our case) liballocs's private malloc. Each of these 4 | malloc implementations has its own entry points which need to be 5 | interposed on separately. Each also has its own 'struct allocator', 6 | giving it its own identity at run time. 7 | 8 | To do the necessary interposition, there is more than one kind of 9 | 'wrapper function' involved. In fact the word 'wrapper' is horribly 10 | overloaded in the context of malloc and we shouldn't use it. It can mean 11 | (leading with a better word in each case) any of the following. 12 | 13 | - 'facade': a function in user code which backs onto malloc (et al) or 14 | transitively, another facade. These 'wrappers' are features of the client 15 | program, rather than something we generate. They need special treatment 16 | at present because we classify with a 'sizeofness' only the ultimate call 17 | site to any allocator or facade, i.e. the code that is actually using 18 | 'sizeof' to compute a size and pass it down to the allocation operation. 19 | 20 | - 'stub' or 'caller-side wrapper': these the functions, with names such 21 | as __wrap_malloc, we generate in order to latch the (outermost) 22 | allocation call site address. This is how we infer what type is being 23 | allocated -- the outermost allocator-or-facade call site is the one whose 24 | classified sizeofness reveals the type. In some obscure cases there can 25 | be more than one classified allocator call on the stack at once (e.g. a 26 | facade that allocates a big array but also separately allocates a 'spine 27 | vector' of pointers into that array), hence "outermost". Subject to 28 | performance experiments, I would like to eliminate caller-side wrappers 29 | altogether, at a cost of sometimes walking the stack all the way to the 30 | top (looking for the outermost site). 31 | 32 | - an 'entry point' or 'callee-side wrapper': these generated functions 33 | stand in for the real allocator but additionally perform indexing 34 | operations. We generate them at link time. 35 | 36 | Conceptually, stubs (caller-side wrappers) are inserted when we link the 37 | calling code using the liballocs toolchain, and entry points (callee-side 38 | wrappers) are likewise inserted when we link the called allocator code 39 | using the liballocs toolchain. However, in order to support the common 40 | case of malloc in libc, which we do /not/ assume was built using the 41 | liballocs toolchain, two exceptions are made. 42 | 43 | - liballocs preloads its own malloc which performs caller-side allocation 44 | site latching if no allocation site is currently latched (this is in a 45 | thread-local variable), albeit on the *callee* side. 46 | 47 | - the same arrangement is used to insert indexing operations around the 48 | libc malloc. 49 | 50 | When the 'real' global malloc is built in to the executable, preempting 51 | the ones in libc and liballocs, we require that the executable was built 52 | with the liballocs toolchain. That means it will have both kinds of 53 | wrappers built in to it. (To work with such executables unmodified, we 54 | could imagine getting into binary instrumentation techniques, but that is 55 | not attempted yet.) 56 | 57 | See malloc-indexing.txt for a diagram of how this works in two common 58 | cases. 59 | -------------------------------------------------------------------------------- /Documentation/projects.md: -------------------------------------------------------------------------------- 1 | For smaller projects (and some larger!) please check the issues on 2 | GitHub. This includes some quality-of-implementation issues, 3 | performance optimisations and usability improvements. 4 | 5 | Existing use cases: 6 | * run-time type checking: libcrunch 7 | * linking-related 8 | - Jon French's use of liballocs tools to create a run-time interface model of the Linux kernel (more soon!) 9 | * multi-language programming without foreign function interfacing APIs 10 | - CPython extension module (Guillaume Bertholon) 11 | * fine-grained versioning and adaptation of binary interfaces 12 | - see liballocs's ancestor project, Cake (https://humprog.org/~stephen/research/cake) 13 | 14 | For possible large-scale use cases, the list in the README is the 15 | definitive list. 16 | -------------------------------------------------------------------------------- /Documentation/static-symbols.txt: -------------------------------------------------------------------------------- 1 | "Segments" are contiguous regions of virtual address space defined 2 | by a loaded DSO (executable or shared object)... on ELF platforms 3 | they correspond to a LOAD program header. Meanwhile "symbols" are 4 | subdivisions of segments, with (usually) associated type 5 | information since they correspond to a program function or 6 | variable. 7 | 8 | The main data structure used to index a segment is a "metavector". 9 | This is an array of short records sorted by address. Lookup 10 | proceeds by binary search. 11 | 12 | Metavector records are intentionally small, for time- and 13 | space-efficiency. They do not duplicate information held in the 14 | ELF symbol, for example. This is covered below. 15 | 16 | There may or may not be a "sections" layer in between segments and 17 | symbols (or, perhaps, in between *some* symbols and the segment, 18 | but not all). I have also contemplated creating a further 19 | "subsection" layer, recording the original section boundaries prior 20 | to linking; if a link map is available, this could be used to infer 21 | which parts of a section are padding and therefore available to be 22 | allocated e.g. for hot-patching purposes. 23 | 24 | "Symbols" is interpreted widely, in order to ensure that type 25 | information can be attached to all allocations that have a 26 | meaningful type. An ELF symbol is of course a symbol -- provided it 27 | is of non-zero st_size and has ELF symbol type SHT_FUNC or 28 | SHT_OBJECT. But other chunks of data can be treated as "symbols" 29 | too. There are two notable kinds: string literals, and static 30 | locals. 31 | 32 | String literals are treated as quasi-symbols whose boundaries and 33 | types are inferred from relocation records (this is assuming -q was 34 | passed at link time). Any region of memory not covered by a symbol 35 | but referenced by a relocation record is assumed to form an 36 | allocation, spanning from the referenced point to the next distinct 37 | point that is referenced by some other relocation record. 38 | 39 | Note that this is a heuristic and currently may be defeated by 40 | string suffix merging: if a relocation record points into the 41 | middle of a longer string, this will create an allocation boundary 42 | and could give rise to reports of bounds errors, say by libcrunch. 43 | A better heuristic could perhaps be devised, or additional metadata 44 | propagated. (Since string literals' sizes are statically known at 45 | the relocation site, this could be propagated somehow and then the 46 | overlap detected after linking, once string merging has happened.) 47 | 48 | Static locals typically don't have an ELF symbol but can be 49 | recovered from debugging information. This yields a second kind of 50 | quasi-symbols: "debug-only". 51 | 52 | As mentioned above, the metavector does not contain most metadata, 53 | e.g. for a 64-bit ELF symbol, it does not repeat the Elf64_Sym's 54 | contents. Rather, it's assumed the symbol table is mapped and 55 | available. For dynamic symbols (ELF .dynsym) this is always the 56 | case. For "static" symbols (ELF .symtab), the table is mapped by 57 | liballocs if it does not fall within a segment of the file already. 58 | For quasi-symbols, a "fake" symbol table called "extrasyms" is 59 | created by the liballocs tools, and stored in the meta-DSO. 60 | 61 | This assumption that a symbol record is always available enables a 62 | uniform yet compact representation in the metavector. To describe 63 | the object's address, size and (where present) name, it only needs 64 | to record which of these three symbol tables to consult, and the 65 | index into that table. The rest of the record encodes a pointer to 66 | the uniqtype, which of course is not present in a symbol record. On 67 | current 64-bit x86 platforms with only 47 bits of user address 68 | space, a uniqtype pointer requires only 44 bits (accounting for 69 | 8-byte alignment), meaning that in one word there remain 20 bits to 70 | identify the symbol table and index within it. These are assumed to 71 | be sufficient, i.e. there can be no more than about a million 72 | symbols of each kind per DSO. 73 | 74 | A minor exception to the above is that relocation quasi-symbols are 75 | currently not promoted to extrasyms. Instead the base address and 76 | size of the target memory are encoded directly into the metavector, 77 | and this is memory is assumed to have array-of-character type. This 78 | may need to be revisited if non-character data is delineated only 79 | by relocs. (Perhaps on certain architectures that have limited 80 | immediate modes, constants of various types are stored at non-fixed 81 | offsets from the referencing instruction? Even if they are 82 | intermingled with the instructions, it would be good to have 83 | information about these. This has minor consequences for the 84 | packed_sequence that is the instruction stream... it consists not 85 | only of instructions but also of primitive constants.) 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is experimental research software! If you're interested in 2 | licensing it, please contact me, the author, Stephen Kell 3 | . 4 | 5 | Software licensing is complicated; licensing of research software is 6 | doubly so. If you care, please read my thoughts on this matter. 7 | 8 | 9 | Since people seem interested in using this software, as of the current 10 | revision I am happy to license it under the terms of GNU Lesser General 11 | Public License version 3, whose text is in the file LICENSE.lgpl3. 12 | 13 | If you want it under a different license, please contact me. 14 | 15 | Nothing about this licensing arrangement is final, nor exclusive of 16 | other options. For example, I may be willing to license the software *to 17 | you* on customised terms. And I may also change how I license subsequent 18 | changes to the work. All this is in aid of maximising public good. 19 | 20 | Stephen Kell 21 | 2nd December 2015 22 | 23 | 24 | Addendum: several programs in this distribution generate code as output. 25 | I hereby disclaim any copyright interest in any code generated in this 26 | way, by this code or any derivative of it. 27 | 28 | Stephen Kell 29 | 21st July 2016 30 | 31 | 32 | Another addendum: in the above I omitted to mention that separate 33 | licensing terms apply to a few parts of the code, as commented in the 34 | source file themselves. In particular, CIL tools (OCaml code in 35 | tools/lang/c/*) are licensed under the same licence as CIL, and bear 36 | comments to that effect. 37 | 38 | Stephen Kell 39 | 15th November 2017 40 | -------------------------------------------------------------------------------- /allocsld/asmutil.h: -------------------------------------------------------------------------------- 1 | #ifndef ASMUTIL_H_ 2 | #define ASMUTIL_H_ 3 | 4 | #include /* for ElfW and link map structures */ 5 | #include /* for ELFW_* */ 6 | 7 | #ifndef stringify 8 | #define stringifx(x) #x 9 | #define stringify(x) stringifx(x) 10 | #endif 11 | /* To embed a relocation type in our assembly strings, 12 | * we break out of the string but immediately stringify 13 | * the relocation type's preprocessor token, after expansion. 14 | * e.g. "..."R_(X86_64_PC32)"..." */ 15 | #define R_(x) stringify(R_ ## x) 16 | 17 | /* A cpp-macro for a stretch of inline assembly that 18 | * assembles into a .rodata section. In parallel, it assembles 19 | * a relocation table... the assembly invokes an assembly-macro when it 20 | * wants to emit a relocation, done using pushsection/popsection. 21 | * 22 | * FIXME: the following bakes in Elf64_Rela in the .8byte sequence. 23 | * Better to cpp-macro-abstract this, and ideally also share these 24 | * cpp macro helpers with the code generated by tools/* (e.g. see extrasym). */ 25 | #define INSTRS_FROM_ASM(symname, asmstr) \ 26 | extern ElfW(Rela) symname ## _relocs[]; /*__attribute__((section(".rodata_" #symname "_relocs" ))); */ \ 27 | extern char symname []; /* __attribute__((section(".rodata_" #symname))); */ \ 28 | __asm__( \ 29 | ".pushsection .rodata_" #symname "_relocs, \"a\", @progbits \n\ 30 | "#symname "_relocs: \n\ 31 | .popsection \n\ 32 | .pushsection .rodata_" #symname ", \"a\", @progbits \n\ 33 | "#symname ": \n\ 34 | .popsection \n\ 35 | .set nrelocs, 0 \n\ 36 | .macro reloc offs kind symidx addend=0 \n\ 37 | .set offsval, \\offs - " #symname " \n\ 38 | .pushsection .rodata_" #symname "_relocs, \"a\", @progbits \n\ 39 | .8byte offsval \n\ 40 | .8byte \\kind | (\\symidx << 32) \n\ 41 | .8byte \\addend \n\ 42 | .popsection \n\ 43 | .set nrelocs, nrelocs + 1 \n\ 44 | .endm \n\ 45 | .pushsection .rodata_" #symname ", \"a\", @progbits \n\ 46 | " asmstr " \n\ 47 | .size " #symname ", . - " #symname "\n\ 48 | "#symname "_size:\n\ 49 | .8byte . - " #symname "\n\ 50 | "#symname "_nrelocs:\n\ 51 | .8byte nrelocs\n\ 52 | .popsection \n\ 53 | .pushsection " ".rodata_" #symname "_relocs, \"a\", @progbits\n\ 54 | .size " #symname "_relocs, . - " #symname "_relocs \n\ 55 | .popsection \n\ 56 | .purgem reloc\n" \ 57 | ) 58 | 59 | // Use it like the following: 60 | // INSTRS_FROM_ASM (bytes, /* FIXME: sysdep */ " \ 61 | // 1: movabs 0x123456789abcdef0,%rax # 48 b8 f0 de bc 9a 78 56 34 12 \n\ 62 | // RELOC 1b + 2, "STR(R_X86_64_64)", 0 "/* symidx */", 0 "/* addend */" \n\ 63 | // jmpq *%rax \n\ 64 | // "); 65 | // 66 | // ... now we have "bytes" as a char[] and "bytes_relocs" as a ElfW(Rela)[] 67 | 68 | /* What about applying relocs? 69 | * We could define memcpy_and_relocate(dest, src, n, relocs, symaddr...) 70 | * ... how do we terminate symaddr? 71 | memcpy_and_relocate(dest, src, n, relocs, nsyms, symaddr...) 72 | * Another problem with our helper is that now 'sizeof bytes' does not give us 73 | * the memcpying, as distinct from 74 | */ 75 | 76 | static inline void apply_one_reloc(void *buf, ElfW(Rela) rel, uintptr_t *symaddrs) 77 | { 78 | char *tgt = buf + rel.r_offset; 79 | unsigned symidx = ELFW_R_SYM(rel.r_info); 80 | unsigned long s = symaddrs[symidx]; 81 | unsigned long a = rel.r_addend; 82 | unsigned long long utmp; 83 | long stmp; 84 | switch (ELFW_R_TYPE(rel.r_info)) 85 | { 86 | case R_X86_64_PC32: utmp = s - (uintptr_t) tgt + a; memcpy(tgt, &utmp, 4); break; 87 | case R_X86_64_64: utmp = s + a; memcpy(tgt, &utmp, 8); break; 88 | case R_X86_64_32: utmp = s + a; memcpy(tgt, &utmp, 4); break; 89 | case R_X86_64_32S: stmp = (int32_t) s + (int32_t) a; memcpy(tgt, &stmp, 4); break; 90 | case R_X86_64_TPOFF32:utmp = s + a; memcpy(tgt, &utmp, 4); break; 91 | 92 | default: abort(); 93 | } 94 | } 95 | static inline unsigned long read_one_relocated_field(void *buf, ElfW(Rela) rel) 96 | { 97 | char *tgt = buf + rel.r_offset; 98 | unsigned long long utmp = 0; 99 | unsigned utmp32 = 0; 100 | long long stmp = 0; 101 | unsigned stmp32 = 0; 102 | switch (ELFW_R_TYPE(rel.r_info)) 103 | { 104 | case R_X86_64_PC32: memcpy(&stmp, tgt, 4); return (uintptr_t) tgt + stmp; 105 | case R_X86_64_64: memcpy(&utmp, tgt, 8); return utmp; 106 | case R_X86_64_32: memcpy(&utmp32, tgt, 4); return utmp32; 107 | case R_X86_64_32S: memcpy(&stmp32, tgt, 4); return stmp32; 108 | case R_X86_64_TPOFF32:memcpy(&utmp32, tgt, 4); return utmp32; 109 | default: abort(); 110 | } 111 | } 112 | #define memcpy_and_relocate(dest, srcident, ...) do { \ 113 | uintptr_t addrlist[] = { __VA_ARGS__ }; \ 114 | extern size_t srcident ## _size; \ 115 | extern size_t srcident ## _nrelocs; \ 116 | extern ElfW(Rela) srcident ## _relocs[]; \ 117 | memcpy(dest, srcident, srcident ## _size); \ 118 | for (unsigned i = 0; i < srcident ## _nrelocs; ++i) { \ 119 | apply_one_reloc(dest, srcident ## _relocs[i], addrlist); \ 120 | } \ 121 | } while (0) 122 | 123 | #endif 124 | -------------------------------------------------------------------------------- /allocsld/cover-tracks.h: -------------------------------------------------------------------------------- 1 | struct link_map; 2 | extern struct link_map fake_ld_so_link_map; 3 | 4 | void cover_tracks(_Bool we_are_the_program, ElfW(Phdr) *program_phdrs, unsigned program_phnum, const char *ldso_path, uintptr_t inferior_dynamic_vaddr, uintptr_t base_addr); 5 | 6 | void instrument_ld_so_allocators(uintptr_t ld_so_load_addr); 7 | -------------------------------------------------------------------------------- /allocsld/cover-tracks.inc.c: -------------------------------------------------------------------------------- 1 | /* We are a fragment of C code, called from a late pre-entry context in donald. 2 | * We care about "program" phdrs because it's "the program" that needs a DT_DEBUG 3 | * entry. It might be they're the ones where we need to 4 | * */ 5 | cover_tracks(we_are_the_program, program_phdrs, 6 | program_phnum, SYSTEM_LDSO_PATH, 7 | inferior.dynamic_vaddr, inferior.base_addr); 8 | -------------------------------------------------------------------------------- /allocsld/return-hook.S: -------------------------------------------------------------------------------- 1 | # We have a thread-local whose job is to store the real 2 | # return address. Our trampoline will write into this 3 | # immediately before it clobbers the on-stack return address. 4 | .globl real_return_address 5 | .section .tbss,"awT",@nobits 6 | .align 8 7 | .type real_return_address, @object 8 | .size real_return_address, 8 9 | real_return_address: 10 | .zero 8 11 | 12 | .text 13 | .globl generic_return_hook 14 | .type generic_return_hook, @function 15 | .align 16 16 | generic_return_hook: 17 | # We can treat %rdi as a scratch register because it's not callee-saved. 18 | pushq %rax 19 | # --- begin 16-byte TLS magic 20 | # .byte 0x66 21 | # leaq real_return_address@tlsgd(%rip), %rdi 22 | # .word 0x6666 23 | # rex64 call __tls_get_addr@plt 24 | # --- end 16-byte TLS magic 25 | # This "leaq; call" sequence will get turned into a "mov; lea" sequence 26 | # by the linker, if it knows that the sought TLS entry is module-local. 27 | # The initial 'mov' materialises the base address of the TLS block, 28 | # and the following 'lea' applies an offset that reaches the variable. 29 | # This pair of instructions are also 16 bytes. 30 | # Let's just include them directly. 31 | mov %fs:0x0,%rax 32 | lea real_return_address@tpoff(%rax),%rax 33 | movq %rax, %rdi # TLS address is in %rdi now 34 | popq %rax # Now %rax has its original value 35 | pushq (%rdi) # Now the saved return address is back on the stack 36 | # ___ <-- in here we can do what we like -- call to an outside helper? 37 | retq 38 | # FIXME: write some DWARF CFI for this code. 39 | # Can we set up the CFI s.t. it loads the 40 | # return address from the TLS var just like we do? 41 | # Obviously, that will no longer apply at the final 'retq'. 42 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | aclocal && \ 3 | autoconf && \ 4 | libtoolize --force --ltdl && \ 5 | (test -f libltdl/ltdl.mk || (cd libltdl && ln -sf Makefile.inc ltdl.mk)) && \ 6 | autoheader && \ 7 | automake --add-missing && \ 8 | automake 9 | 10 | # autoreconf --force --install -I config -I m4 && \ 11 | -------------------------------------------------------------------------------- /buildtest/debian-buster/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:buster 2 | 3 | ARG user 4 | RUN apt-get update && apt-get install -y sudo 5 | RUN adduser ${user:-user} && \ 6 | echo "${user:-user} ALL=(root) NOPASSWD:ALL" > /etc/sudoers && \ 7 | chmod 0440 /etc/sudoers 8 | RUN mkdir -p /usr/local/src && chown root:user /usr/local/src && \ 9 | chmod g+w /usr/local/src 10 | USER ${user:-user} 11 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:staff /usr/lib/meta && \ 12 | sudo chmod g+w /usr/lib/meta 13 | RUN sudo apt-get install -y git build-essential libelf-dev libdw-dev binutils-dev \ 14 | autoconf automake libtool pkg-config autoconf-archive \ 15 | g++ ocaml ocamlbuild ocaml-findlib \ 16 | default-jdk-headless python3 python3-distutils python \ 17 | make git gawk gdb wget \ 18 | libunwind-dev libc6-dev-i386 zlib1g-dev libc6-dbg \ 19 | libboost-iostreams-dev libboost-regex-dev libboost-serialization-dev libboost-filesystem-dev 20 | RUN cd /usr/local/src && git clone https://github.com/stephenrkell/liballocs.git 21 | RUN cd /usr/local/src/liballocs && \ 22 | git submodule update --init --recursive && \ 23 | make -C contrib -j4 24 | RUN cd /usr/local/src/liballocs && \ 25 | ./autogen.sh && \ 26 | (. contrib/env.sh && ./configure --prefix=/usr/local) && \ 27 | make -j4 28 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:user /usr/lib/meta && \ 29 | sudo chmod g+w /usr/lib/meta 30 | RUN cd /usr/local/src/liballocs && \ 31 | make -f tools/Makefile.meta \ 32 | $(for libname in `ldd /bin/true | sed -En '/[[:blank:]]*([^[:blank:]]* => )?(.*) \(0x[0-9a-f]+\)/ {s//\2/;p}' | egrep 'libc\.so\.6|ld-linux.*\.so' | xargs readlink -f`; do echo "/usr/lib/meta${libname}-meta.so"; done) 33 | 34 | -------------------------------------------------------------------------------- /buildtest/debian-stretch/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:stretch 2 | 3 | ARG user 4 | RUN apt-get update && apt-get install -y sudo 5 | RUN adduser ${user:-user} && \ 6 | echo "${user:-user} ALL=(root) NOPASSWD:ALL" > /etc/sudoers && \ 7 | chmod 0440 /etc/sudoers 8 | RUN mkdir -p /usr/local/src && chown root:user /usr/local/src && \ 9 | chmod g+w /usr/local/src 10 | USER ${user:-user} 11 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:staff /usr/lib/meta && \ 12 | sudo chmod g+w /usr/lib/meta 13 | RUN sudo apt-get install -y git build-essential libelf-dev libdw-dev binutils-dev \ 14 | autoconf automake libtool pkg-config autoconf-archive \ 15 | g++ ocaml ocaml-findlib \ 16 | default-jdk-headless python3 python \ 17 | make git gawk gdb wget \ 18 | libunwind-dev libc6-dev-i386 zlib1g-dev libc6-dbg \ 19 | libboost-iostreams-dev libboost-regex-dev libboost-serialization-dev libboost-filesystem-dev 20 | RUN cd /usr/local/src && git clone https://github.com/stephenrkell/liballocs.git 21 | RUN cd /usr/local/src/liballocs && \ 22 | git submodule update --init --recursive && \ 23 | make -C contrib -j4 24 | RUN cd /usr/local/src/liballocs && \ 25 | ./autogen.sh && \ 26 | (. contrib/env.sh && ./configure --prefix=/usr/local) && \ 27 | make -j4 28 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:user /usr/lib/meta && \ 29 | sudo chmod g+w /usr/lib/meta 30 | RUN cd /usr/local/src/liballocs && \ 31 | make -f tools/Makefile.meta \ 32 | $(for libname in `ldd /bin/true | sed -En '/[[:blank:]]*([^[:blank:]]* => )?(.*) \(0x[0-9a-f]+\)/ {s//\2/;p}' | egrep 'libc\.so\.6|ld-linux.*\.so' | xargs readlink -f`; do echo "/usr/lib/meta${libname}-meta.so"; done) 33 | -------------------------------------------------------------------------------- /buildtest/ubuntu-18.04/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ARG user 4 | RUN apt-get update && apt-get install -y sudo 5 | RUN adduser ${user:-user} && \ 6 | echo "${user:-user} ALL=(root) NOPASSWD:ALL" > /etc/sudoers && \ 7 | chmod 0440 /etc/sudoers 8 | RUN mkdir -p /usr/local/src && chown root:user /usr/local/src && \ 9 | chmod g+w /usr/local/src 10 | USER ${user:-user} 11 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:staff /usr/lib/meta && \ 12 | sudo chmod g+w /usr/lib/meta 13 | RUN sudo apt-get install -y build-essential libbsd-dev \ 14 | libelf-dev libdw-dev binutils-dev \ 15 | autoconf automake libtool pkg-config autoconf-archive \ 16 | g++ ocaml ocamlbuild ocaml-findlib \ 17 | default-jdk-headless python3 python \ 18 | make git gawk gdb wget \ 19 | libunwind-dev libc6-dev-i386 zlib1g-dev libc6-dbg \ 20 | libboost-iostreams-dev libboost-regex-dev \ 21 | libboost-serialization-dev libboost-filesystem-dev libffi6 libffi-dev 22 | RUN cd /usr/local/src && git clone https://github.com/stephenrkell/liballocs.git 23 | RUN cd /usr/local/src/liballocs && \ 24 | git submodule update --init --recursive && \ 25 | make -C contrib -j4 26 | RUN cd /usr/local/src/liballocs && \ 27 | ./autogen.sh && \ 28 | (. contrib/env.sh && ./configure --prefix=/usr/local) && \ 29 | make -j4 30 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:user /usr/lib/meta && \ 31 | sudo chmod g+w /usr/lib/meta 32 | RUN cd /usr/local/src/liballocs && \ 33 | make -f tools/Makefile.meta \ 34 | $(for libname in `ldd /bin/true | sed -En '/[[:blank:]]*([^[:blank:]]* => )?(.*) \(0x[0-9a-f]+\)/ {s//\2/;p}' | egrep 'libc\.so\.6|ld-linux.*\.so' | xargs readlink -f`; do echo "/usr/lib/meta${libname}-meta.so"; done) 35 | -------------------------------------------------------------------------------- /buildtest/ubuntu-20.04/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ARG user 4 | RUN apt-get update && apt-get install -y sudo 5 | RUN adduser ${user:-user} && \ 6 | echo "${user:-user} ALL=(root) NOPASSWD:ALL" > /etc/sudoers && \ 7 | chmod 0440 /etc/sudoers 8 | RUN mkdir -p /usr/local/src && chown root:user /usr/local/src && \ 9 | chmod g+w /usr/local/src 10 | USER ${user:-user} 11 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:staff /usr/lib/meta && \ 12 | sudo chmod g+w /usr/lib/meta 13 | RUN sudo env DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y \ 14 | build-essential libbsd-dev libelf-dev libdw-dev binutils-dev \ 15 | autoconf automake libtool pkg-config autoconf-archive \ 16 | g++ ocaml ocamlbuild ocaml-findlib libnum-ocaml-dev \ 17 | default-jdk-headless python3 \ 18 | make git gawk gdb wget \ 19 | libunwind-dev libc6-dev-i386 zlib1g-dev libc6-dbg \ 20 | libboost-iostreams-dev libboost-regex-dev \ 21 | libboost-serialization-dev libboost-filesystem-dev libffi-dev \ 22 | ca-certificates \ 23 | pkg-config libglib2.0-dev # <-- for 'simple-multi-alloc' test case only 24 | RUN cd /usr/local/src && git clone https://github.com/stephenrkell/liballocs.git 25 | RUN cd /usr/local/src/liballocs && \ 26 | git submodule update --init --recursive && \ 27 | make -C contrib -j4 28 | RUN cd /usr/local/src/liballocs && \ 29 | ./autogen.sh && \ 30 | (. contrib/env.sh && ./configure --prefix=/usr/local) && \ 31 | make -j4 32 | RUN sudo mkdir -p /usr/lib/meta && sudo chown root:user /usr/lib/meta && \ 33 | sudo chmod g+w /usr/lib/meta 34 | RUN cd /usr/local/src/liballocs && \ 35 | make -f tools/Makefile.meta \ 36 | $(for libname in `ldd /bin/true | sed -En '/[[:blank:]]*([^[:blank:]]* => )?(.*) \(0x[0-9a-f]+\)/ {s//\2/;p}' | egrep 'libc\.so\.6|ld-linux.*\.so' | xargs readlink -f`; do echo "/usr/lib/meta${libname}-meta.so"; done) 37 | -------------------------------------------------------------------------------- /config.mk.in: -------------------------------------------------------------------------------- 1 | LIBMALLOCHOOKS := @libmallochooks@ 2 | LIBSYSTRAP := @libsystrap@ 3 | LIBRUNT := @librunt@ 4 | LIBDLBIND := @libdlbind@ 5 | DONALD := @donald@ 6 | TOOLSUB := @toolsub@ 7 | ELFTIN := @elftin@ 8 | CIL_INSTALL := @cil_install@ 9 | LIBALLOCSTOOL_CFLAGS := @liballocstool_cflags@ 10 | # export anything needed during build of liballocs-enabled executables 11 | export CIL_INSTALL 12 | export TOOLSUB 13 | export ELFTIN 14 | export LIBMALLOCHOOKS 15 | export LIBALLOCSTOOL_CFLAGS 16 | 17 | XWRAP_LDPLUGIN := $(ELFTIN)/xwrap-ldplugin/xwrap-ldplugin.so 18 | export XWRAP_LDPLUGIN 19 | 20 | # tests/Makefile and src/Makefile include us 21 | CPPFLAGS := -I@librunt@/include -I@libmallochooks@/include @LIBALLOCSTOOL_CFLAGS@ 22 | export CPPFLAGS 23 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) 2 | srcroot := $(realpath $(dir $(THIS_MAKEFILE))/..) 3 | -include $(srcroot)/config.mk 4 | 5 | # HACK while LTO is broken 6 | CFLAGS += -fno-lto 7 | LDFLAGS += -fno-lto 8 | 9 | CLANG ?= clang 10 | 11 | # HACK while gcc can't grok our case / atomify4 metaprogramming 12 | # SIGH: without -fPIC, with clang we hit the infamous 'copy reloc of protected 13 | # symbols' bug, because it generates R_X86_64_64 direct to big_allocations 14 | # that it thinks will be copy-reloc'd into the executable, but the real liballocs 15 | # is using its own copy 16 | CC := $(CLANG) -fPIC -fno-lto 17 | 18 | META_BASE ?= /usr/lib/meta 19 | 20 | .PHONY: default 21 | default: client-allocator-elf+meta $(META_BASE)$(realpath $(dir $(THIS_MAKEFILE)))/client-allocator-elf+meta-meta.so 22 | 23 | $(META_BASE)$(realpath $(dir $(THIS_MAKEFILE)))/client-allocator-elf+meta-meta.so: client-allocator-elf+meta 24 | $(MAKE) -f $(srcroot)/tools/Makefile.meta $@ 25 | 26 | CFLAGS += -I$(srcroot)/include -g $(LIBALLOCSTOOL_CFLAGS) 27 | LDFLAGS += -L$(srcroot)/lib -L$(LIBDLBIND)/lib -Wl,--dynamic-linker,$(srcroot)/lib/allocsld.so $(srcroot)/lib/interp-pad.o 28 | LDLIBS += -lallocs -ldlbind -ldl -lbsd 29 | 30 | client-allocator-elf+meta: client-allocator-elf.o elf-allocators+meta.o elf-refs.o emit-asm.o 31 | $(CC) -o $@ $+ $(LDFLAGS) $(LDLIBS) 32 | 33 | %+meta.o: %.o 34 | cp "$<" "$@" && ../tools/lang/c/bin/link-used-types "$@" || (rm -f "$@"; false) 35 | 36 | .PHONY: clean 37 | clean: 38 | rm -f client-allocator-elf.o client-allocator-elf+meta.o client-allocator-elf+meta 39 | -------------------------------------------------------------------------------- /examples/elf-allocators.h: -------------------------------------------------------------------------------- 1 | #ifndef ELF_ALLOCATORS_H_ 2 | #define ELF_ALLOCATORS_H_ 3 | 4 | #include "allocmeta.h" 5 | 6 | struct allocated_chunk {}; // FIXME: move this elsewhere 7 | _Static_assert(sizeof (struct allocated_chunk) == 0, 8 | "struct allocated_chunk's size should be zero"); 9 | 10 | struct elf_allocated_chunk 11 | { 12 | struct allocated_chunk empty; 13 | }; 14 | _Static_assert(sizeof (struct elf_allocated_chunk) == 0, 15 | "struct elf_allocated_chunk's size should be zero"); 16 | 17 | /* How does our code become aware of an mmap'd ELF file? 18 | * It might do it itself, or 19 | * it might be given a mapping made externally 20 | * and expected to 'bless' it as an ELF file, which would 21 | * involve creating the . 22 | * Since the 'external blessing' pattern is how most of our 23 | * current allocators work, we follow that pattern here. */ 24 | 25 | #define SHDR_IS_MANIFEST(shdr) \ 26 | ((shdr).sh_type != SHT_NOBITS && (shdr).sh_size != 0) 27 | 28 | #ifndef stringify 29 | #define stringify(cond) #cond 30 | #endif 31 | // stringify expanded 32 | #ifndef stringifx 33 | #define stringifx(cond) stringify(cond) 34 | #endif 35 | 36 | #define GET_UNIQTYPE_PTR(tfrag) ({ \ 37 | void *ret = fake_dlsym(RTLD_DEFAULT, "__uniqtype__" stringifx(tfrag)); \ 38 | if (ret == (void*)-1) ret = NULL; \ 39 | ret; }) 40 | 41 | #define ElfW_with_data(t) catx(ElfW(t), _with_data) 42 | #define elf_file_data_types(v) \ 43 | v(EHDR, ElfW(Ehdr), ElfW(Ehdr), /* is array? */ 0) \ 44 | v(SHDRS, ElfW(Shdr), ElfW(Shdr), 1) \ 45 | v(PHDRS, ElfW(Phdr), ElfW(Phdr), 1) \ 46 | v(NHDR, ElfW(Nhdr), ElfW_with_data(Nhdr), 0) \ 47 | v(SYMS, ElfW(Sym), ElfW(Sym), 1) \ 48 | v(RELAS, ElfW(Rela), ElfW(Rela), 1) \ 49 | v(RELS, ElfW(Rel), ElfW(Rel), 1) \ 50 | v(DYNAMICS, ElfW(Dyn), ElfW(Dyn), 1) \ 51 | v(FUNPTRVVS, funptr_t, __PTR___FUN_FROM___FUN_TO_void, 1) \ 52 | v(BYTES, unsigned char, unsigned_char$$8, 1) 53 | 54 | // define an enum -- ignoring the second argument 55 | #define elf_file_data_types_enum_entry(tag, ctype, tfrag, tisarray) \ 56 | ELF_DATA_ ## tag , 57 | enum elf_file_data_type 58 | { 59 | ELF_DATA_NONE, 60 | elf_file_data_types(elf_file_data_types_enum_entry) 61 | ELF_DATA_NTYPES 62 | }; 63 | 64 | extern struct uniqtype *elf_file_type_table[ELF_DATA_NTYPES]; 65 | 66 | extern struct allocator __elf_file_allocator; 67 | extern struct allocator __elf_element_allocator; 68 | 69 | struct elf_file_metadata 70 | { 71 | void *alloc_site; 72 | }; 73 | 74 | struct elf_elements_metadata 75 | { 76 | /* With the metavector, what we get is a collection of 'elements' 77 | * each with a file offset, a type_idx and a size in bytes. 78 | * However, we might want to correlate these back to (especially) 79 | * the section headers, so store this too. */ 80 | ElfW(Shdr) *shdrs; 81 | unsigned nshdr; 82 | unsigned char *shstrtab_data; 83 | unsigned metavector_size; 84 | struct elf_metavector_entry *metavector; 85 | bitmap_word_t bitmap[]; 86 | }; 87 | 88 | struct big_allocation *elf_adopt_mapping_sequence(void *mapping_start, 89 | size_t mapping_len, 90 | size_t trailing_mapping_len); 91 | struct uniqtype *elf_get_type(void *obj); 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /examples/elf-refs.h: -------------------------------------------------------------------------------- 1 | #ifndef ELF_REFS_H_ 2 | #define ELF_REFS_H_ 3 | 4 | #include 5 | #include "allocmeta.h" 6 | 7 | struct elf_walk_refs_state 8 | { 9 | struct walk_refs_state ref; 10 | struct big_allocation *file_bigalloc; 11 | struct elf_reference *buf; // don't copy this; we need to realloc it 12 | unsigned buf_capacity; 13 | unsigned buf_used; 14 | }; 15 | 16 | struct elf_reference 17 | { 18 | unsigned long source_file_offset; 19 | struct uniqtype *reference_type; 20 | unsigned long target_file_offset; // may be -1, in theory (shouldn't be, for us) 21 | const char *target_alloc_name; 22 | unsigned target_offset_from_alloc_start; 23 | struct uniqtype *referenced_type; 24 | intptr_t interp_how; 25 | // HMM: more here 26 | }; 27 | 28 | intptr_t can_interp_elf_offset_or_pointer(void *exp, struct uniqtype *exp_t, 29 | struct alloc_tree_link *link); 30 | 31 | void *do_interp_elf_offset_or_pointer(void *exp, struct uniqtype *exp_t, struct alloc_tree_link *link, intptr_t how); 32 | 33 | _Bool may_contain_elf_offset_or_pointer(void *exp, struct uniqtype *exp_t, struct alloc_tree_link *link); 34 | 35 | uintptr_t is_environ_elf_offset_or_pointer(void *exp, struct uniqtype *exp_t, struct alloc_tree_link *link); 36 | 37 | int seen_elf_reference_or_pointer_cb(struct big_allocation *maybe_the_allocation, 38 | void *obj, struct uniqtype *t, const void *allocsite, 39 | struct alloc_tree_link *link_to_here, 40 | void *elf_walk_refs_state_as_void); 41 | 42 | int seen_elf_environ_cb(struct big_allocation *maybe_the_allocation, 43 | void *obj, struct uniqtype *t, const void *allocsite, 44 | struct alloc_tree_link *link_to_here, 45 | void *environ_elt_cb_arg_as_void); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /examples/emit-asm.h: -------------------------------------------------------------------------------- 1 | #ifndef EMIT_ASM_H_ 2 | #define EMIT_ASM_H_ 3 | 4 | struct elf_walk_refs_state; 5 | struct big_allocation; 6 | 7 | struct emit_asm_ctxt 8 | { 9 | void *start_address; 10 | unsigned long emitted_up_to_offset; 11 | unsigned depth; 12 | // need to thread through a summary of incoming references, 13 | // so that we can emit labels as we go along 14 | struct elf_walk_refs_state *references; 15 | // to simulate a post-order traversal given only in-order traversal, 16 | // we queue up post-order output, which gets flushed 17 | // (1) on output at or below its depth, and 18 | // (2) at the end of the traversal. 19 | struct { 20 | unsigned depth; 21 | char *output; 22 | } *queued_end_output; 23 | unsigned queue_size; 24 | unsigned queue_nused; 25 | struct big_allocation *file_bigalloc; 26 | }; 27 | 28 | int compare_reference_source_address(const void *refent1_as_void, const void *refent2_as_void); 29 | int compare_reference_target_address(const void *refent1_as_void, const void *refent2_as_void); 30 | 31 | int __liballocs_name_ref_targets_cb(struct big_allocation *maybe_the_allocation, 32 | void *obj, struct uniqtype *t, const void *allocsite, 33 | struct alloc_tree_link *link_to_here, 34 | void *elf_walk_refs_state_as_void); 35 | 36 | int emit_memory_asm_cb(struct big_allocation *maybe_the_allocation, 37 | void *obj, struct uniqtype *t, const void *allocsite, 38 | struct alloc_tree_link *link_to_here, void *emit_asm_ctxt_as_void); 39 | 40 | void drain_queued_output(struct emit_asm_ctxt *ctxt, unsigned depth); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /include/allocs.h: -------------------------------------------------------------------------------- 1 | liballocs.h -------------------------------------------------------------------------------- /include/allocsites.h: -------------------------------------------------------------------------------- 1 | #ifndef LIBALLOCS_ALLOCSITES_H_ 2 | #define LIBALLOCS_ALLOCSITES_H_ 3 | 4 | #include "allocmeta.h" 5 | 6 | /* Each allocation site has a metadata record generated for it 7 | * by our toolchain extensions. 8 | * These come out in one big array, so naturally a given record 9 | * has some index within that array. 10 | * To create a global identifier, we just remember a "start" value 11 | * for each loaded object. The global index of an allocation site 12 | * is its per-object index plus that object's "start" value. 13 | * 14 | * If we want to look up an alloc site's address by its index, 15 | * we get the base index for its object 16 | * and then add its offset. 17 | * 18 | * We can also look up an alloc site by its address. 19 | * Since each object's allocation site metadata records are also 20 | * sorted by the site address within that object, we simply 21 | * have to identify the object and then binary-search within 22 | * that object's sorted vector of allocation site records. 23 | * 24 | * FIXME: this is a bit too separate from the bigalloc stuff for my liking. 25 | * Is "allocation site" a central concept? Arguably so. 26 | * 27 | * FIXME: what if we want to redefine a given function / move its allocsites / insert a new one? 28 | * If we split the id into segment#, per-segment-id#, is that workable? 29 | * Suppose we use 10 bits for segment. Then we have 6 bits. So no. Even 8-8 is no-go. 30 | * (Arguably 16 bits for the id is not enough. But it's only for callers 31 | * of malloc/similar, remember.) 32 | * Maybe on updating some code, a global rewrite of allocsite ids could 33 | * be done. This is all academic for now. Much of the other static metadata stuff, 34 | * i.e. the sorted meta-vector, is also per-file or per-segment. 35 | * 36 | * FIXME: this should probably be part of the extrasyms or at least the 37 | * sorted meta-vector. Remember we have 38 | * 39 | * alloc sites (heap) <-- we're thinking about these 40 | * static alloc sites (being replaced by sorted meta vec) 41 | * frame alloc sites (like heap alloc sites but one extra field) 42 | * 43 | * and for the static alloc sites, we add type info *and* link back to 44 | * a symbol (or a spent reloc record? No; the meta-info is about the *target* 45 | * of the reloc, which may be the target of *many* reloc records; unclear 46 | * what it would mean to pick one... I think that field should just be 0). 47 | 48 | struct sym_or_reloc_rec 49 | { 50 | unsigned kind:2; // an instance of sym_or_reloc_kind 51 | unsigned idx:18; // i.e. at most 256K symbols of each kind, per file 52 | unsigned long uniqtype_ptr_bits:44; // i.e. the low-order 3 bits are 0 53 | } *sorted_meta_vec; // addr-sorted list of relevant dynsym/symtab/extrasym/reloc entries 54 | 55 | * Note that the sorted meta vec entry doesn't store the address of the 56 | * object it describes -- for that we have to indirect into the symtab 57 | * or to use the 'starts' bitmap. 58 | * 59 | * These patterns -- spines, sorted arrays, bitmaps and "next index" shortcut 60 | * arrays -- seem to be popping up in a few places. It would be good to have 61 | * one implementation of them. 62 | * Our plan for the starts bitmap was 63 | * for that. 64 | * So in the case of relocs, we have to encode the section/symbol *and* 65 | * (perhaps) addend that gives us the target address. 66 | * 67 | * 68 | * We could make this *sorted_meta_vec[N_META_KINDS] 69 | * and allow additional per-file metadata kinds, of which heap allocsites 70 | * would naturally be one, and frame allocsites would naturally be another. 71 | 72 | * 73 | */ 74 | 75 | 76 | /* This is effectively a "spine" linking all the per-file 77 | * allocsite metadata vectors. */ 78 | struct allocsites_vectors_by_base_id_entry 79 | { 80 | allocsite_id_t start_id; 81 | allocsite_id_t count; 82 | uintptr_t file_base_addr; 83 | struct allocsite_entry *ptr; 84 | }; 85 | #define ALLOCSITES_INDEX_SIZE 256 /* i.e. up to 256 objects with allocsite metadata */ 86 | extern struct allocsites_vectors_by_base_id_entry 87 | allocsites_vectors_by_base_id[ALLOCSITES_INDEX_SIZE] __attribute__((visibility("hidden"))); 88 | extern unsigned short 89 | allocsites_id_entry_slot_next_free __attribute__((visibility("hidden"))); 90 | 91 | void 92 | init_allocsites_info(struct allocs_file_metadata *file) __attribute__((visibility("hidden"))); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /include/fake-libunwind.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLOCS_FAKE_LIBUNWIND_H_ 2 | #define ALLOCS_FAKE_LIBUNWIND_H_ 3 | 4 | #include /* for size_t */ 5 | 6 | #if !defined(__i386__) && !defined(__x86__) && !defined(__x86_64__) && !defined(X86_64) 7 | #error "Unsupported architecture for fake libunwind." 8 | #endif 9 | 10 | #if defined(__cplusplus) || defined(c_plusplus) 11 | extern "C" { 12 | #endif 13 | 14 | #if defined(__i386__) || defined(__x86__) 15 | #define UNW_TARGET_X86 16 | #elif defined(__x86_64__) || defined(X86_64) 17 | #define UNW_TARGET_X86_64 18 | #endif 19 | typedef unsigned long unw_word_t; 20 | typedef void *unw_addr_space_t; 21 | extern long local_addr_space; 22 | extern unw_addr_space_t unw_local_addr_space __asm__("__liballocs_unw_local_addr_space"); 23 | struct accessors 24 | { 25 | int (*access_mem) (unw_addr_space_t as, unw_word_t addr, unw_word_t *data, int dir, void *priv); 26 | }; 27 | typedef struct accessors unw_accessors_t; 28 | 29 | extern struct accessors local_accessors; 30 | inline struct accessors *unw_get_accessors(unw_addr_space_t as) 31 | { 32 | return &local_accessors; 33 | } 34 | 35 | #if defined(__cplusplus) || defined(c_plusplus) 36 | enum unw_error_t 37 | #else 38 | typedef enum 39 | #endif 40 | { 41 | UNW_ESUCCESS = 0, 42 | UNW_EUNSPEC, 43 | UNW_ENOMEM, 44 | UNW_EBADREG, 45 | UNW_EREADONLYREG, 46 | UNW_ESTOPUNWIND, 47 | UNW_EINVALIDIP, 48 | UNW_EBADFRAME, 49 | UNW_EINVAL, 50 | UNW_EBADVERSION, 51 | UNW_ENOINFO 52 | #if defined(__cplusplus) || defined(c_plusplus) 53 | }; 54 | #else 55 | } unw_error_t; 56 | #endif 57 | 58 | /* core register numbers from libunwind-x86.h */ 59 | #if defined(__cplusplus) || defined(c_plusplus) 60 | enum x86_regnum_t 61 | #else 62 | typedef enum 63 | #endif 64 | { 65 | UNW_X86_EAX, 66 | UNW_X86_EDX, 67 | UNW_X86_ECX, 68 | UNW_X86_EBX, 69 | UNW_X86_ESI, 70 | UNW_X86_EDI, 71 | UNW_X86_EBP, 72 | UNW_X86_ESP, 73 | UNW_X86_EIP, 74 | UNW_X86_EFLAGS, 75 | UNW_X86_TRAPNO, 76 | #if defined(__cplusplus) || defined(c_plusplus) 77 | }; 78 | #else 79 | } x86_regnum_t; 80 | #endif 81 | /* core register numbers from libunwind-x86_64.h */ 82 | #if defined(__cplusplus) || defined(c_plusplus) 83 | enum x86_64_regnum_t 84 | #else 85 | typedef enum 86 | #endif 87 | { 88 | UNW_X86_64_RAX, 89 | UNW_X86_64_RDX, 90 | UNW_X86_64_RCX, 91 | UNW_X86_64_RBX, 92 | UNW_X86_64_RSI, 93 | UNW_X86_64_RDI, 94 | UNW_X86_64_RBP, 95 | UNW_X86_64_RSP, 96 | UNW_X86_64_R8, 97 | UNW_X86_64_R9, 98 | UNW_X86_64_R10, 99 | UNW_X86_64_R11, 100 | UNW_X86_64_R12, 101 | UNW_X86_64_R13, 102 | UNW_X86_64_R14, 103 | UNW_X86_64_R15, 104 | UNW_X86_64_RIP 105 | #if defined(__cplusplus) || defined(c_plusplus) 106 | }; 107 | #else 108 | } x86_64_regnum_t; 109 | #endif 110 | 111 | #if defined(__i386__) || defined(__x86__) 112 | #define UNW_REG_IP UNW_X86_EIP 113 | #define UNW_REG_SP UNW_X86_ESP 114 | #define UNW_REG_BP UNW_X86_EBP 115 | #define UNW_TDEP_BP UNW_X86_EBP 116 | #elif defined(__x86_64__) || defined(X86_64) 117 | #define UNW_REG_IP UNW_X86_64_RIP 118 | #define UNW_REG_SP UNW_X86_64_RSP 119 | #define UNW_REG_BP UNW_X86_64_RBP 120 | #define UNW_TDEP_BP UNW_X86_64_RBP 121 | #endif 122 | 123 | #if defined(__cplusplus) || defined(c_plusplus) 124 | struct unw_cursor_t 125 | #else 126 | typedef struct 127 | #endif 128 | { 129 | unw_word_t frame_sp; 130 | unw_word_t frame_bp; 131 | unw_word_t frame_ip; 132 | #if defined(__cplusplus) || defined(c_plusplus) 133 | }; 134 | #else 135 | } unw_cursor_t; 136 | #endif 137 | typedef unw_cursor_t unw_context_t; 138 | 139 | 140 | /* These are defined in fake-unwind.c. They have protected visibility 141 | * to allow them to be inlined at link time, and a symbol prefix 142 | * to avoid their replacing the non-fake libunwind in others parts of 143 | * the program. */ 144 | int unw_get_reg(unw_cursor_t *cursor, int reg, unw_word_t *dest) __asm__("__liballocs_unw_get_reg"); 145 | int unw_init_local(unw_cursor_t *cursor, unw_context_t *context) __asm__("__liballocs_unw_init_local"); 146 | int unw_get_proc_name(unw_cursor_t *p_cursor, char *buf, size_t n, unw_word_t *offp) __asm__("__liballocs_unw_get_proc_name"); 147 | int unw_getcontext(unw_context_t *ucp) __asm__("__liballocs_unw_getcontext"); 148 | int unw_step(unw_cursor_t *cp) __asm__("__liballocs_unw_step"); 149 | 150 | #if defined(__cplusplus) || defined(c_plusplus) 151 | } 152 | #endif 153 | 154 | #endif 155 | -------------------------------------------------------------------------------- /include/liballocs_config.h.in: -------------------------------------------------------------------------------- 1 | /* Defined to 1 if we must use our own libunwind */ 2 | #undef USE_FAKE_LIBUNWIND 3 | 4 | /* If lifetime extenstion is available this macro expands to the number of 5 | * available lifetime policies */ 6 | #undef LIFETIME_POLICIES 7 | 8 | /* Expands to the type of lifetime inserts */ 9 | #undef LIFETIME_INSERT_TYPE 10 | 11 | /* If defined, liballocs needs to return the precise requested size on size 12 | * queries */ 13 | #undef PRECISE_REQUESTED_ALLOCSIZE 14 | -------------------------------------------------------------------------------- /include/liballocs_ext.h: -------------------------------------------------------------------------------- 1 | #ifndef LIBALLOCS_EXT_H_ 2 | #define LIBALLOCS_EXT_H_ 3 | 4 | /* This file is for declarations that might be needed 5 | * in code external to the liballocs DSO, 6 | * but that calls into usually private parts of the 7 | * liballocs implementation. 8 | * 9 | * Examples include allocator stubs (linked in to a built exe) 10 | * and possibly 'extensions' residing in other DSOs (like the 11 | * ELF file allocator in example/). */ 12 | 13 | void *__liballocs_private_malloc(size_t); 14 | void *__liballocs_private_realloc(void*, size_t); 15 | void __liballocs_private_free(void *); 16 | 17 | void __liballocs_free_arena_bitmap_and_info(void *info /* really struct arena_bitmap_info * */); 18 | 19 | /* All the above are created as global aliases (would ideally 20 | * be protected ). */ 21 | 22 | void __notify_copy(void *dest, const void *src, unsigned long n); 23 | void __notify_free(void *dest); 24 | // FIXME: seems wrong that this is declared only in the CIL inlines? 25 | void __liballocs_uncache_all(const void *allocptr, unsigned long size); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/linear_malloc_index.h: -------------------------------------------------------------------------------- 1 | #ifndef _LINEAR_MALLOC_INDEX_H 2 | #define _LINEAR_MALLOC_INDEX_H 3 | 4 | /* Note: you have to be _GNU_SOURCE to use this file. */ 5 | #ifndef _GNU_SOURCE /* ensure we get PTHREAD_MUTEX_RECURSIVE_NP */ 6 | #error "Not _GNU_SOURCE!" 7 | #endif 8 | 9 | #include 10 | #include "liballocs_config.h" 11 | #include "liballocs.h" 12 | #include "liballocs_ext.h" 13 | #include "pageindex.h" 14 | #include "malloc-meta.h" 15 | 16 | struct linear_malloc_rec { 17 | void *addr; 18 | unsigned caller_requested_size; // 32 bits should be enough 19 | unsigned char padding_to_caller_usable_size; 20 | // te insert lives at userptr + caller_usable_size 21 | }; 22 | 23 | struct linear_malloc_index_instance { 24 | /* We have to chain a bigalloc-creating shim onto these once liballocs 25 | * starts up. Otherwise we won't be able to find the linear malloc arenas 26 | * starting from a liballocs query, which always proceeds via the pageindex. 27 | * So we slurp the addresses of our *pointers* to the original malloc-family 28 | * functions, and chain our handler after we've done an initial scan of 29 | * the chunks indexed so far. */ 30 | void *(**p_orig_malloc)(size_t); 31 | void *(**p_orig_calloc)(size_t, size_t); 32 | void *(**p_orig_realloc)(void *, size_t); 33 | void (**p_orig_free)(void*); 34 | struct linear_malloc_rec *recs; 35 | unsigned nrecs; 36 | unsigned nrecs_used; 37 | }; 38 | 39 | 40 | #ifndef MAX_LINEAR_MALLOCS 41 | #define space_left_in_one_page (\ 42 | (4096 - \ 43 | sizeof (struct linear_malloc_index_instance)) \ 44 | & ~((_Alignof (struct linear_malloc_rec)) - 1) \ 45 | ) 46 | #define MAX_LINEAR_MALLOCS \ 47 | ( (space_left_in_one_page) / (sizeof (struct linear_malloc_rec)) ) 48 | #endif 49 | 50 | static inline int compare_linear_mallocs(const void *arg1, const void *arg2) 51 | { 52 | /* Mostly we just compare the addresses. But we want null 53 | * addresses to float to the end, to handle deletions properly. 54 | * So... */ 55 | void *addr1 = ((struct linear_malloc_rec *) arg1)->addr; 56 | void *addr2 = ((struct linear_malloc_rec *) arg2)->addr; 57 | if (!addr1 && !addr2) return 0; 58 | if (!addr1) /* first argument is greater */ return 1; 59 | if (!addr2) /* second argument is greater */ return -1; 60 | return (uintptr_t) addr1 - (uintptr_t) addr2; 61 | } 62 | 63 | static inline 64 | struct linear_malloc_rec *find_linear_malloc_rec(void* addr, struct linear_malloc_rec *recs, 65 | unsigned nrecs, unsigned nrecs_used) 66 | { 67 | #define proj(r) ((uintptr_t)(r)->addr) 68 | struct linear_malloc_rec *found = bsearch_leq_generic(struct linear_malloc_rec, 69 | (uintptr_t) addr, 70 | recs, 71 | nrecs_used, 72 | proj); 73 | #undef proj 74 | /* Does 'found' span the address we're looking for? */ 75 | if (found && 76 | (uintptr_t) addr < ((uintptr_t) found->addr + found->caller_requested_size) 77 | ) return found; else return NULL; 78 | } 79 | 80 | static inline size_t linear_malloc_usable_size(void *arg, struct linear_malloc_rec *recs, 81 | unsigned nrecs, unsigned nrecs_used) 82 | { 83 | struct linear_malloc_rec *found 84 | = find_linear_malloc_rec(arg, recs, nrecs, nrecs_used); 85 | if (found && found->addr == arg) 86 | { 87 | return found->caller_requested_size + found->padding_to_caller_usable_size; 88 | } 89 | return (size_t) -1; 90 | } 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /include/uniqtype-bfs.h: -------------------------------------------------------------------------------- 1 | #ifndef UNIQTYPE_BFS_H_ 2 | #define UNIQTYPE_BFS_H_ 3 | 4 | // our representation of nodes in the graph 5 | typedef struct __uniqtype_node_rec_s 6 | { 7 | void* obj; 8 | struct uniqtype *t; 9 | void *info; 10 | void (*free)(void *); 11 | struct __uniqtype_node_rec_s *next; 12 | } __uniqtype_node_rec; 13 | 14 | static inline void __uniqtype_node_queue_push_tail(__uniqtype_node_rec **q_head, __uniqtype_node_rec **q_tail, __uniqtype_node_rec *to_enqueue) 15 | { 16 | __uniqtype_node_rec *old_head_node = *q_head; 17 | __uniqtype_node_rec *old_tail_node = *q_tail; 18 | assert(!to_enqueue->next); 19 | *q_tail = to_enqueue; 20 | if (old_tail_node) old_tail_node->next = to_enqueue; 21 | else 22 | { 23 | assert(!old_head_node); 24 | /* If we just went from 0 elements to 1, update the head */ 25 | *q_head = to_enqueue; 26 | } 27 | } 28 | 29 | static inline __uniqtype_node_rec *__uniqtype_node_queue_pop_head(__uniqtype_node_rec **q_head, __uniqtype_node_rec **q_tail) 30 | { 31 | __uniqtype_node_rec *old_head_node = *q_head; 32 | if (old_head_node) 33 | { 34 | *q_head = old_head_node->next; 35 | /* If we just went from 1 element to 0, clear the tail. */ 36 | if (!*q_head) *q_tail = NULL; 37 | /* Clear the "next" pointer, since it's not in the queue any more. */ 38 | old_head_node->next = NULL; 39 | } 40 | return old_head_node; 41 | } 42 | 43 | static inline _Bool __uniqtype_node_queue_empty(void *q_head) 44 | { 45 | return !q_head; 46 | } 47 | 48 | typedef void follow_ptr_fn(void**, struct uniqtype**, void *); 49 | typedef void on_blacken_fn(void *obj, struct uniqtype *t, void *); 50 | 51 | void __uniqtype_default_follow_ptr(void**, struct uniqtype**, void *); 52 | 53 | void __uniqtype_walk_bfs_from_object( 54 | void *object, struct uniqtype *t, 55 | follow_ptr_fn *follow_ptr, void *fp_arg, 56 | on_blacken_fn *on_blacken, void *ob_arg); 57 | 58 | void __uniqtype_process_bfs_queue( 59 | __uniqtype_node_rec **p_q_head, 60 | __uniqtype_node_rec **p_q_tail, 61 | follow_ptr_fn *follow_ptr, void *fp_arg, 62 | on_blacken_fn *on_blacken, void *ob_arg); 63 | 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) 2 | SRCROOT := $(realpath $(dir $(THIS_MAKEFILE))/..) 3 | 4 | default: all 5 | 6 | SYMLINKS := liballocs_preload.a liballocs_preload.so \ 7 | liballocs_dummyweaks.o liballocs_dummyweaks.so liballocs.a \ 8 | liballocs.so allocsld.so 9 | 10 | # Now that we build in '$(SRCROOT)/build/', where might 11 | # be 'opt' or 'debug', we have a problem -- which one do we symlink? 12 | # The answer is that we symlink the outdir, then make our symlinks point 13 | # under there. 14 | ifneq ($(TEST),) 15 | # we never link a 'test' build here... 16 | # but if run with non-empty TEST, 'libbuild' will force *both* a debug *and* a test build 17 | outdir := debug 18 | libbuild_extra := DEBUG=1 19 | else 20 | ifneq ($(DEBUG),) 21 | outdir := debug 22 | libbuild_extra := DEBUG=1 23 | else 24 | outdir := opt 25 | libbuild_extra := 26 | endif 27 | endif 28 | 29 | all: outdir $(SYMLINKS) liballocs_systrap.o 30 | 31 | # For simplicity, we want to support being invoked before the relevant build 32 | # has been done. In particular, tests/lib-test relies on being able to trigger a build. 33 | target_files := $(filter-out liballocs.so allocsld.so,$(SYMLINKS)) 34 | $(foreach f,$(target_files) systrap.o,$(SRCROOT)/build/$(outdir)/$(f)): libbuild 35 | .PHONY: libbuild 36 | libbuild: 37 | $(MAKE) -C .. SUBDIRS=src $(libbuild_extra) 38 | ifneq ($(TEST),) 39 | $(MAKE) -C .. SUBDIRS=src TEST="" $(libbuild_extra) 40 | endif 41 | 42 | ifneq ($(notdir $(shell readlink outdir)),$(outdir)) 43 | .PHONY: outdir 44 | endif 45 | #$(foreach f,$(target_files) systrap.o,$(SRCROOT)/build/$(outdir)/$(f)) 46 | outdir: libbuild 47 | rm -f outdir 48 | ln -sf ../build/$(outdir) $@ 49 | 50 | $(target_files): %: $(SRCROOT)/build/$(outdir)/% 51 | ln -sf outdir/$(notdir $<) "$@" 52 | 53 | # the liballocs.so file lives in the srcdir 54 | liballocs.so: 55 | ln -sf "$(SRCROOT)"/src/liballocs.so $@ 56 | 57 | # allocsld.so lives in the allocsld dir 58 | allocsld.so: 59 | ln -sf "$(SRCROOT)"/allocsld/$@ $@ 60 | 61 | # the systrap one has a different name 62 | # We export this separately because libcrunch uses it, even in its 63 | # stubs (dummyweaks) objects, which wrap __liballocs_nudge_mmap. 64 | # I think a better fix would be to have libcrunch pick up that 65 | # function from liballocs.a, but that may be fiddly (FIXME), e.g. 66 | # requiring one-function-per-object or --gc-sections. 67 | liballocs_systrap.o: $(SRCROOT)/build/$(outdir)/systrap.o 68 | ln -sf outdir/$(notdir $<) "$@" 69 | 70 | clean: 71 | rm -f outdir $(SYMLINKS) liballocs_systrap.o liballocs.so 72 | -------------------------------------------------------------------------------- /liballocs.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | pkgincludedir=${includedir}/@PACKAGE@ 6 | pkglibdir=@pkglibdir@ 7 | datarootdir=@datarootdir@ 8 | datadir=@datadir@ 9 | 10 | Name: liballocs 11 | Description: Runtime and toolchain for whole-program monitoring of allocations and their data types 12 | Version: 0.1 13 | Requires: libsrk31cxx libcxxfileno libdwarfpp libcxxgen dwarfidl 14 | Cflags: -I${includedir}/liballocs 15 | Libs: -L${libdir} -lallocs 16 | -------------------------------------------------------------------------------- /src/addrlist.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "liballocs_private.h" 9 | 10 | #include 11 | 12 | int __liballocs_addrlist_contains(struct addrlist *l, void *addr) __attribute__((visibility("protected"))); 13 | int __liballocs_addrlist_contains(struct addrlist *l, void *addr) 14 | { 15 | for (unsigned i = 0; i < l->count; ++i) 16 | { 17 | if (l->addrs[i] == addr) return 1 + i; 18 | } 19 | return 0; 20 | } 21 | void __liballocs_addrlist_add(struct addrlist *l, void *addr) __attribute__((visibility("protected"))); 22 | void __liballocs_addrlist_add(struct addrlist *l, void *addr) 23 | { 24 | if (l->count == l->allocsz) 25 | { 26 | ++(l->allocsz); 27 | l->allocsz *= 2; 28 | l->addrs = __private_realloc( 29 | l->addrs, 30 | l->allocsz * sizeof (void*)); 31 | } 32 | l->addrs[l->count++] = addr; 33 | } 34 | -------------------------------------------------------------------------------- /src/allocators/default-lib-malloc.c: -------------------------------------------------------------------------------- 1 | /* This file uses GNU C extensions */ 2 | #define _GNU_SOURCE 3 | 4 | /* We need to create global hooks, not hidden. 5 | * Must match how we pull in libmallochooks source files in src/Makefile. */ 6 | #define ALLOC_EVENT_ATTRIBUTES 7 | #define ALLOC_EVENT(s) __liballocs_malloc_ ## s 8 | 9 | #include 10 | size_t malloc_usable_size(void *ptr); 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "liballocs_private.h" 21 | #include "relf.h" 22 | #include "pageindex.h" 23 | #include "generic_malloc_index.h" 24 | #include "malloc-meta.h" 25 | 26 | /* Stuff we need to generate glue goes in here. */ 27 | #include "../tools/stubgen.h" /* this pathname is a HACK */ 28 | 29 | /* To be the "default lib malloc" means the one that any preloads 30 | * in this, preloaded library, will override. */ 31 | // FIXME: this indirect call is potentially slow. Could we instead use an ifunc? 32 | static size_t __default_lib_malloc_usable_size(void *ptr) 33 | { 34 | static size_t (*real_malloc_usable_size)(void *); 35 | if (!real_malloc_usable_size) 36 | { 37 | real_malloc_usable_size = fake_dlsym(RTLD_NEXT, "malloc_usable_size"); 38 | } 39 | return real_malloc_usable_size(ptr); 40 | } 41 | ALLOC_EVENT_INDEXING_DEFS(__default_lib_malloc, __default_lib_malloc_usable_size) 42 | 43 | /* By default, the 'malloc' first in libraries' link order, i.e. the one */ 44 | /* our preload sits in front of, is deemed the global malloc. But if the */ 45 | /* executable has one too, it should override this. */ 46 | extern struct allocator __global_malloc_allocator 47 | __attribute__((weak,alias("__default_lib_malloc_allocator"))); 48 | -------------------------------------------------------------------------------- /src/cache.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "liballocs_cil_inlines.h" 3 | #include "liballocs.h" 4 | #include "pageindex.h" 5 | 6 | // FIXME: this should be thread-local but my gdb can't grok that 7 | struct __liballocs_memrange_cache /* __thread */ __liballocs_ool_cache = { 8 | .size_plus_one = 1 + LIBALLOCS_MEMRANGE_CACHE_MAX_SIZE, 9 | .next_victim = 1 10 | }; 11 | 12 | /* FIXME: rewrite these */ 13 | void __liballocs_uncache_all(const void *allocptr, unsigned long size) 14 | { 15 | assert((__liballocs_check_cache_sanity(&__liballocs_ool_cache), 1)); 16 | for (unsigned i = 1; i < __liballocs_ool_cache.size_plus_one; ++i) 17 | { 18 | if (__liballocs_ool_cache.validity & (1u << (i-1))) 19 | { 20 | assert((__liballocs_check_cache_sanity(&__liballocs_ool_cache), 1)); 21 | /* Uncache any object beginning anywhere within the passed-in range. */ 22 | if ((char*) __liballocs_ool_cache.entries[i].obj_base >= (char*) allocptr 23 | && (char*) __liballocs_ool_cache.entries[i].obj_base < (char*) allocptr + size) 24 | { 25 | // unset validity and make this the next victim 26 | __liballocs_cache_unlink(&__liballocs_ool_cache, i); 27 | __liballocs_ool_cache.next_victim = i; 28 | } 29 | assert((__liballocs_check_cache_sanity(&__liballocs_ool_cache), 1)); 30 | } 31 | } 32 | assert((__liballocs_check_cache_sanity(&__liballocs_ool_cache), 1)); 33 | } 34 | -------------------------------------------------------------------------------- /src/counters.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | 3 | #include 4 | #include "liballocs.h" 5 | #include "liballocs_private.h" 6 | 7 | struct addrlist __liballocs_unrecognised_heap_alloc_sites = { 0, 0, NULL }; 8 | 9 | struct liballocs_err __liballocs_err_stack_walk_step_failure 10 | = { "stack walk reached higher frame" }; 11 | struct liballocs_err __liballocs_err_stack_walk_reached_higher_frame 12 | = { "stack walk reached higher frame" }; 13 | struct liballocs_err __liballocs_err_stack_walk_reached_top_of_stack 14 | = { "stack walk reached top-of-stack" }; 15 | struct liballocs_err __liballocs_err_unknown_stack_walk_problem 16 | = { "unknown stack walk problem" }; 17 | struct liballocs_err __liballocs_err_unindexed_heap_object 18 | = { "unindexed heap object" }; 19 | struct liballocs_err __liballocs_err_unindexed_alloca_object 20 | = { "unindexed alloca object" }; 21 | struct liballocs_err __liballocs_err_unrecognised_alloc_site 22 | = { "unrecognised alloc site" }; 23 | struct liballocs_err __liballocs_err_unrecognised_static_object 24 | = { "unrecognised static object" }; 25 | struct liballocs_err __liballocs_err_object_of_unknown_storage 26 | = { "object of unknown storage" }; 27 | 28 | /* Counters -- these are mostly liballocs-internal and therefore hidden, 29 | * but the ones to do with heap allocation might get ref'd from other 30 | * DSOs. Also we may find that we want ot inline some paths 31 | * into clients, in which case others may have to become more visible. */ 32 | unsigned long __liballocs_aborted_stack __attribute__((visibility("hidden")));; 33 | unsigned long __liballocs_aborted_static __attribute__((visibility("hidden")));; 34 | unsigned long __liballocs_aborted_unknown_storage __attribute__((visibility("hidden")));; 35 | unsigned long __liballocs_hit_heap_case __attribute__((visibility("protected"))); 36 | unsigned long __liballocs_hit_alloca_case __attribute__((visibility("hidden")));; 37 | unsigned long __liballocs_hit_stack_case __attribute__((visibility("hidden")));; 38 | unsigned long __liballocs_hit_static_case __attribute__((visibility("hidden")));; 39 | unsigned long __liballocs_aborted_unindexed_heap __attribute__((visibility("protected")));; 40 | unsigned long __liballocs_aborted_unindexed_alloca __attribute__((visibility("hidden")));; 41 | unsigned long __liballocs_aborted_unrecognised_allocsite __attribute__((visibility("protected")));; 42 | 43 | __attribute__((visibility("hidden"))) 44 | void print_exit_summary(void) 45 | { 46 | if (__liballocs_aborted_unknown_storage + __liballocs_hit_static_case + __liballocs_hit_stack_case 47 | + __liballocs_hit_heap_case + __liballocs_hit_alloca_case > 0) 48 | { 49 | fprintf(get_stream_err(), "====================================================\n"); 50 | fprintf(get_stream_err(), "liballocs summary: \n"); 51 | fprintf(get_stream_err(), "----------------------------------------------------\n"); 52 | fprintf(get_stream_err(), "queries aborted for unknown storage: % 9ld\n", __liballocs_aborted_unknown_storage); 53 | fprintf(get_stream_err(), "queries handled by static case: % 9ld\n", __liballocs_hit_static_case); 54 | fprintf(get_stream_err(), "queries handled by stack case: % 9ld\n", __liballocs_hit_stack_case); 55 | fprintf(get_stream_err(), "queries handled by heap case: % 9ld\n", __liballocs_hit_heap_case); 56 | fprintf(get_stream_err(), "queries handled by alloca case: % 9ld\n", __liballocs_hit_alloca_case); 57 | fprintf(get_stream_err(), "----------------------------------------------------\n"); 58 | fprintf(get_stream_err(), "queries aborted for unindexed heap: % 9ld\n", __liballocs_aborted_unindexed_heap); 59 | fprintf(get_stream_err(), "queries aborted for unknown heap allocsite:% 9ld\n", __liballocs_aborted_unrecognised_allocsite); 60 | fprintf(get_stream_err(), "queries aborted for unindexed alloca: % 9ld\n", __liballocs_aborted_unindexed_alloca); 61 | fprintf(get_stream_err(), "queries aborted for unknown stackframes: % 9ld\n", __liballocs_aborted_stack); 62 | fprintf(get_stream_err(), "queries aborted for unknown static obj: % 9ld\n", __liballocs_aborted_static); 63 | fprintf(get_stream_err(), "====================================================\n"); 64 | for (unsigned i = 0; i < __liballocs_unrecognised_heap_alloc_sites.count; ++i) 65 | { 66 | if (i == 0) 67 | { 68 | fprintf(get_stream_err(), "Saw the following unrecognised heap alloc sites: \n"); 69 | } 70 | fprintf(get_stream_err(), "%p (%s)\n", __liballocs_unrecognised_heap_alloc_sites.addrs[i], 71 | format_symbolic_address(__liballocs_unrecognised_heap_alloc_sites.addrs[i])); 72 | } 73 | } 74 | 75 | if (getenv("LIBALLOCS_DUMP_SMAPS_AT_EXIT")) 76 | { 77 | char buffer[4096]; 78 | size_t bytes; 79 | FILE *smaps = fopen("/proc/self/smaps", "r"); 80 | if (smaps) 81 | { 82 | while (0 < (bytes = fread(buffer, 1, sizeof(buffer), smaps))) 83 | { 84 | fwrite(buffer, 1, bytes, get_stream_err()); 85 | } 86 | } 87 | else fprintf(get_stream_err(), "Couldn't read from smaps!\n"); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/dlmalloc-includes.h: -------------------------------------------------------------------------------- 1 | #include 2 | void *emulated_sbrk(intptr_t); 3 | -------------------------------------------------------------------------------- /src/dummy_thread_locals.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int __liballocs_global_init(void); 4 | 5 | __thread void *__current_allocsite; 6 | __thread void *__current_allocfn; 7 | __thread size_t __current_allocsz; 8 | __thread int __currently_freeing; 9 | 10 | void (__attribute__((constructor)) init)(void) 11 | { 12 | __liballocs_global_init(); 13 | } 14 | -------------------------------------------------------------------------------- /src/err.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | 4 | /* HACK to avoid too much librunt dependency in this allocsld-borrowed code. */ 5 | #ifndef IN_LIBALLOCS_DSO 6 | #define get_exe_command_basename(...) "(no name)" 7 | #endif 8 | 9 | /* If we are linking librunt, as we usually are, we will get this from there. 10 | * But otherwise don't! E.g. from allocsld, don't pull in librunt. */ 11 | FILE *stream_err __attribute__((weak)); 12 | #include "liballocs_private.h" 13 | 14 | int __liballocs_debug_level; 15 | 16 | __attribute__((visibility("hidden"))) 17 | FILE *get_stream_err(void) 18 | { 19 | // figure out where our output goes 20 | const char *errvar = getenv("LIBALLOCS_ERR"); 21 | if (errvar) 22 | { 23 | // try opening it 24 | stream_err = fopen(errvar, "w"); 25 | if (!stream_err) 26 | { 27 | stream_err = stderr; 28 | debug_printf(0, "could not open %s for writing\n", errvar); 29 | } 30 | } else stream_err = stderr; 31 | assert(stream_err); 32 | return stream_err; 33 | } 34 | 35 | const char *__liballocs_errstring(struct liballocs_err *err) 36 | { 37 | return err->message; 38 | } 39 | -------------------------------------------------------------------------------- /src/ifunc.c: -------------------------------------------------------------------------------- 1 | /* Helpers for liballocs ifuncs. 2 | * How should this work? 3 | * 4 | * We need a helper to detect when liballocs is in preload position. 5 | * If it is, our ifuncs bind to the real definitions. 6 | * If it isn't, they bind to dummy definitions *if they exist*. 7 | * -- for data, it may not be necessary or desirable to have dummies 8 | * -- let's only use IFUNC symbols if a dummy exists 9 | */ 10 | // A minimal IFUNC example looks like the following (from linksem). 11 | #if 0 12 | /* dynamic linker does roughly this */ 13 | void **site = (void**) p_r->r_offset; 14 | void *(*resolver)(void) = (void*) p_r->r_addend; 15 | *site = resolver(); 16 | #endif 17 | #define _GNU_SOURCE /* for basename() */ 18 | #include 19 | #include 20 | #include "librunt.h" 21 | #include "relf.h" 22 | #include "raw-syscalls-defs.h" 23 | 24 | #ifdef _LIBGEN_H 25 | #error "ifunc.c needs GNU basename() so must not include libgen.h" 26 | #endif 27 | 28 | static _Bool checked_position; 29 | static _Bool is_in_head_preload_position; 30 | static _Bool check_head_preload_position(void) 31 | { 32 | if (checked_position) return is_in_head_preload_position; 33 | // else we need to check position in the link map 34 | struct r_debug *r = find_r_debug(); 35 | /* FIXME: need proper soname. */ 36 | /* FIXME: can we just use "our link map" and forget the soname check? 37 | * We know whether we're linked into the real liballocs or not. */ 38 | struct link_map *l; 39 | for (l = r->r_map; l; l = l->l_next) 40 | { 41 | /* Skip over the executable. */ 42 | if (0 == strcmp(l->l_name, "")) continue; 43 | /* Skip over the VDSO. */ // FIXME: this is sysdep 44 | #define IS_VDSO(n) (0 == strcmp((n), "linux-vdso.so.1")) 45 | if (IS_VDSO(l->l_name)) continue; 46 | break; 47 | } 48 | if (!l) { /* We didn't find ourselves.*/ abort(); } 49 | #if 0 50 | write_string("First non-skippable link map entry name: "); 51 | raw_write(2, l->l_name, strlen(l->l_name)); 52 | write_string("\n"); 53 | #endif 54 | if (l->l_next) 55 | { 56 | #if 0 57 | write_string("Second non-skippable link map entry name: "); 58 | raw_write(2, l->l_next->l_name, strlen(l->l_next->l_name)); 59 | write_string("\n"); 60 | #endif 61 | } 62 | #ifndef LIBALLOCS_SONAME 63 | #warning "IFUNC test code assuming liballocs soname is liballocs_preload.so (but we should change to liballocs.so)" 64 | #define LIBALLOCS_SONAME "liballocs_preload.so" 65 | #endif 66 | _Bool result = (l && l->l_name && 0 == strcmp(basename(l->l_name), LIBALLOCS_SONAME)); 67 | checked_position = 1; 68 | is_in_head_preload_position = result; 69 | return result; 70 | } 71 | 72 | /* From liballocs.a, what's our plan to generate these? 73 | * We can probably use libcxxgen. 74 | * Only liballocs's 'public' symbols need to be considered. 75 | * 76 | * - version script for liballocs's non-hidden global text symbols 77 | * - build-time error if we find an unversioned non-hidden global text symbol 78 | * - replace this file with a generated ifunc.c using libcxxgen... 79 | * - ... containing for each symbol (1) an ifunc, and (2) a _nopreload stub 80 | * - build liballocs.so from liballocs.a, symbol-prefix'd (via .objcopy-opts file) 81 | * - for 'inherited' symbols, either vendor or use --exclude-libs to hide 82 | * - API cleanup/minimisation 83 | * - make it a link-time error to link statically with liballocs.a 84 | * (how? why did we previously allow linking statically? 85 | * am I sure it needs to be ruled out? 86 | * problem is that the result still needs to be preloaded on, 87 | * esp if it gets linked into a DSO... maybe only error-out on that, 88 | * and support as-if-preloading linking into an exe? Can we 89 | * ensure we interpose on the things we want to interpose on? 90 | * maybe the static-linking case should supply liballocs.o not 91 | * liballocs.a? or liballocs.a could be a linker script separating out 92 | * the must-have from the optional archivey stuff?) 93 | * */ 94 | 95 | __attribute__((visibility("hidden"))) 96 | int __liballocs_test_ifunc_nopreload(void) { return 41; } 97 | __attribute__((visibility("hidden"))) 98 | int __liballocs_test_ifunc_preload(void) { return 42; } 99 | 100 | typedef uintptr_t (*func_ptr_t)(); 101 | func_ptr_t __liballocs_test_ifunc(void) 102 | { 103 | if (check_head_preload_position()) return (func_ptr_t) __liballocs_test_ifunc_preload; 104 | return (func_ptr_t) __liballocs_test_ifunc_nopreload; 105 | } 106 | __asm__(".type __liballocs_test_ifunc,%gnu_indirect_function"); 107 | 108 | /* So the code literally binds against an ifunc as if it were 109 | * the function being called. But the defining symbol is actually 110 | * of type %gnu_indirect_function. XXX: why does the dynamic linker 111 | * receive the resolver as the added of the reloc record? Why is it 112 | * not the resolution of the symbol? 113 | * */ 114 | 115 | -------------------------------------------------------------------------------- /src/liballocs.so: -------------------------------------------------------------------------------- 1 | OUTPUT_FORMAT(elf64-x86-64) 2 | GROUP ( liballocs_dummyweaks.so ) 3 | -------------------------------------------------------------------------------- /src/nonshared_hook_wrappers.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephenrkell/liballocs/dd9497e422bfcbcefb46abfac0ba42aa8ea028fb/src/nonshared_hook_wrappers.c -------------------------------------------------------------------------------- /src/private-libc.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include "liballocs_private.h" 4 | 5 | /* __private_malloc is defined by our Makefile as __wrap_dlmalloc. 6 | * Since dlmalloc does not include a strdup, we need to define 7 | * that explicitly. */ 8 | char *__liballocs_private_strdup(const char *s) 9 | { 10 | size_t len = strlen(s) + 1; 11 | char *mem = __private_malloc(len); 12 | if (!mem) return NULL; 13 | return memcpy(mem, s, len); 14 | } 15 | char *__private_strdup(const char *s) __attribute__((alias("__liballocs_private_strdup"))); 16 | char *__liballocs_private_strndup(const char *s, size_t n) 17 | { 18 | size_t maxlen = strlen(s); 19 | size_t len = (n > maxlen ? maxlen : n) + 1; 20 | char *mem = __private_malloc(len); 21 | if (!mem) return NULL; 22 | return memcpy(mem, s, len); 23 | } 24 | char *__private_strndup(const char *s, size_t n) __attribute__((alias("__liballocs_private_strndup"))); 25 | -------------------------------------------------------------------------------- /src/private-malloc-plain.lds: -------------------------------------------------------------------------------- 1 | # FIXME: really want these to be protected not hidden. 2 | HIDDEN(__private_malloc = dlmalloc); 3 | HIDDEN(__private_calloc = dlcalloc); 4 | HIDDEN(__private_free = dlfree); 5 | HIDDEN(__private_realloc = dlrealloc); 6 | HIDDEN(__private_memalign = dlmemalign); 7 | HIDDEN(__private_posix_memalign = dlposix_memalign); 8 | __liballocs_private_malloc = __private_malloc; 9 | __liballocs_private_realloc = __private_realloc; 10 | __liballocs_private_free = __private_free; 11 | -------------------------------------------------------------------------------- /src/private-malloc-wrapped.lds: -------------------------------------------------------------------------------- 1 | # FIXME: really want these to be protected not hidden. 2 | HIDDEN(__private_malloc =__wrap_dlmalloc); 3 | HIDDEN(__private_calloc = __wrap_dlcalloc); 4 | HIDDEN(__private_free = __wrap_dlfree); 5 | HIDDEN(__private_realloc = __wrap_dlrealloc); 6 | HIDDEN(__private_memalign = __wrap_dlmemalign); 7 | HIDDEN(__private_posix_memalign = __wrap_dlposix_memalign); 8 | __liballocs_private_malloc = __private_malloc; 9 | __liballocs_private_realloc = __private_realloc; 10 | __liballocs_private_free = __private_free; 11 | -------------------------------------------------------------------------------- /src/stubs.c.example: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* This is now just an EXAMPLE stubs file. We autogenerate these files 4 | * in allocscc, based on LIBALLOCS_ALLOC_FNS. */ 5 | 6 | #include "stubgen.h" 7 | 8 | // xcalloc(zZ)p 9 | #define arglist_xcalloc(make_arg) make_arg(0, z), make_arg(1, Z) 10 | make_wrapper(xcalloc, p) 11 | 12 | // xmalloc(Z)p 13 | #define arglist_xmalloc(make_arg) make_arg(0, Z) 14 | make_wrapper(xmalloc, p) 15 | 16 | // xrealloc(pZ)p 17 | #define arglist_xrealloc(make_arg) make_arg(0, p), make_arg(1, Z) 18 | make_wrapper(xrealloc, p) 19 | 20 | // xmallocz(Z) 21 | #define arglist_xmallocz(make_arg) make_arg(0, Z) 22 | make_wrapper(xmallocz, p) 23 | -------------------------------------------------------------------------------- /src/systrap_noop.c: -------------------------------------------------------------------------------- 1 | void __liballocs_systrap_init(void) {} 2 | 3 | _Bool __liballocs_systrap_is_initialized; /* globally visible, so that it gets overridden. */ 4 | -------------------------------------------------------------------------------- /src/util.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include "librunt.h" 4 | #include "liballocs_private.h" 5 | 6 | const char *format_symbolic_address(const void *addr) __attribute__((visibility("hidden"))); 7 | const char *format_symbolic_address(const void *addr) 8 | { 9 | Dl_info info = dladdr_with_cache(addr); 10 | 11 | static __thread char buf[8192]; 12 | 13 | snprintf(buf, sizeof buf, "%s`%s+%p", 14 | info.dli_fname ? basename(info.dli_fname) : "unknown", 15 | info.dli_sname ? info.dli_sname : "unknown", 16 | info.dli_saddr 17 | ? (void*)((char*) addr - (char*) info.dli_saddr) 18 | : NULL); 19 | 20 | buf[sizeof buf - 1] = '\0'; 21 | 22 | return buf; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/workarounds.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE /* get the right basename() */ 2 | #include 3 | #include 4 | #include "relf.h" 5 | #if 0 6 | #include "donald.h" 7 | #endif 8 | #include "liballocs_private.h" 9 | 10 | #ifdef _LIBGEN_H 11 | #error "liballocs.c needs GNU basename() so must not include libgen.h" 12 | #endif 13 | 14 | static void validate_or_fix_link_map(void *obj, struct uniqtype *u) 15 | { 16 | #if 0 17 | /* The bug we care about is where r_nlist does not match 18 | * l_initfini's length. */ 19 | 20 | #define FIELD_OFFSET(name) \ 21 | for (unsigned i = 0 22 | #define GET_FIELD(name, localtype) \ 23 | ({ localtype buf; memcpy(&buf, (char*)(obj) + FIELD_OFFSET(name), sizeof buf); buf }) 24 | #endif 25 | } 26 | 27 | /* This gets called from __liballocs_global_init. */ 28 | __attribute__((visibility("hidden"))) 29 | void workaround_glibc_bugs(void) 30 | { 31 | /* We need the ld.so malloc allocator to be working. */ 32 | __ld_so_malloc_allocator_init(); 33 | 34 | /* One of glibc's link maps is allocated statically, so 35 | * we can get the uniqtype even without the heap information. */ 36 | struct uniqtype *link_map_type = NULL; 37 | for (struct link_map *l = _r_debug.r_map; l; l = l->l_next) 38 | { 39 | struct uniqtype *u = alloc_get_type(l); 40 | if (u) link_map_type = u; 41 | } 42 | if (!link_map_type) 43 | { 44 | debug_printf(0, "warning: no type information for ld.so link maps, so can't apply workarounds\n"); 45 | goto out; 46 | } 47 | else 48 | { 49 | 50 | /* Do we have glibc? Quick check: libc.so.6 soname and 51 | * anything else? ideally symbol versions, but skip for now */ 52 | _Bool warned = 0; 53 | _Bool seen_glibc = 0; 54 | for (struct link_map *l = _r_debug.r_map; l; l = l->l_next) 55 | { 56 | /* Do we have type information for link map entries? */ 57 | struct allocator *a = alloc_get_allocator(l); 58 | size_t sz = alloc_get_size(l); 59 | const void *site = alloc_get_site(l); 60 | struct uniqtype *u = alloc_get_type(l); 61 | void *base = alloc_get_base(l); 62 | assert(a); 63 | 64 | if (l == _r_debug.r_map) 65 | { 66 | debug_printf(0, "first link map is at %p, a %s-allocated object of size %u, type %s, " 67 | "site %p, base %p\n", 68 | l, a->name, (unsigned) sz, UNIQTYPE_NAME(u), site, base); 69 | } 70 | 71 | if (!site && a != &__static_symbol_allocator && !warned) 72 | { 73 | debug_printf(0, "warning: incomplete metadata for ld.so link maps\n"); 74 | warned = 1; 75 | } 76 | 77 | if (l->l_name && 0 == strcmp(basename(l->l_name), "libc.so.6")) 78 | { 79 | /* OK, seems to be glibc. FIXME: do more checks */ 80 | seen_glibc = 1; 81 | } 82 | } 83 | if (!seen_glibc) 84 | { 85 | debug_printf(0, "not glibc so no workarounds applied\n"); 86 | goto out; 87 | } 88 | for (struct link_map *l = _r_debug.r_map; l; l = l->l_next) 89 | { 90 | validate_or_fix_link_map(l, link_map_type); 91 | } 92 | } 93 | out: 94 | /* We may have done a bunch of liballocs queries in the above, and some 95 | * of them may have failed. So re-zero our counters... this is so that 96 | * we don't break test cases that pass based on zero failures. 97 | * This is a HACK. */ 98 | __liballocs_hit_heap_case = 0; 99 | __liballocs_hit_static_case = 0; 100 | __liballocs_aborted_unindexed_heap = 0; 101 | __liballocs_aborted_unrecognised_allocsite = 0; 102 | __liballocs_aborted_static = 0; 103 | } 104 | 105 | /* To instrument the allocation functions 106 | * in the ld.so, 107 | * we need to do a pass to figure out what's there. 108 | * E.g. we can't assume that instrumenting 'malloc' is all that's needed, 109 | * because 110 | * 111 | * We could take our lead from LIBALLOCS_ALLOC_FNS, which is already 112 | * our mechanism for telling us about this. It seems clunky that correct 113 | * operation would depend on this env var being set, though, in the case of 114 | * the ld.so. That variable is more of a toolchain-side thing (and perhaps, 115 | * FIXME, we should solidify that by putting it into a .note section or similar?). 116 | * 117 | * So assuming we should do something else for the ld.so, what should it be? 118 | * The relevant symbols in my own system's ld.so, that would make things work 119 | * for me Right Now, are: well, they're not! They're not in any symtab. However 120 | * they are in .extrasyms. 121 | * 122 | * 123 | */ 124 | -------------------------------------------------------------------------------- /tests/abort-alloca-clang/abort-alloca-clang.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | extern __thread void *__current_allocsite __attribute__((weak)); 9 | 10 | int main(void) 11 | { 12 | void *a = alloca(42 * sizeof (int)); 13 | struct uniqtype *got_type = __liballocs_get_alloc_type(a); 14 | struct uniqtype *int_type = dlsym(RTLD_NEXT, "__uniqtype__int"); 15 | assert(int_type); 16 | assert(got_type); 17 | assert(got_type == int_type); 18 | 19 | return 0; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /tests/abort-alloca-clang/mk.inc: -------------------------------------------------------------------------------- 1 | CC := $(LIBALLOCS_LIB)/contrib/llvm/build/bin/clang -fsanitize=allocs 2 | LDLIBS += -lallocs -ldl 3 | -------------------------------------------------------------------------------- /tests/addrtaken-allocator/addrtaken-allocator.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | /* Here we test several tricky things. The allocator function 7 | * is not only static, but called indirectly. This is a bit like 8 | * how bzip2 does its allocation (default_bzalloc).*/ 9 | 10 | static void *myalloc(size_t size) 11 | { 12 | return malloc(size); 13 | } 14 | 15 | typedef void*(*alloc_fp)(size_t); 16 | alloc_fp get_allocator(void) 17 | { 18 | return myalloc; 19 | } 20 | 21 | int main(void) 22 | { 23 | int *is = get_allocator()(42 * sizeof(int)); 24 | assert(is); 25 | for (int i = 0; i < 42; ++i) is[i] = i; 26 | struct uniqtype *t = __liballocs_get_alloc_type(is); 27 | assert(t); 28 | printf("Type is: %s\n", NAME_FOR_UNIQTYPE(t)); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /tests/addrtaken-allocator/mk.inc: -------------------------------------------------------------------------------- 1 | export LIBALLOCS_ALLOC_FNS := myalloc(Z)p 2 | -------------------------------------------------------------------------------- /tests/alloca/alloca.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | extern __thread void *__current_allocsite __attribute__((weak)); 9 | 10 | int main(void) 11 | { 12 | void *o = alloca(42 * sizeof (int)); 13 | printf("alloca'd chunk at %p\n", o); 14 | struct big_allocation *b; 15 | struct allocator *a = __liballocs_leaf_allocator_for(o, &b); 16 | assert(a == &__alloca_allocator); 17 | struct uniqtype *got_type = __liballocs_get_alloc_type(o); 18 | struct uniqtype *int_type = dlsym(RTLD_DEFAULT, "__uniqtype__int"); 19 | assert(int_type); 20 | assert(got_type); 21 | assert(UNIQTYPE_IS_ARRAY_TYPE(got_type)); 22 | assert(UNIQTYPE_ARRAY_ELEMENT_TYPE(got_type) == int_type); 23 | 24 | void *c = alloca(69105); 25 | printf("alloca'd chunk at %p\n", c); 26 | // can we still get the type of o (not b)? 27 | struct uniqtype *got_type_again = __liballocs_get_alloc_type(o); 28 | assert(got_type_again); 29 | assert(got_type_again == got_type); 30 | return 0; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /tests/alloca/mk.inc: -------------------------------------------------------------------------------- 1 | LDLIBS += -lallocs -ldl 2 | -------------------------------------------------------------------------------- /tests/allocsite-id/allocsite-id.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "liballocs.h" 5 | 6 | extern int end; 7 | 8 | int main(void) 9 | { 10 | void *mem = malloc(42); 11 | assert(mem); 12 | printf("main is at %p\n", main); 13 | void *allocsite = __liballocs_get_alloc_site(mem); 14 | assert(allocsite); 15 | printf("Got allocsite: %p\n", allocsite); 16 | assert((char*) allocsite >= (char*) main && 17 | (char*) allocsite < (char*) &end); 18 | allocsite_id_t id = __liballocs_allocsite_id(allocsite); 19 | printf("Our allocsite id is %u\n", (unsigned) id); 20 | void *retrieved_allocsite = __liballocs_allocsite_by_id(id); 21 | printf("Retrieved allocsite with id is %p\n", retrieved_allocsite); 22 | assert(retrieved_allocsite == allocsite); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /tests/anon-aliases/anon-aliases.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "liballocs.h" 3 | 4 | extern void x1; 5 | extern void x2; 6 | extern void x3; 7 | 8 | int main(void) 9 | { 10 | typedef long int long_int; // this used to cause us trouble 11 | long_int i = 0l; 12 | 13 | /* Each of x1, x2 and x3 is an object (it is declared as void, not pointer-to-void!) 14 | * in another compilation unit. We use liballocs to get each's type, and assert what 15 | * equalities we think should hold between them. 16 | * Since the introduction of "associated names" in libdwarfpp which cover 17 | * the case of 'typedef struct { ... } name;', we expect like-typedef'd like-definition 18 | * identical structures to have the same types. Previously the filename in which 19 | * the struct was declared would be factored into the type. */ 20 | assert(alloc_get_type(&x1)); 21 | assert(alloc_get_type(&x2)); 22 | assert(alloc_get_type(&x3)); 23 | 24 | /* The following reflect our somewhat arbitrary choices around 25 | * type identity. We erase the directory name, so the same header 26 | * name, even if in different dirs, can yield the identity-equal 27 | * type, whereas if we symlink a header under a new name, it can 28 | * generate identity-distinct types even for identical definitions. */ 29 | 30 | // assert that x1 and x2 have same type (both 'header.h', different paths) 31 | assert(alloc_get_type(&x1) == alloc_get_type(&x2)); 32 | // assert that x1 and x2 have same type ('header.h' vs 'sameheader.h' but we no longe care) 33 | assert(alloc_get_type(&x2) == alloc_get_type(&x3)); 34 | 35 | return (int) i; 36 | } 37 | -------------------------------------------------------------------------------- /tests/anon-aliases/header.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | unsigned long x; 3 | } mystr; 4 | 5 | -------------------------------------------------------------------------------- /tests/anon-aliases/lib1.c: -------------------------------------------------------------------------------- 1 | #include "subdir/header.h" 2 | 3 | mystr x1; 4 | -------------------------------------------------------------------------------- /tests/anon-aliases/lib2.c: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | mystr x2; 3 | 4 | -------------------------------------------------------------------------------- /tests/anon-aliases/lib3.c: -------------------------------------------------------------------------------- 1 | #include "sameheader.h" 2 | mystr x3; 3 | 4 | -------------------------------------------------------------------------------- /tests/anon-aliases/mk.inc: -------------------------------------------------------------------------------- 1 | anon-aliases: lib1.o lib2.o lib3.o 2 | 3 | lib1.o: CC := cc 4 | lib2.o: CC := cc 5 | lib3.o: CC := cc 6 | -------------------------------------------------------------------------------- /tests/anon-aliases/sameheader.h: -------------------------------------------------------------------------------- 1 | header.h -------------------------------------------------------------------------------- /tests/anon-aliases/subdir: -------------------------------------------------------------------------------- 1 | . -------------------------------------------------------------------------------- /tests/bit-fields/bit-fields.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | 5 | struct bits 6 | { 7 | unsigned long x:62; 8 | int y; 9 | char z; 10 | signed awkward:7; 11 | }; 12 | 13 | int main(void) 14 | { 15 | struct bits b = { 1, 2, 3, 4 }; 16 | 17 | printf("b at %p has %d %d %d %d\n", &b, (int) b.x, (int) b.y, (int) b.z, (int) b.awkward); 18 | printf("structure size is %d\n", (int) sizeof b); 19 | 20 | struct uniqtype *u = __liballocs_get_inner_type(&b, 1); 21 | assert(u); 22 | assert(UNIQTYPE_IS_COMPOSITE_TYPE(u)); 23 | for (struct uniqtype_rel_info *memb = &u->related[0]; 24 | memb < &u->related[u->un.composite.nmemb]; 25 | ++memb) 26 | { 27 | if (UNIQTYPE_IS_BASE_TYPE(memb->un.memb.ptr)) 28 | { 29 | printf("Saw a member of bit size %d, bit offset %d, byte offset within structure: %d\n", 30 | (int) UNIQTYPE_BASE_TYPE_BIT_SIZE(memb->un.memb.ptr), 31 | (int) UNIQTYPE_BASE_TYPE_BIT_OFFSET(memb->un.memb.ptr), 32 | (int) memb->un.memb.off 33 | ); 34 | } 35 | } 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /tests/char-allocs/char-allocs.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(void) 8 | { 9 | char *a1 = malloc(42); 10 | char *a2 = malloc(42 * sizeof (char)); 11 | char *a3 = malloc(42 * sizeof (unsigned char)); 12 | 13 | struct uniqtype *u1 = __liballocs_get_alloc_type(a1); 14 | assert(u1); 15 | struct uniqtype *u2 = __liballocs_get_alloc_type(a2); 16 | assert(u2); 17 | struct uniqtype *u3 = __liballocs_get_alloc_type(a3); 18 | assert(u3); 19 | 20 | printf("Got a1 as %s\n", NAME_FOR_UNIQTYPE(u1)); 21 | printf("Got a2 as %s\n", NAME_FOR_UNIQTYPE(u2)); 22 | printf("Got a3 as %s\n", NAME_FOR_UNIQTYPE(u3)); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /tests/hello-via-wrapper/hello-via-wrapper.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* This is a temporary test case for the transition from 5 | * allocscc/allocscompilerwrapper.py to the new toolsub-based 6 | * approach of minimalist wrapping + linker plugin. */ 7 | 8 | int main(void) 9 | { 10 | void *m = malloc(42); 11 | printf("Hello, via wrapper! Got %p\n", m); 12 | free(m); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /tests/hello-via-wrapper/mk.inc: -------------------------------------------------------------------------------- 1 | # Instead of allocscc/allocscompilerwrapper.py, use the new toolsub-based 2 | # minimalist wrapping + linker plugin. 3 | 4 | CC := cc `$(LIBALLOCS)/tools/allocs-cflags` `$(LIBALLOCS)/tools/allocs-ldflags` 5 | export LIBALLOCS # the linker plugin needs this 6 | 7 | # HACKy attempt to forestall __tls_get_addr and "DSO missing from command line" problems 8 | #LDLIBS += /lib64/ld-linux-x86-64.so.2 9 | -------------------------------------------------------------------------------- /tests/lib-test/lib-test.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /* FIXME: giant hack! see allocators/elf-file.c. 9 | * This is our way of ensuring that uniqtypes used by the ELF file 10 | * allocator are always available when we run the tests on it. 11 | * However, it would be better if we could write this into the 12 | * liballocs source somehow. As we know, preloading uniqtypes breaks 13 | * global uniqueness. We could generate them from the UNDs in 14 | * liballocs_preload.so and put them in liballocs_static.a, but 15 | * that would not work when just preloading liballocs into a 16 | * binary that was not linked -lallocs. Probably we need to use 17 | * allocsld. The rule is that any uniqtypes required by liballocs 18 | * that are NOT present in the executable should be somehow injected. 19 | * Can we use the dlbind library as the place to put them? No 20 | * because liballocs generates that. 21 | */ 22 | Elf64_Ehdr ehdr; 23 | Elf64_Shdr shdr[1]; 24 | Elf64_Phdr phdr[1]; 25 | struct Elf64_Nhdr_with_data { 26 | Elf64_Nhdr nhdr[1]; 27 | char data[]; 28 | } __attribute__((packed)) nhdr_with_data[1]; 29 | Elf64_Sym sym[1]; 30 | Elf64_Rela rela[1]; 31 | Elf64_Rel rel[1]; 32 | Elf64_Dyn dyn[1]; 33 | void (*fp)(void); 34 | 35 | int main(void) 36 | { 37 | /* The liballocs source code includes some unit tests. 38 | * These are run as constructors from liballocs_test.so, 39 | * so dlopening that will run them. */ 40 | assert(getenv("LIBALLOCS_BUILD")); 41 | char *path = getenv("LIBALLOCS_BUILD"); 42 | void *handle = dlopen(path, RTLD_NOW); 43 | assert(handle); 44 | printf("Successfully constructed %s\n", path); 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /tests/lib-test/mk.inc: -------------------------------------------------------------------------------- 1 | THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) 2 | # lib-test is a plain old C program, not allocscc'd, and it 3 | # dlopens liballocs (test build) to run its self-test constructors. 4 | export CC := cc 5 | export CFLAGS := -fPIC -O0 -g3 -DTEST 6 | srcroot := $(realpath $(dir $(realpath $(THIS_MAKEFILE)))../..) 7 | # One exception: we must avoid the ld.so hole, so use allocsld for that. 8 | export LDFLAGS := -Wl,--dynamic-linker,$(srcroot)/allocsld/allocsld.so 9 | export LDLIBS := $(srcroot)/lib/interp-pad.o -ldl 10 | # Another exception: we may want to load a meta-object. 11 | real_obj := $(dir $(realpath $(THIS_MAKEFILE)))lib-test 12 | meta_obj := $(META_BASE)$(dir $(realpath $(THIS_MAKEFILE)))lib-test-meta.so 13 | META_CC := cc 14 | export META_CC 15 | _onlyrun-lib-test _onlygdbrun-lib-test: $(meta_obj) 16 | $(info goals $(MAKECMDGOALS)) 17 | export LIBALLOCS_BUILD := $(srcroot)/build/test/liballocs_preload.so 18 | export LD_PRELOAD := # empty 19 | export PRELOAD := # empty 20 | export LIBALLOCS_USE_PRELOAD := no 21 | 22 | lib-test: $(LIBALLOCS_BUILD) 23 | 24 | $(LIBALLOCS_BUILD): 25 | $(MAKE) -C $(srcroot) TEST=1 26 | 27 | $(meta_obj): $(real_obj) 28 | $(MAKE) -f $(srcroot)/tools/Makefile.meta $@ 29 | -------------------------------------------------------------------------------- /tests/malloc-from-dso/dso.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int *dso_malloc_caller(size_t sz) 4 | { 5 | int *chunk = malloc(sz); 6 | return chunk; 7 | } 8 | -------------------------------------------------------------------------------- /tests/malloc-from-dso/malloc-from-dso.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "liballocs.h" 3 | 4 | int *dso_malloc_caller(size_t); 5 | 6 | int main(void) 7 | { 8 | int *chunk = dso_malloc_caller(42 * sizeof (int)); 9 | struct uniqtype *u = __liballocs_get_alloc_type(chunk); 10 | assert(u); 11 | chunk[0] = 42; 12 | return chunk[1]; 13 | } 14 | -------------------------------------------------------------------------------- /tests/malloc-from-dso/mk.inc: -------------------------------------------------------------------------------- 1 | malloc-from-dso: libdso.so 2 | malloc-from-dso: LDLIBS += -Wl,-rpath,$(shell pwd) libdso.so 3 | libdso.so: LDLIBS := 4 | libdso.so: dso.c 5 | $(CC) -shared -o $@ $+ $(CFLAGS) $(LDFLAGS) $(LDLIBS) 6 | 7 | LIBALLOCS_ALLOC_FNS := dso_malloc_caller(Z)p 8 | export LIBALLOCS_ALLOC_FNS 9 | -------------------------------------------------------------------------------- /tests/malloc-in-exe/malloc-in-exe.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include "liballocs.h" 3 | #include 4 | #include 5 | 6 | struct uniqtype; 7 | extern struct uniqtype __uniqtype__int; 8 | 9 | int main(void) 10 | { 11 | printf("about to do user malloc()\n"); 12 | void *a = malloc(sizeof (int)); 13 | printf("addr is %p\n", a); 14 | 15 | struct uniqtype *t = __liballocs_get_alloc_type(a); 16 | printf("type is %p\n", t); 17 | // it's actually an array of 1 int 18 | assert(UNIQTYPE_IS_ARRAY_TYPE(t)); 19 | struct uniqtype *el_t = UNIQTYPE_ARRAY_ELEMENT_TYPE(t); 20 | assert(el_t == &__uniqtype__int); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/malloc-in-exe/mk.inc: -------------------------------------------------------------------------------- 1 | malloc-in-exe: malloc-in-exe.o dlmalloc.o 2 | 3 | dlmalloc.o: CFLAGS += -std=gnu99 -D_DEFAULT_SOURCE 4 | 5 | malloc-in-exe: LDLIBS += -lallocs 6 | #-Wl,test.lds \ 7 | -------------------------------------------------------------------------------- /tests/metavec-layout/metavec-layout.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "allocmeta-defs.h" 5 | 6 | union 7 | { 8 | union sym_or_reloc_rec fields; 9 | uint64_t word; 10 | } u[] = { 11 | { fields: { sym: { .kind = 7, .uniqtype_ptr_bits_no_lowbits = 1, .idx = 9 } } }, 12 | { word: SYM_ONLY_REC_WORD(7ul, 9ul, /* ptr_as_integer_incl_lowbits */ 8ul) } 13 | }; 14 | 15 | int main(void) 16 | { 17 | assert(sizeof (union sym_or_reloc_rec) == 8); 18 | assert(u[0].word == u[1].word); 19 | return 0; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /tests/no-unbind-self/mk.inc: -------------------------------------------------------------------------------- 1 | LIBALLOCS_ALLOC_FNS="myalloc(Z)p" 2 | -------------------------------------------------------------------------------- /tests/no-unbind-self/no-unbind-self.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void *myalloc_myalloc_addr; 8 | void *myalloc(size_t sz) 9 | { 10 | printf("myalloc: I think my addr is %p\n", myalloc); 11 | myalloc_myalloc_addr = myalloc; 12 | return malloc(sz); 13 | } 14 | 15 | /* This is testing that self-references within an allocation function 16 | * are *not* diverted into the wrapper, even when unbinding. 17 | * FIXME: is this definitely the behaviour that we want? 18 | */ 19 | 20 | int main(void) 21 | { 22 | void *myalloc_addr = dlsym(NULL, "myalloc"); 23 | assert(myalloc_addr); 24 | void *p = myalloc(42); 25 | strcpy((char*) p, "Hello!"); 26 | printf("main: I think myalloc's addr is %p\n", myalloc_addr); 27 | assert(myalloc_myalloc_addr == myalloc_addr); 28 | 29 | printf("%s\n", (char*) p); 30 | 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /tests/nopreload-alloca/nopreload-alloca.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | extern __thread void *__current_allocsite __attribute__((weak)); 9 | 10 | int main(void) 11 | { 12 | int *a = alloca(42 * sizeof (int)); 13 | a[41] = 0; 14 | printf("Saw address %p holding %d\n", &a[41], a[41]); 15 | return a[41]; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /tests/offsetof/mk.inc: -------------------------------------------------------------------------------- 1 | LDLIBS += -lallocs 2 | -------------------------------------------------------------------------------- /tests/offsetof/offsetof.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct blah 8 | { 9 | int x; 10 | float y; 11 | char z[1]; 12 | }; 13 | 14 | struct baz 15 | { 16 | void *a; 17 | struct blah b[1]; 18 | }; 19 | 20 | int main(void) 21 | { 22 | void *b = calloc(1, offsetof(struct blah, z) + 10); 23 | 24 | // assert that the alloc is a blah 25 | struct uniqtype *got_type = __liballocs_get_alloc_type(b); 26 | struct uniqtype *blah_type = dlsym(RTLD_NEXT, "__uniqtype__blah"); 27 | assert(blah_type); 28 | assert(got_type); 29 | assert(got_type == blah_type); 30 | 31 | printf("It says: %f\n", ((struct blah *) b)->y); 32 | 33 | void *bz = calloc(1, offsetof(struct baz, b) + 20 * sizeof (struct blah)); 34 | 35 | // assert that the alloc is the composite 36 | struct uniqtype *got_comp_type = __liballocs_get_alloc_type(bz); 37 | struct uniqtype *baz_type = dlsym(RTLD_NEXT, "__uniqtype__baz"); 38 | assert(baz_type); 39 | assert(got_comp_type); 40 | assert(got_comp_type->un.info.kind == COMPOSITE); 41 | assert(got_comp_type->related[0].un.memb.ptr == baz_type); 42 | assert(got_comp_type->related[1].un.memb.ptr->un.array.is_array); 43 | assert(got_comp_type->related[1].un.memb.ptr->related[0].un.memb.ptr == blah_type); 44 | 45 | return 0; 46 | } 47 | -------------------------------------------------------------------------------- /tests/packed-seq-walk/packed-seq-walk.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "liballocs.h" 5 | #include "allocmeta.h" 6 | #include "pageindex.h" 7 | 8 | 9 | static int n = 0; 10 | static int saw_string_cb(struct big_allocation *maybe_the_allocation, 11 | void *obj, struct uniqtype *t, const void *allocsite, 12 | struct alloc_containment_ctxt *cont, void *arg) 13 | { 14 | // printf("Saw a string at %p (%06d): %s\n", obj, n, (char*)obj); 15 | ++n; 16 | return 0; 17 | } 18 | 19 | int main(void) 20 | { 21 | // let's malloc a thing and then declare it (somehow) 22 | // a packed sequence, by promoting it and then 23 | // - clearing its type info (?) 24 | // - setting it as suballocated by the relevant packed seq 25 | void *chunk = calloc(1, 131072); 26 | assert(pageindex[PAGENUM(chunk)]); 27 | 28 | struct big_allocation *seq_b = __lookup_bigalloc_from_root(chunk, 29 | &__default_lib_malloc_allocator, NULL); 30 | assert(seq_b->allocated_by == &__default_lib_malloc_allocator); 31 | 32 | seq_b->suballocator = &__packed_seq_allocator; 33 | seq_b->suballocator_private = malloc(sizeof (struct packed_sequence)); 34 | seq_b->suballocator_private_free = __packed_seq_free; 35 | // FIXME: clear type info? do we need to? 36 | __default_lib_malloc_allocator.set_type(seq_b, chunk, NULL); 37 | if (!seq_b->suballocator_private) abort(); 38 | *(struct packed_sequence *) seq_b->suballocator_private = (struct packed_sequence) { 39 | .fam = &__string8_nulterm_packed_sequence, 40 | .enumerate_fn_arg = NULL, 41 | .name_fn_arg = NULL, 42 | .un = { .metavector_any = NULL }, 43 | .metavector_nused = 0, 44 | .metavector_size = 0, 45 | .starts_bitmap = NULL, 46 | .starts_bitmap_nwords = 0, 47 | .offset_cached_up_to = 0 48 | }; 49 | struct alloc_tree_pos pos = { 50 | .base = chunk, 51 | .bigalloc_or_uniqtype = (uintptr_t) seq_b 52 | }; 53 | __packed_seq_allocator.walk_allocations(&pos, saw_string_cb, NULL, NULL, NULL); 54 | assert(n == 131072); 55 | n = 0; 56 | alloc_walk_allocations(&pos, saw_string_cb, NULL, NULL, NULL); 57 | assert(n == 131072); 58 | n = 0; 59 | __liballocs_walk_allocations_df(&pos, saw_string_cb, NULL); 60 | // HMM. This is visiting each string twice. And that's correct! 61 | // After visiting the array, we also visit the individual char that it 62 | // contains, because we're depth-first. 63 | assert(n == 262144); 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /tests/plugin-hello/mk.inc: -------------------------------------------------------------------------------- 1 | # only for srk's hacky ld.both script 2 | export USE_LD := ld.gold 3 | 4 | # for normal people... but note that --push-state isn't supported 5 | LDFLAGS += -fuse-ld=gold 6 | 7 | LIBALLOCS_ALLOC_FNS := fail_alloc(Z)p fail_realloc(pZ)p fail_calloc(zZ)p 8 | # HACK: export doesn't work 9 | export LIBALLOCS_ALLOC_FNS 10 | 11 | # GAH, commas interact with "-Wl," 12 | 13 | LDFLAGS += $(shell env LIBALLOCS_ALLOC_FNS="$(LIBALLOCS_ALLOC_FNS)" $(LIBALLOCS)/tools/allocs-ldflags) 14 | $(info adding LDFLAGS $(LDFLAGS)) 15 | 16 | # HACK: use cc directly (setting plugin-hello: CC := doesn't seem to work) 17 | plugin-hello: plugin-hello.o 18 | /usr/bin/cc -o "$@" $+ $(LDFLAGS) $(LDLIBS) 19 | -------------------------------------------------------------------------------- /tests/plugin-hello/plugin-hello.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void *fail_alloc(size_t sz) 5 | { 6 | return NULL; 7 | } 8 | void *fail_calloc(size_t sz, size_t nmemb) 9 | { 10 | return NULL; 11 | } 12 | void *fail_realloc(void *p, size_t sz) 13 | { 14 | return NULL; 15 | } 16 | 17 | static void *static_fail_alloc(size_t sz) 18 | { 19 | return NULL; 20 | } 21 | 22 | struct uniqtype; 23 | extern struct uniqype __uniqtype__int; 24 | 25 | int main(void) 26 | { 27 | printf("Hello, world (%p, %p)!\n", static_fail_alloc, &__uniqtype__int); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /tests/ptr-to-opaque/mk.inc: -------------------------------------------------------------------------------- 1 | ptr-to-opaque: other-cu.o 2 | -------------------------------------------------------------------------------- /tests/ptr-to-opaque/other-cu.c: -------------------------------------------------------------------------------- 1 | struct T 2 | { 3 | int field; 4 | } *other_t; 5 | -------------------------------------------------------------------------------- /tests/ptr-to-opaque/ptr-to-opaque.c: -------------------------------------------------------------------------------- 1 | #include "allocs.h" 2 | 3 | struct T; 4 | struct T *my_t; 5 | extern struct T *other_t; 6 | int main(void) 7 | { 8 | /* Bug that prompted this regression test: 9 | * "we get two distinct pointer types (should be the same), and 10 | * __uniqtype__T remains undefined (should be an alias of __uniqtype_05502024_T)". 11 | * 12 | * Elaborating on that: 13 | * 14 | * What should happen: 15 | * this CU uses codeless __PTR_T 16 | * but this gets aliased to the codeful one. 17 | * 18 | * So let's assert that my_t and other_t have the same type. 19 | */ 20 | struct uniqtype *u1 = alloc_get_type(&my_t); 21 | struct uniqtype *u2 = alloc_get_type(&other_t); 22 | assert(u1); 23 | assert(u2); 24 | assert(u1 == u2); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tests/realloc-multi-union/realloc-multi-union.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | extern struct uniqtype __uniqtype__short$20unsigned$20int; 7 | extern struct uniqtype __uniqtype__short$20int; 8 | 9 | /* Here we test the __liballocs_add_type_to_block call and related helpers. 10 | * This is a convenience for code like that in bzip2, which uses C's 11 | * "effective type" rules' allowance for a type-changing update to memory. 12 | * E.g. we can first use some memory as an int32_t array, and then re-use it 13 | * as an array of twice as many int16_t simply by writing int16_t values 14 | * into it. 15 | * 16 | * Code doing that defeats our tracking of the allocation type, so 17 | * as a workaround, we provide a manual helper __liballocs_add_type_to_block 18 | * (e.g. we patch bzip2 to make a call to this... sad times). 19 | * 20 | * This help replaces the malloc chunk's recorded type with a union 21 | * of the old type and the given type. We go through the type-update 22 | * manually to check that the set_type and get_or_create_union_type 23 | * helpers work, and then go around again using the top-level 24 | * add-type-to-block helper and check the outcome is the same. */ 25 | 26 | int main(void) 27 | { 28 | int *p = malloc(2 * sizeof(int)); 29 | assert(p); 30 | struct uniqtype *old_type = __liballocs_get_alloc_type(p); 31 | assert(old_type); 32 | if (old_type->make_precise) old_type = old_type->make_precise(old_type, 33 | NULL, 0, p, p, __liballocs_get_alloc_size(p), __builtin_return_address(0), NULL); 34 | assert(old_type); 35 | short *sp = realloc(p, 4 * sizeof(short)); 36 | struct uniqtype *new_type = __liballocs_get_alloc_type(p); 37 | assert(new_type); 38 | if (new_type->make_precise) new_type = new_type->make_precise(new_type, 39 | NULL, 0, sp, sp, __liballocs_get_alloc_size(sp), __builtin_return_address(0), NULL); 40 | assert(new_type); 41 | struct uniqtype *union_type = __liballocs_get_or_create_union_type(2, 42 | old_type, 43 | new_type 44 | ); 45 | struct allocator *a = __liballocs_leaf_allocator_for(sp, NULL); 46 | liballocs_err_t err = a->set_type(NULL, sp, union_type); 47 | assert(!err); 48 | struct uniqtype *got_t = __liballocs_get_alloc_type(sp); 49 | assert(got_t == union_type); 50 | printf("The type is now: %s\n", NAME_FOR_UNIQTYPE(got_t)); 51 | 52 | /* Now do the same again but with the "add_type_to_block" convenience. */ 53 | int *p2 = malloc(2 * sizeof(int)); 54 | int ret = __liballocs_add_type_to_block(p2, &__uniqtype__short$20int); 55 | struct uniqtype *got_t2 = __liballocs_get_alloc_type(p2); 56 | assert(got_t2 == union_type); 57 | printf("And again: the type is now: %s\n", NAME_FOR_UNIQTYPE(got_t2)); 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /tests/section-group/Makefile: -------------------------------------------------------------------------------- 1 | LDLIBS += -l1 -l2 2 | LDFLAGS += -L$(shell pwd) -Wl,-R$(shell pwd) 3 | CFLAGS += -fPIC -I$(realpath ../../include) 4 | 5 | section-group: lib1.so lib2.so 6 | 7 | %.so: %.c %a.c 8 | $(CC) $(CFLAGS) -shared -o "$@" $^ $(LDFLAGS) 9 | -------------------------------------------------------------------------------- /tests/section-group/lib1.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include "liballocs.h" 5 | 6 | struct s1 7 | { 8 | int x; 9 | } s1; 10 | 11 | extern struct uniqtype *l1a(void); 12 | 13 | void *l1(int arg) 14 | { 15 | /* Get our __uniqtype__s1. */ 16 | struct uniqtype *resolved = dlsym(__liballocs_my_metaobj(), "__uniqtype__s1"); 17 | struct uniqtype *int32 = resolved->related[0].un.memb.ptr; 18 | 19 | /* Check that we're using the same "__uniqtype_int$32" as l1a is. */ 20 | assert(l1a() == int32); 21 | 22 | /* Pass our pointer up to main(), so it can test globally. */ 23 | return int32; 24 | } 25 | -------------------------------------------------------------------------------- /tests/section-group/lib1a.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include "liballocs.h" 4 | 5 | struct s1a 6 | { 7 | int x; 8 | } s1a; 9 | 10 | void *l1a(void) 11 | { 12 | /* Get our __uniqtype__s1a. */ 13 | struct uniqtype *resolved = dlsym(__liballocs_my_metaobj(), "__uniqtype__s1a"); 14 | /* Return our __uniqtype__int$$32. */ 15 | return resolved->related[0].un.memb.ptr; 16 | } 17 | -------------------------------------------------------------------------------- /tests/section-group/lib2.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include "liballocs.h" 5 | 6 | extern struct uniqtype *l2a(void); 7 | 8 | struct s2 9 | { 10 | int x; 11 | } s2; 12 | 13 | void *l2(int arg) 14 | { 15 | /* Get our __uniqtype__s1. */ 16 | struct uniqtype *resolved = dlsym(__liballocs_my_metaobj(), "__uniqtype__s2"); 17 | struct uniqtype *int32 = resolved->related[0].un.memb.ptr; 18 | 19 | /* Check that we're using the same "__uniqtype_int$32" as l2a is. */ 20 | assert(l2a() == int32); 21 | 22 | /* Pass our pointer up to main(), so it can test globally. */ 23 | return int32; 24 | } 25 | -------------------------------------------------------------------------------- /tests/section-group/lib2a.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include "liballocs.h" 4 | 5 | struct s2a 6 | { 7 | int x; 8 | } s2a; 9 | 10 | void *l2a(void) 11 | { 12 | /* Get our __uniqtype__s2a. */ 13 | struct uniqtype *resolved = dlsym(__liballocs_my_metaobj(), "__uniqtype__s2a"); 14 | /* Return our __uniqtype__int$$32. */ 15 | return resolved->related[0].un.memb.ptr; 16 | } 17 | -------------------------------------------------------------------------------- /tests/section-group/section-group.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void *l1(void); 6 | void *l2(void); 7 | 8 | int main(void) 9 | { 10 | /* Here we're testing two things: 11 | * 12 | * - that section groups within a dynamic (type)object have worked as expected, 13 | * 14 | * meaning that multiple definitions of the same type have been collapsed; 15 | * 16 | * - that global linkage *across dynamic objects* have worked as expected, 17 | * 18 | * meaning that a single global definition of the same type is used across 19 | * all referencing objects. 20 | */ 21 | void *addr1 = l1(); 22 | void *addr2 = l2(); 23 | assert(addr1 == addr2); 24 | assert(addr1 != NULL); 25 | assert(addr2 != NULL); 26 | printf("Link-time uniquing seems to be working:\n" 27 | "got %p for both l1.so and l2.so's __uniqtype__int$32\n", addr1); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /tests/simple-client/mk.inc: -------------------------------------------------------------------------------- 1 | # Thanks to an inline function in the liballocs headers, 2 | # building non-PIC gets us a copy relocation against __liballocs_aborted* in the dummyweaks library. 3 | # These symbols are protected, so that is clearly not going to work. 4 | # I don't think copy-relocating them is a good idea. 5 | # Just ensure we build PIC? 6 | export CFLAGS += -pie -fPIC 7 | export LDLIBS += -lallocs 8 | -------------------------------------------------------------------------------- /tests/simple-client/simple-client.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | extern __thread void *__current_allocsite __attribute__((weak)); 8 | 9 | int main(void) 10 | { 11 | void *handle = dlopen(NULL, RTLD_NOW); 12 | printf("Handle is %p\n", handle); 13 | assert(handle); 14 | 15 | struct allocator *a; 16 | const void *alloc_start; 17 | unsigned long alloc_size; 18 | const void *alloc_uniqtype; 19 | const void *alloc_site; 20 | struct liballocs_err *err = __liballocs_get_alloc_info(handle, 21 | &a, &alloc_start, &alloc_size, &alloc_uniqtype, &alloc_site); 22 | 23 | printf("Saw allocator %p, start %p, size %ul, uniqtype %p, alloc site %p\n", 24 | a, alloc_start, alloc_size, alloc_uniqtype, alloc_site); 25 | 26 | /* Check that referencing built-in uniqtypes works. */ 27 | printf("__uniqtype__void is at %p\n", &__uniqtype__void); 28 | 29 | return 0; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /tests/simple-multi-alloc/mk.inc: -------------------------------------------------------------------------------- 1 | CFLAGS += $(shell pkg-config --cflags glib-2.0) 2 | LDLIBS += -Wl,--no-as-needed $(shell pkg-config --libs glib-2.0) -Wl,--as-needed 3 | 4 | # FIXME: don't make the user specify these 5 | export LIBALLOCS_ALLOC_FNS := xmalloc(Z)p xcalloc(zZ)p xrealloc(pZ)p 6 | export LIBALLOCS_FREE_FNS := xfree(P) 7 | export LIBALLOCS_SUBALLOC_FNS := g_slice_alloc(Z)p g_slice_alloc0(Z)p 8 | export LIBALLOCS_SUBFREE_FNS := g_slice_free1(zP)->g_slice_alloc 9 | -------------------------------------------------------------------------------- /tests/simple-multi-alloc/simple-multi-alloc.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | static _Bool initialized; 11 | 12 | extern _Bool __liballocs_is_initialized __attribute__((weak)); 13 | 14 | static void (__attribute__((constructor)) init)(void) 15 | { 16 | initialized = 1; 17 | } 18 | 19 | _Bool __thread doing_deep_call = 0; 20 | 21 | void *xmalloc(size_t size) 22 | { 23 | uintptr_t *ret = g_slice_alloc(size + sizeof (uintptr_t)); 24 | *ret = size; 25 | return ret + 1; 26 | } 27 | void *xcalloc(size_t nmemb, size_t size) 28 | { 29 | uintptr_t *ret = g_slice_alloc0(size * nmemb + sizeof (uintptr_t)); 30 | *ret = size; 31 | return ret + 1; 32 | } 33 | 34 | void xfree(void *ptr) 35 | { 36 | if (!ptr) return; 37 | uintptr_t *p = ptr; 38 | g_slice_free1(*(p-1), p-1); // a.k.a. size allocated 39 | } 40 | 41 | void *xrealloc(void *ptr, size_t size) 42 | { 43 | // use gslice 44 | // -- is the old region big enough? 45 | uintptr_t *p = ptr; 46 | if (*(p-1) >= size) return p; 47 | else 48 | { 49 | // copy and reallocate 50 | uintptr_t *new = g_slice_alloc(size + sizeof (uintptr_t)); 51 | memcpy(new + 1, p, *(p-1)); 52 | g_slice_free1(*(p-1), p-1); 53 | return new; 54 | } 55 | } 56 | 57 | int main(void) 58 | { 59 | int *blah = (int *) xmalloc(200 * sizeof (int)); 60 | for (int i = 0; i < 200; ++i) blah[i] = 42; 61 | 62 | void *fake = blah; 63 | assert(__liballocs_get_alloc_type(fake)); 64 | 65 | printf("It says: %d\n", ((int*)fake)[0]); 66 | 67 | xfree(blah); 68 | 69 | return 0; 70 | } 71 | -------------------------------------------------------------------------------- /tests/sizeof-subtract/sizeof-subtract.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | struct s1 7 | { 8 | float blah; 9 | unsigned int ns[1]; // mismatch on signedness, just to mess... 10 | }; 11 | 12 | 13 | int main(void) 14 | { 15 | void *obj = malloc(sizeof (struct s1) - sizeof (int) + 24); 16 | struct uniqtype *u = __liballocs_get_alloc_type(obj); 17 | assert(UNIQTYPE_IS_COMPOSITE_TYPE(u)); 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /tests/sloppy-dumptypes/sloppy-dumptypes.c: -------------------------------------------------------------------------------- 1 | /* We don't include anything, so that we can control which 2 | * base types appear in our object code. We have to define 3 | * dlsym() and RTLD_DEFAULT ourselves. */ 4 | 5 | extern void *dlsym (void *handle, const char *name); 6 | extern void __assert_fail (const char *__assertion, const char *__file, 7 | unsigned int __line, const char *__function); 8 | #define _GNU_SOURCE 9 | #include 10 | 11 | int main(void) 12 | { 13 | /* We avoid using "signed short", and see if it's present 14 | * in our types object, given that we *do* use unsigned short. */ 15 | __liballocs_ensure_init(); 16 | short unsigned int sshort_uniqtype_truncated = (short unsigned) (unsigned long) dlsym( 17 | 0 /* RTLD_DEFAULT */, "__uniqtype__short$20int"); 18 | if (!sshort_uniqtype_truncated) __assert_fail("sshort_uniqtype_truncated", __FILE__, __LINE__, __func__); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /tests/stack-types/stack-types.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "liballocs.h" 3 | 4 | extern struct uniqtype __uniqtype__int; 5 | extern struct uniqtype __uniqtype__long$20int; 6 | 7 | int f(int arg1, int arg2, int arg3) 8 | { 9 | struct { long q1; long q2; long q3; } blah = { (long) &arg1, (long) &arg2, (long) &arg3 }; 10 | /* Interestingly, with gcc 8.3, arg2 has no location information 11 | * in the DWARF, despite being address-taken here. Unsurprisingly, 12 | * that makes this test case fail. Comment out for now, but this 13 | * is a great example of why the compiler can't currently be 14 | * trusted. If we had marked __liballocs_get_alloc_type as pure, 15 | * then I could understand the compiler's behaviour, but we 16 | * haven't. */ 17 | // struct uniqtype *u1 = __liballocs_get_inner_type(&arg2, 0); 18 | struct uniqtype *u2 = __liballocs_get_inner_type(&blah.q2, 0); 19 | // assert(u1 == &__uniqtype__int); 20 | assert(u2 == &__uniqtype__long$20int); 21 | return arg2 + 1; 22 | } 23 | 24 | 25 | int main(void) 26 | { 27 | return f(42, 42, 42) - 43; 28 | } 29 | -------------------------------------------------------------------------------- /tests/stack-walk/mk.inc: -------------------------------------------------------------------------------- 1 | CFLAGS += -fno-omit-frame-pointer 2 | 3 | LDLIBS += -lallocs 4 | -------------------------------------------------------------------------------- /tests/stack-walk/stack-walk.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include "liballocs.h" 4 | #include "relf.h" 5 | 6 | static int cb(void *ip, void *sp, void *bp, void *arg) 7 | { 8 | const char *sname = ip ? "(unknown)" : "(no active function)"; 9 | int ret = ip ? fake_dladdr(ip, NULL, NULL, &sname, NULL) : 0; 10 | printf("%s\n", sname); 11 | return 0; // keep going 12 | } 13 | 14 | int (__attribute__((optimize("O0"))) h)(void) 15 | { 16 | __liballocs_walk_stack(cb, NULL); 17 | return 0; 18 | } 19 | 20 | int (__attribute__((optimize("O0"))) g)(void) 21 | { 22 | return h(); 23 | } 24 | 25 | 26 | int (__attribute__((optimize("O0"))) f)(void) 27 | { 28 | return g(); 29 | } 30 | 31 | 32 | int main(void) 33 | { 34 | return f(); 35 | } 36 | -------------------------------------------------------------------------------- /tests/string-lit/mk.inc: -------------------------------------------------------------------------------- 1 | LDFLAGS += -Wl,-q 2 | -------------------------------------------------------------------------------- /tests/string-lit/string-lit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "liballocs.h" 3 | 4 | int main(void) 5 | { 6 | #define THE_STRING "Hello, world!\n" 7 | const char *s = THE_STRING; 8 | puts(s); 9 | /* We don't know the type of string-literal data. 10 | * We could perhaps infer it, from the relocation site 11 | * if we know *its* type. Here we would need some data- 12 | * -flow analysis to figure that out. Anyway, for now 13 | * just assert that we can get its base and size. */ 14 | void *base = __liballocs_get_alloc_base(s); 15 | assert(base); 16 | size_t size = __liballocs_get_alloc_size(s); 17 | printf("The string literal's size is %u\n", (unsigned) size); 18 | assert(size >= sizeof THE_STRING); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /tests/uniqtype-make-precise/uniqtype-make-precise.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "liballocs.h" 3 | #include "uniqtype.h" 4 | 5 | struct xyzzy 6 | { 7 | int z; 8 | } x; 9 | extern struct uniqtype __uniqtype__xyzzy __attribute__((weak)); 10 | 11 | int main(void) 12 | { 13 | /* Bare-bones test of the make-precise thing. 14 | * We ask the function to make a precise version of itself 15 | * for a 1-element array. */ 16 | struct xyzzy *z = malloc(sizeof (struct xyzzy)); 17 | struct uniqtype *imprecise = __liballocs_get_alloc_type(z); 18 | assert(imprecise->make_precise); 19 | 20 | struct uniqtype *precise = imprecise->make_precise( 21 | imprecise, NULL, 0, 22 | &x, &x, sizeof x, NULL, NULL); 23 | printf("We got back: %s\n", UNIQTYPE_NAME(precise)); 24 | 25 | /* Now call it again: we should get the same pointer, not another 26 | * copy of the uniqtype. */ 27 | struct uniqtype *precise2 = imprecise->make_precise( 28 | imprecise, NULL, 0, 29 | &x, &x, sizeof x, NULL, NULL); 30 | 31 | assert(precise == precise2); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /tests/uniqtype-walk/mk.inc: -------------------------------------------------------------------------------- 1 | # see note in simple-client/mk.inc... for clients we need to be PIC 2 | # to avoid copy reloc problems 3 | export CFLAGS += -pie -fPIC 4 | export LDLIBS += -lallocs 5 | -------------------------------------------------------------------------------- /tests/uniqtype-walk/uniqtype-walk.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "uniqtype-bfs.h" 7 | 8 | struct list_node 9 | { 10 | struct stuff 11 | { 12 | void *payload; 13 | void *containing_node; /* just to mess with ya */ 14 | } content; 15 | struct list_node *next; 16 | }; 17 | 18 | static int blackened_count; 19 | static void on_blacken(void *obj, struct uniqtype *t, void *arg) 20 | { 21 | ++blackened_count; 22 | fprintf(stderr, "Blackened an object %p, seen as having type %s\n", 23 | obj, UNIQTYPE_NAME(t)); 24 | } 25 | 26 | int main(void) 27 | { 28 | /* Build a list of length n */ 29 | const int n = 10; 30 | const int integers[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 31 | struct list_node *head = NULL; 32 | for (int i = 0; i < n; ++i) 33 | { 34 | struct list_node *new_node = calloc(1, sizeof (struct list_node)); 35 | assert(new_node); 36 | new_node->next = head; 37 | new_node->content.payload = &integers[i]; 38 | new_node->content.containing_node = new_node; 39 | head = new_node; 40 | } 41 | 42 | // to do make-precise we need the size. FIXME: get_outermost_type should do this 43 | //struct uniqtype *list_node_t = __liballocs_get_outermost_type(head); 44 | 45 | struct allocator *a; 46 | const void *alloc_start; 47 | unsigned long alloc_size_bytes; 48 | struct uniqtype *list_node_t = NULL; 49 | struct liballocs_err *err = __liballocs_get_alloc_info(head, &a, 50 | &alloc_start, &alloc_size_bytes, &list_node_t, /* alloc site */NULL); 51 | assert(list_node_t); 52 | if (list_node_t->make_precise) 53 | { 54 | // HACK: make_precise is sanity-checking that we get a multiple of 55 | // list_node's size, even though we don't because we get some malloc 56 | // padding. it should know that we only allocated 1. 57 | // Kludge the range length for now. 58 | list_node_t = list_node_t->make_precise(list_node_t, NULL, 0, head, alloc_start, 59 | /*alloc_size_bytes*/ sizeof (struct list_node), NULL, NULL); 60 | assert(!list_node_t->make_precise); 61 | } 62 | 63 | /* Use our uniqtype bfs walker to walk the list. */ 64 | __uniqtype_walk_bfs_from_object(head, list_node_t, 65 | __uniqtype_default_follow_ptr, NULL, 66 | on_blacken, NULL); 67 | assert(blackened_count == 20); /* 10 nodes, 10 integers -- HMM. 68 | Nodes in the graph should really be pairs, 69 | to avoid the ambiguity of unadorned pointers. 70 | But then each subobject becomes a logically distinct object! 71 | Is that what we want? 72 | I suppose it is the logical extension. 73 | It raises problems when we have ambiguous views of an object: 74 | - is an array[20] also an array[0]? 75 | - is the address of the second element in an array[20] also an array[19]? 76 | - what about the hypothetical "null-terminated char array" uniqtype, 77 | that dynamically refines itself into a known-length type? 78 | Perhaps the answer is to mark as special ("ground") uniqtypes 79 | the ones which don't generate redundant views of memory. 80 | Then when we want to iterate over a minimal covering set of 81 | precise views of all memory, we only use ground uniqtypes. 82 | It's still unclear, with the null-terminated char array case, 83 | how to decompose the memory into ground instances. Perhaps 84 | the null-terminated portion as one char[], then the remainding tail 85 | as another? Or iteratively decomposed into null-term'd char[]s? 86 | Or just see the tail as allocation padding, like the spare bytes 87 | at the end of a malloc()'d chunk? 88 | In reality it depends: an ELF strtab is a sequence of char[]s, 89 | while a single null-term'd array with some trailing bytes is 90 | one array with padding. So there is some framing intent that 91 | we need to capture at the allocation level. 92 | */ 93 | 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /tests/unit-tests/Makefile: -------------------------------------------------------------------------------- 1 | cases ?= $(basename $(wildcard [a-z]*.[ch])) 2 | 3 | THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) 4 | LIBALLOCS_BASE := $(realpath $(dir $(THIS_MAKEFILE))/../..) 5 | -include $(LIBALLOCS_BASE)/config.mk 6 | 7 | # FIXME: this is repetitive w.r.t. liballocs core build flags 8 | CFLAGS += -I$(LIBALLOCS_BASE)/include -I$(LIBRUNT)/include -I$(LIBSYSTRAP) 9 | 10 | CFLAGS += -g -std=c11 -D_GNU_SOURCE -save-temps 11 | # for metavec's test case to work, it needs to be able to find "main" in dynsym 12 | LDFLAGS += -Wl,--export-dynamic 13 | 14 | # FIXME: this is fine for a single-file unit test, but hwat about unit 15 | # tests that need other stuff from liballocs to work properly? Maybe 16 | # they're not unit tests, by definition. Still, it may make sense to unify 17 | # this with the existing approach to testing the mmap allocator. That 18 | # uses the liballocs_test.so: test cases run in its constructor. 19 | 20 | .PHONY: build-% run-% cleanbuild-% gdbrun-% gdbcleanrun-% clean-% 21 | build-%: % 22 | %: %.c 23 | $(CC) $(CFLAGS) $(CPPFLAGS) -DUNIT_TEST -o $@ $< $(LDFLAGS) $(LDLIBS) 24 | %: %.h 25 | $(CC) $(CFLAGS) $(CPPFLAGS) -DUNIT_TEST -o $@ -x c $< -x none $(LDFLAGS) $(LDLIBS) 26 | 27 | default: 28 | for case in $(cases); do \ 29 | $(MAKE) run-$$case; \ 30 | done 31 | 32 | run-%: % 33 | ./$* 34 | 35 | clean-%: 36 | rm -f $* $*.o $*.s 37 | 38 | cleanrun-%: 39 | $(MAKE) clean-$* 40 | $(MAKE) run-$* 41 | -------------------------------------------------------------------------------- /tools/alias-linker-opts-for-base-types.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -z "$EQUIVS" ]]; then 4 | EQUIVS=$( readlink -f `dirname $0`/../frontend/c/base-type-equivs.txt ) 5 | if [[ -z "$EQUIVS" ]]; then 6 | echo "Error: no equivs file" 1>&2 7 | fi 8 | fi 9 | 10 | # we are reading the output of nm -fposix on a -uniqtypes.o file 11 | uniqtypes="$( tr -s '[:blank:]' '\t' | cut -f1 | grep '^__uniqtype_' )" 12 | 13 | # build a big regexp for each equivalence class, to filter out only 14 | # the ones that are actually used 15 | while read equivclass; do 16 | # grep uniqtypes for any member of this equivalence class 17 | big_regexp='^__uniqtype_([0-9a-f]{7,8})?_('"$( echo "$equivclass" | sed 's/, */|/g' | tr ' ' '_' )"')$' 18 | matches="$( echo "$uniqtypes" | egrep "$big_regexp" )" 19 | # We expect at most one typecode-qualified line for each equivclass -- 20 | # i.e. a single DWARF name is used consistently in the debug info. 21 | if [[ $( echo "$matches" | grep '^__uniqtype_[0-9a-f]{7,8}_' | wc -l ) -gt 1 ]]; then 22 | echo "Error: expected at most one matching uniqtype for ${big_regexp}; got " $matches 1>&2 23 | # If this fails, it probably means that we have multiple CUs, and some of them 24 | # use different DWARF names for a given base type than others do. 25 | # We can be more clever about tolerating this, but it gets subtle because 26 | # we should really check that each CU's version of a given apparently-synonymous base type 27 | # is actually the same (w.r.t. size, bit-size, encoding) as the others. 28 | # Otherwise we can claim it's a type-incorrect link, although that might be a bit 29 | # conservative. 30 | exit 1 31 | fi 32 | echo "$matches" | sed '/^$/ d' | while read matching_typesym; do 33 | # grep-delete the matching uniq type from the equivalence class, then 34 | # output a linker alias option for all the others 35 | matching_type="$( echo "$matching_typesym" | sed -r 's/^__uniqtype_([0-9a-f]{7,8})?_//' )" 36 | echo "$equivclass" | sed 's/, */\n/g' | tr ' ' '_' | grep -v "^${matching_type}"'$' | while read equiv; do 37 | synonym_typesym="$( echo "$matching_typesym" | sed -r "s/^(__uniqtype_([0-9a-f]{7,8})?_).*/\1${equiv}/" )" 38 | echo -Wl,--defsym,${synonym_typesym}=${matching_typesym} 39 | done 40 | done 41 | done < "$EQUIVS" 42 | -------------------------------------------------------------------------------- /tools/alloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void some_other_function(void); 6 | 7 | int main(void) 8 | { 9 | void *alloc1 = malloc(sizeof (struct stat)); 10 | void *alloc2 = malloc(sizeof (void *)); 11 | void *alloc3 = malloc(42); 12 | some_other_function(); 13 | 14 | return errno; 15 | } 16 | -------------------------------------------------------------------------------- /tools/allocs-cflags: -------------------------------------------------------------------------------- 1 | allocs-wrapper -------------------------------------------------------------------------------- /tools/allocs-install: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Look for binaries under $1 with a recent ctime, and search for matching 4 | # binaries under the cwd. Copy the allocsites info if an unambiguous match 5 | # is found. 6 | 7 | prefix="$( readlink -f $1 )" 8 | 9 | escape_regexp () { 10 | sed -r 's/(\*|\.|\[|\^|\$|\[|\]|\\)/\\\1/g' 11 | } 12 | 13 | META_BASE="${META_BASE:-/usr/lib/meta}" 14 | 15 | find "$prefix" -type f -cmin -30 \( -name '*.so*' -o -perm +001 \) | sed "s^$prefix^^" | while read suffix; do 16 | size="$( stat -c%s "$prefix"/"$suffix" )" && 17 | found="$( find . -type f -name "$( basename "$suffix" )" -size "$size"c )" && 18 | if [[ -n "$found" ]]; then 19 | # echo "We think file $prefix/$suffix came from $found" 1>&2 20 | stem="${META_BASE}/$( readlink -f "$found" )" 21 | dest_stem="${META_BASE}/$prefix/$suffix" 22 | for fname in "$stem".* "$stem"-*; do 23 | ext="$( echo "$fname" | sed "s#^$( echo "$stem" | escape_regexp )##" )" 24 | mkdir -p "$( dirname "$dest_stem" )" && 25 | cp -p "$fname" "${dest_stem}${ext}" 26 | done 27 | else 28 | echo "Couldn't find where $prefix/$suffix came from" 1>&2 29 | fi 30 | done 31 | -------------------------------------------------------------------------------- /tools/allocs-ldflags: -------------------------------------------------------------------------------- 1 | allocs-wrapper -------------------------------------------------------------------------------- /tools/allocsites.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | // #include // broken in GNU libstdc++! 13 | //#include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "allocsites-info.hpp" 19 | 20 | using std::cin; 21 | using std::cout; 22 | using std::cerr; 23 | using std::endl; 24 | using std::map; 25 | using std::multimap; 26 | using std::ios; 27 | using std::ifstream; 28 | using std::unique_ptr; 29 | using std::pair; 30 | using std::make_pair; 31 | using std::vector; 32 | using boost::optional; 33 | using std::ostringstream; 34 | 35 | // regex usings 36 | using boost::regex; 37 | using boost::regex_match; 38 | using boost::smatch; 39 | using boost::regex_constants::egrep; 40 | using boost::match_default; 41 | using boost::format_all; 42 | 43 | using namespace allocs::tool; 44 | 45 | int main(int argc, char **argv) 46 | { 47 | /* We read from stdin lines such as those output by dumpallocs, 48 | * prefixed by their filename. Actually they will have been 49 | * stored in .allocs files. */ 50 | std::shared_ptr p_in; 51 | if (argc > 1) 52 | { 53 | p_in = std::make_shared(argv[1]); 54 | if (!*p_in) 55 | { 56 | cerr << "Could not open file " << argv[1] << endl; 57 | return 1; 58 | } 59 | } 60 | std::istream& in = p_in ? *p_in : cin; 61 | vector allocsites = read_allocsites(in); 62 | cerr << "Found " << allocsites.size() << " allocation sites" << endl; 63 | if (allocsites.size() == 0) return 0; 64 | /* HACK: get the objname from the first entry; we assume it's the same for all entries. */ 65 | string seen_objname = allocsites.begin()->objname; 66 | auto p_objfile = std::unique_ptr(new std::ifstream(seen_objname)); 67 | if (!*p_objfile) { cerr << "Could not open "<< seen_objname << std::endl; return 1; } 68 | unique_ptr p_root = std::unique_ptr(new root_die(fileno(*p_objfile))); 69 | assert(p_root); 70 | /* rewrite the allocsites we were passed */ 71 | vector> types_created 72 | = ensure_needed_types_and_assign_to_allocsites(*p_root, allocsites); 73 | std::sort(allocsites.begin(), allocsites.end(), [](const allocsite& a1, const allocsite& a2) { 74 | return make_pair(a1.objname, a1.file_addr) < make_pair(a2.objname, a2.file_addr); 75 | }); 76 | cout << "#include \"allocmeta-defs.h\"\n\n"; 77 | // extern-declare the uniqtypes as weak! we might still want typeless alloc site info 78 | for (auto i_a = allocsites.begin(); i_a != allocsites.end(); ++i_a) 79 | { 80 | emit_extern_declaration(cout, codeful_name(i_a->found_type), true); 81 | } 82 | cout << "struct allocsite_entry allocsites[] = {" << endl; 83 | for (auto i_a = allocsites.begin(); i_a != allocsites.end(); ++i_a) 84 | { 85 | if (i_a != allocsites.begin()) cout << ","; 86 | 87 | cout << "\n\t/* allocsite info for " << i_a->objname << "+" 88 | << std::hex << "0x" << i_a->file_addr << std::dec << " */"; 89 | cout << "\n\t{ 0x" << std::hex << i_a->file_addr << std::dec << "UL, " 90 | << "&" << mangle_typename(codeful_name(i_a->found_type)); 91 | cout << " }"; 92 | } 93 | // close the list 94 | cout << "\n};\n"; 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /tools/callsites.cpp: -------------------------------------------------------------------------------- 1 | /* This is like a generalised version of objdumpallocs. 2 | * 3 | * It walks all call sites in the binary, optionally outputting metadata. 4 | * We can also merge with allocsites info at this point. 5 | * We also identify syscalls. 6 | * 7 | * FIXME: where does the symbolic execution begin? The idea here is to 8 | * do some simple static analysis on the binary to propagate certain 9 | * information between instructions. There are two known use cases 10 | * at present: 11 | * 12 | * - for Linux's type-erased system call entry points, we want to 13 | * execute forwards just far enough that we connect with the 14 | * DWARF info describing the static / inlined body of the call. 15 | * 16 | * - for system calls, we want to identify cases where the syscall 17 | * being made is statically knowable (this is usually the case). 18 | * We do this by executing forwards from an entry point and, if 19 | * we reach a syscall, 20 | * 21 | *** See thing thing I mailed to Guillaume 22 | * 23 | * Might this also help with stuff like allocation calls whose size 24 | * argument is statically knowable but not described with sizeofness? 25 | * 26 | * In general, what we're doing is static analysis around entry points and 27 | * call sites, deriving a bunch of "facts". Of what form are facts? 28 | * We start with an entry point in symbolic form. 29 | * Then facts are symbolic statements about 30 | * registers (machine-level) 31 | * or call positions (ABI-level) 32 | * at a given site or location. 33 | * Correlating that with the static DWARF info, in the kernel case, 34 | * requires an extra step: back-propagating from the first "location" 35 | * reached inside the static function's DIE range, 36 | * to the entry point itself. 37 | * 38 | * So we sym-execute forwards from entry points, and care about reaching 39 | * - call sites 40 | * - syscall sites 41 | * - addresses within other DIEs 42 | * */ 43 | -------------------------------------------------------------------------------- /tools/cufiles.cpp: -------------------------------------------------------------------------------- 1 | /* This is a simple dwarfpp program which generates a C file 2 | * recording data on a uniqued set of data types allocated in a given executable. 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using std::cin; 19 | using std::cout; 20 | using std::cerr; 21 | using std::map; 22 | using std::string; 23 | using std::ifstream; 24 | using std::ostringstream; 25 | 26 | using namespace dwarf; 27 | using dwarf::core::root_die; 28 | using dwarf::core::iterator_sibs; 29 | using dwarf::core::compile_unit_die; 30 | 31 | int debug_level; 32 | int main(int argc, char **argv) 33 | { 34 | if (argc <= 1) 35 | { 36 | cerr << "Please name an input file." << endl; 37 | exit(1); 38 | } 39 | std::ifstream infstream(argv[1]); 40 | if (!infstream) 41 | { 42 | cerr << "Could not open file " << argv[1] << endl; 43 | exit(1); 44 | } 45 | 46 | if (getenv("CUFILES_DEBUG_LEVEL")) 47 | { 48 | debug_level = atoi(getenv("CUFILES_DEBUG_LEVEL")); 49 | } 50 | 51 | using core::root_die; 52 | int fd = fileno(infstream); 53 | dwarf::core::root_die root(fd); 54 | 55 | auto cus = root.begin().children(); 56 | for (iterator_sibs i_cu = cus.first; 57 | i_cu != cus.second; ++i_cu) 58 | { 59 | if (i_cu->get_name() && i_cu->get_comp_dir()) 60 | { 61 | auto cu_die_name = *i_cu->get_name(); 62 | auto cu_comp_dir = *i_cu->get_comp_dir(); 63 | 64 | for (unsigned i_srcfile = 1; i_srcfile <= i_cu->source_file_count(); i_srcfile++) 65 | { 66 | /* Does this source file have a matching name? */ 67 | string current_sourcepath; 68 | string cu_srcfile_mayberelative = i_cu->source_file_name(i_srcfile); 69 | //if (!path(cu_srcfile_mayberelative).has_root_directory()) 70 | if (cu_srcfile_mayberelative.length() > 0 && cu_srcfile_mayberelative.at(0) != '/') 71 | { 72 | current_sourcepath = cu_comp_dir + '/' + cu_srcfile_mayberelative; 73 | } 74 | else current_sourcepath = /*path(*/cu_srcfile_mayberelative/*)*/; 75 | 76 | cerr << "CU " << *i_cu->get_name() << " sourcefile " << i_srcfile << " is " << 77 | cu_srcfile_mayberelative 78 | << ", sourcepath " 79 | << current_sourcepath 80 | << endl; 81 | } 82 | } 83 | } // end for each CU 84 | 85 | /* 86 | HMM. 87 | So we can easily have things in our source files, e.g. printf, 88 | that are not described in the DWARF 89 | and whose defining header is also not referenced in the DWARF. 90 | 91 | Can we view this as a pathfinding exercise in a graph of #include files? 92 | 93 | Not really, because we don't know where to start looking. 94 | E.g. if printf came from blah/blah/blah.h, thanks to -Iblah/blah, 95 | we'd have no reference to that. In general we can't bootstrap the search 96 | using standard directories; the information on the command line is essential. 97 | So I think this is something that needs fixing in DWARF, 98 | maybe by not omitting entries for referenced things 99 | maybe by describing the #include paths explicitly, 100 | maybe by including a source file entry for all embodied headers, 101 | even if nothing in the info section uses them. 102 | BUT MAYBE we can hack around the lack of things, 103 | like printf, by some kind of type inference? i.e. analysis of the reference site? 104 | 105 | OR is there another way of looking at this? 106 | What information do we actually need? 107 | Allocation sites, sizeof, any macro definitions they depend on. 108 | sizeof is often used inside macros, so we really do need to 109 | do the preprocessing early. 110 | 111 | -fno-eliminate-unused-debug-types will include any header that defines a type. 112 | So it's not perfect but might be useful. 113 | 114 | Can we also infer the -I paths by looking at how the original 115 | #include directives were qualified? 116 | E.g. if we see #include and we see 117 | /path/to/blah/blah.h 118 | embodied in the output, it seems likely that -I/path/to was on the command line. 119 | 120 | */ 121 | 122 | 123 | return 0; 124 | } 125 | -------------------------------------------------------------------------------- /tools/debian-print-srcpkgs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ldd "$1" | grep ' => ' | grep -v ld-linux | \ 4 | sed 's/.* =>[[:blank:]]*//' | sed 's/ *(0x.*//' | sed '/^$/ d' | while read lib; do 5 | dpkg -S "$lib" | sed 's/: .*//' 6 | done | sort | uniq 7 | -------------------------------------------------------------------------------- /tools/dump-symlinks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $# -lt 1 ]; then echo "Please specify a filename" 1>&2; exit 1; fi 4 | 5 | # We want to print one line of output 6 | # for every symlink that needs to be resolved 7 | # when calculating the canonical ("realpath", "readlink -f") path 8 | # for a given input pathname. 9 | # Ideally, the existing tools would give us this, but they don't. 10 | 11 | resolve_one_and_maybe_print () { 12 | local already_resolved_d="$1" 13 | local name="$2" 14 | if [ "$already_resolved_d" != "$(readlink -f "$already_resolved_d")" ]; then 15 | echo "Error: resolve_one_and_maybe_print received not-fully-resolved first argument ($1)" 1>&2; exit 1 16 | fi 17 | # "name" is not allowed to include '/' 18 | case "$name" in 19 | (*/*) echo "Error: we should not ask ourselves to resolve multi-hop relative paths" 1>&2; exit 1 ;; 20 | (*) true ;; 21 | esac 22 | readlink_out="$( cd "$already_resolved_d" && readlink "$name" )" 23 | if [ $? -eq 0 ]; then 24 | # a symlink was involved 25 | case "$already_resolved_d" in 26 | ('/') lhs="/$name"; d_prefix='/' ;; 27 | (*'/') echo "Internal error: non-root already_resolved_d must not end in a slash" 1>&2; exit 1 ;; 28 | (*) lhs="$already_resolved_d/$name"; d_prefix="$already_resolved_d"/ ;; 29 | esac 30 | case "$readlink_out" in 31 | ('') # hmm 32 | echo "Error: readlink succeeded but no output?" 1>&2; exit 1 33 | ;; 34 | (/*) 35 | # PROBLEM: if readlink's output itself names a symlink, we're not finished yet. 36 | # We may need to recursively resolve_all. Might this cause an infinite recursion? 37 | # YES, if we have a symlink cycle, of course. 38 | # FIXME: keep a global array of what we've been asked to resolve. If the same 39 | # thing comes up twice, abort with the equivalent of ELOOP. 40 | printf "%s\\tabsolute\\t%s\\n" "$lhs" "$readlink_out" 41 | resolve_all "$readlink_out" 42 | ;; 43 | (*) 44 | printf "%s\\trelative from %s\\t%s\\n" "$lhs" "${d_prefix}" "${readlink_out}" 45 | # We don't have to resolve_all, but we still recurse 46 | # FIXME: what if "readlink_out" has a chain of components? 47 | # We will hit the error above: multi-hop relative paths. 48 | # So we may need an iterative rather than our recursive approach below 49 | if [ -h "${d_prefix}${readlink_out}" ]; then 50 | resolve_one_and_maybe_print "$already_resolved_d" "$readlink_out" 51 | fi 52 | ;; 53 | esac 54 | fi 55 | } 56 | 57 | resolve_all () { 58 | # recursively resolve the dirname and basename... 59 | # our base case is if the dirname is '/' 60 | local d="$( dirname "$1" )" 61 | local b="$( basename "$1" )" 62 | case "$d" in 63 | ('.') echo "Error: got relative path but expected absolute" 1>&2; exit 1 64 | ;; 65 | ('/') # OK, no symlinks in the directory part so our recursion has bottomed out 66 | resolve_one_and_maybe_print "/" "$b" 67 | # ... then return to the caller so it can append its "$b"'s resolution, etc. 68 | ;; 69 | (*) resolve_all "$d"; resolve_one_and_maybe_print "$( readlink -f "$d" )" "$b" 70 | ;; 71 | esac 72 | } 73 | 74 | case "$1" in 75 | ('') echo "Empty filename!" 1>&2; exit 1 ;; 76 | (/*) # it's already absolute 77 | resolve_all "$1" ;; 78 | (*) resolve_all "`pwd`"/"$1" ;; 79 | esac 80 | -------------------------------------------------------------------------------- /tools/dwarftypes.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "stickyroot.hpp" 20 | #include "uniqtypes.hpp" 21 | 22 | using std::cin; 23 | using std::cout; 24 | using std::cerr; 25 | using std::map; 26 | using std::make_shared; 27 | using std::ios; 28 | using std::ifstream; 29 | using std::dynamic_pointer_cast; 30 | using boost::optional; 31 | using std::ostringstream; 32 | using std::set; 33 | using namespace dwarf; 34 | //using boost::filesystem::path; 35 | using dwarf::core::iterator_base; 36 | using dwarf::core::iterator_df; 37 | using dwarf::core::iterator_sibs; 38 | using dwarf::core::type_die; 39 | using dwarf::core::subprogram_die; 40 | using dwarf::core::compile_unit_die; 41 | using dwarf::core::member_die; 42 | using dwarf::core::with_data_members_die; 43 | using dwarf::core::variable_die; 44 | using dwarf::core::program_element_die; 45 | using dwarf::core::with_dynamic_location_die; 46 | using dwarf::core::address_holding_type_die; 47 | using dwarf::core::array_type_die; 48 | using dwarf::core::type_chain_die; 49 | 50 | using namespace dwarf::lib; 51 | 52 | // regex usings 53 | using boost::regex; 54 | using boost::regex_match; 55 | using boost::smatch; 56 | using boost::regex_constants::egrep; 57 | using boost::match_default; 58 | using boost::format_all; 59 | 60 | static int debug_out = 1; 61 | 62 | using dwarf::lib::Dwarf_Off; 63 | using dwarf::lib::Dwarf_Addr; 64 | using dwarf::lib::Dwarf_Signed; 65 | using dwarf::lib::Dwarf_Unsigned; 66 | 67 | using namespace allocs::tool; 68 | 69 | /* At a high level, what we do is simply output a uniqtype for every 70 | * DWARF type we find. However, it's not actually quite that simple. 71 | * We sometimes emit types that are not really in the DWARF, such as 72 | * signedness complements. 73 | * And we sometimes want to refer to types that we won't emit, 74 | * for example, the type of void. 75 | * We rely on these being included in the -roottypes.o that is 76 | * also linked in. 77 | * To avoid complexity from tracking whether we have or haven't 78 | * emitted a thing we depend on, 79 | * what order things are emitted, 80 | * and also to avoid hard prevention of emitting something twice, 81 | * we do the following. 82 | * 83 | * 1. anything that we reference, extern-declare before we reference 84 | * it. 85 | * 2. anything we define, #ifdef-protect it so that if it's already 86 | * defined, we don't define it again. 87 | * 88 | * So far, so standard. For signedness complements, we use a quick 89 | * check of which complements we reference, and then when we get to 90 | * the end, output any we haven't output yet. 91 | * For codeless aliases, we run a shell script over the combined metadata 92 | * output afterwards, and ld -r --defsym the aliases into existence. 93 | * That's all done in Makefile.meta. */ 94 | 95 | int main(int argc, char **argv) 96 | { 97 | /* We open the file named by argv[1] and dump its DWARF types. */ 98 | if (argc <= 1) 99 | { 100 | cerr << "Please name an input file." << endl; 101 | exit(1); 102 | } 103 | std::ifstream infstream(argv[1]); 104 | if (!infstream) 105 | { 106 | cerr << "Could not open file " << argv[1] << endl; 107 | exit(1); 108 | } 109 | 110 | if (getenv("DWARFTYPES_DEBUG")) 111 | { 112 | debug_out = atoi(getenv("DWARFTYPES_DEBUG")); 113 | } 114 | 115 | using core::root_die; 116 | int fd = fileno(infstream); 117 | shared_ptr p_root = sticky_root_die::create(fd); 118 | if (!p_root) { std::cerr << "Error opening file" << std::endl; return 1; } 119 | sticky_root_die& root = *p_root; 120 | assert(&root.get_frame_section()); 121 | master_relation_t master_relation; 122 | make_exhaustive_master_relation(master_relation, root.begin(), root.end()); 123 | cerr << "Master relation contains " << master_relation.size() << " data types." << endl; 124 | // write a forward declaration for every uniqtype we need 125 | set names_emitted; 126 | map > > types_by_name; 127 | cout << "#include \"uniqtype-defs.h\"\n\n"; 128 | write_master_relation(master_relation, cout, cerr, 129 | names_emitted, types_by_name, /* emit_codeless_alises */ true); 130 | // HACK: we emit codeless aliases here, but better if it were a wrapper shell 131 | // script on the -meta.so afterwards, because types don't only come from dwarftypes. 132 | 133 | // success! 134 | return 0; 135 | } 136 | -------------------------------------------------------------------------------- /tools/find-allocated-type-size.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | // #include // broken in GNU libstdc++! 11 | //#include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "uniqtypes.hpp" 17 | 18 | using std::cin; 19 | using std::cout; 20 | using std::cerr; 21 | using std::map; 22 | using std::multimap; 23 | using std::ios; 24 | using std::ifstream; 25 | using boost::optional; 26 | using std::ostringstream; 27 | using namespace dwarf; 28 | //using boost::filesystem::path; 29 | using dwarf::core::root_die; 30 | using dwarf::core::iterator_base; 31 | using dwarf::core::iterator_df; 32 | using dwarf::core::iterator_sibs; 33 | using dwarf::core::type_die; 34 | using dwarf::core::subprogram_die; 35 | using dwarf::core::compile_unit_die; 36 | using dwarf::core::pointer_type_die; 37 | 38 | using namespace allocs::tool; 39 | 40 | int main(int argc, char **argv) 41 | { 42 | /* We read from stdin lines such as those output by dumpallocs, 43 | * prefixed by their filename. Actually they will have been 44 | * stored in .allocsites files. */ 45 | 46 | using std::unique_ptr; 47 | unique_ptr p_objfile; 48 | unique_ptr p_root; 49 | 50 | std::shared_ptr p_in; 51 | 52 | string fnname; 53 | if (argc < 2) 54 | { 55 | cerr << "Please specify an allocation function name." << endl; 56 | return 1; 57 | } 58 | 59 | fnname = argv[1]; 60 | 61 | for (int i = 2; i < argc; ++i) 62 | { 63 | // open this file's dwarf info 64 | auto p_in = new ifstream(argv[i]); 65 | if (!*p_in) 66 | { 67 | cerr << "Could not open file " << argv[i] << endl; 68 | } 69 | else 70 | { 71 | root_die *p_root = new root_die(fileno(*p_in)); 72 | 73 | // search the root for a function named fnname 74 | auto cus_seq = p_root->begin().children().subseq_of(); 75 | for (auto i_cu = cus_seq.first; i_cu != cus_seq.second; ++i_cu) 76 | { 77 | auto found = i_cu->named_child(fnname); 78 | if (found && found.is_a()) 79 | { 80 | auto return_type = found.as_a()->get_type(); 81 | 82 | if (return_type) 83 | { 84 | auto concrete_t = return_type->get_concrete_type(); 85 | if (concrete_t && concrete_t.is_a()) 86 | { 87 | auto target_t = concrete_t.as_a()->get_type(); 88 | if (target_t && target_t->get_concrete_type()) 89 | { 90 | auto opt_byte_sz = target_t->get_concrete_type() 91 | ->calculate_byte_size(); 92 | if (!opt_byte_sz) 93 | { 94 | cerr << "Warning: could not compute byte size of " 95 | << target_t << endl; 96 | } 97 | else 98 | { 99 | cout << *opt_byte_sz << "\t from " << found.as_a()->summary() << endl; 100 | } 101 | } 102 | } 103 | else cerr << "Warning: return type of " << found << " is not a pointer type " 104 | << "(" << concrete_t << ")" << endl; 105 | } 106 | } 107 | } 108 | 109 | /* HACK: don't delete right now, to work around bug in libdwarfpp. */ 110 | // delete p_root; 111 | } 112 | 113 | // delete p_in; 114 | } 115 | 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /tools/gather-srcallocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # turn tab-separated lines into one line per field, with \f for original newlines 4 | fields_as_lines () { 5 | tr '\n' '\f' | \ 6 | sed 's/\f/\f\n/g' | \ 7 | tr '\t' '\n' 8 | # now a line break is \f\n and a field break is \n 9 | } 10 | 11 | recover_lines () { 12 | tr '\n' '\t' | \ 13 | sed 's/\f\t/\n/g' 14 | } 15 | 16 | pad_numbers () { 17 | # be careful not to swallow a trailing \f 18 | fields_as_lines | \ 19 | sed -r "s/^([[:space:]]*[0-9]+[[:space:]]*)(\\f|\$)/printf '%06d' '\\1'; printf '\\2'/e" | \ 20 | recover_lines 21 | # this gawk replacement doesn't work 22 | #gawk '/^[[:space:]]*[0-9]+[[:space:]]*$/ { printf "%06d%s\n", $0, gensub(/.*[^[:space:]]*([[:space:]]*)$/, "\\1", ""); next }; /.*/ { printf "%s", $0; }' | \ 23 | } 24 | 25 | use_src_realpaths () { 26 | while IFS=$'\t' read -r alloc_sourcefile alloc_sourceline alloc_fun alloc_rest; do 27 | echo "Saw alloc_rest: $alloc_rest" 1>&2 28 | echo "$( readlink -f $alloc_sourcefile)"$'\t'"$alloc_sourceline"$'\t'"$alloc_fun"$'\t'"$alloc_rest" 29 | done 30 | } 31 | 32 | # for readelf_debug 33 | . $(dirname $0)/debug-funcs.sh 34 | 35 | our_name="$(basename "$0")" 36 | our_name_rewritten="$( echo "$our_name" | sed 's/gather-\(.*\)\.sh/gather-\1/' )" 37 | 38 | all_obj_allocs_file="$1" 39 | 40 | # echo Hello 1>&2 41 | 42 | # Do a per-CU loop over the file and dispatch to a per-language allocs-gatherer 43 | 44 | cat "$all_obj_allocs_file" | cut -f1 | sort | uniq | while read obj rest; do 45 | echo "Saw line $obj $rest" 1>&2 46 | all_cus_info="$( get_cu_info "$obj" )" 47 | 48 | echo "$all_cus_info" | while read_cu_info; do 49 | case "$cu_language_num" in 50 | (1|2|12) # DW_LANG_C89, DW_LANG_C, DW_LANG_C99 51 | $(dirname "$0")/lang/c/bin/c-"$our_name_rewritten" "$cu_sourcepath" "$obj" "$cu_fname" "$cu_compdir" 52 | ;; 53 | (*) # unknown 54 | echo "Warning: could not gather source-level allocs for unknown language: $cu_language_fullstr ($cu_language_num, $( echo -n "$cu_language_fullstr" | hd ))" 1>&2 55 | ;; 56 | esac 57 | done 58 | done | pad_numbers | sort -t$'\t' -k1 -k2 | uniq #use_src_realpaths | 59 | -------------------------------------------------------------------------------- /tools/gather-srcmemacc.sh: -------------------------------------------------------------------------------- 1 | gather-srcallocs.sh -------------------------------------------------------------------------------- /tools/guess-allocsite-types.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # "Historical interest only" 4 | # -- a pleasingly primitive version of dumpallocs! 5 | 6 | escape_eregexp () { 7 | # filter which reads a string on input, and yields a plain grep-style regexp 8 | # which matches the string literally, by escaping the metacharacters 9 | sed -r 's/(\*|\.|\[|\^|\$|\[|\]|\||\{|\}|\?|\+|\(|\)|\\)/\\\1/g' 10 | } 11 | 12 | escapefn_eregexp () { 13 | echo "$1" | escape_eregexp 14 | } 15 | 16 | # Build a list of allocation sites with the power of objdump. 17 | # We record them as the string inside the < > 18 | # i.e. , without the < >. 19 | line_regexp='^[0-9a-f]* <([^>]*)>' 20 | alloc_site_regexp="${line_regexp}.*"'call.*alloc' 21 | allocation_sites="$( objdump --prefix-addresses -d "$1" | \ 22 | egrep "$alloc_site_regexp" | \ 23 | sed -r "s/${alloc_site_regexp}.*/\1/" )" #" 24 | 25 | # read the whole objdump in, to avoid re-disassembling 26 | objdump_output="$( objdump --prefix-addresses -dS "$1" )" 27 | 28 | while read sym offset; do 29 | # Now use the power of objdump -S to get the source line for that alloc. 30 | # 1, Build a regexp that will re-locate the current alloc site. 31 | regexp="<$( escapefn_eregexp "$sym" )\+$( escapefn_eregexp "$offset")>" 32 | echo "regexp: $regexp" 1>&2 33 | # 2. Grab that and 200 lines of pre-context, 34 | # which hopefully will include the allocating source line 35 | context="$( echo "$objdump_output" | egrep -B200 "$regexp" )" #" 36 | echo "context: $context" 1>&2 37 | # 3. Filter out non-source lines, and collapse to a single line 38 | source="$( echo "$context" | egrep -v "$line_regexp" | tr '\n' ' ' )" #" 39 | #echo "source: $source" 1>&2 40 | # 4. Get the first ident following the last occurrence of "new" or "sizeof" 41 | token="$( echo "$source" | egrep '(new|sizeof)([^0-9a-z_]|$)' | sed -r 's/.*(new|sizeof)([^0-9a-zA-Z_]|$)/\2/' | sed -r 's/[^0-9a-zA-Z_]*([a-zA-Z0-9_ \*]+).*/\1/' )" #" 42 | echo "token: $token" 1>&2 43 | # 5. Print the token and the site 44 | echo "Guessed that site <${sym}+${offset}> allocated: ${token}" 45 | done <<<"$( echo "$allocation_sites" | sed -r 's/\+(0x[0-9a-f]*)$/ \1/' )" #" 46 | -------------------------------------------------------------------------------- /tools/interp-pad.S: -------------------------------------------------------------------------------- 1 | #if defined(__linux__) && defined(__ELF__) 2 | .section .note.GNU-stack,"",%progbits 3 | #endif 4 | .section .interp, "aw" 5 | -------------------------------------------------------------------------------- /tools/lang/Makefile: -------------------------------------------------------------------------------- 1 | default: all 2 | all: c 3 | install: ; # nothing 4 | 5 | .PHONY: c 6 | c: 7 | $(MAKE) -C c 8 | 9 | .PHONY: clean 10 | clean: 11 | $(MAKE) -C c clean 12 | -------------------------------------------------------------------------------- /tools/lang/c++/bin/allocsc++: -------------------------------------------------------------------------------- 1 | ../lib/allocscxx.py -------------------------------------------------------------------------------- /tools/lang/c++/bin/link-used-types: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | objfile="$1" 4 | 5 | test -n "$objfile" || (echo "Please specify an object file"; false) || exit 1 6 | 7 | . $(dirname "$0")/../../../used-types-funcs.sh 8 | 9 | usedtypes_src=$( mktemp ) 10 | usedtypes_obj=$( mktemp ) 11 | 12 | # For C++, we have to do the template-to-C-style translation 13 | # and then the usual base types translation. 14 | 15 | translate_template_symnames () { 16 | our_objfile="$1" 17 | 18 | candidate_symnames="$( nm -fposix -u "$our_objfile" | \ 19 | sed -r 's/[[:blank:]]*[Uw][[:blank:]]*$//' | grep __uniqtype__ )" 20 | 21 | } 22 | 23 | num_and_remainder () { 24 | grep '^[0-9]' | sed 's/^[0-9]\+/&\n/g' 25 | } 26 | 27 | chomp_idents () { 28 | # next chomp a num, stripping any leading non-numbers 29 | num_and_remainder="$( sed 's/^[^0-9]*//' | num_and_remainder )" 30 | if [ -z "$num_and_remainder" ]; then exit 0; fi 31 | echo "num_and_remainder is $num_and_remainder" 1>&2 32 | num="$( echo "$num_and_remainder" | head -n1 )" 33 | remainder="$( echo "$num_and_remainder" | tail -n +2 )" 34 | # then chomp that many chars from the remainder 35 | read -N${num} ident <<<"${remainder}" 36 | echo "$ident" 37 | # then tail-call if non-empty 38 | next_chunk="$( echo "$remainder" | tail -c+${num} )" 39 | if [ -n "$next_chunk" ]; then 40 | echo "$next_chunk" | chomp_idents 41 | fi 42 | } 43 | 44 | translate_symnames () { 45 | # anything of the form uniqtype< ... >::obj 46 | # gets translated to 47 | # __uniqtype__{...} 48 | # otherwise we echo as-is 49 | # N -- uniqtype 50 | # _Z N 8uniqtype I Z 4main E 4blah E 3obj E 51 | # ^- Z-encoding ^- global ns? ^-- def ^-? ^-templ.type.parm ^expr ^expr? 52 | 53 | while read line; do case "$line" in 54 | ('_ZN8uniqtypeI'*'3objE') 55 | middle_bit="$( echo "$line" | sed 's/_ZN8uniqtypeI\(.*\)3objE/\1/' )" 56 | # HACK: this is just a quick hack for now 57 | # -- split 58 | idents="$( echo "$middle_bit" | chomp_idents )" 59 | fragment="$( echo "$idents" | tr '\n' '\f' | sed 's/^\f//; s/\f$//' | sed 's/\f/__DOT_/' )" 60 | echo __uniqtype__${fragment} 61 | echo "rewrote $line to __uniqtype__${fragment}" 1>&2 62 | ;; 63 | (*) 64 | echo "left alone: $line" 1>&2 65 | echo "$line" 66 | ;; 67 | esac; done 68 | } 69 | 70 | objcopy_and_redefine_cxx_names () { 71 | our_objfile="$1" 72 | 73 | candidate_symnames="$( nm -fposix -u "$our_objfile" | \ 74 | sed -r 's/[[:blank:]]*[Uw][[:blank:]]*$//' | grep '^_ZN8uniqtype' )" 75 | 76 | # What do we want to rewrite? 77 | # The template name gives us a C++-style typename which we have to turn into 78 | # an allocs-style typestr. How do we encode namespace separation in allocs typestrs? 79 | 80 | echo "sed program is $sed_program" 1>&2 81 | 82 | # we need to push candidates into the filter and see what comes out; 83 | # if it changes, add a --redefine-sym option to the list 84 | 85 | # if any substitutions took effect, issue a redefine-sym command 86 | while read cand && read canon <&3; do 87 | if [[ "$cand" != "$canon" ]]; then 88 | first_redefinition_args="${first_redefinition_args} --redefine-sym ${cand}=${canon}" 89 | fi 90 | done<<<"${candidate_symnames}" 3<<<"$( echo "${candidate_symnames}" | translate_symnames "$our_objfile" )" 91 | 92 | if [[ -n "${first_redefinition_args}" ]]; then 93 | # first, lower all undef typenames to canonical ones (but staying codeless) 94 | echo ${OBJCOPY} $first_redefinition_args "$our_objfile" 1>&2 && \ 95 | ${OBJCOPY} $first_redefinition_args "$our_objfile" && \ 96 | echo "objcopy renamed symbols in $our_objfile according to $first_redefinition_args" 1>&2 97 | 98 | else 99 | echo "No need to replace any C++ typenames" 1>&2 100 | fi 101 | } 102 | 103 | echo ${USEDTYPES} "$objfile" 1>&2 # for debugging 104 | ( objcopy_and_redefine_cxx_names "$objfile" && \ 105 | echo "Successfully did objcopy_and_redefine_cxx_names" 1>&2 && \ 106 | echo ${USEDTYPES} "$objfile" 1>&2 && \ 107 | ${USEDTYPES} "$objfile" > "$usedtypes_src" && \ 108 | echo "Successfully did usedtypes" 1>&2 && \ 109 | compile "$usedtypes_src" "$usedtypes_obj" && \ 110 | echo "Successfully did compile" 1>&2 && \ 111 | objcopy_and_redefine_codeless_names "$objfile" "$usedtypes_obj" && \ 112 | echo "Successfully did objcopy_and_redefine_codeless_names" 1>&2 && \ 113 | link_defining_aliases "$objfile" "$usedtypes_obj" && \ 114 | echo "Successfully did link_defining_aliases" 1>&2 ) || \ 115 | (echo NOT REALLY rm -f "$objfile" 1>&2 ; exit 1) 116 | 117 | # rm -f "$usedtypes_src" "$usedtypes_obj" 118 | -------------------------------------------------------------------------------- /tools/lang/c++/lib/allocscxx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # C++ compiler wrapper for liballocs. 4 | 5 | import os, sys 6 | # HACK 7 | liballocs_base = os.path.realpath(os.path.dirname(__file__) + "/../../../..") 8 | sys.path.append(liballocs_base + "/tools") 9 | sys.path.append(liballocs_base + "tools/lang/c++/lib") 10 | from allocscompilerwrapper import * 11 | 12 | class AllocsCxx(AllocsCompilerWrapper): 13 | 14 | # FIXME: also new, delete et al 15 | def defaultL1AllocFns(self): 16 | return ["malloc(Z)p", "calloc(zZ)p", "realloc(pZ)p", "memalign(zZ)p"] 17 | def defaultFreeFns(self): 18 | return ["free(P)"] 19 | 20 | def makeObjectFileName(self, sourceFile): 21 | nameStem, nameExtension = os.path.splitext(sourceFile) 22 | if (nameExtension == ".cpp" or nameExtension == ".cc" or nameExtension == ".C"): 23 | outputFilename = nameStem + ".o" 24 | self.debugMsg("Making a secret output file (from C++ source) " + outputFilename + "\n") 25 | else: 26 | outputFilename = sourceFile + ".o" 27 | self.debugMsg("Making a secret output file (from unknown source) " + outputFilename + "\n") 28 | return outputFilename 29 | 30 | def getUnderlyingCompilerCommand(self, sourceFiles): 31 | return ["c++"] 32 | 33 | if __name__ == '__main__': 34 | wrapper = AllocsCxx() 35 | ret = wrapper.main() 36 | exit(ret) 37 | 38 | -------------------------------------------------------------------------------- /tools/lang/c/Makefile: -------------------------------------------------------------------------------- 1 | THIS_MAKEFILE := $(lastword $(MAKEFILE_LIST)) 2 | srcroot := $(realpath $(dir $(THIS_MAKEFILE))/../../..) 3 | include $(srcroot)/config.mk 4 | OCAMLFIND ?= ocamlfind 5 | 6 | ifeq ($(CIL_INSTALL),) 7 | $(error "Expected CIL_INSTALL to be set") 8 | endif 9 | CILLY ?= $(CIL_INSTALL)/../bin/cilly 10 | CIL_TOOLS ?= cilallocs dumpallocs monalloca dumpmemacc trapptrwrites 11 | OCAMLFLAGS += -package findlib 12 | OCAMLFLAGS += -I $(CIL_INSTALL)/cil -I $(dir $(THIS_MAKEFILE))/cilallocs 13 | 14 | CIL_TOOLS_SRC := $(shell find $(CIL_TOOLS) $(realpath $(dir $(THIS_MAKEFILE)))/lib -name '*.ml') 15 | 16 | default: src bin $(foreach t,$(CIL_TOOLS),$(t)/$(t).cmxs $(t)/$(t).cma) 17 | 18 | OCAML_DEPS := $(patsubst %.ml,%.d,$(CIL_TOOLS_SRC)) 19 | 20 | $(OCAML_DEPS): %.d: %.ml 21 | $(OCAMLFIND) ocamldep $(filter-out -g,$(OCAMLOPTFLAGS)) $(OCAMLFLAGS) "$<" > "$@" 22 | 23 | DEPS += $(OCAML_DEPS) 24 | -include $(DEPS) 25 | 26 | %.cmxs: %.cmx 27 | $(OCAMLFIND) ocamlopt -shared -o "$@" $(OCAMLOPTFLAGS) $(OCAMLFLAGS) $+ 28 | %.cmx %.cmi: %.ml 29 | $(OCAMLFIND) ocamlopt -o "$@" $(OCAMLOPTFLAGS) $(OCAMLFLAGS) -c "$<" 30 | %.cmo %.cmi: %.ml 31 | $(OCAMLFIND) ocamlc -o "$@" $(OCAMLFLAGS) -c "$<" 32 | %.cma: %.cmo 33 | $(OCAMLFIND) ocamlc -o "$@" $(OCAMLFLAGS) -a $+ 34 | 35 | # build cilallocs first 36 | dumpallocs/dumpallocs.cmx: cilallocs/cilallocs.cmx 37 | dumpmemacc/dumpmemacc.cmx: cilallocs/cilallocs.cmx 38 | monalloca/monalloca.cmx: cilallocs/cilallocs.cmx 39 | dumpallocs/dumpallocs.cmo: cilallocs/cilallocs.cmo 40 | dumpmemacc/dumpmemacc.cmo: cilallocs/cilallocs.cmo 41 | monalloca/monalloca.cmo: cilallocs/cilallocs.cmo 42 | 43 | # for testing 44 | %.cil.o: %.c 45 | cd "$(dir $<)" && $(CILLY) --do$$( echo $(dir $<) | tr -d '/' ) --save-temps -c -o "$(notdir $@)" "$(notdir $<)" 46 | 47 | .PHONY: clean 48 | clean: 49 | for dir in $(CIL_TOOLS); do (cd $$dir && rm -f *.o *.cmo *.cma *.cmi *.cmx *.cmxa *.cmxs *.cil.c *.i ); done 50 | $(MAKE) -C src clean 51 | 52 | .PHONY: src 53 | src: 54 | $(MAKE) -C src 55 | 56 | .PHONY: bin 57 | bin: src 58 | cd bin && ln -sf ../src/base-types-translation . 59 | -------------------------------------------------------------------------------- /tools/lang/c/base-type-equivs.txt: -------------------------------------------------------------------------------- 1 | signed char, char, char signed 2 | unsigned char, char unsigned 3 | short int, short, int short 4 | short unsigned int, unsigned short, short unsigned, unsigned short int, int unsigned short, int short unsigned, unsigned int short, short int unsigned 5 | int, signed, signed int, int signed 6 | unsigned int, unsigned, int unsigned 7 | long int, long, int long, signed long int, int signed long, int long signed, long signed int, signed int long, long signed, signed long 8 | unsigned long int, int unsigned long, int long unsigned, long unsigned int, unsigned int long, long unsigned, unsigned long 9 | long long int, long long, long int long, int long long, long long signed, long signed long, signed long long, long long int signed, long long signed int, long signed long int, signed long long int, long int long signed, long int signed long,long signed int long,signed long int long, int long long signed, int long signed long, int signed long long, signed int long long 10 | long long unsigned int, long long unsigned, long unsigned long, unsigned long long, long long int unsigned, long unsigned long int, unsigned long long int, long int long unsigned, long int unsigned long, long unsigned int long, unsigned long int long, int long long unsigned, int long unsigned long, int unsigned long long, unsigned int long long 11 | float 12 | double 13 | long double, double long 14 | bool 15 | wchar_t 16 | -------------------------------------------------------------------------------- /tools/lang/c/bin/c-gather-srcallocs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . $(dirname $0)/../lib/symname-funcs.sh 4 | 5 | cu_sourcepath="$1" 6 | obj="$2" 7 | cu_fname="$3" 8 | cu_compdir="$4" 9 | 10 | our_name="$(basename "$0")" 11 | case "$our_name" in 12 | (*srcallocs) 13 | extension="allocs" 14 | ;; 15 | (*srcmemacc) 16 | extension="memacc" 17 | ;; 18 | (*) 19 | echo "Did not recognise basename '$our_name'" 1>&2; exit 1 20 | ;; 21 | esac 22 | 23 | test -n "$cu_compdir" || (echo "args: " 1>&2; false) || exit 1 24 | 25 | cu_allocspath="$( echo "$cu_sourcepath" | grep '\.cil\.[ci]$' | sed "s/\.cil\.[ci]/.i.$extension/" )" 26 | 27 | # If we're using clang instead of CIL then the source path will just be '%.c', 28 | # not '%.cil.c'. 29 | if [[ -z "$cu_allocspath" ]]; then 30 | cu_allocspath="$( echo "$cu_sourcepath" | grep '\.c$' | sed "s/\.[ci]/.i.$extension/" )" 31 | fi 32 | 33 | echo "Warning: cu_allocspath is $cu_allocspath" 1>&2 34 | 35 | if [[ ! -r "$cu_allocspath" ]]; then 36 | echo "Warning: missing expected allocs file ($cu_allocspath) for source file: $cu_sourcepath" 1>&2 37 | else 38 | # we need to sed its symnames 39 | cat "$cu_allocspath" | \ 40 | while read fname rest; do 41 | # if the filename is relative, it's relative to the compilation directory. 42 | # prepend the full path *as the compiler saw it*, i.e. cu_compdir. 43 | case "$fname" in 44 | ('/'*) 45 | echo "$fname"$'\t'"$rest" 46 | ;; 47 | (*) 48 | echo "$cu_compdir"/"$fname"$'\t'"$rest" 49 | ;; 50 | esac 51 | done | translate_symnames "$obj" "$cu_fname" "$cu_compdir" 52 | fi 53 | -------------------------------------------------------------------------------- /tools/lang/c/bin/c-gather-srcmemacc: -------------------------------------------------------------------------------- 1 | c-gather-srcallocs -------------------------------------------------------------------------------- /tools/lang/c/bin/link-used-types: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . $(dirname "${BASH_SOURCE[0]}")/../../../used-types-funcs.sh # sets LIBALLOCS 4 | 5 | objcopy_and_redefine_c_names () { 6 | our_objfile="$1" 7 | 8 | candidate_symnames="$( nm -fposix -u "$our_objfile" | \ 9 | sed -r 's/[[:blank:]]*[Uw][[:blank:]]*$//' | grep __uniqtype__ )" 10 | 11 | # we need to push candidates into the filter and see what comes out; 12 | # if it changes, add a --redefine-sym option to the list 13 | 14 | . ${LIBALLOCS}/tools/lang/c/lib/symname-funcs.sh 15 | 16 | # if any substitutions took effect, issue a redefine-sym command 17 | first_redefinition_args="" 18 | while read cand && read canon <&3; do 19 | if [[ "$cand" != "$canon" ]]; then 20 | first_redefinition_args="${first_redefinition_args} --redefine-sym ${cand}=${canon}" 21 | fi 22 | done<<<"${candidate_symnames}" 3<<<"$( echo "${candidate_symnames}" | translate_symnames "$our_objfile" )" 23 | 24 | if [[ -n "${first_redefinition_args}" ]]; then 25 | # first, lower all undef typenames to canonical ones (but staying codeless) 26 | echo ${OBJCOPY} $first_redefinition_args "$our_objfile" 1>&2 && \ 27 | ${OBJCOPY} $first_redefinition_args "$our_objfile" && \ 28 | echo "objcopy renamed symbols in $our_objfile according to $first_redefinition_args" 1>&2 29 | 30 | else 31 | echo "No need to replace any C typenames" 1>&2 32 | fi 33 | } 34 | 35 | # only carry out the main logic if we're run, not sourced 36 | case "$0" in 37 | (*link-used-types) 38 | 39 | objfile="$1" 40 | test -n "$objfile" || (echo "Please specify an object file"; false) || exit 1 41 | 42 | usedtypes_src=$( mktemp --suffix=.c ) 43 | usedtypes_obj=$( mktemp --suffix=.o ) 44 | 45 | echo ${USEDTYPES} "$objfile" 1>&2 # for debugging 46 | ( objcopy_and_redefine_c_names "$objfile" && \ 47 | echo "Successfully did objcopy_and_redefine_c_names" 1>&2 && \ 48 | echo ${USEDTYPES} "$objfile" 1>&2 && \ 49 | ${USEDTYPES} "$objfile" > "$usedtypes_src" && \ 50 | echo "Successfully did usedtypes" 1>&2 && \ 51 | compile "$usedtypes_src" "$usedtypes_obj" && \ 52 | echo "Successfully did compile" 1>&2 && \ 53 | objcopy_and_redefine_codeless_names "$objfile" "$usedtypes_obj" && \ 54 | echo "Successfully did objcopy_and_redefine_codeless_names" 1>&2 && \ 55 | link_defining_aliases "$objfile" "$usedtypes_obj" && \ 56 | echo "Successfully did link_defining_aliases" 1>&2 ) || \ 57 | (echo NOT REALLY rm -f "$objfile" 1>&2 ; exit 1) 58 | 59 | ;; 60 | 61 | (*) ;; esac 62 | # rm -f "$usedtypes_src" "$usedtypes_obj" 63 | -------------------------------------------------------------------------------- /tools/lang/c/cilallocs/.merlin: -------------------------------------------------------------------------------- 1 | S ../cil/src 2 | B ../cil/lib/cil 3 | -------------------------------------------------------------------------------- /tools/lang/c/dumpallocs/.merlin: -------------------------------------------------------------------------------- 1 | S ../** 2 | B ../** 3 | -------------------------------------------------------------------------------- /tools/lang/c/dumpallocs/dumpallocs-gdb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TEMP=`getopt -q -l alloc-fn -n $(basename $0) -- "$@"` 4 | eval set -- "$TEMP" 5 | 6 | declare -a alloc_fns 7 | ctr=0 8 | 9 | while true; do case "$1" in 10 | (--alloc-fn) 11 | alloc_fns[$ctr]="$2" 12 | ctr=$(( $ctr + 1 )) 13 | ;; 14 | esac 15 | shift || break 16 | done 17 | 18 | 19 | # load the program with gdb 20 | 21 | gdb --args 22 | -------------------------------------------------------------------------------- /tools/lang/c/lib/allocscc.py: -------------------------------------------------------------------------------- 1 | ../bin/allocscc -------------------------------------------------------------------------------- /tools/lang/c/lib/debian-build-funcs.sh: -------------------------------------------------------------------------------- 1 | export UNIQTYPES_BASE=/usr/lib/meta 2 | export LD_PRELOAD=${HOME}/work/devel/liballocs.hg/lib/libheap_index_fast_hooks.so 3 | if [[ -z "$( echo "$PATH" | tr ':' '\n' | grep '/cil[^/]*/bin' )" ]]; then 4 | export PATH=/usr/local/src/cil/bin:${PATH} 5 | fi 6 | export LIBALLOCS_ALLOC_FNS="xcalloc(zZ) xmalloc(Z) xrealloc(pZ) xmallocz(Z)" 7 | 8 | rebuild_debian () { 9 | find -name '*.cil.*' -o -name '*.i' -o -name '*.allocs' -type f | xargs rm -f 10 | DEB_LDFLAGS_APPEND="-L${HOME}/work/devel/liballocs.hg/lib -Wl,-R$( readlink -f ${HOME}/work/devel/liballocs.hg/lib ) -Wl,--no-add-needed -lallocs -Wl,--add-needed -ldl -Wl,--allow-shlib-undefined" \ 11 | CC=${HOME}/work/devel/liballocs.hg/tools/lang/c/bin/allocscc \ 12 | DEB_BUILD_OPTIONS="nostrip" dpkg-buildpackage 2>&1 | tee build.log 13 | } 14 | 15 | redo_git_tests () { 16 | CC=${HOME}/work/devel/liballocs.hg/tools/lang/c/bin/allocscc make -C t/ all 17 | } 18 | 19 | # DEB_LDFLAGS_APPEND="-L${HOME}/work/devel/typiklee.hg/lib \-Wl,-R$( readlink -f ${HOME}/work/devel/typiklee.hg/lib ) -Wl,-Bdynamic -Wl,--as-needed -lallocs -ldl -Wl,--dynamic-list -Wl,dynamic-list -Wl,--allow-shlib-undefined -Wl,--no-as-needed" \ 20 | -------------------------------------------------------------------------------- /tools/lang/c/lib/symname-funcs.sh: -------------------------------------------------------------------------------- 1 | # horrible HACK 2 | this_filename () { 3 | # look through the defined functions 4 | ctr=0 5 | while true; do 6 | if [[ -z "${FUNCNAME[$ctr]}" ]]; then 7 | echo "Error: couldn't find this_filename" 1>&2 8 | exit 1 9 | fi 10 | if [[ "${FUNCNAME[$ctr]}" == "this_filename" ]]; then 11 | echo ${BASH_SOURCE[$ctr]} 12 | exit 0 13 | fi 14 | ctr=$(( $ctr + 1 )) 15 | done 16 | } 17 | 18 | translate_symnames() { 19 | objfile="$1" 20 | 21 | # We want to select out only a certain CU, if the caller asks. 22 | # That's because we might be being run on a whole binary 23 | # (from c-gather-srcallocs) 24 | # or on a single relocatable file (link-used-types). 25 | cu_fname="$2" 26 | cu_compdir="$3" 27 | 28 | BASE_TYPES_TRANSLATION=${BASE_TYPES_TRANSLATION:-$( dirname "$(this_filename)" )/../src/base-types-translation} 29 | 30 | signpost_frag_regexp="__ARG[0-9]+_|__PTR_|__REF_|__RR_|__ARR[0-9]*_|__FUN_FROM_|__FUN_TO_|__VA_" 31 | type_pred_regexp="__uniqtype__|${signpost_frag_regexp}" 32 | type_succ_regexp="${signpost_frag_regexp}|\$|"$'\t' 33 | 34 | # FIXME: we should really use our hard-coded table of base type equivalences here, 35 | # to save me the pain of remembering it's "short unsigned int" and not 36 | # "unsigned short int", say. 37 | 38 | # join the substitutions into a big sed program 39 | sed_program="" 40 | echo running ${BASE_TYPES_TRANSLATION} "$objfile" "$cu_fname" "$cu_compdir" 1>&2 41 | while read c_base canon_base; do 42 | sed_program="${sed_program}; s/(${type_pred_regexp})$(echo "${c_base}" | sed 's/\$/\\$/')(${type_succ_regexp})/\1${canon_base}\2/g" 43 | done<<<"$( ${BASE_TYPES_TRANSLATION} "$objfile" "$cu_fname" "$cu_compdir" )" 44 | 45 | echo "sed program is $sed_program" 1>&2 46 | if [[ -n "$( echo "$sed_program" | tr -d '[:blank:]' )" ]]; then 47 | 48 | # apply the substitutions to this symname, "til a fixed point" 49 | sed -r "$sed_program" | \ 50 | sed -r "$sed_program" | \ 51 | sed -r "$sed_program" | \ 52 | sed -r "$sed_program" | \ 53 | sed -r "$sed_program" | \ 54 | sed -r "$sed_program" | \ 55 | sed -r "$sed_program" | \ 56 | sed -r "$sed_program" | \ 57 | sed -r "$sed_program" | \ 58 | sed -r "$sed_program" | \ 59 | sed -r "$sed_program" 60 | else 61 | cat 62 | fi 63 | } 64 | -------------------------------------------------------------------------------- /tools/lang/c/src/Makefile: -------------------------------------------------------------------------------- 1 | $(warning antlr environment is $(shell printenv | grep -i antlr)) 2 | 3 | DEPS := 4 | 5 | srcroot ?= ../../../.. 6 | 7 | CXXFLAGS += -Wno-deprecated-declarations # while we're using old libdwarfpp -- FIXME 8 | 9 | # use the pkg-config vars 10 | CXXFLAGS += $(LIBCXXFILENO_CXXFLAGS) \ 11 | $(LIBSRK31CXX_CXXFLAGS) \ 12 | $(LIBDWARFPP_CXXFLAGS) \ 13 | $(LIBCXXGEN_CXXFLAGS) \ 14 | $(DWARFIDL_CXXFLAGS) \ 15 | $(LIBANTLR3C_CXXFLAGS) \ 16 | $(LIBALLOCSTOOL_CXXFLAGS) 17 | LDFLAGS += $(LIBCXXFILENO_LIBS) \ 18 | $(LIBSRK31CXX_LIBS) \ 19 | $(LIBDWARFPP_LIBS) \ 20 | $(LIBCXXGEN_LIBS) \ 21 | $(DWARFIDL_LIBS) \ 22 | $(LIBANTLR3C_LIBS) \ 23 | $(LIBALLOCSTOOL_DIR) 24 | 25 | CXXFLAGS += -I$(srcroot) # for config.h 26 | 27 | LDFLAGS += -L`pwd` -Wl,-R`pwd` 28 | LDLIBS += -lantlr3c -ldwarfidl -lcxxgen -ldwarfpp \ 29 | -lboost_regex -lboost_filesystem -lboost_system -lboost_serialization \ 30 | -lc++fileno -lsrk31c++ -lelf -lstdc++ -lz 31 | 32 | CPP_SRC := $(wildcard *.cpp) 33 | 34 | default: base-types-translation #lower-typenames 35 | 36 | .PHONY: clean 37 | clean: 38 | rm -f *.o .*.d lower-typenames base-types-translation 39 | 40 | CPP_DEPS := $(patsubst %.cpp,.%.d,$(CPP_SRC)) 41 | DEPS := $(CPP_DEPS) 42 | 43 | $(CPP_DEPS): .%.d : %.cpp 44 | $(CXX) -MM $(CXXFLAGS) "$<" > "$@" 45 | 46 | ifneq ($(MAKECMDGOALS),clean) 47 | -include $(DEPS) 48 | endif 49 | 50 | # GAH... for some reason GNU Make needs this rule to avoid trying 51 | # to link the .o using $(CC) instead of $(CXX). 52 | %: %.cpp 53 | $(CXX) $(LDFLAGS) $(CXXFLAGS) -o "$@" "$<" $(LDLIBS) 54 | 55 | lower-typenames: lower-typenames.cpp 56 | base-types-translation: base-types-translation.cpp 57 | -------------------------------------------------------------------------------- /tools/lang/c/src/base-types-translation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | //#include 20 | #include "uniqtypes.hpp" 21 | 22 | using std::cin; 23 | using std::cout; 24 | using std::cerr; 25 | using std::map; 26 | using std::ios; 27 | using std::ifstream; 28 | using std::dynamic_pointer_cast; 29 | using boost::optional; 30 | using std::ostringstream; 31 | using std::set; 32 | using std::pair; 33 | using std::make_pair; 34 | using std::multimap; 35 | using namespace dwarf; 36 | //using boost::filesystem::path; 37 | using dwarf::core::iterator_base; 38 | using dwarf::core::iterator_df; 39 | using dwarf::core::iterator_sibs; 40 | using dwarf::core::type_die; 41 | using dwarf::core::subprogram_die; 42 | using dwarf::core::compile_unit_die; 43 | using dwarf::core::member_die; 44 | using dwarf::core::with_data_members_die; 45 | using dwarf::core::variable_die; 46 | using dwarf::core::with_dynamic_location_die; 47 | using dwarf::core::address_holding_type_die; 48 | using dwarf::core::base_type_die; 49 | using dwarf::core::array_type_die; 50 | using dwarf::core::type_chain_die; 51 | using dwarf::core::subroutine_type_die; 52 | using dwarf::core::formal_parameter_die; 53 | 54 | using dwarf::lib::Dwarf_Off; 55 | using dwarf::tool::abstract_c_compiler; 56 | 57 | using boost::regex; 58 | using boost::regex_match; 59 | using boost::smatch; 60 | using boost::regex_constants::egrep; 61 | using boost::match_default; 62 | using boost::format_all; 63 | 64 | 65 | int main(int argc, char **argv) 66 | { 67 | optional cu_name; 68 | optional cu_comp_dir; 69 | if (argc <= 1) 70 | { 71 | cerr << "Please name an input file." << endl; 72 | exit(1); 73 | } 74 | if (argc > 2 && strlen(argv[2]) > 0) 75 | { 76 | cu_name = argv[2]; 77 | } 78 | if (argc > 3 && strlen(argv[3]) > 0) 79 | { 80 | cu_comp_dir = argv[3]; 81 | } 82 | std::ifstream infstream(argv[1]); 83 | assert(infstream); 84 | using core::root_die; 85 | root_die r(fileno(infstream)); 86 | 87 | /* We read things like __uniqtype__signed_char, __uniqtype____PTR_signed_char 88 | * 89 | * and rewrite them in language-independent form. 90 | * 91 | * Expanding all possible names of complex types (like function pointers with 92 | * many args) becomes intractable. So instead, we focus on the fragments: 93 | * 94 | * for each base type in the DWARF, print out a pair 95 | * 96 | * 97 | * and use this to grep for symbol names containing the C name. 98 | */ 99 | 100 | auto cu_seq = r.begin().children_here().subseq_of(); 101 | for (auto i_cu = cu_seq.first; i_cu != cu_seq.second; ++i_cu) 102 | { 103 | // if we were passed a cu_name or comp_dir and they don't match, skip it 104 | if (cu_name && *i_cu->get_name() != *cu_name) continue; 105 | if (cu_comp_dir && i_cu->get_comp_dir() && *i_cu->get_comp_dir() != *cu_comp_dir) continue; 106 | 107 | auto i_next_cu = i_cu; ++i_next_cu; 108 | 109 | for (iterator_df<> i = i_cu; i != i_next_cu; ++i) 110 | { 111 | if (i.is_a() && i.name_here()) 112 | { 113 | const char **equiv = abstract_c_compiler::get_equivalence_class_ptr(i.name_here()->c_str()); 114 | 115 | if (equiv) 116 | { 117 | cout << allocs::tool::mangle_string(equiv[0]) 118 | << "\t" 119 | << allocs::tool::mangle_string(i.as_a()->get_canonical_name()) 120 | << endl; 121 | } 122 | 123 | } 124 | } 125 | } 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/.merlin: -------------------------------------------------------------------------------- 1 | S ../cil/src 2 | B ../cil/lib/cil 3 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/tests/Makefile: -------------------------------------------------------------------------------- 1 | CIL_INSTALL ?= $(LIBALLOCS)/contrib/cil/lib 2 | CC := $(CIL_INSTALL)/../bin/cilly 3 | CFLAGS := --load=../trapptrwrites.cmxs --dotrap-ptr-writes --save-temps -g 4 | TESTS := writer arrays closure 5 | 6 | build: $(TESTS) 7 | 8 | %.o: %.c ../trapptrwrites.cmxs 9 | $(CC) $(CFLAGS) -c $< -o $@ 10 | 11 | %: %.o log_ptr_writes.o ../trapptrwrites.cmxs 12 | $(CC) $(CFLAGS) $< log_ptr_writes.o -o $@ 13 | 14 | # Disable default rules (why do they exist ?!) 15 | %: %.c 16 | %: %.o 17 | %.o: %.c 18 | 19 | clean: 20 | rm $(TESTS) *.i *.cil.c 21 | 22 | .PHONY: build clean 23 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/tests/arrays.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct with_array 5 | { 6 | int *num; 7 | int *arr[5]; 8 | }; 9 | 10 | int ANSWER = 42; 11 | 12 | int main() 13 | { 14 | struct with_array a; 15 | a.num = &ANSWER; 16 | a.arr[3] = &ANSWER; 17 | 18 | struct with_array *b = malloc(sizeof(struct with_array)); 19 | *b = a; 20 | 21 | printf("%d\n", *b->arr[3]); 22 | 23 | free(b); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/tests/closure.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void *(*ALLOCATOR)(unsigned long sz); 4 | 5 | int main() 6 | { 7 | ALLOCATOR = malloc; 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/tests/log_ptr_writes.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void __notify_ptr_write(const void **dest, const void *val) 4 | { 5 | fprintf(stderr, "Write %p to %p\n", val, dest); 6 | } 7 | 8 | void __notify_copy(void *dest, const void *src, size_t count) 9 | { 10 | fprintf(stderr, "Copy %d bytes at %p to %p\n", count, src, dest); 11 | } 12 | -------------------------------------------------------------------------------- /tools/lang/c/trapptrwrites/tests/writer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int *GLOBAL_PTR; 5 | char HW[] = "hello world ?"; 6 | 7 | struct list 8 | { 9 | int head; 10 | struct list *tail; 11 | }; 12 | struct list *LIST; 13 | 14 | void print_str(const char *str) 15 | { 16 | printf("%s\n", str); 17 | } 18 | 19 | void set_global_ptr(int *gptr) 20 | { 21 | GLOBAL_PTR = gptr; 22 | } 23 | 24 | struct list *cons_list(int val, struct list *tail) 25 | { 26 | struct list *l = malloc(sizeof(struct list)); 27 | *l = (struct list){.head = val, .tail = tail}; 28 | return l; 29 | } 30 | 31 | void print_list(struct list *l) 32 | { 33 | if(l) 34 | { 35 | printf("%d ", l->head); 36 | print_list(l->tail); 37 | } 38 | else printf("\n"); 39 | } 40 | 41 | int main() 42 | { 43 | int i; 44 | i = 12; 45 | HW[i] = '!'; 46 | print_str(HW); 47 | 48 | set_global_ptr(&i); 49 | 50 | struct list *l0 = cons_list(0, NULL); 51 | LIST = cons_list(1, l0); 52 | print_list(LIST); 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /tools/lang/fortran/bin/allocsfc: -------------------------------------------------------------------------------- 1 | ../lib/allocsfc.py -------------------------------------------------------------------------------- /tools/lang/fortran/lib/allocsfc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Fortran 90 compiler wrapper for liballocs. 4 | 5 | import os, sys 6 | # HACK 7 | sys.path.append(os.path.realpath(os.path.dirname(__file__) + "/../../..")) 8 | from allocscompilerwrapper import * 9 | 10 | class AllocsFC(AllocsCompilerWrapper): 11 | 12 | # FIXME: also new, delete et al 13 | def defaultL1AllocFns(self): 14 | return ["malloc(Z)p", "calloc(zZ)p", "realloc(pZ)p", "memalign(zZ)p"] 15 | def defaultFreeFns(self): 16 | return ["free(P)"] 17 | 18 | def makeObjectFileName(self, sourceFile): 19 | nameStem, nameExtension = os.path.splitext(sourceFile) 20 | if (nameExtension == ".f90" or nameExtension == ".f" or nameExtension == ".f77"): 21 | outputFilename = nameStem + ".o" 22 | self.debugMsg("Making a secret output file (from Fortran source) " + outputFilename + "\n") 23 | else: 24 | outputFilename = sourceFile + ".o" 25 | self.debugMsg("Making a secret output file (from unknown source) " + outputFilename + "\n") 26 | return outputFilename 27 | 28 | def getUnderlyingCompilerCommand(self, fnames): 29 | return ["gfortran"] 30 | 31 | if __name__ == '__main__': 32 | wrapper = AllocsFC() 33 | ret = wrapper.main() 34 | exit(ret) 35 | 36 | -------------------------------------------------------------------------------- /tools/ldd-funcs.sh: -------------------------------------------------------------------------------- 1 | # NOTE: use /bin/echo 2 | # because we get invoked from make, as sh 3 | # hence invoking POSIX behaviour 4 | # in which echo -e and echo -n aren't supported. 5 | # Similarly, $'...' is not supported 6 | 7 | # In short, in this file, stay POSIX-compatible! 8 | 9 | exec_text_addr=0x0 #0400000 10 | 11 | obj_load_addrs () { 12 | setarch $( uname -m ) -R sh -c 'LD_TRACE_LOADED_OBJECTS=1 '"$1" 2>&1 | \ 13 | grep '=>' | grep -v 'linux-vdso' | grep -v 'not found' | \ 14 | sed 's/.*=> //' | tr -d '()' | \ 15 | tr -s '[:blank:]' '\t' 16 | /bin/echo -n "$( readlink -f "$1" )" 17 | /bin/echo -e '\t'"${exec_text_addr}" 18 | } 19 | 20 | mangle_objname () { 21 | #echo "asked to mangle: $1" 1>&2 22 | echo "$1" | tr '/ .-' '_' 23 | } 24 | 25 | # Not necessary for bash, but necessary for sh 26 | hex_to_dec () { 27 | printf "%d" $( echo "$1" | sed 's/^[^0][^xX].*/0x&/' ) 28 | } 29 | 30 | obj_load_addrs_as_cpp_macros () { 31 | #echo "asked for: $1" 1>&2 32 | # We MUST output in sorted order, because allocsmt relies on this. 33 | # HACK: this is for debugging weird process layout that we get when 34 | # running ldd from dash. Have hacked around it for now.... 35 | #printenv 1>&2 36 | #cat /proc/$$/maps 1>&2 37 | #cat /proc/$PPID/maps 1>&2 38 | #ldd "$1" 1>&2 39 | obj_load_addrs "$1" | sort | while read obj base; do 40 | #echo "obj is: $obj" 1>&2 41 | #echo "base is $base" 1>&2 42 | echo "-D__LOAD_ADDR_$( mangle_objname "${obj}" | tr '[a-z]' '[A-Z]' )"="${base}ULL" 43 | #min_obj_load_addr=0x7eff00000000 44 | min_obj_load_addr=0x2aaa00000000 45 | if [ $( hex_to_dec $base ) -lt $( hex_to_dec $min_obj_load_addr ) ] && ! [ $( hex_to_dec $base ) -eq $( hex_to_dec ${exec_text_addr} ) ]; then 46 | echo "Warning: library $obj has a load address $base less than the assumed minimum $min_obj_load_addr" 1>&2 47 | fi 48 | done 49 | } 50 | -------------------------------------------------------------------------------- /tools/objcopy-unbind-syms-naive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | obj="$1" 4 | shift 5 | 6 | curobj="$obj" 7 | LD="${LD:-ld}" 8 | AS="${AS:-as}" 9 | STRIP="${STRIP:-strip}" 10 | OBJCOPY="${OBJCOPY:-objcopy}" 11 | ourdir="$( mktemp -d )" 12 | mktempobj () { 13 | mktemp -p "$ourdir" --suffix .o "$@" 14 | } 15 | SYM2UND=/home/stephen/work/devel/elftin.hg/abs2und/sym2und 16 | 17 | bail () { 18 | local sym="$1" 19 | local status="$2" 20 | printenv | grep "obj=" 1>&2 21 | echo "processing $sym, subcommand returned $status" 1>&2 22 | exit $status 23 | } 24 | 25 | for sym in "$@"; do 26 | newobj="$(mktempobj newtmp.XXX )" 27 | # create a 'def' alias 28 | ${LD} -r -o "$newobj" --defsym __def_$sym=$sym "$curobj" || bail $sym $? 29 | objdump -t "$newobj" 1>&2 30 | # turn the ABS into UND 31 | undobj="$(mktempobj undtmp.XXX )" 32 | cp "$newobj" "$undobj" || bail $sym $? 33 | ${SYM2UND} "$undobj" "$sym" || bail $sym $? 34 | objdump -t "$undobj" 1>&2 35 | # rename the 'sym' into '__ref_' 36 | refobj="$(mktempobj reftmp.XXX )" 37 | cp "$undobj" "$refobj" || bail $sym $? 38 | ${OBJCOPY} --redefine-sym "$sym"=__ref_"$sym" "$refobj" || bail $sym $? 39 | objdump -t "$refobj" 1>&2 40 | curobj="$refobj" 41 | done 42 | 43 | mv "$curobj" "$obj" 44 | rm -rf "$ourdir" 45 | -------------------------------------------------------------------------------- /tools/objcopy-unbind-syms.sh: -------------------------------------------------------------------------------- 1 | objcopy-unbind-syms-naive.sh -------------------------------------------------------------------------------- /tools/objdeps: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | INPUTFILE="$1" 4 | 5 | test -n "$INPUTFILE" && test -x "$INPUTFILE" || ( \ 6 | echo "You must specify an executable file as input." 1>&2; false) || exit 1 7 | 8 | ( file "$INPUTFILE" | grep 'ELF.*exec.*dynamic' >/dev/null ) || \ 9 | ( echo "Not a dynamically linked executable." 1>&2; exit 0) 10 | 11 | source "$(dirname $0)"/ldd-funcs.sh 12 | 13 | obj_load_addrs "$1" 14 | -------------------------------------------------------------------------------- /tools/objdumpmemacc: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Build a list of allocation sites using the power of objdump. 4 | line_regexp='([0-9a-f]+)[[:blank:]]*<([^-\+]+)([-\+](0x[0-9a-f]+))?>.*' 5 | # Treat callq of monalloca labels as a possible alloc site, as that's what 6 | # our alloca instrumentation inserts. 7 | meta_instr_regexp="${line_regexp}.*(([a-z0-9]+).*(.*[\*\(]))" 8 | # HACK: if liballocs_alloc_fns is unset, use "malloc" a second time 9 | # (leaving this out will make the regexp end "|)" so match all calls) 10 | 11 | echo "meta_instr_regexp is: $meta_instr_regexp" 1>&2 12 | 13 | . "$(dirname "$0")"/objdumpmeta 14 | 15 | format_output () { 16 | while read next_instr_sym next_instr_addr next_instr_offset address filename line_number line_number_end token source_oneline; do 17 | case "$outputstyle" in 18 | (tab) 19 | echo "${next_instr_sym}"$'\t'"${address}"$'\t'"${filename}"$'\t'"${line_number}"$'\t'"${line_number_end}" 20 | ;; 21 | (punc) 22 | echo "<${next_instr_sym}+${next_instr_offset}-1instr> @${filename}:${line_number}"$'\t'"${token}" 23 | ;; 24 | (*) 25 | echo "<${next_instr_sym}+${next_instr_offset}-1instr> @${filename}:${line_number}"$'\t'"${token}" 26 | ;; 27 | esac 28 | done 29 | } 30 | 31 | output_lines="$( generate_output_lines )" 32 | 33 | echo "$output_lines" | format_output 34 | -------------------------------------------------------------------------------- /tools/objdumpmeta: -------------------------------------------------------------------------------- 1 | objdumpmeta.sh -------------------------------------------------------------------------------- /tools/pubsyms.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # We use this tool to dump the ABI of liballocs_preload.a 4 | # and it provides a symbol list as input to 'noopgen' for generating 5 | # the noop version of each public symbol. That gets linked into the 6 | # _preload.so. 7 | 8 | READELF=${READELF:-readelf} 9 | file=$1 10 | 11 | # First check whether we have a dynsym. 12 | grep '^[[:blank:]]*\[[[:blank:]0-9]\+\][[:blank:]]\+\.dynsym' >/dev/null 2>&1 <<<"$( "$READELF" -WS "$file" )" 13 | has_dynsym=$? 14 | 15 | do_readelf () { 16 | if [[ $has_dynsym -eq 0 ]]; then 17 | ${READELF} -D "$@" 18 | else 19 | ${READELF} "$@" 20 | fi 21 | } 22 | # Here we dump symbols that are FUNC, GLOBAL and not HIDDEN 23 | do_readelf -Ws "$file" | sed 's/^[[:blank:]]*//' | tr -s '[:blank:]' '\t' | \ 24 | sed '/^[[:blank:]]*$/ d' | egrep '^File|^[0-9]+' | \ 25 | awk 'BEGIN { pat="^File:[[:blank:]]*"; file=""; } $0 ~ pat { file=gensub(pat, "", 1); next; } { print file "\t" $0; }' | \ 26 | grep 'FUNC[[:blank:]]GLOBAL[[:blank:]][^H]' | cut -f1,3,5,6,7,9 | sort -k6 27 | -------------------------------------------------------------------------------- /tools/strip-non-dynamic-relocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ctr=0 4 | declare -a sects 5 | while read sect; do 6 | args[$ctr]="-R" 7 | ctr=$(( $ctr + 1 )) 8 | args[$ctr]="$sect" 9 | ctr=$(( $ctr + 1 )) 10 | done <<< "$( 11 | readelf -WS "$1" 2>/dev/null | grep '\[[^\]*\]' | grep -v '\[Nr\]' | sed 's/\[[^\]*\]//' | while read name type address off size es flg lk inf al; do 12 | case "${type}_${flg}" in 13 | (RELA_*A*|REL_*A*) # we don't want allocatable reloc sections 14 | ;; 15 | (RELA_*|REL_*) # we do want other reloc sections 16 | echo "$name" ;; 17 | (*) ;; # we don't want other sections 18 | esac 19 | done )" 20 | 21 | strip "${args[@]}" "$1" 22 | -------------------------------------------------------------------------------- /tools/to-globalize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | obj="$1" 4 | shift 5 | 6 | for sym in "$@"; do 7 | eregexp="${eregexp:+${eregexp}|}"${sym} 8 | done 9 | nm -fposix "$obj" | egrep "^(${eregexp}) t " | sed 's/[[:blank:]].*//' 10 | -------------------------------------------------------------------------------- /tools/used-types-funcs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | this_filename () { 4 | # look through the defined functions 5 | ctr=0 6 | while true; do 7 | if [[ -z "${FUNCNAME[$ctr]}" ]]; then 8 | echo "Error: couldn't find this_filename" 1>&2 9 | exit 1 10 | fi 11 | if [[ "${FUNCNAME[$ctr]}" == "this_filename" ]]; then 12 | echo ${BASH_SOURCE[$ctr]} 13 | exit 0 14 | fi 15 | ctr=$(( $ctr + 1 )) 16 | done 17 | } 18 | 19 | LIBALLOCS="${LIBALLOCS:-$( dirname "$(this_filename)" )/..}" 20 | LIBALLOCSTOOL="${LIBALLOCSTOOL:-$( dirname "$(this_filename)" )/../contrib/liballocstool}" 21 | USEDTYPES=${USEDTYPES:-${LIBALLOCS}/tools/usedtypes} 22 | BASE_TYPES_TRANSLATION=${BASE_TYPES_TRANSLATION:-${LIBALLOCS}/tools/lang/c/bin/base-types-translation} 23 | CC=${CC:-$(which cc)} 24 | LD=${LD:-$(which ld)} 25 | OBJCOPY=${OBJCOPY:-$(which objcopy)} 26 | 27 | # HACK: Seems that clang cannot compile generated files, so let the user choose 28 | # another compiler for these with an environnement variable 29 | META_CC=${META_CC:-${CC}} 30 | 31 | compile () { 32 | src="$1" 33 | dest="$2" 34 | asm="$( mktemp --suffix=.s )" 35 | ${META_CC} -I"${LIBALLOCSTOOL}"/include -S -o "$asm" -x c "$src" && \ 36 | ${META_CC} -c -o "$dest" "$asm" && \ 37 | echo "Compiler generated $dest" 1>&2 38 | } 39 | 40 | link_defining_aliases () { 41 | our_objfile="$1" 42 | our_usedtypes_obj="$2" 43 | temporary_out=$( mktemp ) 44 | # NOTE: we used to add aliases here... 45 | # `nm -fposix "${our_usedtypes_obj}" | $(dirname ${USEDTYPES})/alias-linker-opts-for-base-types.sh | sed -r 's/-Wl,--defsym,/--defsym /g'` 46 | # but this seems wrong (and, at least, will create "multiple definition" errors at link time) 47 | #cp "$our_objfile" "$our_objfile".orig.o 48 | #echo ${LD} -o "$temporary_out" -r "$our_objfile" "$our_usedtypes_obj" "$LIBALLOCS"/tools/libroottypes.a && \ 49 | ${LD} -o "$temporary_out" -r "$our_objfile" "$our_usedtypes_obj" "$LIBALLOCS"/tools/libroottypes.a && \ 50 | echo "Linker generated ${temporary_out}, moving to ${our_objfile}" 1>&2 && \ 51 | mv "$temporary_out" "$our_objfile" 52 | } 53 | 54 | symbol_redefinitions () { 55 | f="$1" 56 | # Here we are renaming codeless symnames with codeful ones, for the codeful 57 | # ones that are defined in our temporary (usedtypes) object file. 58 | # PROBLEM: with bitfields, we can get multiple entries with the same trailing name 59 | # but different codes. We should *either* avoid defining any alias in those cases, 60 | # *or* prevent usedtypes from generating the "__uniqtype_01234567_int" alias. 61 | # We take the former approach here. The 21- and 20-character options to sort and 62 | # uniq refer to the length of the prefix "__uniqtype_........_". 63 | nm -fposix --defined-only "$f" | tr -s '[:blank:]' '\t' | cut -f1 | \ 64 | egrep '__uniqtype_([0-9a-f]{8})_' | grep -v '_subobj_names$' | \ 65 | sort -k1.21 | uniq -s20 -c | while read count sym; do \ 66 | case "$count" in (1) echo "$sym";; (*);; esac; done | \ 67 | sed -r 's/__uniqtype_([0-9a-f]{8})_(.*)/--redefine-sym __uniqtype__\2=__uniqtype_\1_\2/' 68 | } 69 | 70 | objcopy_and_redefine_codeless_names () { 71 | our_objfile="$1" 72 | our_usedtypes_obj="$2" 73 | 74 | # now, fill in the codeful names for codeless ones 75 | second_redefinition_args="$( symbol_redefinitions "$our_usedtypes_obj" )" && \ 76 | echo ${OBJCOPY} $second_redefinition_args "$our_objfile" 1>&2 && \ 77 | ${OBJCOPY} $second_redefinition_args "$our_objfile" && \ 78 | echo "objcopy renamed symbols in $our_objfile according to $second_redefinition_args" 1>&2 79 | } 80 | -------------------------------------------------------------------------------- /tools/usedtypes.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "uniqtypes.hpp" 5 | 6 | using std::cout; 7 | using std::cerr; 8 | using std::vector; 9 | using std::string; 10 | using namespace allocs::tool; 11 | 12 | static int debug_out = 1; 13 | 14 | int main(int argc, char **argv) 15 | { 16 | unsigned nfiles = argc - 1; 17 | if (nfiles < 1) 18 | { 19 | cerr << "Please name an input file." << endl; 20 | exit(1); 21 | } 22 | 23 | vector fnames; 24 | for (unsigned i = 0; i < nfiles; ++i) 25 | { 26 | string fname = argv[1+i]; 27 | fnames.push_back(fname); 28 | } 29 | 30 | cout << "#include \"uniqtype-defs.h\"\n\n"; 31 | return dump_usedtypes(fnames, cout, cerr); 32 | } 33 | --------------------------------------------------------------------------------