├── .gitignore ├── COPYING ├── Defines.mk ├── Makefile ├── README.md ├── bin └── .gitignore ├── docs ├── .gitignore ├── Makefile ├── _config.yml ├── btrfs-kernel.md ├── btrfs-other.md ├── config.md ├── event-counters.md ├── gotchas.md ├── how-it-works.md ├── index.md ├── install.md ├── missing.md ├── options.md ├── running.md └── wrong.md ├── include └── crucible │ ├── backtrace.h │ ├── bencode.h │ ├── btrfs-tree.h │ ├── btrfs.h │ ├── bytevector.h │ ├── cache.h │ ├── chatter.h │ ├── city.h │ ├── cleanup.h │ ├── crc64.h │ ├── endian.h │ ├── error.h │ ├── extentwalker.h │ ├── fd.h │ ├── fs.h │ ├── hexdump.h │ ├── limits.h │ ├── lockset.h │ ├── multilock.h │ ├── namedptr.h │ ├── ntoa.h │ ├── openat2.h │ ├── path.h │ ├── pool.h │ ├── process.h │ ├── progress.h │ ├── resource.h │ ├── seeker.h │ ├── string.h │ ├── table.h │ ├── task.h │ ├── time.h │ ├── uname.h │ └── version.h ├── lib ├── .gitignore ├── Makefile ├── btrfs-tree.cc ├── bytevector.cc ├── chatter.cc ├── city.cc ├── cleanup.cc ├── configure.h.in ├── crc64.cc ├── error.cc ├── extentwalker.cc ├── fd.cc ├── fs.cc ├── multilock.cc ├── ntoa.cc ├── openat2.cc ├── path.cc ├── process.cc ├── string.cc ├── table.cc ├── task.cc ├── time.cc └── uname.cc ├── makeflags ├── scripts ├── beesd.conf.sample ├── beesd.in └── beesd@.service.in ├── src ├── .gitignore ├── Makefile ├── bees-context.cc ├── bees-hash.cc ├── bees-resolve.cc ├── bees-roots.cc ├── bees-thread.cc ├── bees-trace.cc ├── bees-types.cc ├── bees-usage.txt ├── bees.cc └── bees.h └── test ├── .gitignore ├── Makefile ├── chatter.cc ├── crc64.cc ├── fd.cc ├── limits.cc ├── namedptr.cc ├── path.cc ├── process.cc ├── progress.cc ├── seeker.cc ├── table.cc ├── task.cc ├── tests.h └── tmp └── .gitignore /.gitignore: -------------------------------------------------------------------------------- 1 | *.[ao] 2 | *.bak 3 | *.dep 4 | *.new 5 | *.tmp 6 | *.so* 7 | Doxyfile 8 | README.html 9 | depends.mk 10 | doxygen_* 11 | html/ 12 | latex/ 13 | make.log 14 | make.log.new 15 | localconf 16 | lib/configure.h 17 | scripts/beesd 18 | scripts/beesd@.service 19 | -------------------------------------------------------------------------------- /Defines.mk: -------------------------------------------------------------------------------- 1 | MAKE += PREFIX=$(PREFIX) LIBEXEC_PREFIX=$(LIBEXEC_PREFIX) ETC_PREFIX=$(ETC_PREFIX) 2 | 3 | define TEMPLATE_COMPILER = 4 | sed $< >$@ \ 5 | -e's#@DESTDIR@#$(DESTDIR)#' \ 6 | -e's#@PREFIX@#$(PREFIX)#' \ 7 | -e's#@ETC_PREFIX@#$(ETC_PREFIX)#' \ 8 | -e's#@LIBEXEC_PREFIX@#$(LIBEXEC_PREFIX)#' 9 | endef 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX ?= /usr 2 | ETC_PREFIX ?= /etc 3 | LIBDIR ?= lib 4 | 5 | LIB_PREFIX ?= $(PREFIX)/$(LIBDIR) 6 | LIBEXEC_PREFIX ?= $(LIB_PREFIX)/bees 7 | 8 | SYSTEMD_SYSTEM_UNIT_DIR ?= $(shell pkg-config systemd --variable=systemdsystemunitdir) 9 | 10 | BEES_VERSION ?= $(shell git describe --always --dirty || echo UNKNOWN) 11 | 12 | # allow local configuration to override above variables 13 | -include localconf 14 | 15 | DEFAULT_MAKE_TARGET ?= reallyall 16 | 17 | ifeq ($(DEFAULT_MAKE_TARGET),reallyall) 18 | RUN_INSTALL_TESTS = test 19 | endif 20 | 21 | include Defines.mk 22 | 23 | default: $(DEFAULT_MAKE_TARGET) 24 | 25 | all: lib src scripts 26 | reallyall: all doc test 27 | 28 | clean: ## Cleanup 29 | git clean -dfx -e localconf 30 | 31 | .PHONY: lib src test doc 32 | 33 | lib: ## Build libs 34 | +$(MAKE) TAG="$(BEES_VERSION)" -C lib 35 | 36 | src: ## Build bins 37 | src: lib 38 | +$(MAKE) BEES_VERSION="$(BEES_VERSION)" -C src 39 | 40 | test: ## Run tests 41 | test: lib src 42 | +$(MAKE) -C test 43 | 44 | doc: ## Build docs 45 | +$(MAKE) -C docs 46 | 47 | scripts/%: scripts/%.in 48 | $(TEMPLATE_COMPILER) 49 | 50 | scripts: scripts/beesd scripts/beesd@.service 51 | 52 | install_bees: ## Install bees + libs 53 | install_bees: src $(RUN_INSTALL_TESTS) 54 | install -Dm755 bin/bees $(DESTDIR)$(LIBEXEC_PREFIX)/bees 55 | 56 | install_scripts: ## Install scipts 57 | install_scripts: scripts 58 | install -Dm755 scripts/beesd $(DESTDIR)$(PREFIX)/sbin/beesd 59 | install -Dm644 scripts/beesd.conf.sample $(DESTDIR)$(ETC_PREFIX)/bees/beesd.conf.sample 60 | ifneq ($(SYSTEMD_SYSTEM_UNIT_DIR),) 61 | install -Dm644 scripts/beesd@.service $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/beesd@.service 62 | endif 63 | 64 | install: ## Install distribution 65 | install: install_bees install_scripts 66 | 67 | help: ## Show help 68 | @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/\t/' 69 | 70 | bees: reallyall 71 | fly: install 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BEES 2 | ==== 3 | 4 | Best-Effort Extent-Same, a btrfs deduplication agent. 5 | 6 | About bees 7 | ---------- 8 | 9 | bees is a block-oriented userspace deduplication agent designed to scale 10 | up to large btrfs filesystems. It is an offline dedupe combined with 11 | an incremental data scan capability to minimize time data spends on disk 12 | from write to dedupe. 13 | 14 | Strengths 15 | --------- 16 | 17 | * Space-efficient hash table - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB) 18 | * Daemon mode - incrementally dedupes new data as it appears 19 | * Largest extents first - recover more free space during fixed maintenance windows 20 | * Works with btrfs compression - dedupe any combination of compressed and uncompressed files 21 | * Whole-filesystem dedupe - scans data only once, even with snapshots and reflinks 22 | * Persistent hash table for rapid restart after shutdown 23 | * Constant hash table size - no increased RAM usage if data set becomes larger 24 | * Works on live data - no scheduled downtime required 25 | * Automatic self-throttling - reduces system load 26 | * btrfs support - recovers more free space from btrfs than naive dedupers 27 | 28 | Weaknesses 29 | ---------- 30 | 31 | * Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists 32 | * Requires root privilege (`CAP_SYS_ADMIN` plus the usual filesystem read/modify caps) 33 | * [First run may increase metadata space usage if many snapshots exist](docs/gotchas.md) 34 | * Constant hash table size - no decreased RAM usage if data set becomes smaller 35 | * btrfs only 36 | 37 | Installation and Usage 38 | ---------------------- 39 | 40 | * [Installation](docs/install.md) 41 | * [Configuration](docs/config.md) 42 | * [Running](docs/running.md) 43 | * [Command Line Options](docs/options.md) 44 | 45 | Recommended Reading 46 | ------------------- 47 | 48 | * [bees Gotchas](docs/gotchas.md) 49 | * [btrfs kernel bugs](docs/btrfs-kernel.md) - especially DATA CORRUPTION WARNING for old kernels 50 | * [bees vs. other btrfs features](docs/btrfs-other.md) 51 | * [What to do when something goes wrong](docs/wrong.md) 52 | 53 | More Information 54 | ---------------- 55 | 56 | * [How bees works](docs/how-it-works.md) 57 | * [Missing bees features](docs/missing.md) 58 | * [Event counter descriptions](docs/event-counters.md) 59 | 60 | Bug Reports and Contributions 61 | ----------------------------- 62 | 63 | Email bug reports and patches to Zygo Blaxell . 64 | 65 | You can also use Github: 66 | 67 | https://github.com/Zygo/bees 68 | 69 | Copyright & License 70 | ------------------- 71 | 72 | Copyright 2015-2025 Zygo Blaxell . 73 | 74 | GPL (version 3 or later). 75 | -------------------------------------------------------------------------------- /bin/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | MARKDOWN := $(firstword $(shell command -v cmark-gfm redcarpet markdown2 markdown markdown_py 2>/dev/null || echo markdown)) 2 | 3 | # If you have cmark-gfm, you get Github-style tables; otherwise, you don't. 4 | ifeq ($(notdir $(MARKDOWN)),cmark-gfm) 5 | MARKDOWN += -e table 6 | endif 7 | 8 | .PHONY: docs 9 | 10 | docs: $(subst .md,.html,$(wildcard *.md)) index.html ../README.html 11 | 12 | %.html: %.md Makefile 13 | $(MARKDOWN) $< | sed -e 's/\.md/\.html/g' > $@.new 14 | mv -f $@.new $@ 15 | 16 | index.md: ../README.md 17 | sed -e 's:docs/::g' < ../README.md > index.md.new 18 | mv -f index.md.new index.md 19 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman 2 | -------------------------------------------------------------------------------- /docs/btrfs-other.md: -------------------------------------------------------------------------------- 1 | Good Btrfs Feature Interactions 2 | ------------------------------- 3 | 4 | bees has been tested in combination with the following: 5 | 6 | * btrfs compression (zlib, lzo, zstd) 7 | * PREALLOC extents (unconditionally replaced with holes) 8 | * HOLE extents and btrfs no-holes feature 9 | * Other deduplicators (`duperemove`, `jdupes`) 10 | * Reflink copies (modern coreutils `cp` and `mv`) 11 | * Concurrent file modification (e.g. PostgreSQL and sqlite databases, VMs, build daemons) 12 | * All btrfs RAID profiles: single, dup, raid0, raid1, raid10, raid1c3, raid1c4, raid5, raid6 13 | * IO errors during dedupe (affected extents are skipped) 14 | * 4K filesystem data block size / clone alignment 15 | * 64-bit and 32-bit LE host CPUs (amd64, x86, arm) 16 | * Large files (kernel 5.4 or later strongly recommended) 17 | * Filesystem data sizes up to 100T+ bytes, 1000M+ files 18 | * `open(O_DIRECT)` (seems to work as well--or as poorly--with bees as with any other btrfs feature) 19 | * btrfs-convert from ext2/3/4 20 | * btrfs `autodefrag` mount option 21 | * btrfs balance (data balances cause rescan of relocated data) 22 | * btrfs block-group-tree 23 | * btrfs `flushoncommit` and `noflushoncommit` mount options 24 | * btrfs mixed block groups 25 | * btrfs `nodatacow`/`nodatasum` inode attribute or mount option (bees skips all nodatasum files) 26 | * btrfs qgroups and quota support (_not_ squotas) 27 | * btrfs receive 28 | * btrfs scrub 29 | * btrfs send (dedupe pauses automatically, kernel 5.4 or later required) 30 | * btrfs snapshot, non-snapshot subvols (RW and RO), snapshot delete 31 | 32 | **Note:** some btrfs features have minimum kernel versions which are 33 | higher than the minimum kernel version for bees. 34 | 35 | Untested Btrfs Feature Interactions 36 | ----------------------------------- 37 | 38 | bees has not been tested with the following, and undesirable interactions may occur: 39 | 40 | * Non-4K filesystem data block size (should work if recompiled) 41 | * Non-equal hash (SUM) and filesystem data block (CLONE) sizes (need to fix that eventually) 42 | * btrfs seed filesystems, raid-stripe-tree, squotas (no particular reason these wouldn't work, but no one has reported trying) 43 | * btrfs out-of-tree kernel patches (e.g. encryption, extent tree v2) 44 | * Host CPUs with exotic page sizes, alignment requirements, or endianness (ppc, alpha, sparc, strongarm, s390, mips, m68k...) 45 | -------------------------------------------------------------------------------- /docs/how-it-works.md: -------------------------------------------------------------------------------- 1 | How bees Works 2 | -------------- 3 | 4 | bees is a daemon designed to run continuously and maintain its state 5 | across crashes and reboots. 6 | 7 | bees uses checkpoints for persistence to eliminate the IO overhead of a 8 | transactional data store. On restart, bees will dedupe any data that 9 | was added to the filesystem since the last checkpoint. Checkpoints 10 | occur every 15 minutes for scan progress, stored in `beescrawl.dat`. 11 | The hash table trickle-writes to disk at 128KiB/s to `beeshash.dat`, 12 | but will flush immediately if bees is terminated by SIGTERM. 13 | 14 | There are no special requirements for bees hash table storage--`.beeshome` 15 | could be stored on a different btrfs filesystem, ext4, or even CIFS (but 16 | not MS-DOS--beeshome does need filenames longer than 8.3). 17 | 18 | bees uses a persistent dedupe hash table with a fixed size configured 19 | by the user. Any size of hash table can be dedicated to dedupe. If a 20 | fast dedupe with low hit rate is desired, bees can use a hash table as 21 | small as 128KB. 22 | 23 | The bees hash table is loaded into RAM at startup and `mlock`ed so it 24 | will not be swapped out by the kernel (if swap is permitted, performance 25 | degrades to nearly zero, for both bees and the swap device). 26 | 27 | bees scans the filesystem in a single pass which removes duplicate 28 | extents immediately after they are detected. There are no distinct 29 | scanning and dedupe phases, so bees can start recovering free space 30 | immediately after startup. 31 | 32 | Once a filesystem scan has been completed, bees uses the `min_transid` 33 | parameter of the `TREE_SEARCH_V2` ioctl to avoid rescanning old data 34 | on future scans and quickly scan new data. An incremental data scan 35 | can complete in less than a millisecond on an idle filesystem. 36 | 37 | Once a duplicate data block is identified, bees examines the nearby 38 | blocks in the files where the matched block appears. This allows bees 39 | to find long runs of adjacent duplicate block pairs if it has an entry 40 | for any one of the blocks in its hash table. On typical data sets, 41 | this means most of the blocks in the hash table are redundant and can 42 | be discarded without significant impact on dedupe hit rate. 43 | 44 | Hash table entries are grouped together into LRU lists. As each block 45 | is scanned, its hash table entry is inserted into the LRU list at a 46 | random position. If the LRU list is full, the entry at the end of the 47 | list is deleted. If a hash table entry is used to discover duplicate 48 | blocks, the entry is moved to the beginning of the list. This makes bees 49 | unable to detect a small number of duplicates, but it dramatically 50 | improves efficiency on filesystems with many small files. 51 | 52 | Once the hash table fills up, old entries are evicted by new entries. 53 | This means that the optimum hash table size is determined by the 54 | distance between duplicate blocks on the filesystem rather than the 55 | filesystem unique data size. Even if the hash table is too small 56 | to find all duplicates, it may still find _most_ of them, especially 57 | during incremental scans where the data in many workloads tends to be 58 | more similar. 59 | 60 | When a duplicate block pair is found in two btrfs extents, bees will 61 | attempt to match all other blocks in the newer extent with blocks in 62 | the older extent (i.e. the goal is to keep the extent referenced in the 63 | hash table and remove the most recently scanned extent). If this is 64 | possible, then the new extent will be replaced with a reference to the 65 | old extent. If this is not possible, then bees will create a temporary 66 | copy of the unmatched data in the new extent so that the entire new 67 | extent can be removed by deduplication. This must be done because btrfs 68 | cannot partially overwrite extents--the _entire_ extent must be replaced. 69 | The temporary copy is then scanned during the next pass bees makes over 70 | the filesystem for potential duplication of other extents. 71 | 72 | When a block containing all-zero bytes is found, bees dedupes the extent 73 | against a temporary file containing a hole, possibly creating temporary 74 | copies of any non-zero data in the extent for later deduplication as 75 | described above. If the extent is compressed, bees avoids splitting 76 | the extent in the middle as this generally has a negative impact on 77 | compression ratio (and also triggers a [kernel bug](btrfs-kernel.md)). 78 | 79 | bees does not store any information about filesystem structure, so 80 | its performance is linear in the number or size of files. The hash 81 | table stores physical block numbers which are converted into paths 82 | and FDs on demand through btrfs `SEARCH_V2` and `LOGICAL_INO` ioctls. 83 | This eliminates the storage required to maintain the equivalents 84 | of these functions in userspace, at the expense of encountering [some 85 | kernel bugs in `LOGICAL_INO` performance](btrfs-kernel.md). 86 | 87 | bees uses only the data-safe `FILE_EXTENT_SAME` (aka `FIDEDUPERANGE`) 88 | kernel ioctl to manipulate user data, so it can dedupe live data 89 | (e.g. build servers, sqlite databases, VM disk images). bees does not 90 | modify file attributes or timestamps in deduplicated files. 91 | 92 | When bees has scanned all of the data, bees will pause until a new 93 | transaction has completed in the btrfs filesystem. bees tracks 94 | the current btrfs transaction ID over time so that it polls less often 95 | on quiescent filesystems and more often on busy filesystems. 96 | 97 | Scanning and deduplication work is performed by worker threads. If the 98 | [`--loadavg-target` option](options.md) is used, bees adjusts the number 99 | of worker threads up or down as required to have a user-specified load 100 | impact on the system. The maximum and minimum number of threads is 101 | configurable. If the system load is too high then bees will stop until 102 | the load falls to acceptable levels. 103 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | BEES 2 | ==== 3 | 4 | Best-Effort Extent-Same, a btrfs deduplication agent. 5 | 6 | About bees 7 | ---------- 8 | 9 | bees is a block-oriented userspace deduplication agent designed to scale 10 | up to large btrfs filesystems. It is an offline dedupe combined with 11 | an incremental data scan capability to minimize time data spends on disk 12 | from write to dedupe. 13 | 14 | Strengths 15 | --------- 16 | 17 | * Space-efficient hash table - can use as little as 1 GB hash table per 10 TB unique data (0.1GB/TB) 18 | * Daemon mode - incrementally dedupes new data as it appears 19 | * Largest extents first - recover more free space during fixed maintenance windows 20 | * Works with btrfs compression - dedupe any combination of compressed and uncompressed files 21 | * Whole-filesystem dedupe - scans data only once, even with snapshots and reflinks 22 | * Persistent hash table for rapid restart after shutdown 23 | * Constant hash table size - no increased RAM usage if data set becomes larger 24 | * Works on live data - no scheduled downtime required 25 | * Automatic self-throttling - reduces system load 26 | * btrfs support - recovers more free space from btrfs than naive dedupers 27 | 28 | Weaknesses 29 | ---------- 30 | 31 | * Whole-filesystem dedupe - has no include/exclude filters, does not accept file lists 32 | * Requires root privilege (`CAP_SYS_ADMIN` plus the usual filesystem read/modify caps) 33 | * [First run may increase metadata space usage if many snapshots exist](gotchas.md) 34 | * Constant hash table size - no decreased RAM usage if data set becomes smaller 35 | * btrfs only 36 | 37 | Installation and Usage 38 | ---------------------- 39 | 40 | * [Installation](install.md) 41 | * [Configuration](config.md) 42 | * [Running](running.md) 43 | * [Command Line Options](options.md) 44 | 45 | Recommended Reading 46 | ------------------- 47 | 48 | * [bees Gotchas](gotchas.md) 49 | * [btrfs kernel bugs](btrfs-kernel.md) - especially DATA CORRUPTION WARNING for old kernels 50 | * [bees vs. other btrfs features](btrfs-other.md) 51 | * [What to do when something goes wrong](wrong.md) 52 | 53 | More Information 54 | ---------------- 55 | 56 | * [How bees works](how-it-works.md) 57 | * [Missing bees features](missing.md) 58 | * [Event counter descriptions](event-counters.md) 59 | 60 | Bug Reports and Contributions 61 | ----------------------------- 62 | 63 | Email bug reports and patches to Zygo Blaxell . 64 | 65 | You can also use Github: 66 | 67 | https://github.com/Zygo/bees 68 | 69 | Copyright & License 70 | ------------------- 71 | 72 | Copyright 2015-2025 Zygo Blaxell . 73 | 74 | GPL (version 3 or later). 75 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | Building bees 2 | ============= 3 | 4 | Dependencies 5 | ------------ 6 | 7 | * C++11 compiler (tested with GCC 8.1.0, 12.2.0) 8 | 9 | Sorry. I really like closures and shared_ptr, so support 10 | for earlier compiler versions is unlikely. 11 | 12 | Note that the C++ standard--and GCC's implementation of it--is evolving. 13 | There may be problems when building with newer compiler versions. 14 | Build failure reports welcome! 15 | 16 | * btrfs-progs 17 | 18 | Needed at runtime by the service wrapper script. 19 | 20 | * [Linux kernel version](btrfs-kernel.md) gets its own page. 21 | 22 | * markdown to build the documentation 23 | 24 | * util-linux version that provides `blkid` command for the helper 25 | script `scripts/beesd` to work 26 | 27 | Installation 28 | ============ 29 | 30 | bees can be installed by following one these instructions: 31 | 32 | Arch package 33 | ------------ 34 | 35 | bees is available for Arch Linux in the community repository. Install with: 36 | 37 | `$ pacman -S bees` 38 | 39 | or build a live version from git master using AUR: 40 | 41 | `$ git clone https://aur.archlinux.org/bees-git.git && cd bees-git && makepkg -si` 42 | 43 | Gentoo package 44 | -------------- 45 | 46 | bees is officially available in Gentoo Portage. Just emerge a stable 47 | version: 48 | 49 | `$ emerge --ask bees` 50 | 51 | or build a live version from git master: 52 | 53 | `$ emerge --ask =bees-9999` 54 | 55 | You can opt-out of building the support tools with 56 | 57 | `USE="-tools" emerge ...` 58 | 59 | If you want to start hacking on bees and contribute changes, just emerge 60 | the live version which automatically pulls in all required development 61 | packages. 62 | 63 | Build from source 64 | ----------------- 65 | 66 | Build with `make`. The build produces `bin/bees` which must be copied 67 | to somewhere in `$PATH` on the target system respectively. 68 | 69 | It will also generate `scripts/beesd@.service` for systemd users. This 70 | service makes use of a helper script `scripts/beesd` to boot the service. 71 | Both of the latter use the filesystem UUID to mount the root subvolume 72 | within a temporary runtime directory. 73 | 74 | ### Ubuntu 16.04 - 17.04: 75 | `$ apt -y install build-essential btrfs-tools markdown && make` 76 | 77 | ### Ubuntu 18.10: 78 | `$ apt -y install build-essential btrfs-progs markdown && make` 79 | 80 | Packaging 81 | --------- 82 | 83 | See 'Dependencies' above. Package maintainers can pick ideas for building and 84 | configuring the source package from the Gentoo ebuild: 85 | 86 | 87 | 88 | You can configure some build options by creating a file `localconf` and 89 | adjust settings for your distribution environment there. 90 | 91 | Please also review the Makefile for additional hints. 92 | -------------------------------------------------------------------------------- /docs/missing.md: -------------------------------------------------------------------------------- 1 | Features You Might Expect That bees Doesn't Have 2 | ------------------------------------------------ 3 | 4 | * There's no configuration file (patches welcome!). There are 5 | some tunables hardcoded in the source (`src/bees.h`) that could eventually 6 | become configuration options. There's also an incomplete option parser 7 | (patches welcome!). 8 | 9 | * The bees process doesn't fork and writes its log to stdout/stderr. 10 | A shell wrapper is required to make it behave more like a daemon. 11 | 12 | * There's no facility to exclude any part of a filesystem or focus on 13 | specific files (patches welcome). 14 | 15 | * PREALLOC extents and extents containing blocks filled with zeros will 16 | be replaced by holes. There is no way to turn this off. 17 | 18 | * The fundamental unit of deduplication is the extent _reference_, when 19 | it should be the _extent_ itself. This is an architectural limitation 20 | that results in excess reads of extent data, even in the Extent scan mode. 21 | 22 | * Block reads are currently more allocation- and CPU-intensive than they 23 | should be, especially for filesystems on SSD where the IO overhead is 24 | much smaller. This is a problem for CPU-power-constrained environments 25 | (e.g. laptops running from battery, or ARM devices with slow CPU). 26 | 27 | * bees can currently fragment extents when required to remove duplicate 28 | blocks, but has no defragmentation capability yet. When possible, bees 29 | will attempt to work with existing extent boundaries and choose the 30 | largest fragments available, but it will not aggregate blocks together 31 | from multiple extents to create larger ones. 32 | 33 | * When bees fragments an extent, the copied data is compressed. There 34 | is currently no way (other than by modifying the source) to select a 35 | compression method or not compress the data (patches welcome!). 36 | 37 | * It is theoretically possible to resize the hash table without starting 38 | over with a new full-filesystem scan; however, this feature has not been 39 | implemented yet. 40 | 41 | * btrfs maintains csums of data blocks which bees could use to improve 42 | scan speeds, but bees doesn't use them yet. 43 | -------------------------------------------------------------------------------- /docs/options.md: -------------------------------------------------------------------------------- 1 | # bees Command Line Options 2 | 3 | ## Load management options 4 | 5 | * `--thread-count COUNT` or `-c` 6 | 7 | Specify maximum number of worker threads. Overrides `--thread-factor` 8 | (`-C`), default/autodetected values, and the hardcoded thread limit. 9 | 10 | * `--thread-factor FACTOR` or `-C` 11 | 12 | Specify ratio of worker threads to detected CPU cores. Overridden by 13 | `--thread-count` (`-c`). 14 | 15 | Default is 1.0, i.e. 1 worker thread per detected CPU. Use values 16 | below 1.0 to leave some cores idle, or above 1.0 if there are more 17 | disks than CPUs in the filesystem. 18 | 19 | * `--loadavg-target LOADAVG` or `-g` 20 | 21 | Specify load average target for dynamic worker threads. Default is 22 | to run the maximum number of worker threads all the time. 23 | 24 | Worker threads will be started or stopped subject to the upper limit 25 | imposed by `--thread-factor`, `--thread-min` and `--thread-count` 26 | until the load average is within +/- 0.5 of `LOADAVG`. 27 | 28 | * `--thread-min COUNT` or `-G` 29 | 30 | Specify minimum number of dynamic worker threads. This can be used 31 | to force a minimum number of threads to continue running while using 32 | `--loadavg-target` to manage load. 33 | 34 | Default is 0, i.e. all bees worker threads will stop when the system 35 | load exceeds the target. 36 | 37 | Has no effect unless `--loadavg-target` is used to specify a target load. 38 | 39 | * `--throttle-factor FACTOR` 40 | 41 | In order to avoid saturating btrfs deferred work queues, bees tracks 42 | the time that operations with delayed effect (dedupe and tmpfile copy) 43 | and operations with long run times (`LOGICAL_INO`) run. If an operation 44 | finishes before the average run time for that operation, bees will 45 | sleep for the remainder of the average run time, so that operations 46 | are submitted to btrfs at a rate similar to the rate that btrfs can 47 | complete them. 48 | 49 | The `FACTOR` is multiplied by the average run time for each operation 50 | to calculate the target delay time. 51 | 52 | `FACTOR` 0 is the default, which adds no delays. bees will attempt 53 | to saturate btrfs delayed work queues as quickly as possible, which 54 | may impact other processes on the same filesystem, or even slow down 55 | bees itself. 56 | 57 | `FACTOR` 1.0 will attempt to keep btrfs delayed work queues filled at 58 | a steady average rate. 59 | 60 | `FACTOR` more than 1.0 will add delays longer than the average 61 | run time (e.g. 10.0 will delay all operations that take less than 10x 62 | the average run time). High values of `FACTOR` may be desirable when 63 | using bees with other applications on the same filesystem. 64 | 65 | The maximum delay per operation is 60 seconds. 66 | 67 | ## Filesystem tree traversal options 68 | 69 | * `--scan-mode MODE` or `-m` 70 | 71 | Specify extent scanning algorithm. 72 | **EXPERIMENTAL** feature that may go away. 73 | 74 | * Mode 0: lockstep 75 | * Mode 1: independent 76 | * Mode 2: sequential 77 | * Mode 3: recent 78 | * Mode 4: extent 79 | 80 | For details of the different scanning modes and the default value of 81 | this option, see [bees configuration](config.md). 82 | 83 | ## Workarounds 84 | 85 | * `--workaround-btrfs-send` or `-a` 86 | 87 | _This option is obsolete and should not be used any more._ 88 | 89 | Pretend that read-only snapshots are empty and silently discard any 90 | request to dedupe files referenced through them. This is a workaround 91 | for [problems with old kernels running `btrfs send` and `btrfs send 92 | -p`](btrfs-kernel.md) which make these btrfs features unusable with bees. 93 | 94 | This option was used to avoid breaking `btrfs send` on old kernels. 95 | The affected kernels are now too old to be recommended for use with bees. 96 | 97 | bees now waits for `btrfs send` to finish. There is no need for an 98 | option to enable this. 99 | 100 | **Note:** There is a _significant_ space tradeoff when using this option: 101 | it is likely no space will be recovered--and possibly significant extra 102 | space used--until the read-only snapshots are deleted. 103 | 104 | ## Logging options 105 | 106 | * `--timestamps` or `-t` 107 | 108 | Enable timestamps in log output. 109 | 110 | * `--no-timestamps` or `-T` 111 | 112 | Disable timestamps in log output. 113 | 114 | * `--absolute-paths` or `-p` 115 | 116 | Paths in log output will be absolute. 117 | 118 | * `--strip-paths` or `-P` 119 | 120 | Paths in log output will have the working directory at bees startup stripped. 121 | 122 | * `--verbose` or `-v` 123 | 124 | Set log verbosity (0 = no output, 8 = all output, default 8). 125 | -------------------------------------------------------------------------------- /docs/running.md: -------------------------------------------------------------------------------- 1 | Running bees 2 | ============ 3 | 4 | Setup 5 | ----- 6 | 7 | If you don't want to use the helper script `scripts/beesd` to setup and 8 | configure bees, here's how you manually setup bees. 9 | 10 | Create a directory for bees state files: 11 | 12 | export BEESHOME=/some/path 13 | mkdir -p "$BEESHOME" 14 | 15 | Create an empty hash table ([your choice of size](config.md), but it 16 | must be a multiple of 128KB). This example creates a 1GB hash table: 17 | 18 | truncate -s 1g "$BEESHOME/beeshash.dat" 19 | chmod 700 "$BEESHOME/beeshash.dat" 20 | 21 | bees can _only_ process the root subvol of a btrfs with nothing mounted 22 | over top. If the bees argument is not the root subvol directory, bees 23 | will just throw an exception and stop. 24 | 25 | Use a separate mount point, and let only bees access it: 26 | 27 | UUID=3399e413-695a-4b0b-9384-1b0ef8f6c4cd 28 | mkdir -p /var/lib/bees/$UUID 29 | mount /dev/disk/by-uuid/$UUID /var/lib/bees/$UUID -osubvol=/ 30 | 31 | If you don't set BEESHOME, the path "`.beeshome`" will be used relative 32 | to the root subvol of the filesystem. For example: 33 | 34 | btrfs sub create /var/lib/bees/$UUID/.beeshome 35 | truncate -s 1g /var/lib/bees/$UUID/.beeshome/beeshash.dat 36 | chmod 700 /var/lib/bees/$UUID/.beeshome/beeshash.dat 37 | 38 | You can use any relative path in `BEESHOME`. The path will be taken 39 | relative to the root of the deduped filesystem (in other words it can 40 | be the name of a subvol): 41 | 42 | export BEESHOME=@my-beeshome 43 | btrfs sub create /var/lib/bees/$UUID/$BEESHOME 44 | truncate -s 1g /var/lib/bees/$UUID/$BEESHOME/beeshash.dat 45 | chmod 700 /var/lib/bees/$UUID/$BEESHOME/beeshash.dat 46 | 47 | Configuration 48 | ------------- 49 | 50 | There are some runtime configurable options using environment variables: 51 | 52 | * BEESHOME: Directory containing bees state files: 53 | * beeshash.dat | persistent hash table. Must be a multiple of 128KB, and must be created before bees starts. 54 | * beescrawl.dat | state of SEARCH_V2 crawlers. ASCII text. bees will create this. 55 | * beesstats.txt | statistics and performance counters. ASCII text. bees will create this. 56 | * BEESSTATUS: File containing a snapshot of current bees state: performance 57 | counters and current status of each thread. The file is meant to be 58 | human readable, but understanding it probably requires reading the source. 59 | You can watch bees run in realtime with a command like: 60 | 61 | watch -n1 cat $BEESSTATUS 62 | 63 | Other options (e.g. interval between filesystem crawls) can be configured 64 | in `src/bees.h` or [on the command line](options.md). 65 | 66 | Running 67 | ------- 68 | 69 | Reduce CPU and IO priority to be kinder to other applications sharing 70 | this host (or raise them for more aggressive disk space recovery). If you 71 | use cgroups, put `bees` in its own cgroup, then reduce the `blkio.weight` 72 | and `cpu.shares` parameters. You can also use `schedtool` and `ionice` 73 | in the shell script that launches `bees`: 74 | 75 | schedtool -D -n20 $$ 76 | ionice -c3 -p $$ 77 | 78 | You can also use the [load management options](options.md) to further 79 | control the impact of bees on the rest of the system. 80 | 81 | Let the bees fly: 82 | 83 | for fs in /var/lib/bees/*-*-*-*-*/; do 84 | bees "$fs" >> "$fs/.beeshome/bees.log" 2>&1 & 85 | done 86 | 87 | You'll probably want to arrange for `/var/log/bees.log` to be rotated 88 | periodically. You may also want to set umask to 077 to prevent disclosure 89 | of information about the contents of the filesystem through the log file. 90 | 91 | There are also some shell wrappers in the `scripts/` directory. 92 | -------------------------------------------------------------------------------- /docs/wrong.md: -------------------------------------------------------------------------------- 1 | What to do when something goes wrong with bees 2 | ============================================== 3 | 4 | Hangs and excessive slowness 5 | ---------------------------- 6 | 7 | ### Use load-throttling options 8 | 9 | If bees is just more aggressive than you would like, consider using 10 | [load throttling options](options.md). These are usually more effective 11 | than `ionice`, `schedtool`, and the `blkio` cgroup (though you can 12 | certainly use those too) because they limit work that bees queues up 13 | for later execution inside btrfs. 14 | 15 | ### Check `$BEESSTATUS` 16 | 17 | If bees or the filesystem seems to be stuck, check the contents of 18 | `$BEESSTATUS`. bees describes what it is doing (and how long it has 19 | been trying to do it) through this file. 20 | 21 | Sample: 22 | 23 |
 24 | THREADS (work queue 68 tasks):
 25 | 	tid 20939: crawl_5986: dedup BeesRangePair: 512K src[0x9933f000..0x993bf000] dst[0x9933f000..0x993bf000]
 26 | src = 147 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 27 | dst = 15 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data.new/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 28 | 	tid 20940: crawl_5986: dedup BeesRangePair: 512K src[0x992bf000..0x9933f000] dst[0x992bf000..0x9933f000]
 29 | src = 147 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 30 | dst = 15 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data.new/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 31 | 	tid 21177: crawl_5986: dedup BeesRangePair: 512K src[0x9923f000..0x992bf000] dst[0x9923f000..0x992bf000]
 32 | src = 147 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 33 | dst = 15 /run/bees/ede84fbd-cb59-0c60-9ea7-376fa4984887/data.new/home/builder/linux/.git/objects/pack/pack-09f06f8759ac7fd163df320b7f7671f06ac2a747.pack
 34 | 	tid 21677: bees: [68493.1s] main
 35 | 	tid 21689: crawl_transid: [236.508s] waiting 332.575s for next 10 transid RateEstimator { count = 87179, raw = 969.066 / 32229.2, ratio = 969.066 / 32465.7, rate = 0.0298489, duration(1) = 33.5021, seconds_for(1) = 1 }
 36 | 	tid 21690: status: writing status to file '/run/bees.status'
 37 | 	tid 21691: crawl_writeback: [203.456s] idle, dirty
 38 | 	tid 21692: hash_writeback: [12.466s] flush rate limited after extent #17 of 64 extents
 39 | 	tid 21693: hash_prefetch: [2896.61s] idle 3600s
 40 | 
41 | 42 | The time in square brackets indicates how long the thread has been 43 | executing the current task (if this time is below 5 seconds then it 44 | is omitted). We can see here that the main thread (and therefore the 45 | bees process as a whole) has been running for 68493.1 seconds, the 46 | last hash table write was 12.5 seconds ago, and the last transid poll 47 | was 236.5 seconds ago. Three worker threads are currently performing 48 | dedupe on extents. 49 | 50 | Thread names of note: 51 | 52 | * `bees`: main thread (doesn't do anything after startup, but its task execution time is that of the whole bees process) 53 | * `crawl_master`: task that finds new extents in the filesystem and populates the work queue 54 | * `crawl_transid`: btrfs transid (generation number) tracker and polling thread 55 | * `status`: the thread that writes the status reports to `$BEESSTATUS` 56 | * `crawl_writeback`: writes the scanner progress to `beescrawl.dat` 57 | * `hash_writeback`: trickle-writes the hash table back to `beeshash.dat` 58 | * `hash_prefetch`: prefetches the hash table at startup and updates `beesstats.txt` hourly 59 | 60 | Most other threads have names that are derived from the current dedupe 61 | task that they are executing: 62 | 63 | * `ref_205ad76b1000_24K_50`: extent scan performing dedupe of btrfs extent bytenr `205ad76b1000`, which is 24 KiB long and has 50 references 64 | * `extent_250_32M_16E`: extent scan searching for extents between 32 MiB + 1 and 16 EiB bytes long, tracking scan position in virtual subvol `250`. 65 | * `crawl_378_18916`: subvol scan searching for extent refs in subvol `378`, inode `18916`. 66 | 67 | ### Dump kernel stacks of hung processes 68 | 69 | Check the kernel stacks of all blocked kernel processes: 70 | 71 | ps xar | while read -r x y; do ps "$x"; head -50 --verbose /proc/"$x"/task/*/stack; done | tee lockup-stacks.txt 72 | 73 | Submit the above information in your bug report. 74 | 75 | ### Check dmesg for btrfs stack dumps 76 | 77 | Sometimes these are relevant too. 78 | 79 | 80 | bees Crashes 81 | ------------ 82 | 83 | * If you have a core dump, run these commands in gdb and include 84 | the output in your report (you may need to post it as a compressed 85 | attachment, as it can be quite large): 86 | 87 | (gdb) set pagination off 88 | (gdb) info shared 89 | (gdb) bt 90 | (gdb) thread apply all bt 91 | (gdb) thread apply all bt full 92 | 93 | The last line generates megabytes of output and will often crash gdb. 94 | Submit whatever output gdb can produce. 95 | 96 | **Note that this output may include filenames or data from your 97 | filesystem.** 98 | 99 | * If you have `systemd-coredump` installed, you can use `coredumpctl`: 100 | 101 | (echo set pagination off; 102 | echo info shared; 103 | echo bt; 104 | echo thread apply all bt; 105 | echo thread apply all bt full) | coredumpctl gdb bees 106 | 107 | * If the crash happens often (or don't want to use coredumpctl), 108 | you can run automate the gdb data collection with this wrapper script: 109 | 110 |
111 | #!/bin/sh
112 | set -x
113 | 
114 | # Move aside old core files for analysis
115 | for x in core*; do
116 | 	if [ -e "$x" ]; then
117 | 		mv -vf "$x" "old-$x.$(date +%Y-%m-%d-%H-%M-%S)"
118 | 	fi
119 | done
120 | 
121 | # Delete old core files after a week
122 | find old-core* -type f -mtime +7 -exec rm -vf {} + &
123 | 
124 | # Turn on the cores (FIXME: may need to change other system parameters
125 | # that capture or redirect core files)
126 | ulimit -c unlimited
127 | 
128 | # Run the command
129 | "$@"
130 | rv="$?"
131 | 
132 | # Don't clobber our core when gdb crashes
133 | ulimit -c 0
134 | 
135 | # If there were core files, generate reports for them
136 | for x in core*; do
137 | 	if [ -e "$x" ]; then
138 | 		gdb --core="$x" \
139 | 		--eval-command='set pagination off' \
140 | 		--eval-command='info shared' \
141 | 		--eval-command='bt' \
142 | 		--eval-command='thread apply all bt' \
143 | 		--eval-command='thread apply all bt full' \
144 | 		--eval-command='quit' \
145 | 		--args "$@" 2>&1 | tee -a "$x.txt"
146 | 	fi
147 | done
148 | 
149 | # Return process exit status to caller
150 | exit "$rv"
151 | 
152 | 153 | To use the wrapper script, insert it just before the `bees` command, 154 | as in: 155 | 156 | gdb-wrapper bees /path/to/fs/ 157 | 158 | 159 | Kernel crashes, corruption, and filesystem damage 160 | ------------------------------------------------- 161 | 162 | bees doesn't do anything that _should_ cause corruption or data loss; 163 | however, [btrfs has kernel bugs](btrfs-kernel.md), so corruption is 164 | not impossible. 165 | 166 | Issues with the btrfs filesystem kernel code or other block device layers 167 | should be reported to their respective maintainers. 168 | -------------------------------------------------------------------------------- /include/crucible/backtrace.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_BACKTRACE_H 2 | #define CRUCIBLE_BACKTRACE_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | namespace crucible { 10 | using namespace std; 11 | 12 | class Backtrace { 13 | vector m_buffer; 14 | mutable vector m_result_stringvec; 15 | mutable char **m_result_cpp; 16 | int m_result_size; 17 | int m_desired_size; 18 | public: 19 | Backtrace(int size = 99); 20 | ~Backtrace(); 21 | const vector &strings() const; 22 | const vector &voids() const; 23 | void symbols_fd(int fd) const; 24 | bool overflowed() const; 25 | }; 26 | 27 | } 28 | 29 | #endif // CRUCIBLE_BACKTRACE_H 30 | -------------------------------------------------------------------------------- /include/crucible/bencode.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_BENCODE_H 2 | #define CRUCIBLE_BENCODE_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace crucible { 16 | using namespace std; 17 | 18 | // So...much...forward declaration... 19 | struct bencode_variant; 20 | typedef shared_ptr bencode_variant_ptr; 21 | 22 | struct bencode_variant { 23 | virtual ~bencode_variant(); 24 | virtual ostream& print(ostream &os, const string &parent = "") const = 0; 25 | virtual bencode_variant_ptr at(size_t i) const; 26 | virtual bencode_variant_ptr at(const string &s) const; 27 | virtual operator string() const; 28 | }; 29 | 30 | ostream& operator<<(ostream &os, const bencode_variant_ptr &p); 31 | 32 | // ie 33 | struct bencode_int : public bencode_variant { 34 | ~bencode_int(); 35 | bencode_int(int64_t i); 36 | ostream & print(ostream &os, const string &parent = "") const override; 37 | private: 38 | int64_t m_i; 39 | }; 40 | 41 | // :contents 42 | struct bencode_string : public bencode_variant { 43 | ~bencode_string(); 44 | bencode_string(string s); 45 | ostream & print(ostream &os, const string &parent = "") const override; 46 | operator string() const override; 47 | private: 48 | string m_s; 49 | }; 50 | 51 | // le 52 | struct bencode_list : public bencode_variant { 53 | ~bencode_list(); 54 | bencode_list(const vector &l); 55 | ostream & print(ostream &os, const string &parent = "") const override; 56 | using bencode_variant::at; 57 | bencode_variant_ptr at(size_t i) const override; 58 | private: 59 | vector m_l; 60 | }; 61 | 62 | // de (lexicographically sorted pairs of , key is a string) 63 | struct bencode_dict : public bencode_variant { 64 | ~bencode_dict(); 65 | bencode_dict(const map &m); 66 | ostream& print(ostream &os, const string &parent = "") const override; 67 | using bencode_variant::at; 68 | bencode_variant_ptr at(const string &key) const override; 69 | private: 70 | map m_m; 71 | }; 72 | 73 | bencode_variant_ptr bencode_decode_stream(istream &is); 74 | }; 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /include/crucible/btrfs-tree.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_BTRFS_TREE_H 2 | #define CRUCIBLE_BTRFS_TREE_H 3 | 4 | #include "crucible/fd.h" 5 | #include "crucible/fs.h" 6 | #include "crucible/bytevector.h" 7 | 8 | namespace crucible { 9 | using namespace std; 10 | 11 | class BtrfsTreeItem { 12 | uint64_t m_objectid = 0; 13 | uint64_t m_offset = 0; 14 | uint64_t m_transid = 0; 15 | ByteVector m_data; 16 | uint8_t m_type = 0; 17 | public: 18 | uint64_t objectid() const { return m_objectid; } 19 | uint64_t offset() const { return m_offset; } 20 | uint64_t transid() const { return m_transid; } 21 | uint8_t type() const { return m_type; } 22 | const ByteVector data() const { return m_data; } 23 | BtrfsTreeItem() = default; 24 | BtrfsTreeItem(const BtrfsIoctlSearchHeader &bish); 25 | BtrfsTreeItem& operator=(const BtrfsIoctlSearchHeader &bish); 26 | bool operator!() const; 27 | 28 | /// Member access methods. Invoking a method on the 29 | /// wrong type of item will throw an exception. 30 | 31 | /// @{ Block group items 32 | uint64_t block_group_flags() const; 33 | uint64_t block_group_used() const; 34 | /// @} 35 | 36 | /// @{ Chunk items 37 | uint64_t chunk_length() const; 38 | uint64_t chunk_type() const; 39 | /// @} 40 | 41 | /// @{ Dev extent items (physical byte ranges) 42 | uint64_t dev_extent_chunk_offset() const; 43 | uint64_t dev_extent_length() const; 44 | /// @} 45 | 46 | /// @{ Dev items (devices) 47 | uint64_t dev_item_total_bytes() const; 48 | uint64_t dev_item_bytes_used() const; 49 | /// @} 50 | 51 | /// @{ Inode items 52 | uint64_t inode_size() const; 53 | /// @} 54 | 55 | /// @{ Extent refs (EXTENT_DATA) 56 | uint64_t file_extent_logical_bytes() const; 57 | uint64_t file_extent_generation() const; 58 | uint64_t file_extent_offset() const; 59 | uint64_t file_extent_bytenr() const; 60 | uint8_t file_extent_type() const; 61 | btrfs_compression_type file_extent_compression() const; 62 | /// @} 63 | 64 | /// @{ Extent items (EXTENT_ITEM) 65 | uint64_t extent_begin() const; 66 | uint64_t extent_end() const; 67 | uint64_t extent_flags() const; 68 | uint64_t extent_generation() const; 69 | /// @} 70 | 71 | /// @{ Root items 72 | uint64_t root_flags() const; 73 | uint64_t root_refs() const; 74 | /// @} 75 | 76 | /// @{ Root backref items. 77 | uint64_t root_ref_dirid() const; 78 | string root_ref_name() const; 79 | uint64_t root_ref_parent_rootid() const; 80 | /// @} 81 | }; 82 | 83 | ostream &operator<<(ostream &os, const BtrfsTreeItem &bti); 84 | 85 | class BtrfsTreeFetcher { 86 | protected: 87 | Fd m_fd; 88 | BtrfsIoctlSearchKey m_sk; 89 | uint64_t m_tree = 0; 90 | uint64_t m_min_transid = 0; 91 | uint64_t m_max_transid = numeric_limits::max(); 92 | uint64_t m_block_size = 0; 93 | uint64_t m_lookbehind_size = 0; 94 | uint64_t m_scale_size = 0; 95 | uint8_t m_type = 0; 96 | 97 | uint64_t scale_logical(uint64_t logical) const; 98 | uint64_t unscale_logical(uint64_t logical) const; 99 | const static uint64_t s_max_logical = numeric_limits::max(); 100 | uint64_t scaled_max_logical() const; 101 | 102 | virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t object); 103 | virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr); 104 | virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) = 0; 105 | virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) = 0; 106 | virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) = 0; 107 | Fd fd() const; 108 | void fd(Fd fd); 109 | public: 110 | virtual ~BtrfsTreeFetcher() = default; 111 | BtrfsTreeFetcher(Fd new_fd); 112 | void type(uint8_t type); 113 | uint8_t type(); 114 | void tree(uint64_t tree); 115 | uint64_t tree(); 116 | void transid(uint64_t min_transid, uint64_t max_transid = numeric_limits::max()); 117 | /// Block size (sectorsize) of filesystem 118 | uint64_t block_size() const; 119 | /// Fetch last object < logical, null if not found 120 | BtrfsTreeItem prev(uint64_t logical); 121 | /// Fetch first object > logical, null if not found 122 | BtrfsTreeItem next(uint64_t logical); 123 | /// Fetch object at exactly logical, null if not found 124 | BtrfsTreeItem at(uint64_t); 125 | /// Fetch first object >= logical 126 | BtrfsTreeItem lower_bound(uint64_t logical); 127 | /// Fetch last object <= logical 128 | BtrfsTreeItem rlower_bound(uint64_t logical); 129 | 130 | /// Estimated distance between objects 131 | virtual uint64_t lookbehind_size() const; 132 | virtual void lookbehind_size(uint64_t); 133 | 134 | /// Scale size (normally block size but must be set to 1 for fs trees) 135 | uint64_t scale_size() const; 136 | void scale_size(uint64_t); 137 | }; 138 | 139 | class BtrfsTreeObjectFetcher : public BtrfsTreeFetcher { 140 | protected: 141 | virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t logical) override; 142 | virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override; 143 | virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override; 144 | virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override; 145 | public: 146 | using BtrfsTreeFetcher::BtrfsTreeFetcher; 147 | }; 148 | 149 | class BtrfsTreeOffsetFetcher : public BtrfsTreeFetcher { 150 | protected: 151 | uint64_t m_objectid = 0; 152 | virtual void fill_sk(BtrfsIoctlSearchKey &key, uint64_t offset) override; 153 | virtual uint64_t hdr_logical(const BtrfsIoctlSearchHeader &hdr) override; 154 | virtual bool hdr_match(const BtrfsIoctlSearchHeader &hdr) override; 155 | virtual bool hdr_stop(const BtrfsIoctlSearchHeader &hdr) override; 156 | public: 157 | using BtrfsTreeFetcher::BtrfsTreeFetcher; 158 | void objectid(uint64_t objectid); 159 | uint64_t objectid() const; 160 | }; 161 | 162 | class BtrfsCsumTreeFetcher : public BtrfsTreeOffsetFetcher { 163 | public: 164 | const uint32_t BTRFS_CSUM_TYPE_UNKNOWN = uint32_t(1) << 16; 165 | private: 166 | size_t m_sum_size = 0; 167 | uint32_t m_sum_type = BTRFS_CSUM_TYPE_UNKNOWN; 168 | public: 169 | BtrfsCsumTreeFetcher(const Fd &fd); 170 | 171 | uint32_t sum_type() const; 172 | size_t sum_size() const; 173 | void get_sums(uint64_t logical, size_t count, function output); 174 | }; 175 | 176 | /// Fetch extent items from extent tree. 177 | /// Does not filter out metadata! See BtrfsDataExtentTreeFetcher for that. 178 | class BtrfsExtentItemFetcher : public BtrfsTreeObjectFetcher { 179 | public: 180 | BtrfsExtentItemFetcher(const Fd &fd); 181 | }; 182 | 183 | /// Fetch extent refs from an inode. Caller must set the tree and objectid. 184 | class BtrfsExtentDataFetcher : public BtrfsTreeOffsetFetcher { 185 | public: 186 | BtrfsExtentDataFetcher(const Fd &fd); 187 | }; 188 | 189 | /// Fetch raw inode items 190 | class BtrfsInodeFetcher : public BtrfsTreeObjectFetcher { 191 | public: 192 | BtrfsInodeFetcher(const Fd &fd); 193 | BtrfsTreeItem stat(uint64_t subvol, uint64_t inode); 194 | }; 195 | 196 | /// Fetch a root (subvol) item 197 | class BtrfsRootFetcher : public BtrfsTreeObjectFetcher { 198 | public: 199 | BtrfsRootFetcher(const Fd &fd); 200 | BtrfsTreeItem root(uint64_t subvol); 201 | BtrfsTreeItem root_backref(uint64_t subvol); 202 | }; 203 | 204 | /// Fetch data extent items from extent tree, skipping metadata-only block groups 205 | class BtrfsDataExtentTreeFetcher : public BtrfsExtentItemFetcher { 206 | BtrfsTreeItem m_current_bg; 207 | BtrfsTreeOffsetFetcher m_chunk_tree; 208 | protected: 209 | virtual void next_sk(BtrfsIoctlSearchKey &key, const BtrfsIoctlSearchHeader &hdr) override; 210 | public: 211 | BtrfsDataExtentTreeFetcher(const Fd &fd); 212 | }; 213 | 214 | } 215 | 216 | #endif 217 | -------------------------------------------------------------------------------- /include/crucible/bytevector.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRUCIBLE_BYTEVECTOR_H_ 2 | #define _CRUCIBLE_BYTEVECTOR_H_ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace crucible { 14 | using namespace std; 15 | // new[] is a little slower than malloc 16 | // shared_ptr is about 2x slower than unique_ptr 17 | // vector is ~160x slower 18 | // so we won't bother with unique_ptr because we can't do shared copies with it 19 | 20 | class ByteVector { 21 | public: 22 | using Pointer = shared_ptr; 23 | using value_type = Pointer::element_type; 24 | using iterator = value_type*; 25 | 26 | ByteVector() = default; 27 | ByteVector(const ByteVector &that); 28 | ByteVector& operator=(const ByteVector &that); 29 | ByteVector(size_t size); 30 | ByteVector(const ByteVector &that, size_t start, size_t length); 31 | ByteVector(iterator begin, iterator end, size_t min_size = 0); 32 | 33 | ByteVector at(size_t start, size_t length) const; 34 | 35 | value_type& at(size_t) const; 36 | iterator begin() const; 37 | void clear(); 38 | value_type* data() const; 39 | bool empty() const; 40 | iterator end() const; 41 | value_type& operator[](size_t) const; 42 | size_t size() const; 43 | bool operator==(const ByteVector &that) const; 44 | 45 | // this version of erase only works at the beginning or end of the buffer, else throws exception 46 | void erase(iterator first); 47 | void erase(iterator first, iterator last); 48 | 49 | // An important use case is ioctls that have a fixed-size header struct 50 | // followed by a buffer for further arguments. These templates avoid 51 | // doing reinterpret_casts every time. 52 | template ByteVector(const T& object, size_t min_size); 53 | template T* get() const; 54 | private: 55 | Pointer m_ptr; 56 | size_t m_size = 0; 57 | mutable mutex m_mutex; 58 | }; 59 | 60 | template 61 | ByteVector::ByteVector(const T& object, size_t min_size) 62 | { 63 | const auto size = max(min_size, sizeof(T)); 64 | m_ptr = Pointer(static_cast(malloc(size)), free); 65 | memcpy(m_ptr.get(), &object, sizeof(T)); 66 | m_size = size; 67 | } 68 | 69 | template 70 | T* 71 | ByteVector::get() const 72 | { 73 | THROW_CHECK2(out_of_range, size(), sizeof(T), size() >= sizeof(T)); 74 | return reinterpret_cast(data()); 75 | } 76 | 77 | ostream& operator<<(ostream &os, const ByteVector &bv); 78 | } 79 | 80 | #endif // _CRUCIBLE_BYTEVECTOR_H_ 81 | -------------------------------------------------------------------------------- /include/crucible/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_CACHE_H 2 | #define CRUCIBLE_CACHE_H 3 | 4 | #include "crucible/lockset.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace crucible { 13 | using namespace std; 14 | 15 | template 16 | class LRUCache { 17 | public: 18 | using Key = tuple; 19 | using Func = function; 20 | private: 21 | struct Value { 22 | Key key; 23 | Return ret; 24 | }; 25 | 26 | using ListIter = typename list::iterator; 27 | 28 | Func m_fn; 29 | list m_list; 30 | map m_map; 31 | LockSet m_lockset; 32 | size_t m_max_size; 33 | mutable mutex m_mutex; 34 | 35 | void check_overflow(); 36 | void recent_use(ListIter vp); 37 | void erase_item(ListIter vp); 38 | void erase_key(const Key &k); 39 | Return insert_item(Func fn, Arguments... args); 40 | public: 41 | LRUCache(Func f = Func(), size_t max_size = 100); 42 | 43 | void func(Func f); 44 | void max_size(size_t new_max_size); 45 | 46 | Return operator()(Arguments... args); 47 | Return refresh(Arguments... args); 48 | void expire(Arguments... args); 49 | void insert(const Return &r, Arguments... args); 50 | void clear(); 51 | size_t size() const; 52 | }; 53 | 54 | template 55 | LRUCache::LRUCache(Func f, size_t max_size) : 56 | m_fn(f), 57 | m_max_size(max_size) 58 | { 59 | } 60 | 61 | template 62 | Return 63 | LRUCache::insert_item(Func fn, Arguments... args) 64 | { 65 | Key k(args...); 66 | 67 | // Do we have it cached? 68 | unique_lock lock(m_mutex); 69 | auto found = m_map.find(k); 70 | if (found == m_map.end()) { 71 | // No, release cache lock and acquire key lock 72 | lock.unlock(); 73 | auto key_lock = m_lockset.make_lock(k); 74 | 75 | // Did item appear in cache while we were waiting for key? 76 | lock.lock(); 77 | found = m_map.find(k); 78 | if (found == m_map.end()) { 79 | 80 | // No, we now hold key and cache locks, but item not in cache. 81 | // Release cache lock and call the function 82 | lock.unlock(); 83 | 84 | // Create new value 85 | Value v { 86 | .key = k, 87 | .ret = fn(args...), 88 | }; 89 | 90 | // Reacquire cache lock 91 | lock.lock(); 92 | 93 | // Make room 94 | check_overflow(); 95 | 96 | // Insert return value at back of LRU list (hot end) 97 | auto new_item = m_list.insert(m_list.end(), v); 98 | 99 | // Insert return value in map 100 | bool inserted = false; 101 | tie(found, inserted) = m_map.insert(make_pair(v.key, new_item)); 102 | 103 | // We (should be) holding a lock on this key so we are the ones to insert it 104 | THROW_CHECK0(runtime_error, inserted); 105 | } 106 | 107 | // Item should be in cache now 108 | THROW_CHECK0(runtime_error, found != m_map.end()); 109 | } else { 110 | // Move to end of LRU 111 | recent_use(found->second); 112 | } 113 | 114 | // Return cached object 115 | return found->second->ret; 116 | } 117 | 118 | template 119 | void 120 | LRUCache::erase_item(ListIter vp) 121 | { 122 | if (vp != m_list.end()) { 123 | m_map.erase(vp->key); 124 | m_list.erase(vp); 125 | } 126 | } 127 | 128 | template 129 | void 130 | LRUCache::erase_key(const Key &k) 131 | { 132 | auto map_item = m_map.find(k); 133 | if (map_item != m_map.end()) { 134 | auto list_item = map_item->second; 135 | m_map.erase(map_item); 136 | m_list.erase(list_item); 137 | } 138 | } 139 | 140 | template 141 | void 142 | LRUCache::check_overflow() 143 | { 144 | // Erase items at front of LRU list (cold end) until max size reached or list empty 145 | while (m_map.size() >= m_max_size && !m_list.empty()) { 146 | erase_item(m_list.begin()); 147 | } 148 | } 149 | 150 | template 151 | void 152 | LRUCache::recent_use(ListIter vp) 153 | { 154 | // Splice existing items at back of LRU list (hot end) 155 | auto next_vp = vp; 156 | ++next_vp; 157 | m_list.splice(m_list.end(), m_list, vp, next_vp); 158 | } 159 | 160 | template 161 | void 162 | LRUCache::max_size(size_t new_max_size) 163 | { 164 | unique_lock lock(m_mutex); 165 | m_max_size = new_max_size; 166 | // FIXME: this really reduces the cache size to new_max_size - 1 167 | // because every other time we call this method, it is immediately 168 | // followed by insert. 169 | check_overflow(); 170 | } 171 | 172 | template 173 | void 174 | LRUCache::func(Func func) 175 | { 176 | unique_lock lock(m_mutex); 177 | m_fn = func; 178 | } 179 | 180 | template 181 | void 182 | LRUCache::clear() 183 | { 184 | // Move the map and list onto the stack, then destroy it after we've released the lock 185 | // so that we don't block other threads if the list's destructors are expensive 186 | decltype(m_list) new_list; 187 | decltype(m_map) new_map; 188 | unique_lock lock(m_mutex); 189 | m_list.swap(new_list); 190 | m_map.swap(new_map); 191 | lock.unlock(); 192 | } 193 | 194 | template 195 | size_t 196 | LRUCache::size() const 197 | { 198 | unique_lock lock(m_mutex); 199 | return m_map.size(); 200 | } 201 | 202 | template 203 | Return 204 | LRUCache::operator()(Arguments... args) 205 | { 206 | return insert_item(m_fn, args...); 207 | } 208 | 209 | template 210 | void 211 | LRUCache::expire(Arguments... args) 212 | { 213 | unique_lock lock(m_mutex); 214 | erase_key(Key(args...)); 215 | } 216 | 217 | template 218 | Return 219 | LRUCache::refresh(Arguments... args) 220 | { 221 | expire(args...); 222 | return operator()(args...); 223 | } 224 | 225 | template 226 | void 227 | LRUCache::insert(const Return &r, Arguments... args) 228 | { 229 | insert_item([&](Arguments...) -> Return { return r; }, args...); 230 | } 231 | } 232 | 233 | #endif // CRUCIBLE_CACHE_H 234 | -------------------------------------------------------------------------------- /include/crucible/chatter.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_CHATTER_H 2 | #define CRUCIBLE_CHATTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | /** \brief Chatter wraps a std::ostream reference with a destructor that 14 | writes a newline, and inserts timestamp, pid, and tid prefixes on output. 15 | 16 | Typical usage is expressions like the following: 17 | 18 | int six = 6, nine = 9; \n 19 | Chatter() << "What you get when you multiply" << six 20 | << "by" << nine << '?'; \n 21 | Chatter() << "forty two!"; 22 | 23 | which results in output like the following: 24 | 25 | What you get when you multiply 6 by 9 ?\n 26 | forty-two! 27 | 28 | Note that newlines and timestamps are injected automatically in 29 | the output by the Chatter destructor. You can also use std::endl 30 | explicitly, although it will not have the effect of flushing the 31 | buffer. 32 | */ 33 | 34 | namespace crucible { 35 | using namespace std; 36 | 37 | class Chatter { 38 | int m_loglevel; 39 | string m_name; 40 | ostream &m_os; 41 | ostringstream m_oss; 42 | 43 | public: 44 | Chatter(int loglevel, string name, ostream &os = cerr); 45 | Chatter(Chatter &&c); 46 | ostream &get_os() { return m_oss; } 47 | 48 | template Chatter &operator<<(const T& arg); 49 | 50 | ~Chatter(); 51 | 52 | static void enable_timestamp(bool prefix_timestamp); 53 | static void enable_level(bool prefix_level); 54 | }; 55 | 56 | template 57 | struct ChatterTraits { 58 | Chatter &operator()(Chatter &c, const Argument &arg) 59 | { 60 | c.get_os() << arg; 61 | return c; 62 | } 63 | }; 64 | 65 | template 66 | Chatter & 67 | Chatter::operator<<(const T& arg) 68 | { 69 | return ChatterTraits()(*this, arg); 70 | } 71 | 72 | template 73 | struct ChatterTraits { 74 | Chatter &operator()(Chatter &c, const Argument *arg) 75 | { 76 | if (arg) { 77 | c.get_os() << "(pointer to " << typeid(*arg).name() << ")(" << reinterpret_cast(arg) << ")"; 78 | } else { 79 | c.get_os() << "(NULL pointer to " << typeid(arg).name() << ')'; 80 | } 81 | return c; 82 | } 83 | }; 84 | 85 | template <> 86 | struct ChatterTraits { 87 | Chatter & 88 | operator()(Chatter &c, const char *arg) 89 | { 90 | c.get_os() << arg; 91 | return c; 92 | } 93 | }; 94 | 95 | class ChatterBox { 96 | string m_file; 97 | int m_line; 98 | string m_pretty_function; 99 | bool m_enabled; 100 | ostream& m_os; 101 | 102 | static set s_boxes; 103 | 104 | public: 105 | ChatterBox(string file, int line, string pretty_function, ostream &os = cerr); 106 | ~ChatterBox(); 107 | 108 | template Chatter operator<<(const T &t) 109 | { 110 | Chatter c(LOG_NOTICE, m_pretty_function, m_os); 111 | c << t; 112 | return c; 113 | } 114 | 115 | bool enabled() const { return m_enabled; } 116 | void set_enable(bool en); 117 | 118 | static set& all_boxes(); 119 | }; 120 | 121 | class ChatterUnwinder { 122 | function m_func; 123 | public: 124 | ChatterUnwinder(function f); 125 | ~ChatterUnwinder(); 126 | }; 127 | }; 128 | 129 | #define CHATTER(x) do { \ 130 | using namespace crucible; \ 131 | static ChatterBox crucible_chatterbox_cb(__FILE__, __LINE__, __func__); \ 132 | if (crucible_chatterbox_cb.enabled()) { \ 133 | crucible_chatterbox_cb << x; \ 134 | } \ 135 | } while (0) 136 | 137 | #define CHATTER_TRACE(x) do { \ 138 | using namespace crucible; \ 139 | static ChatterBox crucible_chatterbox_cb(__FILE__, __LINE__, __func__); \ 140 | if (crucible_chatterbox_cb.enabled()) { \ 141 | crucible_chatterbox_cb << __FILE__ << ":" << __LINE__ << ": " << x; \ 142 | } \ 143 | } while (0) 144 | 145 | #define WTF_C(x, y) x##y 146 | #define SRSLY_WTF_C(x, y) WTF_C(x, y) 147 | #define CHATTER_UNWIND(x) \ 148 | crucible::ChatterUnwinder SRSLY_WTF_C(chatterUnwinder_, __LINE__) ([&]() { \ 149 | CHATTER_TRACE(x); \ 150 | }) 151 | 152 | #endif // CRUCIBLE_CHATTER_H 153 | -------------------------------------------------------------------------------- /include/crucible/city.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala 22 | // 23 | // http://code.google.com/p/cityhash/ 24 | // 25 | // This file provides a few functions for hashing strings. All of them are 26 | // high-quality functions in the sense that they pass standard tests such 27 | // as Austin Appleby's SMHasher. They are also fast. 28 | // 29 | // For 64-bit x86 code, on short strings, we don't know of anything faster than 30 | // CityHash64 that is of comparable quality. We believe our nearest competitor 31 | // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash 32 | // tables and most other hashing (excluding cryptography). 33 | // 34 | // For 64-bit x86 code, on long strings, the picture is more complicated. 35 | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., 36 | // CityHashCrc128 appears to be faster than all competitors of comparable 37 | // quality. CityHash128 is also good but not quite as fast. We believe our 38 | // nearest competitor is Bob Jenkins' Spooky. We don't have great data for 39 | // other 64-bit CPUs, but for long strings we know that Spooky is slightly 40 | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. 41 | // Note that CityHashCrc128 is declared in citycrc.h [which has been removed 42 | // for bees]. 43 | // 44 | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that 45 | // is of comparable quality. We believe our nearest competitor is Murmur3A. 46 | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) 47 | // 48 | // Functions in the CityHash family are not suitable for cryptography. 49 | // 50 | // Please see CityHash's README file for more details on our performance 51 | // measurements and so on. 52 | // 53 | // WARNING: This code has been only lightly tested on big-endian platforms! 54 | // It is known to work well on little-endian platforms that have a small penalty 55 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 56 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; 57 | // bug reports are welcome. 58 | // 59 | // By the way, for some hash functions, given strings a and b, the hash 60 | // of a+b is easily derived from the hashes of a and b. This property 61 | // doesn't hold for any hash functions in this file. 62 | 63 | #ifndef CITY_HASH_H_ 64 | #define CITY_HASH_H_ 65 | 66 | #include // for size_t. 67 | #include 68 | #include 69 | 70 | typedef uint8_t uint8; 71 | typedef uint32_t uint32; 72 | typedef uint64_t uint64; 73 | typedef std::pair uint128; 74 | 75 | inline uint64 Uint128Low64(const uint128& x) { return x.first; } 76 | inline uint64 Uint128High64(const uint128& x) { return x.second; } 77 | 78 | // Hash function for a byte array. 79 | uint64 CityHash64(const char *buf, size_t len); 80 | 81 | // Hash function for a byte array. For convenience, a 64-bit seed is also 82 | // hashed into the result. 83 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); 84 | 85 | // Hash function for a byte array. For convenience, two seeds are also 86 | // hashed into the result. 87 | uint64 CityHash64WithSeeds(const char *buf, size_t len, 88 | uint64 seed0, uint64 seed1); 89 | 90 | // Hash function for a byte array. 91 | uint128 CityHash128(const char *s, size_t len); 92 | 93 | // Hash function for a byte array. For convenience, a 128-bit seed is also 94 | // hashed into the result. 95 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); 96 | 97 | // Hash function for a byte array. Most useful in 32-bit binaries. 98 | uint32 CityHash32(const char *buf, size_t len); 99 | 100 | // Hash 128 input bits down to 64 bits of output. 101 | // This is intended to be a reasonably good hash function. 102 | inline uint64 Hash128to64(const uint128& x) { 103 | // Murmur-inspired hashing. 104 | const uint64 kMul = 0x9ddfea08eb382d69ULL; 105 | uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 106 | a ^= (a >> 47); 107 | uint64 b = (Uint128High64(x) ^ a) * kMul; 108 | b ^= (b >> 47); 109 | b *= kMul; 110 | return b; 111 | } 112 | 113 | #endif // CITY_HASH_H_ 114 | -------------------------------------------------------------------------------- /include/crucible/cleanup.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_CLEANUP_H 2 | #define CRUCIBLE_CLEANUP_H 3 | 4 | #include 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | class Cleanup { 10 | function m_cleaner; 11 | public: 12 | Cleanup(function func); 13 | ~Cleanup(); 14 | }; 15 | 16 | } 17 | 18 | #endif // CRUCIBLE_CLEANUP_H 19 | -------------------------------------------------------------------------------- /include/crucible/crc64.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_CRC64_H 2 | #define CRUCIBLE_CRC64_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace crucible { 9 | namespace Digest { 10 | namespace CRC { 11 | uint64_t crc64(const void *p, size_t len); 12 | }; 13 | }; 14 | }; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /include/crucible/endian.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_ENDIAN_H 2 | #define CRUCIBLE_ENDIAN_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace crucible { 9 | 10 | template 11 | struct le_to_cpu_helper { 12 | T operator()(const T v); 13 | }; 14 | 15 | template<> struct le_to_cpu_helper { 16 | uint64_t operator()(const uint64_t v) { return le64toh(v); } 17 | }; 18 | 19 | #if __SIZEOF_LONG__ == 8 20 | // uint64_t is unsigned long on LP64 platforms 21 | template<> struct le_to_cpu_helper { 22 | unsigned long long operator()(const unsigned long long v) { return le64toh(v); } 23 | }; 24 | #endif 25 | 26 | template<> struct le_to_cpu_helper { 27 | uint32_t operator()(const uint32_t v) { return le32toh(v); } 28 | }; 29 | 30 | template<> struct le_to_cpu_helper { 31 | uint16_t operator()(const uint16_t v) { return le16toh(v); } 32 | }; 33 | 34 | template<> struct le_to_cpu_helper { 35 | uint8_t operator()(const uint8_t v) { return v; } 36 | }; 37 | 38 | template 39 | T 40 | le_to_cpu(const T v) 41 | { 42 | return le_to_cpu_helper()(v); 43 | } 44 | 45 | template 46 | T 47 | get_unaligned(const void *const p) 48 | { 49 | struct not_aligned { 50 | T v; 51 | } __attribute__((packed)); 52 | const not_aligned *const nap = reinterpret_cast(p); 53 | return nap->v; 54 | } 55 | 56 | } 57 | 58 | #endif // CRUCIBLE_ENDIAN_H 59 | -------------------------------------------------------------------------------- /include/crucible/error.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_ERROR_H 2 | #define CRUCIBLE_ERROR_H 3 | 4 | // Common error-handling idioms for C library calls 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace crucible { 16 | using namespace std; 17 | 18 | // Common error-handling idioms for C library calls 19 | 20 | template T die_if_minus_errno(const char *expr, T rv) 21 | { 22 | if (rv < 0) { 23 | throw system_error(error_code(-rv, system_category()), expr); 24 | } 25 | return rv; 26 | } 27 | 28 | template T die_if_minus_one(const char *expr, T rv) 29 | { 30 | if (rv == -1) { 31 | throw system_error(error_code(errno, system_category()), expr); 32 | } 33 | return rv; 34 | } 35 | 36 | template T die_if_zero(const char *expr, T rv) 37 | { 38 | if (rv == 0) { 39 | throw system_error(error_code(errno, system_category()), expr); 40 | } 41 | return rv; 42 | } 43 | 44 | template T die_if_non_zero(const char *expr, T rv) 45 | { 46 | if (rv != 0) { 47 | throw system_error(error_code(errno, system_category()), expr); 48 | } 49 | return rv; 50 | } 51 | 52 | // Usage: catch_all([&]() { /* insert body here */ } ); 53 | // Executes body with exceptions caught and reported to cerr. 54 | // Returns: 55 | // 0 if f() returns 56 | // non-zero if f() throws an exception 57 | // -1 for unknown exception 58 | // 1 for std::exception or class derived thereof 59 | 60 | void set_catch_explainer(function f); 61 | void default_catch_explainer(string s); 62 | int catch_all(const function &f, const function &explainer = default_catch_explainer); 63 | 64 | // catch_and_explain traps the exception, calls the explainer, then rethrows the original exception 65 | void catch_and_explain(const function &f, const function &explainer = default_catch_explainer); 66 | }; 67 | 68 | // 0 on success, -errno on error. 69 | // Covers most pthread functions. 70 | #define DIE_IF_MINUS_ERRNO(expr) crucible::die_if_minus_errno(#expr, expr) 71 | 72 | // -1 on error, all other values mean success. 73 | #define DIE_IF_MINUS_ONE(expr) crucible::die_if_minus_one(#expr, expr) 74 | 75 | // 0 (or NULL) on error, all other values mean success. 76 | #define DIE_IF_ZERO(expr) crucible::die_if_zero(#expr, expr) 77 | 78 | // 0 (or NULL) on success, all other values mean error. 79 | #define DIE_IF_NON_ZERO(expr) crucible::die_if_non_zero(#expr, expr) 80 | 81 | // macro for throwing an error 82 | #define THROW_ERROR(type, expr) do { \ 83 | std::ostringstream _te_oss; \ 84 | _te_oss << expr << " at " << __FILE__ << ":" << __LINE__; \ 85 | throw type(_te_oss.str()); \ 86 | } while (0) 87 | 88 | // macro for throwing a system_error with errno 89 | #define THROW_ERRNO(expr) do { \ 90 | std::ostringstream _te_oss; \ 91 | _te_oss << expr << " at " << __FILE__ << ":" << __LINE__; \ 92 | throw std::system_error(std::error_code(errno, std::system_category()), _te_oss.str()); \ 93 | } while (0) 94 | 95 | // macro for throwing a system_error with some other variable 96 | #define THROW_ERRNO_VALUE(value, expr) do { \ 97 | std::ostringstream _te_oss; \ 98 | _te_oss << expr << " at " << __FILE__ << ":" << __LINE__; \ 99 | throw std::system_error(std::error_code((value), std::system_category()), _te_oss.str()); \ 100 | } while (0) 101 | 102 | // macros for checking a constraint 103 | #define THROW_CHECK0(type, expr) do { \ 104 | if (!(expr)) { \ 105 | THROW_ERROR(type, "failed constraint check (" << #expr << ")"); \ 106 | } \ 107 | } while(0) 108 | 109 | #define THROW_CHECK1(type, value, expr) do { \ 110 | if (!(expr)) { \ 111 | THROW_ERROR(type, #value << " = " << (value) << " failed constraint check (" << #expr << ")"); \ 112 | } \ 113 | } while(0) 114 | 115 | #define THROW_CHECK2(type, value1, value2, expr) do { \ 116 | if (!(expr)) { \ 117 | THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) \ 118 | << " failed constraint check (" << #expr << ")"); \ 119 | } \ 120 | } while(0) 121 | 122 | #define THROW_CHECK3(type, value1, value2, value3, expr) do { \ 123 | if (!(expr)) { \ 124 | THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) \ 125 | << " failed constraint check (" << #expr << ")"); \ 126 | } \ 127 | } while(0) 128 | 129 | #define THROW_CHECK4(type, value1, value2, value3, value4, expr) do { \ 130 | if (!(expr)) { \ 131 | THROW_ERROR(type, #value1 << " = " << (value1) << ", " #value2 << " = " << (value2) << ", " #value3 << " = " << (value3) << ", " #value4 << " = " << (value4) \ 132 | << " failed constraint check (" << #expr << ")"); \ 133 | } \ 134 | } while(0) 135 | 136 | #define THROW_CHECK_BIN_OP(type, value1, op, value2) do { \ 137 | if (!((value1) op (value2))) { \ 138 | THROW_ERROR(type, "failed constraint check " << #value1 << " (" << (value1) << ") " << #op << " " << #value2 << " (" << (value2) << ")"); \ 139 | } \ 140 | } while(0) 141 | 142 | #define THROW_CHECK_PREFIX_OP(type, op, value1) do { \ 143 | if (!(op (value1))) { \ 144 | THROW_ERROR(type, "failed constraint check " << #op << " " << #value1 << " (" << (value1) << ")"); \ 145 | } \ 146 | } while(0) 147 | 148 | #define THROW_CHECK_RANGE(type, value_min, value_test, value_max) do { \ 149 | if ((value_test) < (value_min) || (value_max) < (value_test)) { \ 150 | THROW_ERROR(type, "failed constraint check " << #value_min << " (" << (value_min) << ") <= " #value_test << " (" << (value_test) \ 151 | << ") <= " << #value_max << " (" << (value_max) << ")"); \ 152 | } \ 153 | } while(0) 154 | 155 | #define THROW_CHECK_ARRAY_RANGE(type, value_min, value_test, value_max) do { \ 156 | if ((value_test) < (value_min) || !((value_test) < (value_max))) { \ 157 | THROW_ERROR(type, "failed constraint check " << #value_min << " (" << (value_min) << ") <= " #value_test << " (" << (value_test) \ 158 | << ") < " << #value_max << " (" << (value_max) << ")"); \ 159 | } \ 160 | } while(0) 161 | 162 | #endif // CRUCIBLE_ERROR_H 163 | -------------------------------------------------------------------------------- /include/crucible/extentwalker.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_EXTENTWALKER_H 2 | #define CRUCIBLE_EXTENTWALKER_H 3 | 4 | #include "crucible/fd.h" 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | // FIXME: ExtentCursor is probably a better name 10 | struct Extent { 11 | off_t m_begin = 0; 12 | off_t m_end = 0; 13 | uint64_t m_physical = 0; 14 | uint64_t m_flags = 0; 15 | 16 | // Btrfs extent reference details 17 | off_t m_physical_len = 0; 18 | off_t m_logical_len = 0; 19 | off_t m_offset = 0; 20 | 21 | // fiemap flags are uint32_t, so bits 32..63 are OK for us 22 | 23 | // no extent here 24 | static const uint64_t HOLE = (1ULL << 32); 25 | 26 | // extent is physical space full of zeros 27 | static const uint64_t PREALLOC = (1ULL << 33); 28 | 29 | // extent's physical (RAM) size does not match logical (can we know this?) 30 | static const uint64_t OBSCURED = (1ULL << 34); 31 | 32 | operator bool() const; 33 | off_t size() const; 34 | off_t begin() const { return m_begin; } 35 | off_t end() const { return m_end; } 36 | uint64_t flags() const { return m_flags; } 37 | uint64_t physical() const { return m_physical; } 38 | off_t physical_len() const { return m_physical_len; } 39 | off_t logical_len() const { return m_logical_len; } 40 | off_t offset() const { return m_offset; } 41 | bool compressed() const; 42 | uint64_t bytenr() const; 43 | bool operator==(const Extent &that) const; 44 | bool operator!=(const Extent &that) const { return !(*this == that); } 45 | }; 46 | 47 | class ExtentWalker { 48 | public: 49 | using Vec = vector; 50 | using Itr = Vec::iterator; 51 | 52 | protected: 53 | Fd m_fd; 54 | Stat m_stat; 55 | 56 | virtual Vec get_extent_map(off_t pos); 57 | 58 | private: 59 | Vec m_extents; 60 | Itr m_current; 61 | 62 | Itr find_in_cache(off_t pos); 63 | void run_fiemap(off_t pos); 64 | 65 | #ifdef EXTENTWALKER_DEBUG 66 | ostringstream m_log; 67 | #endif 68 | 69 | public: 70 | ExtentWalker(Fd fd = Fd()); 71 | ExtentWalker(Fd fd, off_t initial_pos); 72 | virtual ~ExtentWalker(); 73 | 74 | void reset(); 75 | Extent current(); 76 | bool next(); 77 | bool prev(); 78 | void seek(off_t new_pos); 79 | 80 | friend ostream & operator<<(ostream &os, const ExtentWalker &ew); 81 | }; 82 | 83 | class BtrfsExtentWalker : public ExtentWalker { 84 | uint64_t m_tree_id; 85 | Fd m_root_fd; 86 | 87 | protected: 88 | Vec get_extent_map(off_t pos) override; 89 | 90 | public: 91 | BtrfsExtentWalker(Fd fd); 92 | BtrfsExtentWalker(Fd fd, off_t initial_pos); 93 | BtrfsExtentWalker(Fd fd, off_t initial_pos, Fd root_fd); 94 | void set_root_fd(Fd fd); 95 | }; 96 | 97 | ostream &operator<<(ostream &os, const Extent &e); 98 | }; 99 | 100 | #endif // CRUCIBLE_EXTENTWALKER_H 101 | -------------------------------------------------------------------------------- /include/crucible/fd.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_FD_H 2 | #define CRUCIBLE_FD_H 3 | 4 | #include "crucible/bytevector.h" 5 | #include "crucible/namedptr.h" 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | // open 13 | #include 14 | #include 15 | #include 16 | 17 | // ioctl 18 | #include 19 | #include 20 | 21 | // socket 22 | #include 23 | 24 | // pread/pwrite 25 | #include 26 | 27 | namespace crucible { 28 | using namespace std; 29 | 30 | /// File descriptor owner object. It closes them when destroyed. 31 | /// Most of the functions here don't use it because these functions don't own FDs. 32 | /// All good names for such objects are taken. 33 | class IOHandle { 34 | IOHandle(const IOHandle &) = delete; 35 | IOHandle(IOHandle &&) = delete; 36 | IOHandle& operator=(IOHandle &&) = delete; 37 | IOHandle& operator=(const IOHandle &) = delete; 38 | int m_fd; 39 | void close(); 40 | public: 41 | virtual ~IOHandle(); 42 | IOHandle(int fd = -1); 43 | int get_fd() const; 44 | }; 45 | 46 | /// Copyable file descriptor. 47 | class Fd { 48 | static NamedPtr s_named_ptr; 49 | shared_ptr m_handle; 50 | public: 51 | using resource_type = IOHandle; 52 | Fd(); 53 | Fd(int fd); 54 | Fd &operator=(int fd); 55 | Fd &operator=(const shared_ptr &); 56 | operator int() const; 57 | bool operator!() const; 58 | shared_ptr operator->() const; 59 | }; 60 | 61 | void set_relative_path(string path); 62 | string relative_path(); 63 | 64 | // Functions named "foo_or_die" throw exceptions on failure. 65 | 66 | /// Attempt to open the file with the given mode, throw exception on failure. 67 | int open_or_die(const string &file, int flags = O_RDONLY, mode_t mode = 0777); 68 | /// Attempt to open the file with the given mode, throw exception on failure. 69 | int openat_or_die(int dir_fd, const string &file, int flags = O_RDONLY, mode_t mode = 0777); 70 | 71 | /// Decode open flags 72 | string o_flags_ntoa(int flags); 73 | /// Decode open mode 74 | string o_mode_ntoa(mode_t mode); 75 | 76 | /// mmap with its one weird error case 77 | void *mmap_or_die(void *addr, size_t length, int prot, int flags, int fd, off_t offset); 78 | /// Decode mmap prot 79 | string mmap_prot_ntoa(int prot); 80 | /// Decode mmap flags 81 | string mmap_flags_ntoa(int flags); 82 | 83 | /// Rename, throw exception on failure. 84 | void rename_or_die(const string &from, const string &to); 85 | /// Rename, throw exception on failure. 86 | void renameat_or_die(int fromfd, const string &frompath, int tofd, const string &topath); 87 | 88 | /// Truncate, throw exception on failure. 89 | void ftruncate_or_die(int fd, off_t size); 90 | 91 | // Read or write structs: 92 | // There is a template specialization to read or write strings 93 | // Three-arg version of read_or_die/write_or_die throws an error on incomplete read/writes 94 | // Four-arg version returns number of bytes read/written through reference arg 95 | 96 | /// Attempt read by pointer and length, throw exception on IO error or short read. 97 | void read_or_die(int fd, void *buf, size_t size); 98 | /// Attempt read of a POD struct, throw exception on IO error or short read. 99 | template void read_or_die(int fd, T& buf) 100 | { 101 | return read_or_die(fd, static_cast(&buf), sizeof(buf)); 102 | } 103 | 104 | /// Attempt read by pointer and length, throw exception on IO error but not short read. 105 | void read_partial_or_die(int fd, void *buf, size_t size_wanted, size_t &size_read); 106 | /// Attempt read of a POD struct, throw exception on IO error but not short read. 107 | template void read_partial_or_die(int fd, T& buf, size_t &size_read) 108 | { 109 | return read_partial_or_die(fd, static_cast(&buf), sizeof(buf), size_read); 110 | } 111 | 112 | /// Attempt read at position by pointer and length, throw exception on IO error but not short read. 113 | void pread_or_die(int fd, void *buf, size_t size, off_t offset); 114 | /// Attempt read at position of a POD struct, throw exception on IO error but not short read. 115 | template void pread_or_die(int fd, T& buf, off_t offset) 116 | { 117 | return pread_or_die(fd, static_cast(&buf), sizeof(buf), offset); 118 | } 119 | 120 | void write_or_die(int fd, const void *buf, size_t size); 121 | template void write_or_die(int fd, const T& buf) 122 | { 123 | return write_or_die(fd, static_cast(&buf), sizeof(buf)); 124 | } 125 | 126 | void write_partial_or_die(int fd, const void *buf, size_t size_wanted, size_t &size_written); 127 | template void write_partial_or_die(int fd, const T& buf, size_t &size_written) 128 | { 129 | return write_partial_or_die(fd, static_cast(&buf), sizeof(buf), size_written); 130 | } 131 | 132 | void pwrite_or_die(int fd, const void *buf, size_t size, off_t offset); 133 | template void pwrite_or_die(int fd, const T& buf, off_t offset) 134 | { 135 | return pwrite_or_die(fd, static_cast(&buf), sizeof(buf), offset); 136 | } 137 | 138 | // Specialization for strings which reads/writes the string content, not the struct string 139 | template<> void write_or_die(int fd, const string& str); 140 | template<> void pread_or_die(int fd, string& str, off_t offset); 141 | template<> void pwrite_or_die(int fd, const string& str, off_t offset); 142 | template<> void pread_or_die(int fd, ByteVector& str, off_t offset); 143 | template<> void pwrite_or_die(int fd, const ByteVector& str, off_t offset); 144 | // Deprecated 145 | template<> void pread_or_die>(int fd, vector& str, off_t offset) = delete; 146 | template<> void pwrite_or_die>(int fd, const vector& str, off_t offset) = delete; 147 | template<> void pread_or_die>(int fd, vector& str, off_t offset) = delete; 148 | template<> void pwrite_or_die>(int fd, const vector& str, off_t offset) = delete; 149 | 150 | /// Read a simple string. 151 | string read_string(int fd, size_t size); 152 | 153 | /// A lot of Unix API wants you to initialize a struct and call 154 | /// one function to fill it, another function to throw it away, 155 | /// and has some unknown third thing you have to do when there's 156 | /// an error. That's also a C++ object with an exception-throwing 157 | /// constructor. 158 | struct Stat : public stat { 159 | Stat(); 160 | Stat(int f); 161 | Stat(const string &filename); 162 | Stat &fstat(int fd); 163 | Stat &lstat(const string &filename); 164 | }; 165 | 166 | int ioctl_iflags_get(int fd); 167 | void ioctl_iflags_set(int fd, int attr); 168 | 169 | string st_mode_ntoa(mode_t mode); 170 | 171 | /// Because it's not trivial to do correctly 172 | string readlink_or_die(const string &path); 173 | 174 | /// Determine the name of a FD by readlink through /proc/self/fd/ 175 | string name_fd(int fd); 176 | 177 | /// Returns Fd objects because it does own them. 178 | pair socketpair_or_die(int domain = AF_UNIX, int type = SOCK_STREAM, int protocol = 0); 179 | 180 | /// like unique_lock but for flock instead of mutexes...and not trying 181 | /// to hide the many and subtle differences between those two things *at all*. 182 | class Flock { 183 | int m_fd; 184 | bool m_locked; 185 | Flock(const Flock &) = delete; 186 | Flock(Flock &&) = delete; 187 | Flock &operator=(const Flock &) = delete; 188 | Flock &operator=(Flock &&) = delete; 189 | public: 190 | Flock(); 191 | Flock(int fd); 192 | Flock(int fd, bool init_locked_state); 193 | ~Flock(); 194 | void lock(); 195 | void try_lock(); 196 | void unlock(); 197 | bool owns_lock(); 198 | operator bool(); 199 | int fd(); 200 | }; 201 | 202 | /// Doesn't use Fd objects because it's usually just used to replace stdin/stdout/stderr. 203 | void dup2_or_die(int fd_in, int fd_out); 204 | 205 | } 206 | 207 | #endif // CRUCIBLE_FD_H 208 | -------------------------------------------------------------------------------- /include/crucible/fs.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_FS_H 2 | #define CRUCIBLE_FS_H 3 | 4 | #include "crucible/bytevector.h" 5 | #include "crucible/endian.h" 6 | #include "crucible/error.h" 7 | 8 | // Terribly Linux-specific FS-wrangling functions 9 | 10 | // BTRFS 11 | #include "crucible/btrfs.h" 12 | 13 | // FIEMAP_* structs and flags 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | namespace crucible { 25 | using namespace std; 26 | 27 | // wrapper around fallocate(...FALLOC_FL_PUNCH_HOLE...) 28 | void punch_hole(int fd, off_t offset, off_t len); 29 | 30 | struct BtrfsExtentSame { 31 | virtual ~BtrfsExtentSame(); 32 | BtrfsExtentSame(int src_fd, off_t src_offset, off_t src_length); 33 | void add(int fd, uint64_t offset); 34 | virtual void do_ioctl(); 35 | 36 | uint64_t m_logical_offset = 0; 37 | uint64_t m_length = 0; 38 | int m_fd; 39 | vector m_info; 40 | }; 41 | 42 | ostream & operator<<(ostream &os, const btrfs_ioctl_same_extent_info *info); 43 | ostream & operator<<(ostream &os, const btrfs_ioctl_same_args *info); 44 | ostream & operator<<(ostream &os, const BtrfsExtentSame &bes); 45 | 46 | struct BtrfsInodeOffsetRoot { 47 | uint64_t m_inum; 48 | uint64_t m_offset; 49 | uint64_t m_root; 50 | }; 51 | 52 | ostream & operator<<(ostream &os, const BtrfsInodeOffsetRoot &p); 53 | 54 | struct BtrfsDataContainer { 55 | BtrfsDataContainer(size_t size = 64 * 1024); 56 | void *prepare(size_t size); 57 | 58 | size_t get_size() const; 59 | decltype(btrfs_data_container::bytes_left) get_bytes_left() const; 60 | decltype(btrfs_data_container::bytes_missing) get_bytes_missing() const; 61 | decltype(btrfs_data_container::elem_cnt) get_elem_cnt() const; 62 | decltype(btrfs_data_container::elem_missed) get_elem_missed() const; 63 | 64 | ByteVector m_data; 65 | }; 66 | 67 | struct BtrfsIoctlLogicalInoArgs { 68 | BtrfsIoctlLogicalInoArgs(uint64_t logical, size_t buf_size = 16 * 1024 * 1024); 69 | 70 | uint64_t get_flags() const; 71 | void set_flags(uint64_t new_flags); 72 | void set_logical(uint64_t new_logical); 73 | void set_size(uint64_t new_size); 74 | 75 | void do_ioctl(int fd); 76 | bool do_ioctl_nothrow(int fd); 77 | 78 | struct BtrfsInodeOffsetRootSpan { 79 | using iterator = BtrfsInodeOffsetRoot*; 80 | using const_iterator = const BtrfsInodeOffsetRoot*; 81 | size_t size() const; 82 | iterator begin() const; 83 | iterator end() const; 84 | const_iterator cbegin() const; 85 | const_iterator cend() const; 86 | iterator data() const; 87 | void clear(); 88 | private: 89 | iterator m_begin = nullptr; 90 | iterator m_end = nullptr; 91 | friend struct BtrfsIoctlLogicalInoArgs; 92 | } m_iors; 93 | private: 94 | size_t m_container_size; 95 | BtrfsDataContainer m_container; 96 | uint64_t m_logical; 97 | uint64_t m_flags = 0; 98 | friend ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs *p); 99 | }; 100 | 101 | ostream & operator<<(ostream &os, const BtrfsIoctlLogicalInoArgs &p); 102 | 103 | struct BtrfsIoctlInoPathArgs : public btrfs_ioctl_ino_path_args { 104 | BtrfsIoctlInoPathArgs(uint64_t inode, size_t buf_size = 64 * 1024); 105 | virtual void do_ioctl(int fd); 106 | virtual bool do_ioctl_nothrow(int fd); 107 | 108 | size_t m_container_size; 109 | vector m_paths; 110 | }; 111 | 112 | ostream & operator<<(ostream &os, const BtrfsIoctlInoPathArgs &p); 113 | 114 | struct BtrfsIoctlInoLookupArgs : public btrfs_ioctl_ino_lookup_args { 115 | BtrfsIoctlInoLookupArgs(uint64_t objectid); 116 | virtual void do_ioctl(int fd); 117 | virtual bool do_ioctl_nothrow(int fd); 118 | // use objectid = BTRFS_FIRST_FREE_OBJECTID 119 | // this->treeid is the rootid for the path (we get the path too) 120 | }; 121 | 122 | struct BtrfsIoctlDefragRangeArgs : public btrfs_ioctl_defrag_range_args { 123 | BtrfsIoctlDefragRangeArgs(); 124 | virtual void do_ioctl(int fd); 125 | virtual bool do_ioctl_nothrow(int fd); 126 | }; 127 | 128 | ostream & operator<<(ostream &os, const BtrfsIoctlDefragRangeArgs *p); 129 | 130 | struct FiemapExtent : public fiemap_extent { 131 | FiemapExtent(); 132 | FiemapExtent(const fiemap_extent &that); 133 | operator bool() const; 134 | off_t begin() const; 135 | off_t end() const; 136 | }; 137 | 138 | struct Fiemap { 139 | 140 | // because fiemap.h insists on giving FIEMAP_MAX_OFFSET 141 | // a different type from the struct fiemap members 142 | static const uint64_t s_fiemap_max_offset = FIEMAP_MAX_OFFSET; 143 | 144 | // Get entire file 145 | Fiemap(uint64_t start = 0, uint64_t length = s_fiemap_max_offset); 146 | 147 | void do_ioctl(int fd); 148 | 149 | vector m_extents; 150 | decltype(fiemap::fm_extent_count) m_min_count = (4096 - sizeof(fiemap)) / sizeof(fiemap_extent); 151 | decltype(fiemap::fm_extent_count) m_max_count = 16 * 1024 * 1024 / sizeof(fiemap_extent); 152 | uint64_t m_start; 153 | uint64_t m_length; 154 | // FIEMAP is slow and full of lies. 155 | // This makes FIEMAP even slower, but reduces the lies a little. 156 | decltype(fiemap::fm_flags) m_flags = FIEMAP_FLAG_SYNC; 157 | friend ostream &operator<<(ostream &, const Fiemap &); 158 | }; 159 | 160 | ostream & operator<<(ostream &os, const fiemap_extent *info); 161 | ostream & operator<<(ostream &os, const FiemapExtent &info); 162 | ostream & operator<<(ostream &os, const fiemap *info); 163 | ostream & operator<<(ostream &os, const Fiemap &info); 164 | 165 | string fiemap_extent_flags_ntoa(unsigned long flags); 166 | 167 | // Helper functions 168 | void btrfs_clone_range(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset); 169 | bool btrfs_extent_same(int src_fd, off_t src_offset, off_t src_length, int dst_fd, off_t dst_offset); 170 | 171 | struct BtrfsIoctlSearchHeader : public btrfs_ioctl_search_header { 172 | BtrfsIoctlSearchHeader(); 173 | ByteVector m_data; 174 | size_t set_data(const ByteVector &v, size_t offset); 175 | bool operator<(const BtrfsIoctlSearchHeader &that) const; 176 | }; 177 | 178 | // Perf blames this function for a few percent overhead; move it here so it can be inline 179 | inline bool BtrfsIoctlSearchHeader::operator<(const BtrfsIoctlSearchHeader &that) const 180 | { 181 | return tie(objectid, type, offset, len, transid) < tie(that.objectid, that.type, that.offset, that.len, that.transid); 182 | } 183 | 184 | ostream & operator<<(ostream &os, const btrfs_ioctl_search_header &hdr); 185 | ostream & operator<<(ostream &os, const BtrfsIoctlSearchHeader &hdr); 186 | 187 | struct BtrfsIoctlSearchKey : public btrfs_ioctl_search_key { 188 | BtrfsIoctlSearchKey(size_t buf_size = 1024); 189 | bool do_ioctl_nothrow(int fd); 190 | void do_ioctl(int fd); 191 | 192 | // Copy objectid/type/offset so we move forward 193 | void next_min(const BtrfsIoctlSearchHeader& ref); 194 | 195 | // move forward to next object of a single type 196 | void next_min(const BtrfsIoctlSearchHeader& ref, const uint8_t type); 197 | 198 | size_t m_buf_size; 199 | set m_result; 200 | 201 | static thread_local size_t s_calls; 202 | static thread_local size_t s_loops; 203 | static thread_local size_t s_loops_empty; 204 | static thread_local shared_ptr s_debug_ostream; 205 | }; 206 | 207 | ostream & operator<<(ostream &os, const btrfs_ioctl_search_key &key); 208 | ostream & operator<<(ostream &os, const BtrfsIoctlSearchKey &key); 209 | 210 | string btrfs_chunk_type_ntoa(uint64_t type); 211 | string btrfs_search_type_ntoa(unsigned type); 212 | string btrfs_search_objectid_ntoa(uint64_t objectid); 213 | string btrfs_compress_type_ntoa(uint8_t type); 214 | 215 | uint64_t btrfs_get_root_id(int fd); 216 | uint64_t btrfs_get_root_transid(int fd); 217 | 218 | template 219 | const T* 220 | get_struct_ptr(const V &v, size_t offset = 0) 221 | { 222 | THROW_CHECK2(out_of_range, v.size(), offset + sizeof(T), offset + sizeof(T) <= v.size()); 223 | const uint8_t *const data_ptr = v.data(); 224 | return reinterpret_cast(data_ptr + offset); 225 | } 226 | 227 | template 228 | T 229 | btrfs_get_member(T S::* member, V &v, size_t offset = 0) 230 | { 231 | const S *const sp = nullptr; 232 | const T *const spm = &(sp->*member); 233 | const auto member_offset = reinterpret_cast(spm) - reinterpret_cast(sp); 234 | const void *struct_ptr = get_struct_ptr(v, offset + member_offset); 235 | const T unaligned_t = get_unaligned(struct_ptr); 236 | return le_to_cpu(unaligned_t); 237 | } 238 | 239 | struct Statvfs : public statvfs { 240 | Statvfs(); 241 | Statvfs(string path); 242 | Statvfs(int fd); 243 | unsigned long size() const; 244 | unsigned long free() const; 245 | unsigned long available() const; 246 | }; 247 | 248 | struct BtrfsIoctlFsInfoArgs : public btrfs_ioctl_fs_info_args_v3 { 249 | BtrfsIoctlFsInfoArgs(); 250 | void do_ioctl(int fd); 251 | bool do_ioctl_nothrow(int fd); 252 | uint16_t csum_type() const; 253 | uint16_t csum_size() const; 254 | uint64_t generation() const; 255 | vector fsid() const; 256 | }; 257 | 258 | ostream & operator<<(ostream &os, const BtrfsIoctlFsInfoArgs &a); 259 | }; 260 | 261 | #endif // CRUCIBLE_FS_H 262 | -------------------------------------------------------------------------------- /include/crucible/hexdump.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_HEXDUMP_H 2 | #define CRUCIBLE_HEXDUMP_H 3 | 4 | #include "crucible/string.h" 5 | 6 | #include 7 | 8 | namespace crucible { 9 | using namespace std; 10 | 11 | template 12 | ostream & 13 | hexdump(ostream &os, const V &v) 14 | { 15 | const auto v_size = v.size(); 16 | const uint8_t* const v_data = reinterpret_cast(v.data()); 17 | os << "V { size = " << v_size << ", data:\n"; 18 | for (size_t i = 0; i < v_size; i += 8) { 19 | string hex, ascii; 20 | for (size_t j = i; j < i + 8; ++j) { 21 | if (j < v_size) { 22 | const uint8_t c = v_data[j]; 23 | char buf[8]; 24 | sprintf(buf, "%02x ", c); 25 | hex += buf; 26 | ascii += (c < 32 || c > 126) ? '.' : c; 27 | } else { 28 | hex += " "; 29 | ascii += ' '; 30 | } 31 | } 32 | os << astringprintf("\t%08x %s %s\n", i, hex.c_str(), ascii.c_str()); 33 | } 34 | return os << "}"; 35 | } 36 | }; 37 | 38 | #endif // CRUCIBLE_HEXDUMP_H 39 | -------------------------------------------------------------------------------- /include/crucible/limits.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_LIMITS_H 2 | #define CRUCIBLE_LIMITS_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | 9 | namespace crucible { 10 | using namespace std; 11 | 12 | template 13 | To 14 | ranged_cast(From f) 15 | { 16 | if (typeid(From) == typeid(To)) { 17 | return f; 18 | } 19 | 20 | To t; 21 | static string f_info = typeid(f).name(); 22 | static string t_info = typeid(t).name(); 23 | 24 | if (numeric_limits::max() > numeric_limits::max() && numeric_limits::max() < numeric_limits::max()) { 25 | THROW_ERROR(out_of_range, 26 | "ranged_cast: can't compare limits of types " << f_info << " and " << t_info << ", template specialization required"); 27 | } 28 | 29 | if (numeric_limits::max() > numeric_limits::max() && f > static_cast(numeric_limits::max())) { 30 | THROW_ERROR(out_of_range, 31 | "ranged_cast: " << f_info << "(" << f << ") out of range of target type " << t_info); 32 | } 33 | 34 | if (!numeric_limits::is_signed && numeric_limits::is_signed && f < 0) { 35 | THROW_ERROR(out_of_range, 36 | "ranged_cast: " << f_info << "(" << f << ") out of range of unsigned target type " << t_info); 37 | } 38 | 39 | t = static_cast(f); 40 | 41 | From f2 = static_cast(t); 42 | if (f2 != f) { 43 | THROW_ERROR(out_of_range, 44 | "ranged_cast: " << f_info << "(" << f << ") -> " << t_info << " failed: result value " << f2); 45 | } 46 | 47 | return t; 48 | } 49 | }; 50 | 51 | #endif // CRUCIBLE_LIMITS_H 52 | -------------------------------------------------------------------------------- /include/crucible/lockset.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_LOCKSET_H 2 | #define CRUCIBLE_LOCKSET_H 3 | 4 | #include "crucible/error.h" 5 | #include "crucible/process.h" 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace crucible { 17 | using namespace std; 18 | 19 | template 20 | class LockSet { 21 | 22 | public: 23 | using set_type = map; 24 | using key_type = typename set_type::key_type; 25 | 26 | private: 27 | 28 | set_type m_set; 29 | mutex m_mutex; 30 | condition_variable m_condvar; 31 | size_t m_max_size = numeric_limits::max(); 32 | 33 | bool full(); 34 | bool locked(const key_type &name); 35 | 36 | class Lock { 37 | LockSet &m_lockset; 38 | key_type m_name; 39 | bool m_locked; 40 | 41 | Lock() = delete; 42 | Lock(const Lock &) = delete; 43 | Lock& operator=(const Lock &) = delete; 44 | Lock(Lock &&that) = delete; 45 | Lock& operator=(Lock &&that) = delete; 46 | public: 47 | ~Lock(); 48 | Lock(LockSet &lockset, const key_type &name, bool start_locked = true); 49 | void lock(); 50 | void unlock(); 51 | bool try_lock(); 52 | }; 53 | 54 | public: 55 | ~LockSet(); 56 | LockSet() = default; 57 | 58 | void lock(const key_type &name); 59 | void unlock(const key_type &name); 60 | bool try_lock(const key_type &name); 61 | size_t size(); 62 | bool empty(); 63 | set_type copy(); 64 | 65 | void max_size(size_t max); 66 | 67 | class LockHandle { 68 | shared_ptr m_lock; 69 | 70 | public: 71 | LockHandle(LockSet &lockset, const key_type &name, bool start_locked = true) : 72 | m_lock(make_shared(lockset, name, start_locked)) {} 73 | void lock() { m_lock->lock(); } 74 | void unlock() { m_lock->unlock(); } 75 | bool try_lock() { return m_lock->try_lock(); } 76 | }; 77 | 78 | LockHandle make_lock(const key_type &name, bool start_locked = true); 79 | }; 80 | 81 | template 82 | LockSet::~LockSet() 83 | { 84 | if (!m_set.empty()) { 85 | cerr << "ERROR: " << m_set.size() << " locked items still in set at destruction" << endl; 86 | } 87 | // We will crash later. Might as well crash now. 88 | assert(m_set.empty()); 89 | } 90 | 91 | template 92 | bool 93 | LockSet::full() 94 | { 95 | return m_set.size() >= m_max_size; 96 | } 97 | 98 | template 99 | bool 100 | LockSet::locked(const key_type &name) 101 | { 102 | return m_set.count(name); 103 | } 104 | 105 | template 106 | void 107 | LockSet::max_size(size_t s) 108 | { 109 | m_max_size = s; 110 | } 111 | 112 | template 113 | void 114 | LockSet::lock(const key_type &name) 115 | { 116 | unique_lock lock(m_mutex); 117 | while (full() || locked(name)) { 118 | m_condvar.wait(lock); 119 | } 120 | auto rv = m_set.insert(make_pair(name, gettid())); 121 | THROW_CHECK0(runtime_error, rv.second); 122 | } 123 | 124 | template 125 | bool 126 | LockSet::try_lock(const key_type &name) 127 | { 128 | unique_lock lock(m_mutex); 129 | if (full() || locked(name)) { 130 | return false; 131 | } 132 | auto rv = m_set.insert(make_pair(name, gettid())); 133 | THROW_CHECK1(runtime_error, name, rv.second); 134 | return true; 135 | } 136 | 137 | template 138 | void 139 | LockSet::unlock(const key_type &name) 140 | { 141 | unique_lock lock(m_mutex); 142 | auto erase_count = m_set.erase(name); 143 | m_condvar.notify_all(); 144 | THROW_CHECK1(invalid_argument, erase_count, erase_count == 1); 145 | } 146 | 147 | template 148 | size_t 149 | LockSet::size() 150 | { 151 | unique_lock lock(m_mutex); 152 | return m_set.size(); 153 | } 154 | 155 | template 156 | bool 157 | LockSet::empty() 158 | { 159 | unique_lock lock(m_mutex); 160 | return m_set.empty(); 161 | } 162 | 163 | template 164 | typename LockSet::set_type 165 | LockSet::copy() 166 | { 167 | unique_lock lock(m_mutex); 168 | // Make temporary copy of set while protected by mutex 169 | auto rv = m_set; 170 | // Return temporary copy after releasing lock 171 | return rv; 172 | } 173 | 174 | template 175 | void 176 | LockSet::Lock::lock() 177 | { 178 | if (m_locked) return; 179 | m_lockset.lock(m_name); 180 | m_locked = true; 181 | } 182 | 183 | template 184 | bool 185 | LockSet::Lock::try_lock() 186 | { 187 | if (m_locked) return true; 188 | m_locked = m_lockset.try_lock(m_name); 189 | return m_locked; 190 | } 191 | 192 | template 193 | void 194 | LockSet::Lock::unlock() 195 | { 196 | if (!m_locked) return; 197 | m_lockset.unlock(m_name); 198 | m_locked = false; 199 | } 200 | 201 | template 202 | LockSet::Lock::~Lock() 203 | { 204 | if (m_locked) { 205 | unlock(); 206 | } 207 | } 208 | 209 | template 210 | LockSet::Lock::Lock(LockSet &lockset, const key_type &name, bool start_locked) : 211 | m_lockset(lockset), 212 | m_name(name), 213 | m_locked(false) 214 | { 215 | if (start_locked) { 216 | lock(); 217 | } 218 | } 219 | 220 | template 221 | typename LockSet::LockHandle 222 | LockSet::make_lock(const key_type &name, bool start_locked) 223 | { 224 | return LockHandle(*this, name, start_locked); 225 | } 226 | 227 | } 228 | 229 | #endif // CRUCIBLE_LOCKSET_H 230 | -------------------------------------------------------------------------------- /include/crucible/multilock.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_MULTILOCK_H 2 | #define CRUCIBLE_MULTILOCK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace crucible { 11 | using namespace std; 12 | 13 | class MultiLocker { 14 | mutex m_mutex; 15 | condition_variable m_cv; 16 | map m_counters; 17 | bool m_do_locking = true; 18 | 19 | class LockHandle { 20 | const string m_type; 21 | MultiLocker &m_parent; 22 | bool m_locked = false; 23 | void set_locked(bool state); 24 | public: 25 | ~LockHandle(); 26 | LockHandle(const string &type, MultiLocker &parent); 27 | friend class MultiLocker; 28 | }; 29 | 30 | friend class LockHandle; 31 | 32 | bool is_lock_available(const string &type); 33 | void put_lock(const string &type); 34 | shared_ptr get_lock_private(const string &type); 35 | public: 36 | static shared_ptr get_lock(const string &type); 37 | static void enable_locking(bool enabled); 38 | }; 39 | 40 | } 41 | 42 | #endif // CRUCIBLE_MULTILOCK_H 43 | -------------------------------------------------------------------------------- /include/crucible/namedptr.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_NAMEDPTR_H 2 | #define CRUCIBLE_NAMEDPTR_H 3 | 4 | #include "crucible/lockset.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace crucible { 13 | using namespace std; 14 | 15 | /// A thread-safe container for RAII of shared resources with unique names. 16 | 17 | template 18 | class NamedPtr { 19 | public: 20 | /// The name in "NamedPtr" 21 | using Key = tuple; 22 | /// A shared pointer to the named object with ownership 23 | /// tracking that erases the object's stored name when 24 | /// the last shared pointer is destroyed. 25 | using Ptr = shared_ptr; 26 | /// A function that translates a name into a shared pointer to an object. 27 | using Func = function; 28 | private: 29 | struct Value; 30 | using WeakPtr = weak_ptr; 31 | using MapType = map; 32 | struct MapRep { 33 | MapType m_map; 34 | mutex m_mutex; 35 | }; 36 | using MapPtr = shared_ptr; 37 | /// Container for Return pointers. Destructor removes entry from map. 38 | struct Value { 39 | Ptr m_ret_ptr; 40 | MapPtr m_map_rep; 41 | Key m_ret_key; 42 | ~Value(); 43 | Value(Ptr&& ret_ptr, const Key &key, const MapPtr &map_rep); 44 | }; 45 | 46 | Func m_fn; 47 | MapPtr m_map_rep = make_shared(); 48 | LockSet m_lockset; 49 | 50 | Ptr lookup_item(const Key &k); 51 | Ptr insert_item(Func fn, Arguments... args); 52 | 53 | public: 54 | NamedPtr(Func f = Func()); 55 | 56 | void func(Func f); 57 | 58 | Ptr operator()(Arguments... args); 59 | 60 | Ptr insert(const Ptr &r, Arguments... args); 61 | }; 62 | 63 | /// Construct NamedPtr map and define a function to turn a name into a pointer. 64 | template 65 | NamedPtr::NamedPtr(Func f) : 66 | m_fn(f) 67 | { 68 | } 69 | 70 | /// Construct a Value wrapper: the value to store, the argument key to store the value under, 71 | /// and a pointer to the map. Everything needed to remove the key from the map when the 72 | /// last NamedPtr is deleted. NamedPtr then releases its own pointer to the value, which 73 | /// may or may not trigger deletion there. 74 | template 75 | NamedPtr::Value::Value(Ptr&& ret_ptr, const Key &key, const MapPtr &map_rep) : 76 | m_ret_ptr(ret_ptr), 77 | m_map_rep(map_rep), 78 | m_ret_key(key) 79 | { 80 | } 81 | 82 | /// Destroy a Value wrapper: remove a dead Key from the map, then let the member destructors 83 | /// do the rest. The Key might be in the map and not dead, so leave it alone in that case. 84 | template 85 | NamedPtr::Value::~Value() 86 | { 87 | unique_lock lock(m_map_rep->m_mutex); 88 | // We are called from the shared_ptr destructor, so we 89 | // know that the weak_ptr in the map has already expired; 90 | // however, if another thread already noticed that the 91 | // map entry expired while we were waiting for the lock, 92 | // the other thread will have already replaced the map 93 | // entry with a pointer to some other object, and that 94 | // object now owns the map entry. So we do a key lookup 95 | // here instead of storing a map iterator, and only erase 96 | // "our" map entry if it exists and is expired. The other 97 | // thread would have done the same for us if the race had 98 | // a different winner. 99 | const auto found = m_map_rep->m_map.find(m_ret_key); 100 | if (found != m_map_rep->m_map.end() && found->second.expired()) { 101 | m_map_rep->m_map.erase(found); 102 | } 103 | } 104 | 105 | /// Find a Return by key and fetch a strong Return pointer. 106 | /// Ignore Keys that have expired weak pointers. 107 | template 108 | typename NamedPtr::Ptr 109 | NamedPtr::lookup_item(const Key &k) 110 | { 111 | // Must be called with lock held 112 | const auto found = m_map_rep->m_map.find(k); 113 | if (found != m_map_rep->m_map.end()) { 114 | // Get the strong pointer back 115 | const auto rv = found->second.lock(); 116 | if (rv) { 117 | // Have strong pointer. Return value that shares map entry. 118 | return shared_ptr(rv, rv->m_ret_ptr.get()); 119 | } 120 | // Have expired weak pointer. Another thread is trying to delete it, 121 | // but we got the lock first. Leave the map entry alone here. 122 | // The other thread will erase it, or we will put a different entry 123 | // in the same map entry. 124 | } 125 | return Ptr(); 126 | } 127 | 128 | /// Insert the Return value of calling Func(Arguments...). 129 | /// If the value already exists in the map, return the existing value. 130 | /// If another thread is already running Func(Arguments...) then this thread 131 | /// will block until the other thread finishes inserting the Return in the 132 | /// map, and both threads will return the same Return value. 133 | template 134 | typename NamedPtr::Ptr 135 | NamedPtr::insert_item(Func fn, Arguments... args) 136 | { 137 | Key k(args...); 138 | 139 | // Is it already in the map? 140 | unique_lock lock_lookup(m_map_rep->m_mutex); 141 | auto rv = lookup_item(k); 142 | if (rv) { 143 | return rv; 144 | } 145 | 146 | // Release map lock and acquire key lock 147 | lock_lookup.unlock(); 148 | const auto key_lock = m_lockset.make_lock(k); 149 | 150 | // Did item appear in map while we were waiting for key? 151 | lock_lookup.lock(); 152 | rv = lookup_item(k); 153 | if (rv) { 154 | return rv; 155 | } 156 | 157 | // We now hold key and index locks, but item not in map (or expired). 158 | // Release map lock so other threads can use the map 159 | lock_lookup.unlock(); 160 | 161 | // Call the function and create a new Value outside of the map 162 | const auto new_value_ptr = make_shared(fn(args...), k, m_map_rep); 163 | 164 | // Function must return a non-null pointer 165 | THROW_CHECK0(runtime_error, new_value_ptr->m_ret_ptr); 166 | 167 | // Reacquire index lock for map insertion. We still hold the key lock. 168 | // Use a different lock object to make exceptions unlock in the right order 169 | unique_lock lock_insert(m_map_rep->m_mutex); 170 | 171 | // Insert return value in map or overwrite existing 172 | // empty or expired weak_ptr value. 173 | WeakPtr &new_item_ref = m_map_rep->m_map[k]; 174 | 175 | // We searched the map while holding both locks and 176 | // found no entry or an expired weak_ptr; therefore, no 177 | // other thread could have inserted a new non-expired 178 | // weak_ptr, and the weak_ptr in the map is expired 179 | // or was default-constructed as a nullptr. So if the 180 | // new_item_ref is not expired, we have a bug we need 181 | // to find and fix. 182 | assert(new_item_ref.expired()); 183 | 184 | // Update the map slot we are sure is empty 185 | new_item_ref = new_value_ptr; 186 | 187 | // Return shared_ptr to Return using strong pointer's reference counter 188 | return shared_ptr(new_value_ptr, new_value_ptr->m_ret_ptr.get()); 189 | 190 | // Release map lock, then key lock 191 | } 192 | 193 | /// (Re)define a function to turn a name into a pointer. 194 | template 195 | void 196 | NamedPtr::func(Func func) 197 | { 198 | unique_lock lock(m_map_rep->m_mutex); 199 | m_fn = func; 200 | } 201 | 202 | /// Convert a name into a pointer using the configured function. 203 | template 204 | typename NamedPtr::Ptr 205 | NamedPtr::operator()(Arguments... args) 206 | { 207 | return insert_item(m_fn, args...); 208 | } 209 | 210 | /// Insert a pointer that has already been created under the 211 | /// given name. Useful for inserting a pointer to a derived 212 | /// class when the name doesn't contain all of the information 213 | /// required for the object, or when the Return is already known by 214 | /// some cheaper method than calling the function. 215 | template 216 | typename NamedPtr::Ptr 217 | NamedPtr::insert(const Ptr &r, Arguments... args) 218 | { 219 | THROW_CHECK0(invalid_argument, r); 220 | return insert_item([&](Arguments...) { return r; }, args...); 221 | } 222 | 223 | } 224 | 225 | #endif // CRUCIBLE_NAMEDPTR_H 226 | -------------------------------------------------------------------------------- /include/crucible/ntoa.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_NTOA_H 2 | #define CRUCIBLE_NTOA_H 3 | 4 | #include 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | struct bits_ntoa_table { 10 | unsigned long long n; 11 | unsigned long long mask; 12 | const char *a; 13 | }; 14 | 15 | string bits_ntoa(unsigned long long n, const bits_ntoa_table *a); 16 | 17 | }; 18 | 19 | // Combinations of bits (list multiple-bit entries first) 20 | #define NTOA_TABLE_ENTRY_BITS(x) { .n = (x), .mask = (x), .a = (#x) } 21 | 22 | // Enumerations (entire value matches all bits) 23 | #define NTOA_TABLE_ENTRY_ENUM(x) { .n = (x), .mask = ~0ULL, .a = (#x) } 24 | 25 | // End of table (sorry, C++ didn't get C99's compound literals, so we have to write out all the member names) 26 | #define NTOA_TABLE_ENTRY_END() { .n = 0, .mask = 0, .a = nullptr } 27 | 28 | #endif // CRUCIBLE_NTOA_H 29 | -------------------------------------------------------------------------------- /include/crucible/openat2.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_OPENAT2_H 2 | #define CRUCIBLE_OPENAT2_H 3 | 4 | #include 5 | 6 | // Compatibility for building on old libc for new kernel 7 | #include 8 | 9 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) 10 | 11 | #include 12 | 13 | #else 14 | 15 | #include 16 | 17 | #ifndef RESOLVE_NO_XDEV 18 | #define RESOLVE_NO_XDEV 1 19 | 20 | // RESOLVE_NO_XDEV was there from the beginning of openat2, 21 | // so if that's missing, so is open_how 22 | 23 | struct open_how { 24 | __u64 flags; 25 | __u64 mode; 26 | __u64 resolve; 27 | }; 28 | #endif 29 | 30 | #ifndef RESOLVE_NO_MAGICLINKS 31 | #define RESOLVE_NO_MAGICLINKS 2 32 | #endif 33 | #ifndef RESOLVE_NO_SYMLINKS 34 | #define RESOLVE_NO_SYMLINKS 4 35 | #endif 36 | #ifndef RESOLVE_BENEATH 37 | #define RESOLVE_BENEATH 8 38 | #endif 39 | #ifndef RESOLVE_IN_ROOT 40 | #define RESOLVE_IN_ROOT 16 41 | #endif 42 | 43 | #endif // Linux version >= v5.6 44 | 45 | extern "C" { 46 | 47 | /// Weak symbol to support libc with no syscall wrapper 48 | int openat2(int dirfd, const char *pathname, struct open_how *how, size_t size) throw(); 49 | 50 | }; 51 | 52 | #endif // CRUCIBLE_OPENAT2_H 53 | -------------------------------------------------------------------------------- /include/crucible/path.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_PATH_H 2 | #define CRUCIBLE_PATH_H 3 | 4 | #include 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | string basename(string s); 10 | string join(string dir, string base); 11 | }; 12 | 13 | #endif // CRUCIBLE_PATH_H 14 | -------------------------------------------------------------------------------- /include/crucible/pool.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_POOL_H 2 | #define CRUCIBLE_POOL_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace crucible { 12 | using namespace std; 13 | 14 | /// Storage for reusable anonymous objects that are too expensive to create and/or destroy frequently 15 | 16 | template 17 | class Pool { 18 | public: 19 | using Ptr = shared_ptr; 20 | using Generator = function; 21 | using Checker = function; 22 | 23 | ~Pool(); 24 | Pool(Generator f = Generator(), Checker checkin = Checker(), Checker checkout = Checker()); 25 | 26 | /// Function to create new objects when Pool is empty 27 | void generator(Generator f); 28 | 29 | /// Optional function called when objects exit the pool (user handle is created and returned to user) 30 | void checkout(Checker f); 31 | 32 | /// Optional function called when objects enter the pool (last user handle is destroyed) 33 | void checkin(Checker f); 34 | 35 | /// Pool() returns a handle to an object of type shared_ptr 36 | Ptr operator()(); 37 | 38 | /// Destroy all objects in Pool that are not in use 39 | void clear(); 40 | 41 | private: 42 | struct PoolRep { 43 | list m_list; 44 | mutex m_mutex; 45 | Checker m_checkin; 46 | PoolRep(Checker checkin); 47 | }; 48 | struct Handle { 49 | weak_ptr m_list_rep; 50 | Ptr m_ret_ptr; 51 | Handle(shared_ptr list_rep, Ptr ret_ptr); 52 | ~Handle(); 53 | }; 54 | 55 | Generator m_fn; 56 | Checker m_checkout; 57 | shared_ptr m_list_rep; 58 | }; 59 | 60 | template 61 | Pool::PoolRep::PoolRep(Checker checkin) : 62 | m_checkin(checkin) 63 | { 64 | } 65 | 66 | template 67 | Pool::Pool(Generator f, Checker checkin, Checker checkout) : 68 | m_fn(f), 69 | m_checkout(checkout), 70 | m_list_rep(make_shared(checkin)) 71 | { 72 | } 73 | 74 | template 75 | Pool::~Pool() 76 | { 77 | auto list_rep = m_list_rep; 78 | unique_lock lock(list_rep->m_mutex); 79 | m_list_rep.reset(); 80 | } 81 | 82 | template 83 | Pool::Handle::Handle(shared_ptr list_rep, Ptr ret_ptr) : 84 | m_list_rep(list_rep), 85 | m_ret_ptr(ret_ptr) 86 | { 87 | } 88 | 89 | template 90 | Pool::Handle::~Handle() 91 | { 92 | // Checkin prepares the object for storage and reuse. 93 | // Neither of those will happen if there is no Pool. 94 | // If the Pool was destroyed, just let m_ret_ptr expire. 95 | auto list_rep = m_list_rep.lock(); 96 | if (!list_rep) { 97 | return; 98 | } 99 | 100 | unique_lock lock(list_rep->m_mutex); 101 | // If a checkin function is defined, call it 102 | auto checkin = list_rep->m_checkin; 103 | if (checkin) { 104 | lock.unlock(); 105 | checkin(m_ret_ptr); 106 | lock.lock(); 107 | } 108 | 109 | // Place object back in pool 110 | list_rep->m_list.push_front(m_ret_ptr); 111 | } 112 | 113 | template 114 | typename Pool::Ptr 115 | Pool::operator()() 116 | { 117 | Ptr rv; 118 | 119 | // Do we have an object in the pool we can return instead? 120 | unique_lock lock(m_list_rep->m_mutex); 121 | if (m_list_rep->m_list.empty()) { 122 | // No, release cache lock and call the function 123 | lock.unlock(); 124 | 125 | // Create new value 126 | rv = m_fn(); 127 | } else { 128 | rv = m_list_rep->m_list.front(); 129 | m_list_rep->m_list.pop_front(); 130 | 131 | // Release lock so we don't deadlock with Handle destructor 132 | lock.unlock(); 133 | } 134 | 135 | // rv now points to a T object that is not in the list. 136 | THROW_CHECK0(runtime_error, rv); 137 | 138 | // Construct a shared_ptr for Handle which will refcount the Handle objects 139 | // and reinsert the T into the Pool when the last Handle is destroyed. 140 | auto hv = make_shared(m_list_rep, rv); 141 | 142 | // If a checkout function is defined, call it 143 | if (m_checkout) { 144 | m_checkout(rv); 145 | } 146 | 147 | // T an alias shared_ptr for the T using Handle's refcount. 148 | return Ptr(hv, rv.get()); 149 | } 150 | 151 | template 152 | void 153 | Pool::generator(Generator func) 154 | { 155 | unique_lock lock(m_list_rep->m_mutex); 156 | m_fn = func; 157 | } 158 | 159 | template 160 | void 161 | Pool::checkin(Checker func) 162 | { 163 | unique_lock lock(m_list_rep->m_mutex); 164 | m_list_rep->m_checkin = func; 165 | } 166 | 167 | template 168 | void 169 | Pool::checkout(Checker func) 170 | { 171 | unique_lock lock(m_list_rep->m_mutex); 172 | m_checkout = func; 173 | } 174 | 175 | template 176 | void 177 | Pool::clear() 178 | { 179 | unique_lock lock(m_list_rep->m_mutex); 180 | m_list_rep->m_list.clear(); 181 | } 182 | 183 | } 184 | 185 | #endif // POOL_H 186 | -------------------------------------------------------------------------------- /include/crucible/process.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_PROCESS_H 2 | #define CRUCIBLE_PROCESS_H 3 | 4 | #include "crucible/resource.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | extern "C" { 14 | pid_t gettid() throw(); 15 | }; 16 | 17 | namespace crucible { 18 | using namespace std; 19 | 20 | // Like thread, but for processes. 21 | // TODO: thread has a few warts for this usage: 22 | // - can't create one from its native_handle, 23 | // - can't destroy one without joining/detaching it first 24 | // - can't implement detach correctly without crossing threshold of insanity 25 | // - WTF is native_handle() not const? 26 | struct Process { 27 | // These parts are for compatibility with std::thread 28 | 29 | using id = ::pid_t; 30 | using native_handle_type = ::pid_t; 31 | 32 | ~Process(); 33 | Process(); 34 | 35 | template 36 | Process(Fn fn, Args... args) : 37 | Process() 38 | { 39 | do_fork(function([&]() { return fn(args...); })); 40 | } 41 | 42 | Process(const Process &) = delete; 43 | Process(Process &&move_from); 44 | 45 | bool joinable(); 46 | void detach(); 47 | native_handle_type native_handle(); 48 | id get_id(); 49 | 50 | // Modified thread members for Process 51 | 52 | // join() calls waitpid(), returns status or exception (std::thread returns void) 53 | using status_type = int; 54 | status_type join(); 55 | 56 | // New members for Process 57 | 58 | // kill() terminates a process in the usual Unix way 59 | void kill(int sig = SIGTERM); 60 | 61 | // take over ownership of an already-forked native process handle 62 | Process(id pid); 63 | 64 | private: 65 | id m_pid; 66 | 67 | void do_fork(function); 68 | }; 69 | 70 | template <> 71 | struct ResourceTraits { 72 | Process::id get_key(const Process &res) const { return (const_cast(res)).native_handle(); } 73 | shared_ptr make_resource(const Process::id &id) const { return make_shared(id); } 74 | bool is_null_key(const Process::id &key) const { return !key; } 75 | Process::id get_null_key() const { return 0; } 76 | }; 77 | 78 | typedef ResourceHandle Pid; 79 | 80 | double getloadavg1(); 81 | double getloadavg5(); 82 | double getloadavg15(); 83 | 84 | string signal_ntoa(int sig); 85 | } 86 | #endif // CRUCIBLE_PROCESS_H 87 | -------------------------------------------------------------------------------- /include/crucible/progress.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_PROGRESS_H 2 | #define CRUCIBLE_PROGRESS_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace crucible { 14 | using namespace std; 15 | 16 | /// A class to track progress of multiple workers using only two points: 17 | /// the first and last incomplete state. The first incomplete 18 | /// state can be recorded as a checkpoint to resume later on. 19 | /// The last completed state is the starting point for workers that 20 | /// need something to do. 21 | template 22 | class ProgressTracker { 23 | struct ProgressTrackerState; 24 | class ProgressHolderState; 25 | public: 26 | using value_type = T; 27 | using ProgressHolder = shared_ptr; 28 | 29 | /// Create ProgressTracker with initial begin and end state 'v'. 30 | ProgressTracker(const value_type &v); 31 | 32 | /// The first incomplete state. This is not "sticky", 33 | /// it will revert to the end state if there are no 34 | /// items in progress. 35 | value_type begin() const; 36 | 37 | /// The last incomplete state. This is "sticky", 38 | /// it can only increase and never decrease. 39 | value_type end() const; 40 | 41 | ProgressHolder hold(const value_type &v); 42 | 43 | friend class ProgressHolderState; 44 | 45 | private: 46 | struct ProgressTrackerState { 47 | using key_type = pair; 48 | mutex m_mutex; 49 | set m_in_progress; 50 | value_type m_begin; 51 | value_type m_end; 52 | }; 53 | 54 | class ProgressHolderState { 55 | shared_ptr m_state; 56 | const value_type m_value; 57 | using key_type = typename ProgressTrackerState::key_type; 58 | public: 59 | ProgressHolderState(shared_ptr state, const value_type &v); 60 | ~ProgressHolderState(); 61 | value_type get() const; 62 | }; 63 | 64 | 65 | shared_ptr m_state; 66 | }; 67 | 68 | template 69 | typename ProgressTracker::value_type 70 | ProgressTracker::begin() const 71 | { 72 | unique_lock lock(m_state->m_mutex); 73 | return m_state->m_begin; 74 | } 75 | 76 | template 77 | typename ProgressTracker::value_type 78 | ProgressTracker::end() const 79 | { 80 | unique_lock lock(m_state->m_mutex); 81 | return m_state->m_end; 82 | } 83 | 84 | template 85 | typename ProgressTracker::value_type 86 | ProgressTracker::ProgressHolderState::get() const 87 | { 88 | return m_value; 89 | } 90 | 91 | template 92 | ProgressTracker::ProgressTracker(const ProgressTracker::value_type &t) : 93 | m_state(make_shared()) 94 | { 95 | m_state->m_begin = t; 96 | m_state->m_end = t; 97 | } 98 | 99 | template 100 | ProgressTracker::ProgressHolderState::ProgressHolderState(shared_ptr state, const value_type &v) : 101 | m_state(state), 102 | m_value(v) 103 | { 104 | unique_lock lock(m_state->m_mutex); 105 | const auto rv = m_state->m_in_progress.insert(key_type(m_value, this)); 106 | THROW_CHECK1(runtime_error, m_value, rv.second); 107 | // Set the beginning to the first existing in-progress item 108 | m_state->m_begin = m_state->m_in_progress.begin()->first; 109 | // If this value is past the end, move the end, but don't go backwards 110 | if (m_state->m_end < m_value) { 111 | m_state->m_end = m_value; 112 | } 113 | } 114 | 115 | template 116 | ProgressTracker::ProgressHolderState::~ProgressHolderState() 117 | { 118 | unique_lock lock(m_state->m_mutex); 119 | const auto rv = m_state->m_in_progress.erase(key_type(m_value, this)); 120 | // THROW_CHECK2(runtime_error, m_value, rv, rv == 1); 121 | assert(rv == 1); 122 | if (m_state->m_in_progress.empty()) { 123 | // If we made the list empty, then m_begin == m_end 124 | m_state->m_begin = m_state->m_end; 125 | } else { 126 | // If we deleted the first element, then m_begin = current first element 127 | m_state->m_begin = m_state->m_in_progress.begin()->first; 128 | } 129 | } 130 | 131 | template 132 | shared_ptr::ProgressHolderState> 133 | ProgressTracker::hold(const value_type &v) 134 | { 135 | return make_shared(m_state, v); 136 | } 137 | 138 | } 139 | 140 | #endif // CRUCIBLE_PROGRESS_H 141 | -------------------------------------------------------------------------------- /include/crucible/seeker.h: -------------------------------------------------------------------------------- 1 | #ifndef _CRUCIBLE_SEEKER_H_ 2 | #define _CRUCIBLE_SEEKER_H_ 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #if 0 12 | #include 13 | #include 14 | #define DINIT(__x) __x 15 | #define DLOG(__x) do { logs << __x << std::endl; } while (false) 16 | #define DOUT(__err) do { __err << logs.str(); } while (false) 17 | #else 18 | #define DINIT(__x) do {} while (false) 19 | #define DLOG(__x) do {} while (false) 20 | #define DOUT(__x) do {} while (false) 21 | #endif 22 | 23 | namespace crucible { 24 | using namespace std; 25 | 26 | // Requirements for Container Fetch(Pos lower, Pos upper): 27 | // - fetches objects in Pos order, starting from lower (must be >= lower) 28 | // - must return upper if present, may or may not return objects after that 29 | // - returns a container of Pos objects with begin(), end(), rbegin(), rend() 30 | // - container must iterate over objects in Pos order 31 | // - uniqueness of Pos objects not required 32 | // - should store the underlying data as a side effect 33 | // 34 | // Requirements for Pos: 35 | // - should behave like an unsigned integer type 36 | // - must have specializations in numeric_limits for digits, max(), min() 37 | // - must support +, -, -=, and related operators 38 | // - must support <, <=, ==, and related operators 39 | // - must support Pos / 2 (only) 40 | // 41 | // Requirements for seek_backward: 42 | // - calls Fetch to search Pos space near target_pos 43 | // - if no key exists with value <= target_pos, returns the minimum Pos value 44 | // - returns the highest key value <= target_pos 45 | // - returned key value may not be part of most recent Fetch result 46 | // - 1 loop iteration when target_pos exists 47 | 48 | template 49 | Pos 50 | seek_backward(Pos const target_pos, Fetch fetch, Pos min_step = 1, size_t max_loops = numeric_limits::max()) 51 | { 52 | DINIT(ostringstream logs); 53 | try { 54 | static const Pos end_pos = numeric_limits::max(); 55 | // TBH this probably won't work if begin_pos != 0, i.e. any signed type 56 | static const Pos begin_pos = numeric_limits::min(); 57 | // Run a binary search looking for the highest key below target_pos. 58 | // Initial upper bound of the search is target_pos. 59 | // Find initial lower bound by doubling the size of the range until a key below target_pos 60 | // is found, or the lower bound reaches the beginning of the search space. 61 | // If the lower bound search reaches the beginning of the search space without finding a key, 62 | // return the beginning of the search space; otherwise, perform a binary search between 63 | // the bounds now established. 64 | Pos lower_bound = 0; 65 | Pos upper_bound = target_pos; 66 | bool found_low = false; 67 | Pos probe_pos = target_pos; 68 | // We need one loop for each bit of the search space to find the lower bound, 69 | // one loop for each bit of the search space to find the upper bound, 70 | // and one extra loop to confirm the boundary is correct. 71 | for (size_t loop_count = min(numeric_limits::digits * size_t(2) + 1, max_loops); loop_count; --loop_count) { 72 | DLOG("fetch(probe_pos = " << probe_pos << ", target_pos = " << target_pos << ")"); 73 | auto result = fetch(probe_pos, target_pos); 74 | const Pos low_pos = result.empty() ? end_pos : *result.begin(); 75 | const Pos high_pos = result.empty() ? end_pos : *result.rbegin(); 76 | DLOG(" = " << low_pos << ".." << high_pos); 77 | // check for correct behavior of the fetch function 78 | THROW_CHECK2(out_of_range, high_pos, probe_pos, probe_pos <= high_pos); 79 | THROW_CHECK2(out_of_range, low_pos, probe_pos, probe_pos <= low_pos); 80 | THROW_CHECK2(out_of_range, low_pos, high_pos, low_pos <= high_pos); 81 | if (!found_low) { 82 | // if target_pos == end_pos then we will find it in every empty result set, 83 | // so in that case we force the lower bound to be lower than end_pos 84 | if ((target_pos == end_pos) ? (low_pos < target_pos) : (low_pos <= target_pos)) { 85 | // found a lower bound, set the low bound there and switch to binary search 86 | found_low = true; 87 | lower_bound = low_pos; 88 | DLOG("found_low = true, lower_bound = " << lower_bound); 89 | } else { 90 | // still looking for lower bound 91 | // if probe_pos was begin_pos then we can stop with no result 92 | if (probe_pos == begin_pos) { 93 | DLOG("return: probe_pos == begin_pos " << begin_pos); 94 | return begin_pos; 95 | } 96 | // double the range size, or use the distance between objects found so far 97 | THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); 98 | // already checked low_pos <= high_pos above 99 | const Pos want_delta = max(upper_bound - probe_pos, min_step); 100 | // avoid underflowing the beginning of the search space 101 | const Pos have_delta = min(want_delta, probe_pos - begin_pos); 102 | THROW_CHECK2(out_of_range, want_delta, have_delta, have_delta <= want_delta); 103 | // move probe and try again 104 | probe_pos = probe_pos - have_delta; 105 | DLOG("probe_pos " << probe_pos << " = probe_pos - have_delta " << have_delta << " (want_delta " << want_delta << ")"); 106 | continue; 107 | } 108 | } 109 | if (low_pos <= target_pos && target_pos <= high_pos) { 110 | // have keys on either side of target_pos in result 111 | // search from the high end until we find the highest key below target 112 | for (auto i = result.rbegin(); i != result.rend(); ++i) { 113 | // more correctness checking for fetch 114 | THROW_CHECK2(out_of_range, *i, probe_pos, probe_pos <= *i); 115 | if (*i <= target_pos) { 116 | DLOG("return: *i " << *i << " <= target_pos " << target_pos); 117 | return *i; 118 | } 119 | } 120 | // if the list is empty then low_pos = high_pos = end_pos 121 | // if target_pos = end_pos also, then we will execute the loop 122 | // above but not find any matching entries. 123 | THROW_CHECK0(runtime_error, result.empty()); 124 | } 125 | if (target_pos <= low_pos) { 126 | // results are all too high, so probe_pos..low_pos is too high 127 | // lower the high bound to the probe pos 128 | upper_bound = probe_pos; 129 | DLOG("upper_bound = probe_pos " << probe_pos); 130 | } 131 | if (high_pos < target_pos) { 132 | // results are all too low, so probe_pos..high_pos is too low 133 | // raise the low bound to the high_pos 134 | DLOG("lower_bound = high_pos " << high_pos); 135 | lower_bound = high_pos; 136 | } 137 | // compute a new probe pos at the middle of the range and try again 138 | // we can't have a zero-size range here because we would not have set found_low yet 139 | THROW_CHECK2(out_of_range, lower_bound, upper_bound, lower_bound <= upper_bound); 140 | const Pos delta = (upper_bound - lower_bound) / 2; 141 | probe_pos = lower_bound + delta; 142 | if (delta < 1) { 143 | // nothing can exist in the range (lower_bound, upper_bound) 144 | // and an object is known to exist at lower_bound 145 | DLOG("return: probe_pos == lower_bound " << lower_bound); 146 | return lower_bound; 147 | } 148 | THROW_CHECK2(out_of_range, lower_bound, probe_pos, lower_bound <= probe_pos); 149 | THROW_CHECK2(out_of_range, upper_bound, probe_pos, probe_pos <= upper_bound); 150 | DLOG("loop: lower_bound " << lower_bound << ", probe_pos " << probe_pos << ", upper_bound " << upper_bound); 151 | } 152 | THROW_ERROR(runtime_error, "FIXME: should not reach this line: " 153 | "lower_bound..upper_bound " << lower_bound << ".." << upper_bound << ", " 154 | "found_low " << found_low); 155 | } catch (...) { 156 | DOUT(cerr); 157 | throw; 158 | } 159 | } 160 | } 161 | 162 | #endif // _CRUCIBLE_SEEKER_H_ 163 | 164 | -------------------------------------------------------------------------------- /include/crucible/string.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_STRING_H 2 | #define CRUCIBLE_STRING_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace crucible { 12 | using namespace std; 13 | 14 | // int->hex conversion with sprintf 15 | string to_hex(uint64_t i); 16 | 17 | // hex->int conversion with stoull 18 | uint64_t from_hex(const string &s); 19 | 20 | // asprintf with string output and exceptions 21 | template 22 | string 23 | astringprintf(const char *fmt, Args... args) 24 | { 25 | char *rv = NULL; 26 | DIE_IF_MINUS_ONE(asprintf(&rv, fmt, args...)); 27 | string rv_string = rv; 28 | free(rv); 29 | return rv_string; 30 | } 31 | 32 | template 33 | string 34 | astringprintf(const string &fmt, Args... args) 35 | { 36 | return astringprintf(fmt.c_str(), args...); 37 | } 38 | 39 | vector split(string delim, string s); 40 | 41 | // Shut up and give me the difference between two pointers 42 | template 43 | ptrdiff_t 44 | pointer_distance(const P1 *a, const P2 *b) 45 | { 46 | return reinterpret_cast(a) - reinterpret_cast(b); 47 | } 48 | }; 49 | 50 | #endif // CRUCIBLE_STRING_H 51 | -------------------------------------------------------------------------------- /include/crucible/table.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_TABLE_H 2 | #define CRUCIBLE_TABLE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace crucible { 14 | namespace Table { 15 | using namespace std; 16 | 17 | using Content = function; 18 | const size_t endpos = numeric_limits::max(); 19 | 20 | Content Fill(const char c); 21 | Content Text(const string& s); 22 | 23 | template 24 | Content Number(const T& num) 25 | { 26 | ostringstream oss; 27 | oss << num; 28 | return Text(oss.str()); 29 | } 30 | 31 | class Cell { 32 | Content m_content; 33 | public: 34 | Cell(const Content &fn = [](size_t, size_t) { return string(); } ); 35 | Cell& operator=(const Content &fn); 36 | string text(size_t width, size_t height) const; 37 | }; 38 | 39 | class Dimension { 40 | size_t m_next_pos = 0; 41 | vector m_elements; 42 | friend class Table; 43 | size_t at(size_t) const; 44 | public: 45 | size_t size() const; 46 | size_t insert(size_t pos); 47 | void erase(size_t pos); 48 | }; 49 | 50 | class Table { 51 | Dimension m_rows, m_cols; 52 | map, Cell> m_cells; 53 | string m_left = "|"; 54 | string m_mid = "|"; 55 | string m_right = "|"; 56 | public: 57 | Dimension &rows(); 58 | const Dimension& rows() const; 59 | Dimension &cols(); 60 | const Dimension& cols() const; 61 | Cell& at(size_t row, size_t col); 62 | const Cell& at(size_t row, size_t col) const; 63 | template void insert_row(size_t pos, const T& container); 64 | template void insert_col(size_t pos, const T& container); 65 | void left(const string &s); 66 | void mid(const string &s); 67 | void right(const string &s); 68 | const string& left() const; 69 | const string& mid() const; 70 | const string& right() const; 71 | }; 72 | 73 | ostream& operator<<(ostream &os, const Table &table); 74 | 75 | template 76 | void 77 | Table::insert_row(size_t pos, const T& container) 78 | { 79 | const auto new_pos = m_rows.insert(pos); 80 | size_t col = 0; 81 | for (const auto &i : container) { 82 | if (col >= cols().size()) { 83 | cols().insert(col); 84 | } 85 | at(new_pos, col++) = i; 86 | } 87 | } 88 | 89 | template 90 | void 91 | Table::insert_col(size_t pos, const T& container) 92 | { 93 | const auto new_pos = m_cols.insert(pos); 94 | size_t row = 0; 95 | for (const auto &i : container) { 96 | if (row >= rows().size()) { 97 | rows().insert(row); 98 | } 99 | at(row++, new_pos) = i; 100 | } 101 | } 102 | 103 | } 104 | } 105 | 106 | #endif // CRUCIBLE_TABLE_H 107 | -------------------------------------------------------------------------------- /include/crucible/task.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_TASK_H 2 | #define CRUCIBLE_TASK_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace crucible { 11 | using namespace std; 12 | 13 | class TaskState; 14 | 15 | using TaskId = uint64_t; 16 | 17 | /// A unit of work to be scheduled by TaskMaster. 18 | class Task { 19 | shared_ptr m_task_state; 20 | 21 | Task(shared_ptr pts); 22 | 23 | public: 24 | 25 | /// Create empty Task object. 26 | Task() = default; 27 | 28 | /// Create Task object containing closure and description. 29 | Task(string title, function exec_fn); 30 | 31 | /// Schedule Task for at most one future execution. 32 | /// May run Task in current thread or in other thread. 33 | /// May run Task before or after returning. 34 | /// Schedules Task at the end of the global execution queue. 35 | /// 36 | /// Only one instance of a Task may execute at a time. 37 | /// If a Task is already scheduled, run() does nothing. 38 | /// If a Task is already running when a new instance reaches 39 | /// the front of the queue, the new instance will execute 40 | /// after the current instance exits. 41 | void run() const; 42 | 43 | /// Schedule task to run when no other Task is available. 44 | void idle() const; 45 | 46 | /// Schedule Task to run after this Task has run or 47 | /// been destroyed. 48 | void append(const Task &task) const; 49 | 50 | /// Schedule Task to run after this Task has run or 51 | /// been destroyed, in Task ID order. 52 | void insert(const Task &task) const; 53 | 54 | /// Describe Task as text. 55 | string title() const; 56 | 57 | /// Returns currently executing task if called from exec_fn. 58 | /// Usually used to reschedule the currently executing Task. 59 | static Task current_task(); 60 | 61 | /// Returns number of currently existing Task objects. 62 | /// Good for spotting leaks. 63 | static size_t instance_count(); 64 | 65 | /// Ordering operator for containers 66 | bool operator<(const Task &that) const; 67 | 68 | /// Null test 69 | operator bool() const; 70 | 71 | /// Unique non-repeating(ish) ID for task 72 | TaskId id() const; 73 | }; 74 | 75 | ostream &operator<<(ostream &os, const Task &task); 76 | 77 | class TaskMaster { 78 | public: 79 | /// Blocks until the running thread count reaches this number 80 | static void set_thread_count(size_t threads); 81 | 82 | /// Sets minimum thread count when load average tracking enabled 83 | static void set_thread_min_count(size_t min_threads); 84 | 85 | /// Calls set_thread_count with default 86 | static void set_thread_count(); 87 | 88 | /// Creates thread to track load average and adjust thread count dynamically 89 | static void set_loadavg_target(double target); 90 | 91 | /// Writes the current non-executing Task queue 92 | static ostream & print_queue(ostream &); 93 | 94 | /// Writes the current executing Task for each worker 95 | static ostream & print_workers(ostream &); 96 | 97 | /// Gets the current number of queued Tasks 98 | static size_t get_queue_count(); 99 | 100 | /// Gets the current number of active workers 101 | static size_t get_thread_count(); 102 | 103 | /// Gets the current load tracking statistics 104 | struct LoadStats { 105 | /// Current load extracted from last two 5-second load average samples 106 | double current_load; 107 | /// Target thread count computed from previous thread count and current load 108 | double thread_target; 109 | /// Load average for last 60 seconds 110 | double loadavg; 111 | }; 112 | static LoadStats get_current_load(); 113 | 114 | /// Drop the current queue and discard new Tasks without 115 | /// running them. Currently executing tasks are not 116 | /// affected (use set_thread_count(0) to wait for those 117 | /// to complete). 118 | static void cancel(); 119 | 120 | /// Stop running any new Tasks. All existing 121 | /// Consumer threads will exit. Does not affect queue. 122 | /// Does not wait for threads to exit. Reversible. 123 | static void pause(bool paused = true); 124 | }; 125 | 126 | class BarrierState; 127 | 128 | /// Barrier delays the execution of one or more Tasks. 129 | /// The Tasks are executed when the last shared reference to the 130 | /// BarrierState is released. Copies of Barrier objects refer 131 | /// to the same Barrier state. 132 | class Barrier { 133 | shared_ptr m_barrier_state; 134 | 135 | public: 136 | Barrier(); 137 | 138 | /// Schedule a task for execution when last Barrier is released. 139 | void insert_task(Task t); 140 | 141 | /// Release this reference to the barrier state. 142 | /// Last released reference executes the task. 143 | /// Barrier can only be released once, after which the 144 | /// object can no longer be used. 145 | void release(); 146 | }; 147 | 148 | class ExclusionLock { 149 | shared_ptr m_owner; 150 | ExclusionLock(shared_ptr owner); 151 | friend class Exclusion; 152 | public: 153 | /// Explicit default constructor because we have other kinds 154 | ExclusionLock() = default; 155 | 156 | /// Release this Lock immediately and permanently 157 | void release(); 158 | 159 | /// Test for locked state 160 | operator bool() const; 161 | }; 162 | 163 | class Exclusion { 164 | mutex m_mutex; 165 | weak_ptr m_owner; 166 | 167 | public: 168 | /// Attempt to obtain a Lock. If successful, current Task 169 | /// owns the Lock until the ExclusionLock is released 170 | /// (it is the ExclusionLock that owns the lock, so it can 171 | /// be passed to other Tasks or threads, but this is not 172 | /// recommended practice). 173 | /// If not successful, the argument Task is appended to the 174 | /// task that currently holds the lock. Current task is 175 | /// expected to immediately release any other ExclusionLock 176 | /// objects it holds, and exit its Task function. 177 | ExclusionLock try_lock(const Task &task); 178 | 179 | }; 180 | 181 | /// Wrapper around pthread_setname_np which handles length limits 182 | void pthread_setname(const string &name); 183 | 184 | /// Wrapper around pthread_getname_np for symmetry 185 | string pthread_getname(); 186 | } 187 | 188 | #endif // CRUCIBLE_TASK_H 189 | -------------------------------------------------------------------------------- /include/crucible/time.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_TIME_H 2 | #define CRUCIBLE_TIME_H 3 | 4 | #include "crucible/error.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace crucible { 13 | 14 | double nanosleep(double secs); 15 | 16 | class Timer { 17 | chrono::high_resolution_clock::time_point m_start; 18 | 19 | public: 20 | Timer(); 21 | double age() const; 22 | chrono::high_resolution_clock::time_point get() const; 23 | double report(int precision = 1000) const; 24 | void reset(); 25 | double lap(); 26 | bool operator<(double d) const; 27 | bool operator>(double d) const; 28 | }; 29 | 30 | ostream &operator<<(ostream &os, const Timer &t); 31 | 32 | class RateLimiter { 33 | Timer m_timer; 34 | double m_rate; 35 | double m_burst; 36 | double m_tokens = 0.0; 37 | mutable mutex m_mutex; 38 | 39 | void update_tokens(); 40 | RateLimiter() = delete; 41 | public: 42 | RateLimiter(double rate, double burst); 43 | RateLimiter(double rate); 44 | void sleep_for(double cost = 1.0); 45 | double sleep_time(double cost = 1.0); 46 | bool is_ready(); 47 | void borrow(double cost = 1.0); 48 | void rate(double new_rate); 49 | double rate() const; 50 | }; 51 | 52 | class RateEstimator { 53 | mutable mutex m_mutex; 54 | mutable condition_variable m_condvar; 55 | Timer m_timer; 56 | double m_num = 0.0; 57 | double m_den = 0.0; 58 | uint64_t m_last_count = numeric_limits::max(); 59 | Timer m_last_update; 60 | const double m_decay = 0.99; 61 | Timer m_last_decay; 62 | double m_min_delay; 63 | double m_max_delay; 64 | 65 | chrono::duration duration_unlocked(uint64_t relative_count) const; 66 | chrono::high_resolution_clock::time_point time_point_unlocked(uint64_t absolute_count) const; 67 | double rate_unlocked() const; 68 | pair ratio_unlocked() const; 69 | void update_unlocked(uint64_t new_count); 70 | public: 71 | RateEstimator(double min_delay = 1, double max_delay = 3600); 72 | 73 | // Block until count reached 74 | void wait_for(uint64_t new_count_relative) const; 75 | void wait_until(uint64_t new_count_absolute) const; 76 | 77 | // Computed rates and ratios 78 | double rate() const; 79 | pair ratio() const; 80 | 81 | // Inspect raw num/den 82 | pair raw() const; 83 | 84 | // Write count 85 | void update(uint64_t new_count); 86 | 87 | // Ignore counts that go backwards 88 | void update_monotonic(uint64_t new_count); 89 | 90 | // Read count 91 | uint64_t count() const; 92 | 93 | /// Increment count (like update(count() + more), but atomic) 94 | void increment(uint64_t more = 1); 95 | 96 | // Convert counts to chrono types 97 | chrono::high_resolution_clock::time_point time_point(uint64_t absolute_count) const; 98 | chrono::duration duration(uint64_t relative_count) const; 99 | 100 | // Polling delay until count reached (limited by min/max delay) 101 | double seconds_for(uint64_t new_count_relative) const; 102 | double seconds_until(uint64_t new_count_absolute) const; 103 | }; 104 | 105 | ostream & 106 | operator<<(ostream &os, const RateEstimator &re); 107 | 108 | } 109 | 110 | #endif // CRUCIBLE_TIME_H 111 | -------------------------------------------------------------------------------- /include/crucible/uname.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_UNAME_H 2 | #define CRUCIBLE_UNAME_H 3 | 4 | #include 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | struct Uname : public utsname { 10 | Uname(); 11 | }; 12 | } 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /include/crucible/version.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_VERSION_H 2 | #define CRUCIBLE_VERSION_H 3 | 4 | namespace crucible { 5 | extern const char *VERSION; 6 | } 7 | 8 | #endif CRUCIBLE_VERSION_H 9 | -------------------------------------------------------------------------------- /lib/.gitignore: -------------------------------------------------------------------------------- 1 | .version.* 2 | -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | default: libcrucible.a 2 | %.a: Makefile 3 | 4 | CRUCIBLE_OBJS = \ 5 | bytevector.o \ 6 | btrfs-tree.o \ 7 | chatter.o \ 8 | city.o \ 9 | cleanup.o \ 10 | crc64.o \ 11 | error.o \ 12 | extentwalker.o \ 13 | fd.o \ 14 | fs.o \ 15 | multilock.o \ 16 | ntoa.o \ 17 | openat2.o \ 18 | path.o \ 19 | process.o \ 20 | string.o \ 21 | table.o \ 22 | task.o \ 23 | time.o \ 24 | uname.o \ 25 | 26 | include ../makeflags 27 | -include ../localconf 28 | include ../Defines.mk 29 | 30 | BEES_LDFLAGS = $(LDFLAGS) 31 | 32 | configure.h: configure.h.in 33 | $(TEMPLATE_COMPILER) 34 | 35 | %.dep: %.cc configure.h Makefile 36 | $(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $< 37 | 38 | include $(CRUCIBLE_OBJS:%.o=%.dep) 39 | 40 | %.o: %.cc ../makeflags 41 | $(CXX) $(BEES_CXXFLAGS) -o $@ -c $< 42 | 43 | libcrucible.a: $(CRUCIBLE_OBJS) 44 | $(AR) rcs $@ $^ 45 | -------------------------------------------------------------------------------- /lib/bytevector.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/bytevector.h" 2 | 3 | #include "crucible/error.h" 4 | #include "crucible/hexdump.h" 5 | #include "crucible/string.h" 6 | 7 | #include 8 | 9 | namespace crucible { 10 | using namespace std; 11 | 12 | ByteVector::iterator 13 | ByteVector::begin() const 14 | { 15 | unique_lock lock(m_mutex); 16 | return m_ptr.get(); 17 | } 18 | 19 | ByteVector::iterator 20 | ByteVector::end() const 21 | { 22 | unique_lock lock(m_mutex); 23 | return m_ptr.get() + m_size; 24 | } 25 | 26 | size_t 27 | ByteVector::size() const 28 | { 29 | return m_size; 30 | } 31 | 32 | bool 33 | ByteVector::empty() const 34 | { 35 | return !m_ptr || !m_size; 36 | } 37 | 38 | void 39 | ByteVector::clear() 40 | { 41 | unique_lock lock(m_mutex); 42 | m_ptr.reset(); 43 | m_size = 0; 44 | } 45 | 46 | ByteVector::value_type& 47 | ByteVector::operator[](size_t index) const 48 | { 49 | unique_lock lock(m_mutex); 50 | return m_ptr.get()[index]; 51 | } 52 | 53 | ByteVector::ByteVector(const ByteVector &that) 54 | { 55 | unique_lock lock(that.m_mutex); 56 | m_ptr = that.m_ptr; 57 | m_size = that.m_size; 58 | } 59 | 60 | ByteVector& 61 | ByteVector::operator=(const ByteVector &that) 62 | { 63 | // If &that == this, there's no need to do anything, but 64 | // especially don't try to lock the same mutex twice. 65 | if (&m_mutex != &that.m_mutex) { 66 | unique_lock lock_this(m_mutex, defer_lock); 67 | unique_lock lock_that(that.m_mutex, defer_lock); 68 | lock(lock_this, lock_that); 69 | m_ptr = that.m_ptr; 70 | m_size = that.m_size; 71 | } 72 | return *this; 73 | } 74 | 75 | ByteVector::ByteVector(const ByteVector &that, size_t start, size_t length) 76 | { 77 | THROW_CHECK0(out_of_range, that.m_ptr); 78 | THROW_CHECK2(out_of_range, start, that.m_size, start <= that.m_size); 79 | THROW_CHECK2(out_of_range, start + length, that.m_size + length, start + length <= that.m_size + length); 80 | m_ptr = Pointer(that.m_ptr, that.m_ptr.get() + start); 81 | m_size = length; 82 | } 83 | 84 | ByteVector 85 | ByteVector::at(size_t start, size_t length) const 86 | { 87 | return ByteVector(*this, start, length); 88 | } 89 | 90 | ByteVector::value_type& 91 | ByteVector::at(size_t size) const 92 | { 93 | unique_lock lock(m_mutex); 94 | THROW_CHECK0(out_of_range, m_ptr); 95 | THROW_CHECK2(out_of_range, size, m_size, size < m_size); 96 | return m_ptr.get()[size]; 97 | } 98 | 99 | static 100 | void * 101 | bv_allocate(size_t size) 102 | { 103 | #ifdef BEES_VALGRIND 104 | // XXX: only do this to shut up valgrind 105 | return calloc(1, size); 106 | #else 107 | return malloc(size); 108 | #endif 109 | } 110 | 111 | ByteVector::ByteVector(size_t size) 112 | { 113 | m_ptr = Pointer(static_cast(bv_allocate(size)), free); 114 | // bad_alloc doesn't fit THROW_CHECK's template 115 | THROW_CHECK0(runtime_error, m_ptr); 116 | m_size = size; 117 | } 118 | 119 | ByteVector::ByteVector(iterator begin, iterator end, size_t min_size) 120 | { 121 | const size_t size = end - begin; 122 | const size_t alloc_size = max(size, min_size); 123 | m_ptr = Pointer(static_cast(bv_allocate(alloc_size)), free); 124 | THROW_CHECK0(runtime_error, m_ptr); 125 | m_size = alloc_size; 126 | memcpy(m_ptr.get(), begin, size); 127 | } 128 | 129 | bool 130 | ByteVector::operator==(const ByteVector &that) const 131 | { 132 | unique_lock lock_this(m_mutex, defer_lock); 133 | unique_lock lock_that(that.m_mutex, defer_lock); 134 | lock(lock_this, lock_that); 135 | if (!m_ptr) { 136 | return !that.m_ptr; 137 | } 138 | if (!that.m_ptr) { 139 | return false; 140 | } 141 | if (m_size != that.m_size) { 142 | return false; 143 | } 144 | if (m_ptr.get() == that.m_ptr.get()) { 145 | return true; 146 | } 147 | return !memcmp(m_ptr.get(), that.m_ptr.get(), m_size); 148 | } 149 | 150 | void 151 | ByteVector::erase(iterator begin, iterator end) 152 | { 153 | unique_lock lock(m_mutex); 154 | const size_t size = end - begin; 155 | if (!size) return; 156 | THROW_CHECK0(out_of_range, m_ptr); 157 | const iterator my_begin = m_ptr.get(); 158 | const iterator my_end = my_begin + m_size; 159 | THROW_CHECK4(out_of_range, my_begin, begin, my_end, end, my_begin == begin || my_end == end); 160 | if (begin == my_begin) { 161 | if (end == my_end) { 162 | m_size = 0; 163 | m_ptr.reset(); 164 | return; 165 | } 166 | m_ptr = Pointer(m_ptr, end); 167 | } 168 | m_size -= size; 169 | } 170 | 171 | void 172 | ByteVector::erase(iterator begin) 173 | { 174 | erase(begin, begin + 1); 175 | } 176 | 177 | ByteVector::value_type* 178 | ByteVector::data() const 179 | { 180 | unique_lock lock(m_mutex); 181 | return m_ptr.get(); 182 | } 183 | 184 | ostream& 185 | operator<<(ostream &os, const ByteVector &bv) { 186 | hexdump(os, bv); 187 | return os; 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /lib/chatter.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/chatter.h" 2 | #include "crucible/error.h" 3 | #include "crucible/path.h" 4 | #include "crucible/process.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | namespace crucible { 16 | using namespace std; 17 | 18 | static shared_ptr> chatter_names; 19 | static const char *SPACETAB = " \t"; 20 | static bool add_prefix_timestamp = true; 21 | static bool add_prefix_level = true; 22 | 23 | static 24 | void 25 | init_chatter_names() 26 | { 27 | if (!chatter_names.get()) { 28 | chatter_names.reset(new set); 29 | const char *sp = ::getenv("CRUCIBLE_CHATTER"); 30 | if (sp) { 31 | cerr << "CRUCIBLE_CHATTER = '" << sp << "'" << endl; 32 | string s(sp); 33 | while (!s.empty()) { 34 | s.erase(0, s.find_first_not_of(SPACETAB)); 35 | if (s.empty()) { 36 | break; 37 | } 38 | size_t last = s.find_first_of(SPACETAB); 39 | string first_word = s.substr(0, last); 40 | cerr << "\t'" << first_word << "'" << endl; 41 | chatter_names->insert(first_word); 42 | s.erase(0, last); 43 | } 44 | } 45 | } 46 | } 47 | 48 | Chatter::Chatter(int loglevel, string name, ostream &os) 49 | : m_loglevel(loglevel), m_name(name), m_os(os) 50 | { 51 | } 52 | 53 | void 54 | Chatter::enable_timestamp(bool prefix_timestamp) 55 | { 56 | add_prefix_timestamp = prefix_timestamp; 57 | } 58 | 59 | void 60 | Chatter::enable_level(bool prefix_level) 61 | { 62 | add_prefix_level = prefix_level; 63 | } 64 | 65 | Chatter::~Chatter() 66 | { 67 | ostringstream header_stream; 68 | 69 | if (add_prefix_timestamp) { 70 | time_t ltime; 71 | DIE_IF_MINUS_ONE(time(<ime)); 72 | struct tm ltm; 73 | DIE_IF_ZERO(localtime_r(<ime, <m)); 74 | 75 | char buf[1024]; 76 | DIE_IF_ZERO(strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", <m)); 77 | 78 | header_stream << buf; 79 | header_stream << " " << getpid() << "." << gettid(); 80 | if (add_prefix_level) { 81 | header_stream << "<" << m_loglevel << ">"; 82 | } 83 | if (!m_name.empty()) { 84 | header_stream << " " << m_name; 85 | } 86 | } else { 87 | if (add_prefix_level) { 88 | header_stream << "<" << m_loglevel << ">"; 89 | } 90 | header_stream << (m_name.empty() ? "thread" : m_name); 91 | header_stream << "[" << gettid() << "]"; 92 | } 93 | 94 | header_stream << ": "; 95 | 96 | string out = m_oss.str(); 97 | string header = header_stream.str(); 98 | 99 | string::size_type start = 0; 100 | while (start < out.size()) { 101 | size_t end_line = out.find_first_of("\n", start); 102 | if (end_line != string::npos) { 103 | assert(out[end_line] == '\n'); 104 | size_t end = end_line; 105 | m_os << (header + out.substr(start, end - start) + "\n") << flush; 106 | start = end_line + 1; 107 | } else { 108 | m_os << (header + out.substr(start) + "\n") << flush; 109 | start = out.size(); 110 | } 111 | } 112 | } 113 | 114 | Chatter::Chatter(Chatter &&c) 115 | : m_loglevel(c.m_loglevel), m_name(c.m_name), m_os(c.m_os), m_oss(c.m_oss.str()) 116 | { 117 | c.m_oss.str(""); 118 | } 119 | 120 | set ChatterBox::s_boxes; 121 | 122 | set& ChatterBox::all_boxes() 123 | { 124 | return s_boxes; 125 | } 126 | 127 | ChatterBox::ChatterBox(string file, int line, string pretty_function, ostream &os) 128 | : m_file(basename(file)), m_line(line), m_pretty_function(pretty_function), m_enabled(false), m_os(os) 129 | { 130 | s_boxes.insert(this); 131 | init_chatter_names(); 132 | if (chatter_names->find(m_file) != chatter_names->end()) { 133 | m_enabled = true; 134 | } else if (chatter_names->find(m_pretty_function) != chatter_names->end()) { 135 | m_enabled = true; 136 | } else if (!chatter_names->empty()) { 137 | cerr << "CRUCIBLE_CHATTER does not list '" << m_file << "' or '" << m_pretty_function << "'" << endl; 138 | } 139 | (void)m_line; // not implemented yet 140 | // cerr << "ChatterBox " << reinterpret_cast(this) << " constructed" << endl; 141 | } 142 | 143 | ChatterBox::~ChatterBox() 144 | { 145 | s_boxes.erase(this); 146 | // cerr << "ChatterBox " << reinterpret_cast(this) << " destructed" << endl; 147 | } 148 | 149 | void 150 | ChatterBox::set_enable(bool en) 151 | { 152 | m_enabled = en; 153 | } 154 | 155 | ChatterUnwinder::ChatterUnwinder(function f) : 156 | m_func(f) 157 | { 158 | } 159 | 160 | ChatterUnwinder::~ChatterUnwinder() 161 | { 162 | if (current_exception()) { 163 | m_func(); 164 | } 165 | } 166 | 167 | }; 168 | -------------------------------------------------------------------------------- /lib/cleanup.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/cleanup.h" 2 | 3 | namespace crucible { 4 | 5 | Cleanup::Cleanup(function func) : 6 | m_cleaner(func) 7 | { 8 | } 9 | 10 | Cleanup::~Cleanup() 11 | { 12 | if (m_cleaner) { 13 | m_cleaner(); 14 | } 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /lib/configure.h.in: -------------------------------------------------------------------------------- 1 | #ifndef _CONFIGURE_H 2 | 3 | #define ETC_PREFIX "@ETC_PREFIX@" 4 | 5 | #define _CONFIGURE_H 6 | #endif 7 | -------------------------------------------------------------------------------- /lib/crc64.cc: -------------------------------------------------------------------------------- 1 | /* crc64.c -- compute CRC-64 2 | * Copyright (C) 2013 Mark Adler 3 | * Version 1.4 16 Dec 2013 Mark Adler 4 | */ 5 | 6 | /* 7 | This software is provided 'as-is', without any express or implied 8 | warranty. In no event will the author be held liable for any damages 9 | arising from the use of this software. 10 | 11 | Permission is granted to anyone to use this software for any purpose, 12 | including commercial applications, and to alter it and redistribute it 13 | freely, subject to the following restrictions: 14 | 15 | 1. The origin of this software must not be misrepresented; you must not 16 | claim that you wrote the original software. If you use this software 17 | in a product, an acknowledgment in the product documentation would be 18 | appreciated but is not required. 19 | 2. Altered source versions must be plainly marked as such, and must not be 20 | misrepresented as being the original software. 21 | 3. This notice may not be removed or altered from any source distribution. 22 | 23 | Mark Adler 24 | madler@alumni.caltech.edu 25 | */ 26 | 27 | /* Substantially modified by Paul Jones for usage in bees */ 28 | 29 | #include "crucible/crc64.h" 30 | 31 | #define POLY64REV 0xd800000000000000ULL 32 | 33 | namespace crucible { 34 | 35 | static bool init = false; 36 | static uint64_t CRCTable[8][256]; 37 | 38 | static void init_crc64_table() 39 | { 40 | if (!init) { 41 | uint64_t crc; 42 | 43 | // Generate CRCs for all single byte sequences 44 | for (int n = 0; n < 256; n++) { 45 | uint64_t part = n; 46 | for (int j = 0; j < 8; j++) { 47 | if (part & 1) { 48 | part = (part >> 1) ^ POLY64REV; 49 | } else { 50 | part >>= 1; 51 | } 52 | } 53 | CRCTable[0][n] = part; 54 | } 55 | 56 | // Generate nested CRC table for slice-by-8 lookup 57 | for (int n = 0; n < 256; n++) { 58 | crc = CRCTable[0][n]; 59 | for (int k = 1; k < 8; k++) { 60 | crc = CRCTable[0][crc & 0xff] ^ (crc >> 8); 61 | CRCTable[k][n] = crc; 62 | } 63 | } 64 | init = true; 65 | } 66 | } 67 | 68 | uint64_t 69 | Digest::CRC::crc64(const void *p, size_t len) 70 | { 71 | init_crc64_table(); 72 | const unsigned char *next = static_cast(p); 73 | uint64_t crc = 0; 74 | 75 | // Process individual bytes until we reach an 8-byte aligned pointer 76 | while (len && (reinterpret_cast(next) & 7) != 0) { 77 | crc = CRCTable[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); 78 | len--; 79 | } 80 | 81 | // Fast middle processing, 8 bytes (aligned!) per loop 82 | while (len >= 8) { 83 | crc ^= *(reinterpret_cast< const uint64_t *>(next)); 84 | crc = CRCTable[7][crc & 0xff] ^ 85 | CRCTable[6][(crc >> 8) & 0xff] ^ 86 | CRCTable[5][(crc >> 16) & 0xff] ^ 87 | CRCTable[4][(crc >> 24) & 0xff] ^ 88 | CRCTable[3][(crc >> 32) & 0xff] ^ 89 | CRCTable[2][(crc >> 40) & 0xff] ^ 90 | CRCTable[1][(crc >> 48) & 0xff] ^ 91 | CRCTable[0][crc >> 56]; 92 | next += 8; 93 | len -= 8; 94 | } 95 | 96 | // Process remaining bytes (can't be larger than 8) 97 | while (len) { 98 | crc = CRCTable[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); 99 | len--; 100 | } 101 | 102 | return crc; 103 | } 104 | 105 | 106 | }; 107 | -------------------------------------------------------------------------------- /lib/error.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/error.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace crucible { 9 | using namespace std; 10 | 11 | static 12 | string 13 | analyze_exception(const exception &e) 14 | { 15 | // Let's ignore all the potential memory allocation exceptions for now, K? 16 | ostringstream oss; 17 | 18 | int status; 19 | char *realname = abi::__cxa_demangle(typeid(e).name(), 0, 0, &status); 20 | oss << "exception type "; 21 | // This is questionable since anything that would cause 22 | // cxa_demangle to fail will probably cause an exception anyway. 23 | if (realname) { 24 | oss << realname; 25 | free(realname); 26 | } else { 27 | oss << typeid(e).name(); 28 | } 29 | oss << ": " << e.what(); 30 | return oss.str(); 31 | } 32 | 33 | // FIXME: could probably avoid some of these levels of indirection 34 | static 35 | function current_catch_explainer = [](string s) { 36 | cerr << s << endl; 37 | }; 38 | 39 | void 40 | set_catch_explainer(function f) 41 | { 42 | current_catch_explainer = f; 43 | } 44 | 45 | void 46 | default_catch_explainer(string s) 47 | { 48 | current_catch_explainer(s); 49 | } 50 | 51 | int 52 | catch_all(const function &f, const function &explainer) 53 | { 54 | try { 55 | f(); 56 | return 0; 57 | } catch (const exception &e) { 58 | explainer(analyze_exception(e)); 59 | return 1; 60 | } 61 | } 62 | 63 | void 64 | catch_and_explain(const function &f, const function &explainer) 65 | { 66 | try { 67 | f(); 68 | } catch (const exception &e) { 69 | explainer(analyze_exception(e)); 70 | throw; 71 | } 72 | } 73 | 74 | }; 75 | -------------------------------------------------------------------------------- /lib/multilock.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/multilock.h" 2 | 3 | #include "crucible/error.h" 4 | 5 | namespace crucible { 6 | using namespace std; 7 | 8 | MultiLocker::LockHandle::LockHandle(const string &type, MultiLocker &parent) : 9 | m_type(type), 10 | m_parent(parent) 11 | { 12 | } 13 | 14 | void 15 | MultiLocker::LockHandle::set_locked(const bool state) 16 | { 17 | m_locked = state; 18 | } 19 | 20 | MultiLocker::LockHandle::~LockHandle() 21 | { 22 | if (m_locked) { 23 | m_parent.put_lock(m_type); 24 | m_locked = false; 25 | } 26 | } 27 | 28 | bool 29 | MultiLocker::is_lock_available(const string &type) 30 | { 31 | for (const auto &i : m_counters) { 32 | if (i.second != 0 && i.first != type) { 33 | return false; 34 | } 35 | } 36 | return true; 37 | } 38 | 39 | void 40 | MultiLocker::put_lock(const string &type) 41 | { 42 | unique_lock lock(m_mutex); 43 | auto &counter = m_counters[type]; 44 | THROW_CHECK2(runtime_error, type, counter, counter > 0); 45 | --counter; 46 | if (counter == 0) { 47 | m_cv.notify_all(); 48 | } 49 | } 50 | 51 | shared_ptr 52 | MultiLocker::get_lock_private(const string &type) 53 | { 54 | unique_lock lock(m_mutex); 55 | m_counters.insert(make_pair(type, size_t(0))); 56 | while (!is_lock_available(type)) { 57 | m_cv.wait(lock); 58 | } 59 | const auto rv = make_shared(type, *this); 60 | ++m_counters[type]; 61 | rv->set_locked(true); 62 | return rv; 63 | } 64 | 65 | static MultiLocker s_process_instance; 66 | 67 | shared_ptr 68 | MultiLocker::get_lock(const string &type) 69 | { 70 | if (s_process_instance.m_do_locking) { 71 | return s_process_instance.get_lock_private(type); 72 | } else { 73 | return shared_ptr(); 74 | } 75 | } 76 | 77 | void 78 | MultiLocker::enable_locking(const bool enabled) 79 | { 80 | s_process_instance.m_do_locking = enabled; 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /lib/ntoa.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/ntoa.h" 2 | 3 | #include "crucible/error.h" 4 | #include "crucible/string.h" 5 | 6 | namespace crucible { 7 | using namespace std; 8 | 9 | string bits_ntoa(unsigned long long n, const bits_ntoa_table *table) 10 | { 11 | string out; 12 | while (n && table->a) { 13 | // No bits in n outside of mask 14 | THROW_CHECK2(invalid_argument, table->mask, table->n, ((~table->mask) & table->n) == 0); 15 | if ( (n & table->mask) == table->n) { 16 | if (!out.empty()) { 17 | out += "|"; 18 | } 19 | out += table->a; 20 | n &= ~(table->mask); 21 | } 22 | ++table; 23 | } 24 | if (n) { 25 | if (!out.empty()) { 26 | out += "|"; 27 | } 28 | out += to_hex(n); 29 | } 30 | if (out.empty()) { 31 | out = "0"; 32 | } 33 | return out; 34 | } 35 | 36 | 37 | }; 38 | -------------------------------------------------------------------------------- /lib/openat2.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/openat2.h" 2 | 3 | #include 4 | 5 | // Compatibility for building on old libc for new kernel 6 | 7 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 8 | 9 | // Every arch that defines this uses 437, except Alpha, where 437 is 10 | // mq_getsetattr. 11 | 12 | #ifndef SYS_openat2 13 | #ifdef __alpha__ 14 | #define SYS_openat2 547 15 | #else 16 | #define SYS_openat2 437 17 | #endif 18 | #endif 19 | 20 | #endif // Linux version >= v5.6 21 | 22 | #include 23 | #include 24 | 25 | extern "C" { 26 | 27 | int 28 | __attribute__((weak)) 29 | openat2(int const dirfd, const char *const pathname, struct open_how *const how, size_t const size) 30 | throw() 31 | { 32 | #ifdef SYS_openat2 33 | return syscall(SYS_openat2, dirfd, pathname, how, size); 34 | #else 35 | errno = ENOSYS; 36 | return -1; 37 | #endif 38 | } 39 | 40 | }; 41 | -------------------------------------------------------------------------------- /lib/path.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/path.h" 2 | 3 | #include "crucible/error.h" 4 | 5 | namespace crucible { 6 | using namespace std; 7 | 8 | string 9 | basename(string s) 10 | { 11 | size_t left = s.find_last_of("/"); 12 | size_t right = s.find_last_not_of("/"); 13 | if (left == string::npos) { 14 | return s; 15 | } 16 | return s.substr(left + 1, right); 17 | } 18 | 19 | string 20 | join(string dir, string base) 21 | { 22 | // TODO: a lot of sanity checking, maybe canonicalization 23 | return dir + "/" + base; 24 | } 25 | 26 | }; 27 | -------------------------------------------------------------------------------- /lib/process.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/process.h" 2 | 3 | #include "crucible/chatter.h" 4 | #include "crucible/error.h" 5 | #include "crucible/ntoa.h" 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | extern "C" { 14 | pid_t 15 | __attribute__((weak)) 16 | gettid() throw() 17 | { 18 | return syscall(SYS_gettid); 19 | } 20 | }; 21 | 22 | namespace crucible { 23 | using namespace std; 24 | 25 | bool 26 | Process::joinable() 27 | { 28 | return !!m_pid; 29 | } 30 | 31 | Process::~Process() 32 | { 33 | if (joinable()) { 34 | // because it's just not the same without the word "zombie"... 35 | CHATTER("ZOMBIE WARNING: joinable Process pid " << m_pid << " abandoned"); 36 | } 37 | } 38 | 39 | Process::Process() : 40 | m_pid(0) 41 | { 42 | } 43 | 44 | Process::Process(Process &&move_from) : 45 | m_pid(0) 46 | { 47 | swap(m_pid, move_from.m_pid); 48 | } 49 | 50 | void 51 | Process::do_fork(function child_func) 52 | { 53 | int rv = fork(); 54 | if (rv < 0) { 55 | THROW_ERRNO("fork failed"); 56 | } 57 | m_pid = rv; 58 | 59 | if (rv == 0) { 60 | // child 61 | catch_all([&]() { 62 | int rv = child_func(); 63 | exit(rv); 64 | }); 65 | terminate(); 66 | } 67 | } 68 | 69 | Process::status_type 70 | Process::join() 71 | { 72 | if (m_pid == 0) { 73 | THROW_ERROR(invalid_argument, "Process not created"); 74 | } 75 | 76 | int status = 0; 77 | pid_t rv = waitpid(m_pid, &status, 0); 78 | if (rv == -1) { 79 | THROW_ERRNO("waitpid failed, pid = " << m_pid); 80 | } 81 | if (rv != m_pid) { 82 | THROW_ERROR(runtime_error, "waitpid failed, wanted pid = " << m_pid << ", got rv = " << rv << ", status = " << status); 83 | } 84 | m_pid = 0; 85 | return status; 86 | } 87 | 88 | void 89 | Process::detach() 90 | { 91 | m_pid = 0; 92 | } 93 | 94 | Process::native_handle_type 95 | Process::native_handle() 96 | { 97 | return m_pid; 98 | } 99 | 100 | Process::id 101 | Process::get_id() 102 | { 103 | return m_pid; 104 | } 105 | 106 | void 107 | Process::kill(int sig) 108 | { 109 | if (!m_pid) { 110 | THROW_ERROR(invalid_argument, "Process not created"); 111 | } 112 | 113 | int rv = ::kill(m_pid, sig); 114 | if (rv) { 115 | THROW_ERRNO("killing process " << m_pid << " with signal " << sig); 116 | } 117 | } 118 | 119 | double 120 | getloadavg1() 121 | { 122 | double loadavg[1]; 123 | const int rv = ::getloadavg(loadavg, 1); 124 | if (rv != 1) { 125 | THROW_ERRNO("getloadavg(..., 1)"); 126 | } 127 | return loadavg[0]; 128 | } 129 | 130 | double 131 | getloadavg5() 132 | { 133 | double loadavg[2]; 134 | const int rv = ::getloadavg(loadavg, 2); 135 | if (rv != 2) { 136 | THROW_ERRNO("getloadavg(..., 2)"); 137 | } 138 | return loadavg[1]; 139 | } 140 | 141 | double 142 | getloadavg15() 143 | { 144 | double loadavg[3]; 145 | const int rv = ::getloadavg(loadavg, 3); 146 | if (rv != 3) { 147 | THROW_ERRNO("getloadavg(..., 3)"); 148 | } 149 | return loadavg[2]; 150 | } 151 | 152 | static const struct bits_ntoa_table signals_table[] = { 153 | 154 | // POSIX.1-1990 155 | NTOA_TABLE_ENTRY_ENUM(SIGHUP), 156 | NTOA_TABLE_ENTRY_ENUM(SIGINT), 157 | NTOA_TABLE_ENTRY_ENUM(SIGQUIT), 158 | NTOA_TABLE_ENTRY_ENUM(SIGILL), 159 | NTOA_TABLE_ENTRY_ENUM(SIGABRT), 160 | NTOA_TABLE_ENTRY_ENUM(SIGFPE), 161 | NTOA_TABLE_ENTRY_ENUM(SIGKILL), 162 | NTOA_TABLE_ENTRY_ENUM(SIGSEGV), 163 | NTOA_TABLE_ENTRY_ENUM(SIGPIPE), 164 | NTOA_TABLE_ENTRY_ENUM(SIGALRM), 165 | NTOA_TABLE_ENTRY_ENUM(SIGTERM), 166 | NTOA_TABLE_ENTRY_ENUM(SIGUSR1), 167 | NTOA_TABLE_ENTRY_ENUM(SIGUSR2), 168 | NTOA_TABLE_ENTRY_ENUM(SIGCHLD), 169 | NTOA_TABLE_ENTRY_ENUM(SIGCONT), 170 | NTOA_TABLE_ENTRY_ENUM(SIGSTOP), 171 | NTOA_TABLE_ENTRY_ENUM(SIGTSTP), 172 | NTOA_TABLE_ENTRY_ENUM(SIGTTIN), 173 | NTOA_TABLE_ENTRY_ENUM(SIGTTOU), 174 | 175 | // SUSv2 and POSIX.1-2001 176 | NTOA_TABLE_ENTRY_ENUM(SIGBUS), 177 | NTOA_TABLE_ENTRY_ENUM(SIGPOLL), 178 | NTOA_TABLE_ENTRY_ENUM(SIGPROF), 179 | NTOA_TABLE_ENTRY_ENUM(SIGSYS), 180 | NTOA_TABLE_ENTRY_ENUM(SIGTRAP), 181 | NTOA_TABLE_ENTRY_ENUM(SIGURG), 182 | NTOA_TABLE_ENTRY_ENUM(SIGVTALRM), 183 | NTOA_TABLE_ENTRY_ENUM(SIGXCPU), 184 | NTOA_TABLE_ENTRY_ENUM(SIGXFSZ), 185 | 186 | // Other 187 | NTOA_TABLE_ENTRY_ENUM(SIGIOT), 188 | #ifdef SIGEMT 189 | NTOA_TABLE_ENTRY_ENUM(SIGEMT), 190 | #endif 191 | NTOA_TABLE_ENTRY_ENUM(SIGSTKFLT), 192 | NTOA_TABLE_ENTRY_ENUM(SIGIO), 193 | #ifdef SIGCLD 194 | NTOA_TABLE_ENTRY_ENUM(SIGCLD), 195 | #endif 196 | NTOA_TABLE_ENTRY_ENUM(SIGPWR), 197 | #ifdef SIGINFO 198 | NTOA_TABLE_ENTRY_ENUM(SIGINFO), 199 | #endif 200 | #ifdef SIGLOST 201 | NTOA_TABLE_ENTRY_ENUM(SIGLOST), 202 | #endif 203 | NTOA_TABLE_ENTRY_ENUM(SIGWINCH), 204 | #ifdef SIGUNUSED 205 | NTOA_TABLE_ENTRY_ENUM(SIGUNUSED), 206 | #endif 207 | 208 | NTOA_TABLE_ENTRY_END(), 209 | }; 210 | 211 | string 212 | signal_ntoa(int sig) 213 | { 214 | return bits_ntoa(sig, signals_table); 215 | } 216 | 217 | } 218 | -------------------------------------------------------------------------------- /lib/string.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/string.h" 2 | 3 | #include "crucible/error.h" 4 | 5 | #include 6 | 7 | namespace crucible { 8 | using namespace std; 9 | 10 | string 11 | to_hex(uint64_t i) 12 | { 13 | return astringprintf("0x%" PRIx64, i); 14 | } 15 | 16 | uint64_t 17 | from_hex(const string &s) 18 | { 19 | return stoull(s, nullptr, 0); 20 | } 21 | 22 | vector 23 | split(string delim, string s) 24 | { 25 | if (delim.empty()) { 26 | THROW_ERROR(invalid_argument, "delimiter empty when splitting '" << s << "'"); 27 | } 28 | vector rv; 29 | size_t n = 0; 30 | while (n < s.length()) { 31 | size_t f = s.find(delim, n); 32 | if (f == string::npos) { 33 | rv.push_back(s.substr(n)); 34 | break; 35 | } 36 | if (f > n) { 37 | rv.push_back(s.substr(n, f - n)); 38 | } 39 | n = f + delim.length(); 40 | } 41 | return rv; 42 | } 43 | }; 44 | -------------------------------------------------------------------------------- /lib/table.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/table.h" 2 | 3 | #include "crucible/string.h" 4 | 5 | namespace crucible { 6 | namespace Table { 7 | using namespace std; 8 | 9 | Content 10 | Fill(const char c) 11 | { 12 | return [=](size_t width, size_t height) -> string { 13 | string rv; 14 | while (height--) { 15 | rv += string(width, c); 16 | if (height) { 17 | rv += "\n"; 18 | } 19 | } 20 | return rv; 21 | }; 22 | } 23 | 24 | Content 25 | Text(const string &s) 26 | { 27 | return [=](size_t width, size_t height) -> string { 28 | const auto lines = split("\n", s); 29 | string rv; 30 | size_t line_count = 0; 31 | for (const auto &i : lines) { 32 | if (line_count++) { 33 | rv += "\n"; 34 | } 35 | if (i.length() < width) { 36 | rv += string(width - i.length(), ' '); 37 | } 38 | rv += i; 39 | } 40 | while (line_count < height) { 41 | if (line_count++) { 42 | rv += "\n"; 43 | } 44 | rv += string(width, ' '); 45 | } 46 | return rv; 47 | }; 48 | } 49 | 50 | Content 51 | Number(const string &s) 52 | { 53 | return [=](size_t width, size_t height) -> string { 54 | const auto lines = split("\n", s); 55 | string rv; 56 | size_t line_count = 0; 57 | for (const auto &i : lines) { 58 | if (line_count++) { 59 | rv += "\n"; 60 | } 61 | if (i.length() < width) { 62 | rv += string(width - i.length(), ' '); 63 | } 64 | rv += i; 65 | } 66 | while (line_count < height) { 67 | if (line_count++) { 68 | rv += "\n"; 69 | } 70 | rv += string(width, ' '); 71 | } 72 | return rv; 73 | }; 74 | } 75 | 76 | Cell::Cell(const Content &fn) : 77 | m_content(fn) 78 | { 79 | } 80 | 81 | Cell& 82 | Cell::operator=(const Content &fn) 83 | { 84 | m_content = fn; 85 | return *this; 86 | } 87 | 88 | string 89 | Cell::text(size_t width, size_t height) const 90 | { 91 | return m_content(width, height); 92 | } 93 | 94 | size_t 95 | Dimension::size() const 96 | { 97 | return m_elements.size(); 98 | } 99 | 100 | size_t 101 | Dimension::insert(size_t pos) 102 | { 103 | ++m_next_pos; 104 | const auto insert_pos = min(m_elements.size(), pos); 105 | const auto it = m_elements.begin() + insert_pos; 106 | m_elements.insert(it, m_next_pos); 107 | return insert_pos; 108 | } 109 | 110 | void 111 | Dimension::erase(size_t pos) 112 | { 113 | const auto it = m_elements.begin() + min(m_elements.size(), pos); 114 | m_elements.erase(it); 115 | } 116 | 117 | size_t 118 | Dimension::at(size_t pos) const 119 | { 120 | return m_elements.at(pos); 121 | } 122 | 123 | Dimension& 124 | Table::rows() 125 | { 126 | return m_rows; 127 | }; 128 | 129 | const Dimension& 130 | Table::rows() const 131 | { 132 | return m_rows; 133 | }; 134 | 135 | Dimension& 136 | Table::cols() 137 | { 138 | return m_cols; 139 | }; 140 | 141 | const Dimension& 142 | Table::cols() const 143 | { 144 | return m_cols; 145 | }; 146 | 147 | const Cell& 148 | Table::at(size_t row, size_t col) const 149 | { 150 | const auto row_idx = m_rows.at(row); 151 | const auto col_idx = m_cols.at(col); 152 | const auto found = m_cells.find(make_pair(row_idx, col_idx)); 153 | if (found == m_cells.end()) { 154 | static const Cell s_empty(Fill('.')); 155 | return s_empty; 156 | } 157 | return found->second; 158 | }; 159 | 160 | Cell& 161 | Table::at(size_t row, size_t col) 162 | { 163 | const auto row_idx = m_rows.at(row); 164 | const auto col_idx = m_cols.at(col); 165 | return m_cells[make_pair(row_idx, col_idx)]; 166 | }; 167 | 168 | static 169 | pair 170 | text_size(const string &s) 171 | { 172 | const auto s_split = split("\n", s); 173 | size_t width = 0; 174 | for (const auto &i : s_split) { 175 | width = max(width, i.length()); 176 | } 177 | return make_pair(width, s_split.size()); 178 | } 179 | 180 | ostream& operator<<(ostream &os, const Table &table) 181 | { 182 | const auto rows = table.rows().size(); 183 | const auto cols = table.cols().size(); 184 | vector row_heights(rows, 1); 185 | vector col_widths(cols, 1); 186 | // Get the size of all fixed- and minimum-sized content cells 187 | for (size_t row = 0; row < table.rows().size(); ++row) { 188 | vector col_text; 189 | for (size_t col = 0; col < table.cols().size(); ++col) { 190 | col_text.push_back(table.at(row, col).text(0, 0)); 191 | const auto tsize = text_size(*col_text.rbegin()); 192 | row_heights[row] = max(row_heights[row], tsize.second); 193 | col_widths[col] = max(col_widths[col], tsize.first); 194 | } 195 | } 196 | // Render the table 197 | for (size_t row = 0; row < table.rows().size(); ++row) { 198 | vector lines(row_heights[row], ""); 199 | for (size_t col = 0; col < table.cols().size(); ++col) { 200 | const auto& table_cell = table.at(row, col); 201 | const auto table_text = table_cell.text(col_widths[col], row_heights[row]); 202 | auto col_lines = split("\n", table_text); 203 | col_lines.resize(row_heights[row], ""); 204 | for (size_t line = 0; line < row_heights[row]; ++line) { 205 | if (col > 0) { 206 | lines[line] += table.mid(); 207 | } 208 | lines[line] += col_lines[line]; 209 | } 210 | } 211 | for (const auto &line : lines) { 212 | os << table.left() << line << table.right() << "\n"; 213 | } 214 | } 215 | return os; 216 | } 217 | 218 | void 219 | Table::left(const string &s) 220 | { 221 | m_left = s; 222 | } 223 | 224 | void 225 | Table::mid(const string &s) 226 | { 227 | m_mid = s; 228 | } 229 | 230 | void 231 | Table::right(const string &s) 232 | { 233 | m_right = s; 234 | } 235 | 236 | const string& 237 | Table::left() const 238 | { 239 | return m_left; 240 | } 241 | 242 | const string& 243 | Table::mid() const 244 | { 245 | return m_mid; 246 | } 247 | 248 | const string& 249 | Table::right() const 250 | { 251 | return m_right; 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /lib/uname.cc: -------------------------------------------------------------------------------- 1 | #include "crucible/error.h" 2 | #include "crucible/uname.h" 3 | 4 | namespace crucible { 5 | using namespace std; 6 | 7 | Uname::Uname() 8 | { 9 | DIE_IF_NON_ZERO(uname(static_cast(this))); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /makeflags: -------------------------------------------------------------------------------- 1 | # Default: 2 | CCFLAGS = -Wall -Wextra -Werror -O3 3 | 4 | # Optimized: 5 | # CCFLAGS = -Wall -Wextra -Werror -O3 -march=native 6 | 7 | # Debug: 8 | # CCFLAGS = -Wall -Wextra -Werror -O0 -ggdb 9 | 10 | CCFLAGS += -I../include -D_FILE_OFFSET_BITS=64 11 | 12 | BEES_CFLAGS = $(CCFLAGS) -std=c99 $(CFLAGS) 13 | BEES_CXXFLAGS = $(CCFLAGS) -std=c++11 -Wold-style-cast -Wno-missing-field-initializers $(CXXFLAGS) 14 | -------------------------------------------------------------------------------- /scripts/beesd.conf.sample: -------------------------------------------------------------------------------- 1 | ## Config for Bees: /etc/bees/beesd.conf.sample 2 | ## https://github.com/Zygo/bees 3 | ## It's a default values, change it, if needed 4 | 5 | # How to use? 6 | # Copy this file to a new file name and adjust the UUID below 7 | 8 | # Which FS will be used 9 | UUID=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx 10 | 11 | ## System Vars 12 | # Change carefully 13 | # WORK_DIR=/run/bees/ 14 | # MNT_DIR="$WORK_DIR/mnt/$UUID" 15 | # BEESHOME="$MNT_DIR/.beeshome" 16 | # BEESSTATUS="$WORK_DIR/$UUID.status" 17 | 18 | ## Options to apply, see `beesd --help` for details 19 | # OPTIONS="--strip-paths --no-timestamps" 20 | 21 | ## Bees DB size 22 | # Hash Table Sizing 23 | # sHash table entries are 16 bytes each 24 | # (64-bit hash, 52-bit block number, and some metadata bits) 25 | # Each entry represents a minimum of 4K on disk. 26 | # unique data size hash table size average dedupe block size 27 | # 1TB 4GB 4K 28 | # 1TB 1GB 16K 29 | # 1TB 256MB 64K 30 | # 1TB 16MB 1024K 31 | # 64TB 1GB 1024K 32 | # 33 | # Size MUST be multiple of 128KB 34 | # DB_SIZE=$((1024*1024*1024)) # 1G in bytes 35 | -------------------------------------------------------------------------------- /scripts/beesd.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # if not called from systemd try to replicate mount unsharing on ctrl+c 4 | # see: https://github.com/Zygo/bees/issues/281 5 | if [ -z "${SYSTEMD_EXEC_PID}" -a -z "${UNSHARE_DONE}" ]; then 6 | UNSHARE_DONE=true 7 | export UNSHARE_DONE 8 | exec unshare -m --propagation private -- "$0" "$@" 9 | fi 10 | 11 | ## Helpful functions 12 | INFO(){ echo "INFO:" "$@"; } 13 | ERRO(){ echo "ERROR:" "$@"; exit 1; } 14 | YN(){ [[ "$1" =~ (1|Y|y) ]]; } 15 | 16 | ## Global vars 17 | export BEESHOME BEESSTATUS 18 | export WORK_DIR CONFIG_DIR 19 | export CONFIG_FILE 20 | export UUID AL16M AL128K 21 | 22 | readonly AL128K="$((128*1024))" 23 | readonly AL16M="$((16*1024*1024))" 24 | readonly CONFIG_DIR=@ETC_PREFIX@/bees/ 25 | 26 | readonly bees_bin=$(realpath @DESTDIR@/@LIBEXEC_PREFIX@/bees) 27 | 28 | command -v "$bees_bin" &> /dev/null || ERRO "Missing 'bees' agent" 29 | 30 | uuid_valid(){ 31 | if uuidparse -n -o VARIANT $1 | grep -i -q invalid; then 32 | false 33 | fi 34 | } 35 | 36 | help(){ 37 | echo "Usage: beesd [options] " 38 | echo "- - -" 39 | exec "$bees_bin" --help 40 | } 41 | 42 | for i in $("$bees_bin" --help 2>&1 | grep -E " --" | sed -e "s/^[^-]*-/-/" -e "s/,[^-]*--/ --/" -e "s/ [^-]*$//") 43 | do 44 | TMP_ARGS="$TMP_ARGS $i" 45 | done 46 | IFS=" " read -r -a SUPPORTED_ARGS <<< $TMP_ARGS 47 | NOT_SUPPORTED_ARGS=() 48 | ARGUMENTS=() 49 | 50 | for arg in "${@}"; do 51 | supp=false 52 | for supp_arg in "${SUPPORTED_ARGS[@]}"; do 53 | if [[ "$arg" == ${supp_arg}* ]]; then 54 | supp=true 55 | break 56 | fi 57 | done 58 | if $supp; then 59 | ARGUMENTS+=($arg) 60 | else 61 | NOT_SUPPORTED_ARGS+=($arg) 62 | fi 63 | done 64 | 65 | for arg in "${ARGUMENTS[@]}"; do 66 | case $arg in 67 | -h) help;; 68 | --help) help;; 69 | esac 70 | done 71 | 72 | for arg in "${NOT_SUPPORTED_ARGS[@]}"; do 73 | if uuid_valid $arg; then 74 | [ ! -z "$UUID" ] && help 75 | UUID=$arg 76 | fi 77 | done 78 | 79 | [ -z "$UUID" ] && help 80 | 81 | 82 | FILE_CONFIG="$(grep -E -l '^[^#]*UUID\s*=\s*"?'"$UUID" "$CONFIG_DIR"/*.conf | head -1)" 83 | [ ! -f "$FILE_CONFIG" ] && ERRO "No config for $UUID" 84 | INFO "Find $UUID in $FILE_CONFIG, use as conf" 85 | source "$FILE_CONFIG" 86 | 87 | 88 | ## Pre checks 89 | { 90 | [ ! -d "$CONFIG_DIR" ] && ERRO "Missing: $CONFIG_DIR" 91 | [ "$UID" == "0" ] || ERRO "Must be run as root" 92 | } 93 | 94 | 95 | WORK_DIR="${WORK_DIR:-/run/bees/}" 96 | MNT_DIR="${MNT_DIR:-$WORK_DIR/mnt/$UUID}" 97 | BEESHOME="${BEESHOME:-$MNT_DIR/.beeshome}" 98 | BEESSTATUS="${BEESSTATUS:-$WORK_DIR/$UUID.status}" 99 | DB_SIZE="${DB_SIZE:-$((8192*AL128K))}" 100 | 101 | INFO "Check: Disk exists" 102 | if [ ! -b "/dev/disk/by-uuid/$UUID" ]; then 103 | ERRO "Missing disk: /dev/disk/by-uuid/$UUID" 104 | fi 105 | 106 | is_btrfs(){ [ "$(blkid -s TYPE -o value "$1")" == "btrfs" ]; } 107 | 108 | INFO "Check: Disk with btrfs" 109 | if ! is_btrfs "/dev/disk/by-uuid/$UUID"; then 110 | ERRO "Disk not contain btrfs: /dev/disk/by-uuid/$UUID" 111 | fi 112 | 113 | INFO "WORK DIR: $WORK_DIR" 114 | mkdir -p "$WORK_DIR" || exit 1 115 | 116 | INFO "MOUNT DIR: $MNT_DIR" 117 | mkdir -p "$MNT_DIR" || exit 1 118 | 119 | mount --make-private -osubvolid=5,nodev,noexec /dev/disk/by-uuid/$UUID "$MNT_DIR" || exit 1 120 | 121 | if [ ! -d "$BEESHOME" ]; then 122 | INFO "Create subvol $BEESHOME for store bees data" 123 | btrfs sub cre "$BEESHOME" 124 | fi 125 | 126 | # Check DB size 127 | { 128 | DB_PATH="$BEESHOME/beeshash.dat" 129 | touch "$DB_PATH" 130 | OLD_SIZE="$(du -b "$DB_PATH" | sed 's/\t/ /g' | cut -d' ' -f1)" 131 | NEW_SIZE="$DB_SIZE" 132 | if (( "$NEW_SIZE"%AL128K > 0 )); then 133 | ERRO "DB_SIZE Must be multiple of 128K" 134 | fi 135 | if (( "$OLD_SIZE" != "$NEW_SIZE" )); then 136 | INFO "Resize db: $OLD_SIZE -> $NEW_SIZE" 137 | rm -f "$BEESHOME/beescrawl.dat" 138 | truncate -s $NEW_SIZE $DB_PATH 139 | fi 140 | chmod 700 "$DB_PATH" 141 | } 142 | 143 | MNT_DIR="$(realpath $MNT_DIR)" 144 | 145 | cd "$MNT_DIR" 146 | exec "$bees_bin" "${ARGUMENTS[@]}" $OPTIONS "$MNT_DIR" 147 | -------------------------------------------------------------------------------- /scripts/beesd@.service.in: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Bees (%i) 3 | Documentation=https://github.com/Zygo/bees 4 | After=sysinit.target 5 | 6 | [Service] 7 | Type=simple 8 | ExecStart=@PREFIX@/sbin/beesd --no-timestamps %i 9 | CPUAccounting=true 10 | CPUSchedulingPolicy=batch 11 | CPUWeight=12 12 | IOSchedulingClass=idle 13 | IOSchedulingPriority=7 14 | IOWeight=10 15 | KillMode=control-group 16 | KillSignal=SIGTERM 17 | MemoryAccounting=true 18 | Nice=19 19 | Restart=on-abnormal 20 | RuntimeDirectoryMode=0700 21 | RuntimeDirectory=bees 22 | StartupCPUWeight=25 23 | StartupIOWeight=25 24 | 25 | # Hide other users' process in /proc/ 26 | ProtectProc=invisible 27 | 28 | # Mount / as read-only 29 | ProtectSystem=strict 30 | 31 | # Forbidden access to /home, /root and /run/user 32 | ProtectHome=true 33 | 34 | # Mount tmpfs on /tmp/ and /var/tmp/. 35 | # Cannot mount at /run/ or /var/run/ for they are used by systemd. 36 | PrivateTmp=true 37 | 38 | # Disable network access 39 | PrivateNetwork=true 40 | 41 | # Use private IPC namespace, utc namespace 42 | PrivateIPC=true 43 | ProtectHostname=true 44 | 45 | # Disable write access to kernel variables throug /proc 46 | ProtectKernelTunables=true 47 | 48 | # Disable access to control groups 49 | ProtectControlGroups=true 50 | 51 | # Set capabilities of the new program 52 | # The first three are required for accessing any file on the mounted filesystem. 53 | # The last one is required for mounting the filesystem. 54 | AmbientCapabilities=CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SYS_ADMIN 55 | 56 | # With NoNewPrivileges, running sudo cannot gain any new privilege 57 | NoNewPrivileges=true 58 | 59 | [Install] 60 | WantedBy=basic.target 61 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.new.c 2 | bees-usage.c 3 | bees-version.[ch] 4 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | BEES = ../bin/bees 2 | 3 | all: $(BEES) 4 | 5 | include ../makeflags 6 | -include ../localconf 7 | 8 | LIBS = -lcrucible -lpthread 9 | BEES_LDFLAGS = -L../lib $(LDFLAGS) 10 | 11 | BEES_OBJS = \ 12 | bees.o \ 13 | bees-context.o \ 14 | bees-hash.o \ 15 | bees-resolve.o \ 16 | bees-roots.o \ 17 | bees-thread.o \ 18 | bees-trace.o \ 19 | bees-types.o \ 20 | 21 | ALL_OBJS = $(BEES_OBJS) $(PROGRAM_OBJS) 22 | 23 | bees-version.c: bees.h $(BEES_OBJS:.o=.cc) Makefile ../lib/libcrucible.a 24 | echo "const char *BEES_VERSION = \"$(BEES_VERSION)\";" > bees-version.c.new 25 | if ! [ -e "$@" ] || ! cmp -s "$@.new" "$@"; then mv -fv $@.new $@; fi 26 | 27 | bees-usage.c: bees-usage.txt Makefile 28 | (echo 'const char *BEES_USAGE = '; sed -r 's/^(.*)$$/"\1\\n"/' < bees-usage.txt; echo ';') > bees-usage.new.c 29 | mv -f bees-usage.new.c bees-usage.c 30 | 31 | %.dep: %.cc Makefile 32 | $(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $< 33 | 34 | include $(ALL_OBJS:%.o=%.dep) 35 | 36 | %.o: %.c ../makeflags 37 | $(CC) $(BEES_CFLAGS) -o $@ -c $< 38 | 39 | %.o: %.cc ../makeflags 40 | $(CXX) $(BEES_CXXFLAGS) -o $@ -c $< 41 | 42 | $(BEES): $(BEES_OBJS) bees-version.o bees-usage.o ../lib/libcrucible.a 43 | $(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $^ $(LIBS) 44 | 45 | clean: 46 | rm -fv *.o bees-version.c 47 | -------------------------------------------------------------------------------- /src/bees-thread.cc: -------------------------------------------------------------------------------- 1 | #include "bees.h" 2 | 3 | using namespace crucible; 4 | using namespace std; 5 | 6 | BeesThread::BeesThread(string name) : 7 | m_name(name) 8 | { 9 | THROW_CHECK1(invalid_argument, name, !name.empty()); 10 | } 11 | 12 | void 13 | BeesThread::exec(function func) 14 | { 15 | m_timer.reset(); 16 | BEESLOGDEBUG("BeesThread exec " << m_name); 17 | m_thread_ptr = make_shared([=]() { 18 | BeesNote::set_name(m_name); 19 | BEESLOGDEBUG("Starting thread " << m_name); 20 | BEESNOTE("thread function"); 21 | Timer thread_time; 22 | catch_all([&]() { 23 | func(); 24 | }); 25 | BEESLOGDEBUG("Exiting thread " << m_name << ", " << thread_time << " sec"); 26 | }); 27 | } 28 | 29 | BeesThread::BeesThread(string name, function func) : 30 | m_name(name) 31 | { 32 | THROW_CHECK1(invalid_argument, name, !name.empty()); 33 | BEESLOGDEBUG("BeesThread construct " << m_name); 34 | exec(func); 35 | } 36 | 37 | void 38 | BeesThread::join() 39 | { 40 | if (!m_thread_ptr) { 41 | BEESLOGDEBUG("Thread " << m_name << " no thread ptr"); 42 | return; 43 | } 44 | 45 | BEESLOGDEBUG("BeesThread::join " << m_name); 46 | if (m_thread_ptr->joinable()) { 47 | BEESLOGDEBUG("Joining thread " << m_name); 48 | Timer thread_time; 49 | m_thread_ptr->join(); 50 | BEESLOGDEBUG("Waited for " << m_name << ", " << thread_time << " sec"); 51 | } else if (!m_name.empty()) { 52 | BEESLOGDEBUG("BeesThread " << m_name << " not joinable"); 53 | } else { 54 | BEESLOGDEBUG("BeesThread else " << m_name); 55 | } 56 | } 57 | 58 | void 59 | BeesThread::set_name(const string &name) 60 | { 61 | m_name = name; 62 | } 63 | 64 | BeesThread::~BeesThread() 65 | { 66 | if (!m_thread_ptr) { 67 | BEESLOGDEBUG("Thread " << m_name << " no thread ptr"); 68 | return; 69 | } 70 | 71 | BEESLOGDEBUG("BeesThread destructor " << m_name); 72 | if (m_thread_ptr->joinable()) { 73 | BEESLOGDEBUG("Waiting for thread " << m_name); 74 | Timer thread_time; 75 | m_thread_ptr->join(); 76 | BEESLOGDEBUG("Waited for " << m_name << ", " << thread_time << " sec"); 77 | } else if (!m_name.empty()) { 78 | BEESLOGDEBUG("Thread " << m_name << " not joinable"); 79 | } else { 80 | BEESLOGDEBUG("Thread destroy else " << m_name); 81 | } 82 | } 83 | 84 | -------------------------------------------------------------------------------- /src/bees-trace.cc: -------------------------------------------------------------------------------- 1 | #include "bees.h" 2 | 3 | // tracing ---------------------------------------- 4 | 5 | int bees_log_level = 8; 6 | 7 | thread_local BeesTracer *BeesTracer::tl_next_tracer = nullptr; 8 | thread_local bool BeesTracer::tl_first = true; 9 | thread_local bool BeesTracer::tl_silent = false; 10 | 11 | #if __cplusplus >= 201703 12 | static 13 | bool 14 | exception_check() 15 | { 16 | return uncaught_exceptions(); 17 | } 18 | #else 19 | static 20 | bool 21 | exception_check() 22 | { 23 | return uncaught_exception(); 24 | } 25 | #endif 26 | 27 | BeesTracer::~BeesTracer() 28 | { 29 | if (!tl_silent && exception_check()) { 30 | if (tl_first) { 31 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- BEGIN TRACE --- exception ---"); 32 | tl_first = false; 33 | } 34 | try { 35 | m_func(); 36 | } catch (exception &e) { 37 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: Nested exception: " << e.what()); 38 | } catch (...) { 39 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: Nested exception ..."); 40 | } 41 | if (!m_next_tracer) { 42 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- END TRACE --- exception ---"); 43 | } 44 | } 45 | tl_next_tracer = m_next_tracer; 46 | if (!m_next_tracer) { 47 | tl_silent = false; 48 | tl_first = true; 49 | } 50 | } 51 | 52 | BeesTracer::BeesTracer(const function &f, bool silent) : 53 | m_func(f) 54 | { 55 | m_next_tracer = tl_next_tracer; 56 | tl_next_tracer = this; 57 | tl_silent = silent; 58 | } 59 | 60 | void 61 | BeesTracer::trace_now() 62 | { 63 | BeesTracer *tp = tl_next_tracer; 64 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- BEGIN TRACE ---"); 65 | while (tp) { 66 | tp->m_func(); 67 | tp = tp->m_next_tracer; 68 | } 69 | BEESLOG(BEES_TRACE_LEVEL, "TRACE: --- END TRACE ---"); 70 | } 71 | 72 | bool 73 | BeesTracer::get_silent() 74 | { 75 | return tl_silent; 76 | } 77 | 78 | void 79 | BeesTracer::set_silent() 80 | { 81 | tl_silent = true; 82 | } 83 | 84 | thread_local BeesNote *BeesNote::tl_next = nullptr; 85 | mutex BeesNote::s_mutex; 86 | map BeesNote::s_status; 87 | thread_local string BeesNote::tl_name; 88 | 89 | BeesNote::~BeesNote() 90 | { 91 | tl_next = m_prev; 92 | unique_lock lock(s_mutex); 93 | if (tl_next) { 94 | s_status[gettid()] = tl_next; 95 | } else { 96 | s_status.erase(gettid()); 97 | } 98 | } 99 | 100 | BeesNote::BeesNote(function f) : 101 | m_func(f) 102 | { 103 | m_name = get_name(); 104 | m_prev = tl_next; 105 | tl_next = this; 106 | unique_lock lock(s_mutex); 107 | s_status[gettid()] = tl_next; 108 | } 109 | 110 | void 111 | BeesNote::set_name(const string &name) 112 | { 113 | tl_name = name; 114 | pthread_setname(name); 115 | } 116 | 117 | string 118 | BeesNote::get_name() 119 | { 120 | // Use explicit name if given 121 | if (!tl_name.empty()) { 122 | return tl_name; 123 | } 124 | 125 | // Try a Task name. If there is one, return it, but do not 126 | // remember it. Each output message may be a different Task. 127 | // The current task is thread_local so we don't need to worry 128 | // about it being destroyed under us. 129 | auto current_task = Task::current_task(); 130 | if (current_task) { 131 | return current_task.title(); 132 | } 133 | 134 | // OK try the pthread name next. 135 | 136 | // thread_getname_np returns process name 137 | // ...by default? ...for the main thread? 138 | // ...except during exception handling? 139 | // ...randomly? 140 | return pthread_getname(); 141 | } 142 | 143 | BeesNote::ThreadStatusMap 144 | BeesNote::get_status() 145 | { 146 | unique_lock lock(s_mutex); 147 | ThreadStatusMap rv; 148 | for (auto t : s_status) { 149 | ostringstream oss; 150 | if (!t.second->m_name.empty()) { 151 | oss << t.second->m_name << ": "; 152 | } 153 | if (t.second->m_timer.age() > BEES_TOO_LONG) { 154 | oss << "[" << t.second->m_timer << "s] "; 155 | } 156 | t.second->m_func(oss); 157 | rv[t.first] = oss.str(); 158 | } 159 | return rv; 160 | } 161 | 162 | -------------------------------------------------------------------------------- /src/bees-usage.txt: -------------------------------------------------------------------------------- 1 | Usage: %s [options] fs-root-path 2 | Performs best-effort extent-same deduplication on btrfs. 3 | 4 | fs-root-path MUST be the root of a btrfs filesystem tree (subvol id 5). 5 | Other directories will be rejected. 6 | 7 | Options: 8 | -h, --help Show this help 9 | 10 | Load management options: 11 | -c, --thread-count Worker thread count (default CPU count * factor) 12 | -C, --thread-factor Worker thread factor (default 1) 13 | -G, --thread-min Minimum worker thread count (default 0) 14 | -g, --loadavg-target Target load average for worker threads (default none) 15 | --throttle-factor Idle time between operations (default 1.0) 16 | 17 | Filesystem tree traversal options: 18 | -m, --scan-mode Scanning mode (0..4, default 4) 19 | 20 | Workarounds: 21 | -a, --workaround-btrfs-send Workaround for btrfs send 22 | (ignore RO snapshots) 23 | 24 | Logging options: 25 | -t, --timestamps Show timestamps in log output (default) 26 | -T, --no-timestamps Omit timestamps in log output 27 | -p, --absolute-paths Show absolute paths (default) 28 | -P, --strip-paths Strip $CWD from beginning of all paths in the log 29 | -v, --verbose Set maximum log level (0..8, default 8) 30 | 31 | Optional environment variables: 32 | BEESHOME Path to hash table and configuration files 33 | (default is .beeshome/ in the root of the filesystem). 34 | 35 | BEESSTATUS File to write status to (tmpfs recommended, e.g. /run). 36 | No status is written if this variable is unset. 37 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !Makefile 3 | !*.c 4 | !*.cc 5 | !*.h 6 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAMS = \ 2 | chatter \ 3 | crc64 \ 4 | fd \ 5 | limits \ 6 | namedptr \ 7 | path \ 8 | process \ 9 | progress \ 10 | seeker \ 11 | table \ 12 | task \ 13 | 14 | all: test 15 | 16 | test: $(PROGRAMS:%=%.txt) Makefile 17 | FORCE: 18 | 19 | include ../makeflags 20 | -include ../localconf 21 | 22 | LIBS = -lcrucible -lpthread 23 | BEES_LDFLAGS = -L../lib $(LDFLAGS) 24 | 25 | %.dep: %.cc tests.h Makefile 26 | $(CXX) $(BEES_CXXFLAGS) -M -MF $@ -MT $(<:.cc=.o) $< 27 | 28 | include $(PROGRAMS:%=%.dep) 29 | 30 | $(PROGRAMS:%=%.o): %.o: %.cc ../makeflags Makefile 31 | $(CXX) $(BEES_CXXFLAGS) -o $@ -c $< 32 | 33 | $(PROGRAMS): %: %.o ../makeflags Makefile ../lib/libcrucible.a 34 | $(CXX) $(BEES_CXXFLAGS) $(BEES_LDFLAGS) -o $@ $< $(LIBS) 35 | 36 | %.txt: % Makefile FORCE 37 | ./$< >$@ 2>&1 || (RC=$$?; cat $@; exit $$RC) 38 | 39 | clean: 40 | rm -fv $(PROGRAMS:%=%.o) $(PROGRAMS:%=%.txt) $(PROGRAMS) 41 | -------------------------------------------------------------------------------- /test/chatter.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/chatter.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | using namespace crucible; 13 | 14 | static 15 | void 16 | test_chatter_one() 17 | { 18 | cerr << endl; 19 | CHATTER("simple chatter case"); 20 | } 21 | 22 | static 23 | void 24 | test_chatter_two() 25 | { 26 | cerr << endl; 27 | CHATTER("two lines\nof chatter"); 28 | } 29 | 30 | static 31 | void 32 | test_chatter_three() 33 | { 34 | cerr << endl; 35 | Chatter c(0, "tct"); 36 | c << "More complicated"; 37 | c << "\ncase with\n"; 38 | c << "some \\ns"; 39 | } 40 | 41 | int 42 | main(int, char**) 43 | { 44 | RUN_A_TEST(test_chatter_one()); 45 | RUN_A_TEST(test_chatter_two()); 46 | RUN_A_TEST(test_chatter_three()); 47 | 48 | exit(EXIT_SUCCESS); 49 | } 50 | -------------------------------------------------------------------------------- /test/crc64.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include "crucible/crc64.h" 3 | 4 | #include 5 | 6 | using namespace crucible; 7 | 8 | static 9 | void 10 | test_getcrc64_byte_arrays() 11 | { 12 | assert(Digest::CRC::crc64("John", 4) == 5942451273432301568); 13 | assert(Digest::CRC::crc64("Paul", 4) == 5838402100630913024); 14 | assert(Digest::CRC::crc64("George", 6) == 6714394476893704192); 15 | assert(Digest::CRC::crc64("Ringo", 5) == 6038837226071130112); 16 | assert(Digest::CRC::crc64("", 0) == 0); 17 | assert(Digest::CRC::crc64("\377\277\300\200", 4) == 15615382887346470912ULL); 18 | } 19 | 20 | int 21 | main(int, char**) 22 | { 23 | RUN_A_TEST(test_getcrc64_byte_arrays()); 24 | 25 | exit(EXIT_SUCCESS); 26 | } 27 | -------------------------------------------------------------------------------- /test/namedptr.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | #include "crucible/error.h" 3 | #include "crucible/namedptr.h" 4 | 5 | #include 6 | #include 7 | 8 | using namespace crucible; 9 | 10 | struct named_thing { 11 | static set s_set; 12 | int m_a, m_b; 13 | named_thing() = delete; 14 | named_thing(const named_thing &that) : 15 | m_a(that.m_a), 16 | m_b(that.m_b) 17 | { 18 | cerr << "named_thing(" << m_a << ", " << m_b << ") " << this << " copied from " << &that << "." << endl; 19 | auto rv = s_set.insert(this); 20 | THROW_CHECK1(runtime_error, *rv.first, rv.second); 21 | } 22 | named_thing(int a, int b) : 23 | m_a(a), m_b(b) 24 | { 25 | cerr << "named_thing(" << a << ", " << b << ") " << this << " constructed." << endl; 26 | auto rv = s_set.insert(this); 27 | THROW_CHECK1(runtime_error, *rv.first, rv.second); 28 | } 29 | ~named_thing() { 30 | auto rv = s_set.erase(this); 31 | assert(rv == 1); 32 | cerr << "named_thing(" << m_a << ", " << m_b << ") " << this << " destroyed." << endl; 33 | m_a = ~m_a; 34 | m_b = ~m_b; 35 | } 36 | void check(int a, int b) { 37 | THROW_CHECK2(runtime_error, m_a, a, m_a == a); 38 | THROW_CHECK2(runtime_error, m_b, b, m_b == b); 39 | } 40 | static void check_empty() { 41 | THROW_CHECK1(runtime_error, s_set.size(), s_set.empty()); 42 | } 43 | }; 44 | 45 | set named_thing::s_set; 46 | 47 | static 48 | void 49 | test_namedptr() 50 | { 51 | NamedPtr names; 52 | names.func([](int a, int b) -> shared_ptr { return make_shared(a, b); }); 53 | 54 | auto a_3_5 = names(3, 5); 55 | auto b_3_5 = names(3, 5); 56 | { 57 | auto c_2_7 = names(2, 7); 58 | b_3_5 = a_3_5; 59 | a_3_5->check(3, 5); 60 | b_3_5->check(3, 5); 61 | c_2_7->check(2, 7); 62 | } 63 | auto d_2_7 = names(2, 7); 64 | a_3_5->check(3, 5); 65 | a_3_5.reset(); 66 | b_3_5->check(3, 5); 67 | d_2_7->check(2, 7); 68 | } 69 | 70 | static 71 | void 72 | test_leak() 73 | { 74 | named_thing::check_empty(); 75 | } 76 | 77 | int 78 | main(int, char**) 79 | { 80 | RUN_A_TEST(test_namedptr()); 81 | RUN_A_TEST(test_leak()); 82 | 83 | exit(EXIT_SUCCESS); 84 | } 85 | -------------------------------------------------------------------------------- /test/path.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/path.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | using namespace crucible; 13 | 14 | unsigned failures = 0; 15 | static 16 | void 17 | test_path_basename(string input, string expected) 18 | { 19 | string result = basename(input); 20 | if (expected != result) { 21 | std::cerr << "result was \"" << result << "\"" << std::endl; 22 | ++failures; 23 | } 24 | } 25 | 26 | int 27 | main(int, char**) 28 | { 29 | RUN_A_TEST(test_path_basename("/foo/bar.c", "bar.c")); 30 | RUN_A_TEST(test_path_basename("/foo/bar/", "")); 31 | RUN_A_TEST(test_path_basename("/foo/", "")); 32 | RUN_A_TEST(test_path_basename("/", "")); 33 | RUN_A_TEST(test_path_basename("foo/bar.c", "bar.c")); 34 | RUN_A_TEST(test_path_basename("bar.c", "bar.c")); 35 | RUN_A_TEST(test_path_basename("", "")); 36 | 37 | assert(!failures); 38 | 39 | exit(EXIT_SUCCESS); 40 | } 41 | -------------------------------------------------------------------------------- /test/process.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/process.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | using namespace crucible; 14 | using namespace std; 15 | 16 | static inline 17 | int 18 | return_value(int val) 19 | { 20 | // cerr << "pid " << getpid() << " returning " << val << endl; 21 | return val; 22 | } 23 | 24 | static inline 25 | int 26 | return_value_2(int val, int val2) 27 | { 28 | return val + val2; 29 | } 30 | 31 | static inline 32 | void 33 | test_fork_return(int val) 34 | { 35 | Pid child(return_value, val); 36 | assert(child == child->get_id()); 37 | assert(child == child->native_handle()); 38 | int status = child->join(); 39 | int rv_status = WEXITSTATUS(status); 40 | assert(WIFEXITED(status)); 41 | assert(rv_status == val); 42 | } 43 | 44 | static inline 45 | void 46 | test_fork_return(int val, int val2) 47 | { 48 | Pid child(return_value_2, val, val2); 49 | int status = child->join(); 50 | int rv_status = WEXITSTATUS(status); 51 | assert(WIFEXITED(status)); 52 | assert(rv_status == val + val2); 53 | } 54 | 55 | int 56 | main(int, char**) 57 | { 58 | RUN_A_TEST(test_fork_return(0)); 59 | RUN_A_TEST(test_fork_return(1)); 60 | RUN_A_TEST(test_fork_return(9)); 61 | RUN_A_TEST(test_fork_return(2, 3)); 62 | RUN_A_TEST(test_fork_return(7, 9)); 63 | 64 | exit(EXIT_SUCCESS); 65 | } 66 | -------------------------------------------------------------------------------- /test/progress.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/progress.h" 4 | 5 | #include 6 | 7 | #include 8 | 9 | using namespace crucible; 10 | using namespace std; 11 | 12 | void 13 | test_progress() 14 | { 15 | // On create, begin == end == constructor argument 16 | ProgressTracker pt(123); 17 | assert(pt.begin() == 123); 18 | assert(pt.end() == 123); 19 | 20 | // Holding a position past the end increases the end (and moves begin to match) 21 | auto hold345 = pt.hold(345); 22 | assert(pt.begin() == 345); 23 | assert(pt.end() == 345); 24 | 25 | // Holding a position before begin reduces begin, without changing end 26 | auto hold234 = pt.hold(234); 27 | assert(pt.begin() == 234); 28 | assert(pt.end() == 345); 29 | 30 | // Holding a position past the end increases the end, without affecting begin 31 | auto hold456 = pt.hold(456); 32 | assert(pt.begin() == 234); 33 | assert(pt.end() == 456); 34 | 35 | // Releasing a position in the middle affects neither begin nor end 36 | hold345.reset(); 37 | assert(pt.begin() == 234); 38 | assert(pt.end() == 456); 39 | 40 | // Hold another position in the middle to test begin moving forward 41 | auto hold400 = pt.hold(400); 42 | 43 | // Releasing a position at the beginning moves begin forward 44 | hold234.reset(); 45 | assert(pt.begin() == 400); 46 | assert(pt.end() == 456); 47 | 48 | // Releasing a position at the end doesn't move end backward 49 | hold456.reset(); 50 | assert(pt.begin() == 400); 51 | assert(pt.end() == 456); 52 | 53 | // Releasing a position in the middle doesn't move end backward but does move begin forward 54 | hold400.reset(); 55 | assert(pt.begin() == 456); 56 | assert(pt.end() == 456); 57 | 58 | } 59 | 60 | int 61 | main(int, char**) 62 | { 63 | RUN_A_TEST(test_progress()); 64 | 65 | exit(EXIT_SUCCESS); 66 | } 67 | -------------------------------------------------------------------------------- /test/seeker.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/seeker.h" 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace crucible; 11 | 12 | static 13 | set 14 | seeker_finder(const vector &vec, uint64_t lower, uint64_t upper) 15 | { 16 | set s(vec.begin(), vec.end()); 17 | auto lb = s.lower_bound(lower); 18 | auto ub = lb; 19 | if (ub != s.end()) ++ub; 20 | if (ub != s.end()) ++ub; 21 | for (; ub != s.end(); ++ub) { 22 | if (*ub > upper) break; 23 | } 24 | return set(lb, ub); 25 | } 26 | 27 | static bool test_fails = false; 28 | 29 | static 30 | void 31 | seeker_test(const vector &vec, uint64_t const target) 32 | { 33 | cerr << "Find " << target << " in {"; 34 | for (auto i : vec) { 35 | cerr << " " << i; 36 | } 37 | cerr << " } = "; 38 | size_t loops = 0; 39 | bool excepted = catch_all([&]() { 40 | auto found = seek_backward(target, [&](uint64_t lower, uint64_t upper) { 41 | ++loops; 42 | return seeker_finder(vec, lower, upper); 43 | }); 44 | cerr << found; 45 | uint64_t my_found = 0; 46 | for (auto i : vec) { 47 | if (i <= target) { 48 | my_found = i; 49 | } 50 | } 51 | if (found == my_found) { 52 | cerr << " (correct)"; 53 | } else { 54 | cerr << " (INCORRECT - right answer is " << my_found << ")"; 55 | test_fails = true; 56 | } 57 | }); 58 | cerr << " (" << loops << " loops)" << endl; 59 | if (excepted) { 60 | test_fails = true; 61 | } 62 | } 63 | 64 | static 65 | void 66 | test_seeker() 67 | { 68 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 3); 69 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 5); 70 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 0); 71 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 1); 72 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 4); 73 | seeker_test(vector { 0, 1, 2, 3, 4, 5 }, 2); 74 | 75 | seeker_test(vector { 11, 22, 33, 44, 55 }, 2); 76 | seeker_test(vector { 11, 22, 33, 44, 55 }, 25); 77 | seeker_test(vector { 11, 22, 33, 44, 55 }, 52); 78 | seeker_test(vector { 11, 22, 33, 44, 55 }, 99); 79 | seeker_test(vector { 11, 22, 33, 44, 55, 56 }, 99); 80 | seeker_test(vector { 11, 22, 33, 44, 55 }, 1); 81 | seeker_test(vector { 11, 22, 33, 44, 55 }, 55); 82 | seeker_test(vector { 11 }, 55); 83 | seeker_test(vector { 11 }, 10); 84 | seeker_test(vector { 55 }, 55); 85 | seeker_test(vector { }, 55); 86 | seeker_test(vector { 55 }, numeric_limits::max()); 87 | seeker_test(vector { 55 }, numeric_limits::max() - 1); 88 | seeker_test(vector { }, numeric_limits::max()); 89 | seeker_test(vector { 0, numeric_limits::max() }, numeric_limits::max()); 90 | seeker_test(vector { 0, numeric_limits::max() }, numeric_limits::max() - 1); 91 | seeker_test(vector { 0, numeric_limits::max() - 1 }, numeric_limits::max()); 92 | } 93 | 94 | 95 | int main(int, const char **) 96 | { 97 | 98 | RUN_A_TEST(test_seeker()); 99 | 100 | return test_fails ? EXIT_FAILURE : EXIT_SUCCESS; 101 | } 102 | -------------------------------------------------------------------------------- /test/table.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/table.h" 4 | 5 | using namespace crucible; 6 | using namespace std; 7 | 8 | void 9 | print_table(const Table::Table& t) 10 | { 11 | cerr << "BEGIN TABLE\n"; 12 | cerr << t; 13 | cerr << "END TABLE\n"; 14 | cerr << endl; 15 | } 16 | 17 | void 18 | test_table() 19 | { 20 | Table::Table t; 21 | t.insert_row(Table::endpos, vector { 22 | Table::Text("Hello, World!"), 23 | Table::Text("2"), 24 | Table::Text("3"), 25 | Table::Text("4"), 26 | }); 27 | print_table(t); 28 | t.insert_row(Table::endpos, vector { 29 | Table::Text("Greeting"), 30 | Table::Text("two"), 31 | Table::Text("three"), 32 | Table::Text("four"), 33 | }); 34 | print_table(t); 35 | t.insert_row(Table::endpos, vector { 36 | Table::Fill('-'), 37 | Table::Text("ii"), 38 | Table::Text("iii"), 39 | Table::Text("iv"), 40 | }); 41 | print_table(t); 42 | t.mid(" | "); 43 | t.left("| "); 44 | t.right(" |"); 45 | print_table(t); 46 | t.insert_col(1, vector { 47 | Table::Text("1"), 48 | Table::Text("one"), 49 | Table::Text("i"), 50 | Table::Text("I"), 51 | }); 52 | print_table(t); 53 | t.at(2, 1) = Table::Text("Two\nLines"); 54 | print_table(t); 55 | } 56 | 57 | int 58 | main(int, char**) 59 | { 60 | RUN_A_TEST(test_table()); 61 | 62 | exit(EXIT_SUCCESS); 63 | } 64 | -------------------------------------------------------------------------------- /test/task.cc: -------------------------------------------------------------------------------- 1 | #include "tests.h" 2 | 3 | #include "crucible/task.h" 4 | #include "crucible/time.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | using namespace crucible; 17 | using namespace std; 18 | 19 | void 20 | test_tasks(size_t count) 21 | { 22 | TaskMaster::set_thread_count(); 23 | 24 | vector task_done(count, false); 25 | 26 | mutex mtx; 27 | condition_variable cv; 28 | 29 | unique_lock lock(mtx); 30 | 31 | // Run several tasks in parallel 32 | for (size_t c = 0; c < count; ++c) { 33 | ostringstream oss; 34 | oss << "task #" << c; 35 | Task t( 36 | oss.str(), 37 | [c, &task_done, &mtx, &cv]() { 38 | unique_lock lock(mtx); 39 | // cerr << "Task #" << c << endl; 40 | task_done.at(c) = true; 41 | cv.notify_one(); 42 | } 43 | ); 44 | t.run(); 45 | } 46 | 47 | // Get current status 48 | ostringstream oss; 49 | TaskMaster::print_queue(oss); 50 | TaskMaster::print_workers(oss); 51 | 52 | while (true) { 53 | size_t tasks_done = 0; 54 | for (auto i : task_done) { 55 | if (i) { 56 | ++tasks_done; 57 | } 58 | } 59 | if (tasks_done == count) { 60 | return; 61 | } 62 | // cerr << "Tasks done: " << tasks_done << endl; 63 | 64 | cv.wait(lock); 65 | } 66 | } 67 | 68 | void 69 | test_finish() 70 | { 71 | ostringstream oss; 72 | TaskMaster::print_queue(oss); 73 | TaskMaster::print_workers(oss); 74 | TaskMaster::set_thread_count(0); 75 | cerr << "finish done..."; 76 | } 77 | 78 | void 79 | test_unfinish() 80 | { 81 | TaskMaster::set_thread_count(); 82 | cerr << "unfinish done..."; 83 | } 84 | 85 | 86 | void 87 | test_barrier(size_t count) 88 | { 89 | vector task_done(count, false); 90 | 91 | mutex mtx; 92 | condition_variable cv; 93 | bool done_flag = false; 94 | 95 | unique_lock lock(mtx); 96 | 97 | Barrier b; 98 | 99 | // Run several tasks in parallel 100 | for (size_t c = 0; c < count; ++c) { 101 | ostringstream oss; 102 | oss << "task #" << c; 103 | auto b_hold = b; 104 | Task t( 105 | oss.str(), 106 | [c, &task_done, &mtx, b_hold]() mutable { 107 | // ostringstream oss; 108 | // oss << "Task #" << c << endl; 109 | unique_lock lock(mtx); 110 | // cerr << oss.str(); 111 | task_done.at(c) = true; 112 | b_hold.release(); 113 | } 114 | ); 115 | t.run(); 116 | } 117 | 118 | // Need completed to go out of local scope so it will release b 119 | { 120 | Task completed( 121 | "Waiting for Barrier", 122 | [&mtx, &cv, &done_flag]() { 123 | unique_lock lock(mtx); 124 | // cerr << "Running cv notify" << endl; 125 | done_flag = true; 126 | cv.notify_all(); 127 | } 128 | ); 129 | b.insert_task(completed); 130 | } 131 | 132 | // Get current status 133 | // TaskMaster::print_queue(cerr); 134 | // TaskMaster::print_workers(cerr); 135 | 136 | // Release our b 137 | b.release(); 138 | 139 | while (true) { 140 | size_t tasks_done = 0; 141 | for (auto i : task_done) { 142 | if (i) { 143 | ++tasks_done; 144 | } 145 | } 146 | cerr << "Tasks done: " << tasks_done << " done_flag " << done_flag << endl; 147 | if (tasks_done == count && done_flag) { 148 | break; 149 | } 150 | 151 | cv.wait(lock); 152 | } 153 | // cerr << "test_barrier return" << endl; 154 | } 155 | 156 | void 157 | test_exclusion(size_t count) 158 | { 159 | mutex only_one; 160 | auto excl = make_shared(); 161 | 162 | mutex mtx; 163 | condition_variable cv; 164 | 165 | size_t tasks_running(0); 166 | atomic lock_success_count(0); 167 | atomic lock_failure_count(0); 168 | 169 | vector pings; 170 | pings.resize(count); 171 | 172 | // Run several tasks in parallel 173 | for (size_t c = 0; c < count; ++c) { 174 | ostringstream oss; 175 | oss << "task #" << c; 176 | Task t( 177 | oss.str(), 178 | [c, &only_one, excl, &lock_success_count, &lock_failure_count, &pings, &tasks_running, &cv, &mtx]() mutable { 179 | // cerr << "Task #" << c << endl; 180 | (void)c; 181 | auto lock = excl->try_lock(Task::current_task()); 182 | if (!lock) { 183 | ++lock_failure_count; 184 | return; 185 | } 186 | ++lock_success_count; 187 | bool locked = only_one.try_lock(); 188 | assert(locked); 189 | nanosleep(0.0001); 190 | only_one.unlock(); 191 | unique_lock mtx_lock(mtx); 192 | --tasks_running; 193 | ++pings[c]; 194 | cv.notify_all(); 195 | } 196 | ); 197 | unique_lock mtx_lock(mtx); 198 | ++tasks_running; 199 | t.run(); 200 | } 201 | 202 | excl.reset(); 203 | 204 | unique_lock lock(mtx); 205 | while (tasks_running) { 206 | auto cv_rv = cv.wait_for(lock, chrono::duration(1)); 207 | if (cv_rv == cv_status::timeout) { 208 | // TaskMaster::print_tasks(cerr); 209 | for (auto i : pings) { 210 | cerr << i << " "; 211 | } 212 | cerr << endl << "tasks_running = " << tasks_running << endl; 213 | cerr << "lock_success_count " << lock_success_count << endl; 214 | cerr << "lock_failure_count " << lock_failure_count << endl; 215 | } 216 | } 217 | cerr << "lock_success_count " << lock_success_count << endl; 218 | cerr << "lock_failure_count " << lock_failure_count << endl; 219 | 220 | bool oops = false; 221 | for (size_t c = 0; c < pings.size(); ++c) { 222 | if (pings[c] != 1) { 223 | cerr << "pings[" << c << "] = " << pings[c] << endl; 224 | oops = true; 225 | } 226 | } 227 | if (oops) { 228 | assert(!"Pings not OK"); 229 | } else { 230 | cerr << "Pings OK" << endl; 231 | } 232 | } 233 | 234 | int 235 | main(int, char**) 236 | { 237 | // in case of deadlock 238 | alarm(9); 239 | 240 | RUN_A_TEST(test_tasks(256)); 241 | RUN_A_TEST(test_finish()); 242 | RUN_A_TEST(test_unfinish()); 243 | RUN_A_TEST(test_barrier(256)); 244 | RUN_A_TEST(test_finish()); 245 | RUN_A_TEST(test_unfinish()); 246 | RUN_A_TEST(test_exclusion(256)); 247 | RUN_A_TEST(test_finish()); 248 | 249 | exit(EXIT_SUCCESS); 250 | } 251 | -------------------------------------------------------------------------------- /test/tests.h: -------------------------------------------------------------------------------- 1 | #ifndef CRUCIBLE_TESTS_H 2 | #define CRUCIBLE_TESTS_H 3 | 4 | #undef NDEBUG 5 | 6 | #include 7 | 8 | #define RUN_A_TEST(test) do { \ 9 | std::cerr << "Testing " << #test << "..." << std::flush; \ 10 | do { test ; } while (0); \ 11 | std::cerr << "OK" << std::endl; \ 12 | } while (0) 13 | 14 | #endif // CRUCIBLE_TESTS_H 15 | -------------------------------------------------------------------------------- /test/tmp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | --------------------------------------------------------------------------------