├── src
    ├── cover
    │   └── README.md
    ├── background
    │   ├── capability-format.jpg
    │   ├── README.md
    │   ├── architectural-rules.md
    │   └── cheri-capabilities.md
    ├── cheriabi
    │   ├── README.md
    │   ├── handling-capability-signals.md
    │   ├── revocation-apis.md
    │   └── posix-api-changes.md
    ├── printf
    │   ├── README.md
    │   ├── printf.md
    │   └── strfcap.md
    ├── impact
    │   ├── other-sources-of-bounds.md
    │   ├── data-structure-and-memory-allocation-alignment.md
    │   ├── bounds.md
    │   ├── bounds-from-compiler.md
    │   ├── README.md
    │   ├── capability-alignment-in-memory.md
    │   ├── bounds-from-heap-allocator.md
    │   ├── function-prototypes-and-calling-conventions.md
    │   ├── pointer-comparison.md
    │   ├── capability-faults.md
    │   ├── restrictions-in-capability-locations.md
    │   ├── revocation.md
    │   ├── out-of-bounds-pointers.md
    │   ├── capability-representation.md
    │   ├── pointer-provenance-validity.md
    │   ├── single-origin-provenance.md
    │   ├── bitwise-operations.md
    │   ├── recommended-use-c-types.md
    │   └── subobject-bounds.md
    ├── compiler
    │   ├── README.md
    │   ├── ambiguous-provenance.md
    │   ├── underaligned-capabilities.md
    │   └── loss-of-provenance.md
    ├── cheri-ccpp
    │   ├── README.md
    │   ├── cheri-runtime.md
    │   ├── referential-spatial-temporal-safety.md
    │   └── nonaliasing-vs-trapping.md
    ├── apis
    │   ├── retrieving-capability-properties.md
    │   ├── README.md
    │   ├── cheri-related-header-files.md
    │   ├── capability-permissions.md
    │   ├── modifying-or-restricting-capability-properties.md
    │   ├── implications-for-memory-allocator-design.md
    │   └── bounds-alignment-due-to-compression.md
    ├── acks
    │   └── README.md
    ├── introduction
    │   ├── README.md
    │   ├── history.md
    │   └── definitions.md
    ├── limitations
    │   ├── stack-temporal-safety.md
    │   ├── README.md
    │   ├── unions.md
    │   ├── integer-pointer-safety.md
    │   ├── bounds-imprecision.md
    │   ├── compiler-optimizations.md
    │   └── compile-time-type-uncertainty.md
    ├── SUMMARY.md
    └── reading
    │   └── README.md
├── filters
    ├── latex-abstract.lua
    ├── md-source-list.lua
    ├── latex-xref-fixup.lua
    └── latex-custom-formats.lua
├── .gitignore
├── book.toml
├── TODO.txt
├── Makefile
├── .github
    └── workflows
    │   └── book.yml
├── LICENSE
├── README.md
└── latex
    └── cheri-c-programming.tex


/src/cover/README.md:
--------------------------------------------------------------------------------
1 | {{#include ../../README.md:cover}}
2 | 


--------------------------------------------------------------------------------
/filters/latex-abstract.lua:
--------------------------------------------------------------------------------
1 | function Header(header)
2 |   return {}
3 | end
4 | 
5 | 


--------------------------------------------------------------------------------
/src/background/capability-format.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CTSRD-CHERI/cheri-c-programming/HEAD/src/background/capability-format.jpg


--------------------------------------------------------------------------------
/filters/md-source-list.lua:
--------------------------------------------------------------------------------
 1 | local filenames = {}
 2 | 
 3 | function Link(link)
 4 |   local path = "src/" .. link.target
 5 |   if path ~= "src/cover/README.md" then
 6 |     table.insert(filenames, path)
 7 |   end
 8 | end
 9 | 
10 | function Pandoc(doc)
11 |   return pandoc.Pandoc(pandoc.Para(table.concat(filenames, " ")))
12 | end
13 | 


--------------------------------------------------------------------------------
/src/cheriabi/README.md:
--------------------------------------------------------------------------------
1 | # The CheriABI POSIX process environment
2 | 
3 | The CheriABI process environment implements a standard POSIX/UNIX API, but in
4 | some areas there are changes to API semantics (e.g., in the handling of tagged
5 | pointer values and I/O) or new functionality (such as relates to handling
6 | capability-related faults).
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.aux
 3 | *.bbl
 4 | *.blg
 5 | *.fls
 6 | *.log
 7 | *.fdb_latexmk
 8 | *.sw?
 9 | *.toc
10 | *.cb
11 | *.cb?
12 | *.idx
13 | *.ilg
14 | *.ind
15 | *.out
16 | /cheri-c-programming.pdf
17 | /cheri-c-programming-techreport.pdf
18 | /latex/build
19 | /latex/build-techreport
20 | /book
21 | /latex/sections.tex
22 | /latex/abstract.tex
23 | 


--------------------------------------------------------------------------------
/src/printf/README.md:
--------------------------------------------------------------------------------
1 | ## Printing capabilities from C
2 | 
3 | Capability pointers carry additional metadata that it can sometimes be useful
4 | to print to a human readable string.
5 | CHERI C/C++ defines a decoded string format for capabilities, which may be
6 | accessed indirectly via existing C APIs such as `printf(3)`, `snprintf(3)`, or
7 | directly via calls to the `strfcap(3)` function itself.
8 | 


--------------------------------------------------------------------------------
/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["Robert N. M. Watson", "Alexander Richardson", "Brooks Davis",
 3 |   "John Baldwin", "David Chisnall", "Jessica Clarke", "Nathaniel Filardo",
 4 |   "Simon W. Moore", "Edward Napierala", "Peter Sewell", "Peter G. Neumann"]
 5 | language = "en"
 6 | multilingual = false
 7 | src = "src"
 8 | title = "CHERI C/C++ Programming Guide"
 9 | 
10 | [output.html]
11 | git-repository-url = "https://github.com/CTSRD-CHERI/cheri-c-programming"
12 | site-url = "/cheric-programming/"
13 | 


--------------------------------------------------------------------------------
/src/impact/other-sources-of-bounds.md:
--------------------------------------------------------------------------------
 1 | ### Other sources of bounds
 2 | 
 3 | Bounds may also be set by other parts of the implementation.
 4 | For example, the kernel may set bounds on pointers to new memory mappings (see
 5 | [The CheriABI POSIX process environment](../cheriabi)),
 6 | and the system library may set bounds on pointers
 7 | into returned buffers from APIs &mdash; e.g., `fgetln`.
 8 | More detailed information on how C/C++ code can set bounds can be found in
 9 | [C APIs to get and set capability properties](../apis).
10 | 


--------------------------------------------------------------------------------
/src/printf/printf.md:
--------------------------------------------------------------------------------
 1 | ### Printing capabilities with the printf(4) API family
 2 | 
 3 | When using the `printf(3)` family of APIs, the `#` qualifier to the `p` format
 4 | string will cause additional architecture-specific information to be printed
 5 | about a pointer.
 6 | In CHERI C/C++, this prints out capability metadata as rendered using
 7 | `strfcap(3)`'s `%C` format string.
 8 | For example, the following code fragment:
 9 | 
10 | ```
11 | int foo;
12 | ...
13 |         printf("%%p:\t%p\n", &foo);
14 |         printf("%%#p:\t%#p\n", &foo);
15 | ```
16 | 
17 | Will print out the following output in CheriBSD's CheriABI:
18 | 
19 | ```
20 | %p:	0x130b60
21 | %#p:	0x130b60 [rwRW,0x130b60-0x130b64]
22 | ```
23 | 


--------------------------------------------------------------------------------
/src/compiler/README.md:
--------------------------------------------------------------------------------
 1 | # CHERI compiler warnings and errors
 2 | 
 3 | The CHERI Clang compiler includes many diagnostic warnings to identify code
 4 | that is incompatible with CHERI C/C++ or may result in behavioral
 5 | differences.
 6 | In many cases, a successful compilation that does not emit any CHERI-specific
 7 | warnings will result in a functional spatially-safe program.
 8 | However, some incompatibilities (e.g., memory allocators returning
 9 | insufficiently aligned pointers) cannot yet be diagnosed statically.
10 | This section describes some of the more-commonly seen compiler warnings and
11 | provides suggestions on how to change the source code to be compatible with
12 | CHERI C/C++.
13 | All these warnings are enabled when the `-Wall` compiler flag is
14 | set.
15 | 


--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
 1 | It may be useful to restructure this guide to have three sections:
 2 | - What changes relative to ISO-conformant C code (effectively nothing, alignment
 3 |    is already required for some architectures, but byte-by-byte copies do break)
 4 | - Information for programmers who would like make use of CHERI features (e.g. if
 5 |    you write an allocator you should be setting bounds). Information on low
 6 |    pointer bits could also go here
 7 | - Advanced topics for OS kernel developers/language runtime/etc: CHERI intrinsics, etc.
 8 | - Try to avoid the word "should" (suggestion from Paul Gotch) due to potential
 9 |   confusion that could arise.
10 | - Make observation about 32/64-bit transition and impact on recent / updated code, as well as longer-term maintainability
11 | - Any thoughts on standardisation?
12 | 


--------------------------------------------------------------------------------
/src/impact/data-structure-and-memory-allocation-alignment.md:
--------------------------------------------------------------------------------
 1 | ## Data-structure and memory-allocation alignment
 2 | 
 3 | CHERI C/C++ have stronger alignment requirements than C/C++ on conventional
 4 | architectures.
 5 | These requirements arise from two sources: that capabilities themselves must
 6 | be aligned at twice the integer architectural pointer width, and that
 7 | capability compression constrains the addresses that can be used for bounds
 8 | on larger objects.
 9 | 
10 | <!--
11 | \amnote{Is is worth mentioning compiler flags to warn on excessive padding?
12 |   In particular, it seems that it is often the case that the ordering of
13 |   struct elements that was devised for 32bit and 64bit architectures does
14 |   not help much to avoid extra padding with capabilities. It more or less
15 |   depends on how much the pointers are scattered in the struct definition.}
16 | -->
17 | 


--------------------------------------------------------------------------------
/src/impact/bounds.md:
--------------------------------------------------------------------------------
 1 | ## Bounds
 2 | 
 3 | CHERI C/C++ pointers are implemented using capabilities that enforce lower and
 4 | upper bounds on access.
 5 | In the pure-capability run-time environment, those bounds are normally set to
 6 | the range of the memory allocation into which the pointer is intended to
 7 | point.
 8 | Because of capability compression, increased alignment requirements may apply
 9 | to larger allocations (see [Bounds alignment due to compression](../apis/bounds-alignment-due-to-compression.html)).
10 | 
11 | Bounds may be set on pointers returned by multiple system components including
12 | the OS kernel, the run-time linker, compiler-generated code, system libraries,
13 | and other utility functions.
14 | As with violations of provenance validity, out-of-bounds accesses &mdash; including
15 | load, store, and instruction fetch &mdash; trigger a hardware exception (see
16 | [Capability-related faults](capability-faults.html)).
17 | 


--------------------------------------------------------------------------------
/src/impact/bounds-from-compiler.md:
--------------------------------------------------------------------------------
 1 | ### Bounds from the compiler and linker
 2 | 
 3 | The compiler will arrange that language-level pointers to stack allocations have suitable
 4 | bounds, and that the run-time linker will return bounded pointers to global
 5 | and thread-local variables.
 6 | Bounds will typically be set based on an explicitly requested allocation size
 7 | (e.g., via the size passed to `alloca` or, for compiler-generated
 8 | code or linker-allocated memory, by the C type mechanism (e.g.,
 9 | `sizeof(foo)`), adjusted for precision requirements arising from
10 | capability compression.
11 | In some cases, such as with global variables allocated in multiple object
12 | files, the actual size of the allocation may not be resolved until run time,
13 | by the run-time linker.
14 | These bounds will typically not cause observable changes in behavior &mdash; other than hardware exceptions when (accidentally) performing an out-of-bounds access.
15 | 


--------------------------------------------------------------------------------
/src/cheri-ccpp/README.md:
--------------------------------------------------------------------------------
 1 | # CHERI C/C++
 2 | 
 3 | The architectural-capability type can be used in a variety of ways by
 4 | software.
 5 | One particularly useful use case is in implementing *CHERI C/C++*.
 6 | In this model, all C/C++ language-visible pointer types, as well as any
 7 | implied pointers implementing vtables, return addresses, function pointers,
 8 | global variables, arrays of variadic-function arguments, and so on, are
 9 | implemented using capabilities with tight bounds.
10 | This allows the architecture to imbue pointers with protection by virtue of
11 | architectural provenance validity, bounds checking, and permission checking,
12 | protecting pointers from corruption and providing strong spatial memory
13 | safety.
14 | In some execution environments, such as in CHERIoT and CheriBSD's CheriABI
15 | process environment, capabilities are combined with efficient architectural
16 | revocation features to enable strong heap temporal safety.
17 | 


--------------------------------------------------------------------------------
/src/cheriabi/handling-capability-signals.md:
--------------------------------------------------------------------------------
 1 | ## Handling capability-related signals
 2 | 
 3 | When a capability hardware exception fires, the operating system will map it
 4 | into the UNIX `SIGPROT` signal.
 5 | By default, this signal terminates the process, but the signal can be caught
 6 | by registering a `SIGPROT` handler.
 7 | When the signal handler fires, `siginfo.si_code` will be set to
 8 | describe the cause of the fault; available values, defined in
 9 | `signal.h`, include:
10 | 
11 | * **`PROT_CHERI_BOUNDS`**: Capability bounds fault &mdash; an out-of-bounds access was
12 |   attempted.
13 | * **`PROT_CHERI_PERM`**: Capability permission fault &mdash; the attempted access
14 |   exceeded the permissions granted by a capability.
15 | * **`PROT_CHERI_SEALED`**: Capability sealed fault &mdash; dereferencing a sealed
16 |   capability was attempted.
17 | * **`PROT_CHERI_TAG`**: Capability tag fault &mdash; dereferencing an invalid
18 |   capability was attempted.
19 | 


--------------------------------------------------------------------------------
/src/apis/retrieving-capability-properties.md:
--------------------------------------------------------------------------------
 1 | ## Retrieving capability properties
 2 | 
 3 | The following APIs allow capability properties to be retrieved from pointers:
 4 | 
 5 | * **`ptraddr_t cheri_address_get(void *c)`**: Return the address of the capability `c`.
 6 | 
 7 | * **`ptraddr_t cheri_base_get(void *c)`**: Return the lower bound of capability `c`.
 8 | 
 9 | * **`size_t cheri_length_get(void *c)`**: Return the length of the bounds for the capability `c`.
10 |   The base plus the length gives the upper bound on `c`'s address.
11 | 
12 | * **`size_t cheri_offset_get(void *c)`**: Return the difference between the address and the lower bound of the capability `c`.
13 | 
14 | * **`size_t cheri_perms_get(void *c)`**: Return the permissions of capability `c`.
15 |   (See [Capability permissions](capability-permissions.html).)
16 | 
17 | * **`_Bool cheri_tag_get(void *c)`**: Return whether capability `c` has its
18 |   validity tag set.
19 | 
20 | <!--
21 |   \arnote{This returns the raw tag value, cheriintrin.h may also provide `cheri_is_valid` and `cheri_is_invalid`}
22 | -->
23 | 


--------------------------------------------------------------------------------
/src/apis/README.md:
--------------------------------------------------------------------------------
 1 | # C APIs to get and set capability properties
 2 | <!--
 3 | \label{sec:cheri-apis}
 4 | -->
 5 | 
 6 | <!--
 7 | \rwnote{I wonder if we should talk more about permissions?  Perhaps not in
 8 |   this document, in which case possibly we should talk about them less?}
 9 | \amnote{If this is intended as a document to guide porting efforts perhaps
10 |   we should mention them only as background info? If this becomes a summary
11 |   of CHERI programming patterns then we probably want a section that talks
12 |   about permissions as well.}
13 | -->
14 | 
15 | CHERI C/C++ supports a number of new APIs to get and set capability
16 | properties given a pointer argument.
17 | Although most software does not need to directly manage capability properties,
18 | there are some cases when application code needs to further constrain
19 | permissions or limit bounds associated with pointers.
20 | For example, high-performance applications may contain custom memory
21 | allocators and wish to narrow bounds and permissions on returned pointers
22 | to prevent overflows between its own allocations.
23 | 


--------------------------------------------------------------------------------
/src/impact/README.md:
--------------------------------------------------------------------------------
 1 | # Impact on the C/C++ programming model
 2 | 
 3 | Several kinds of changes may be required by programmers; the extent to which
 4 | these changes impact a particular library or application will depend
 5 | significantly on its idiomatic use of C.
 6 | Our experience suggests that low-level system components such as run-time
 7 | linkers, debuggers, memory allocators, and language runtimes require a modest
 8 | but non-trivial porting effort.
 9 | Similarly, support classes that include, for example, custom synchronization
10 | features, may also require moderate adaptation.
11 | Other applications may compile with few or no changes &mdash; especially if they
12 | are already portable across 32-bit and 64-bit platforms and are written in a contemporary C or C++ dialect.
13 | In the following sections, we consider various kinds of programmer-visible
14 | changes required in the CHERI C/C++ programming environment.
15 | In many cases, compiler warnings and errors can be used to identify potential
16 | issues compiling code as CHERI C/C++ (see [CHERI compiler warnings and errors](../compiler)).
17 | 
18 | <!--
19 | \rwnote{Alex: Can we use the word "most" instead of "many"?}
20 | -->
21 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | NAME=cheri-c-programming
 2 | BIB=latex/cheri.bib
 3 | .PHONY: all latex pdf clean
 4 | 
 5 | # Extract list of Markdown source files from mdbook TOC
 6 | MD_SRC_FILES=`pandoc src/SUMMARY.md --lua-filter=filters/md-source-list.lua -t plain`
 7 | 
 8 | all: latex pdf
 9 | 
10 | latex: latex/sections.tex latex/abstract.tex
11 | 
12 | latex/abstract.tex: src/introduction/README.md
13 | 	pandoc src/introduction/README.md --lua-filter=filters/latex-abstract.lua -t latex -o latex/abstract.tex
14 | 
15 | latex/sections.tex: src/SUMMARY.md src/*/*.md
16 | 	pandoc $(MD_SRC_FILES) --lua-filter=filters/latex-custom-formats.lua --lua-filter=filters/latex-xref-fixup.lua -t latex -o latex/sections.tex
17 | 
18 | pdf: latex/$(NAME).tex latex/sections.tex latex/abstract.tex
19 | 	latexmk -pdf latex/$(NAME) --jobname=$(NAME)-final -output-directory=latex/build
20 | 	cp -f "latex/build/$(NAME)-final.pdf" "$(NAME).pdf"
21 | 
22 | techreport: latex/$(NAME).tex latex/sections.tex latex/abstract.tex
23 | 	latexmk -pdf latex/$(NAME) --jobname=$(NAME)-techreport -output-directory=latex/build-techreport
24 | 	cp -f "latex/build-techreport/$(NAME)-techreport.pdf" "$(NAME)-techreport.pdf"
25 | 
26 | clean:
27 | 	rm -rf latex/build latex/build-techreport
28 | 


--------------------------------------------------------------------------------
/src/impact/capability-alignment-in-memory.md:
--------------------------------------------------------------------------------
 1 | ### Capability alignment in memory
 2 | 
 3 | Because tags apply only to memory locations that are capability-aligned
 4 | and capability-sized,
 5 | unaligned storage of pointers will either generate a run-time
 6 | hardware exception (if a capability-aware load or store is performed), or discard the
 7 | tag (if a capability-oblivious memory copy is performed &mdash; e.g., using
 8 | `memcpy` to copy from an aligned location to an unaligned one).
 9 | One example of this is Berkeley DB (BDB) when used as an in-memory
10 | implementation rather than as an on-disk database format.
11 | Even when patched to use `memcpy` to copy objects stored as data, it
12 | does not ensure sufficient alignment in its internal storage to preserve tags.
13 | We therefore recommend against using BDB for this purpose.
14 | While unaligned pointer use is uncommon in C programs, as data-structure
15 | layouts are normally designed to keep them strongly aligned for performance
16 | and atomicity reasons, any code depending on unaligned pointers will need
17 | to be changed.
18 | 
19 | <!--
20 | \amnote{Should we mention code that assumes that it is ok to go out of bounds
21 | for optimization purposes? E.g., strcmp loading a word at a time?}
22 | \psnote{yes}
23 | -->
24 | 


--------------------------------------------------------------------------------
/.github/workflows/book.yml:
--------------------------------------------------------------------------------
 1 | name: Build book
 2 | 
 3 | on:
 4 |   push:
 5 |     branches-ignore:
 6 |       - gh-pages
 7 |   release:
 8 |     types:
 9 |       - created
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Install cargo
16 |         uses: actions-rs/toolchain@v1
17 |         with:
18 |           toolchain: stable
19 |           profile: minimal
20 | 
21 |       - name: Install mdbook
22 |         run: curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.45/mdbook-v0.4.45-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=$HOME/.cargo/bin
23 | 
24 |       - name: Checkout source
25 |         uses: actions/checkout@v4
26 | 
27 |       - name: Build book
28 |         run: mdbook build
29 | 
30 |       - name: Upload book artifact
31 |         uses: actions/upload-artifact@v4
32 |         with:
33 |           name: book
34 |           path: book
35 | 
36 |   deploy:
37 |     if: github.event_name == 'push' && github.ref == 'refs/heads/master'
38 |     needs: build
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - name: Download book artifact
42 |         uses: actions/download-artifact@v4
43 |         with:
44 |           name: book
45 |           path: public
46 | 
47 |       - name: Deploy
48 |         uses: peaceiris/actions-gh-pages@v4
49 |         with:
50 |           github_token: ${{ secrets.GITHUB_TOKEN }}
51 | 
52 | 


--------------------------------------------------------------------------------
/src/apis/cheri-related-header-files.md:
--------------------------------------------------------------------------------
 1 | ## CHERI-related header files
 2 | 
 3 | A set of compiler built-in functions provide access to capability properties
 4 | of pointers.
 5 | Two new header files (distributed as part of the CHERI Clang compiler)
 6 | provide access to further CHERI-related programming
 7 | interfaces including more human-friendly macro wrappers around the compiler
 8 | builtins, and also definitions of key CHERI constants:
 9 | 
10 | * **`cheriintrin.h`**: defines interfaces to access and
11 |   modify capability properties.
12 |   It also defines constants for capability permissions that are portable
13 |   across all implementations of CHERI.
14 | 
15 | * **`cheri.h`**: provides macros for slightly higher-level operations
16 |   such as the manipulation of low pointer bits (see
17 |   [Bitwise operations on capability types](../impact/bitwise-operations.html)).
18 | 
19 | When compiling for CheriBSD, the following header provides additional
20 | constants relating to OS use of capabilities &mdash; for example, software-defined
21 | permission bits:
22 | 
23 | * **`cheri/cheri.h`**: defines constants such as those used in the
24 |   capability permission mask.
25 | 
26 | <!--
27 | %`cheri/cheric.h`: defines interfaces to access and
28 | %  modify capability properties.
29 | 
30 | \rwnote{This section may need updating once we've converged OS and compiler
31 |   versions of cheri.h, and done any necessary header refactoring.}
32 | -->
33 | 


--------------------------------------------------------------------------------
/src/acks/README.md:
--------------------------------------------------------------------------------
 1 | # Acknowledgements
 2 | 
 3 | We gratefully acknowledge the helpful feedback from our colleagues, including
 4 | Hesham Almatary, Ruben Ayrapetyan, Silviu Baranga, Jacob Bramley, Rod Chapman,
 5 | Paul Gotch, Al Grant, Brett Gutstein, Alfredo Mazzinghi, Alan Mycroft, and Lee
 6 | Smith.
 7 | 
 8 | This work was supported by the Defense Advanced Research Projects Agency
 9 | (DARPA) and the Air Force Research Laboratory (AFRL), under contracts
10 | FA8750-10-C-0237 ("CTSRD") and HR0011-18-C-0016 ("ECATS").
11 | The views, opinions, and/or findings contained in this report are those of the
12 | authors and should not be interpreted as representing the official views or
13 | policies of the Department of Defense or the U.S. Government.
14 | 
15 | This work was supported in part by the Innovate UK project Digital Security by
16 | Design (DSbD) Technology Platform Prototype, 105694.
17 | 
18 | This work was supported by part by the Engineering and Physical Sciences
19 | Research Council (EPSRC) under UKRI3001: CHERI Research Centre, and under the
20 | EPSRC REMS Programme Grant (EP/EP/K008528/1).
21 | 
22 | This project has received funding from the European Research Council (ERC)
23 | under the European Union’s Horizon 2020 research and innovation programme
24 | (grant agreement No 789108), ERC Advanced Grant ELVER.
25 | 
26 | We also acknowledge Arm Limited, HP Enterprise, and Google, Inc.
27 | Approved for Public Release, Distribution Unlimited.
28 | 


--------------------------------------------------------------------------------
/src/background/README.md:
--------------------------------------------------------------------------------
 1 | # Background
 2 | 
 3 | CHERI extends conventional processor Instruction-Set Architectures (ISAs) with
 4 | support for *architectural capabilities*.
 5 | One important use for this new hardware data type is in the implementation
 6 | of safer C/C++ pointers and the code or data they point at.
 7 | 
 8 | Our 2019 technical report, [*An Introduction to
 9 | CHERI*](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf), provides a
10 | more detailed overview of the CHERI architecture, ISA modeling, hardware
11 | implementations, and software stack[^1].
12 | Our more recent 2025 article in IEEE Security and Privacy,
13 | [*CHERI: Hardware-Enabled C/C++ Memory Protection at
14 | Scale*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10568212),
15 | reviews recent research and results[^2].
16 | 
17 | [^1]: Robert N. M. Watson, Simon W. Moore, Peter Sewell, and Peter G. Neumann.
18 | [An Introduction to
19 | CHERI](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf), Technical
20 | Report UCAM-CL-TR-941, Computer Laboratory, September 2019.
21 | 
22 | [^2]: Robert N.M. Watson, David Chisnall, Jessica Clarke, Brooks Davis,
23 | Nathaniel Wesley Filardo, Ben Laurie, Simon W. Moore, Peter G. Neumann,
24 | Alexander Richardson, Peter Sewell, Konrad Witaszczyk, and Jonathan Woodruff.
25 | [CHERI: Hardware-Enabled C/C++ Memory Protection at
26 | Scale](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10568212),
27 | IEEE Security & Privacy, vol. 22, no. 04, pp. 50-61, July-August 2024.
28 | 


--------------------------------------------------------------------------------
/src/apis/capability-permissions.md:
--------------------------------------------------------------------------------
 1 | ## Capability permissions
 2 | 
 3 | <!--
 4 | \label{sec:capability_permissions}
 5 | -->
 6 | 
 7 | A number of capability permissions are available for use; only those relating
 8 | to CHERI memory protection are enumerated here:
 9 | 
10 | * **`CHERI_PERM_EXECUTE`**: Authorize instruction fetch via this
11 |    capability.
12 | 
13 | * **`CHERI_PERM_LOAD`**: Authorize data load via this capability.
14 | 
15 | * **`CHERI_PERM_LOAD_CAP`**: Authorize capability load via this capability.
16 |   If the permission is not present, the tag on the loaded value
17 |   will be silently cleared.
18 | 
19 | * **`CHERI_PERM_STORE`**: Authorize data store via this capability.
20 | 
21 | * **`CHERI_PERM_STORE_CAP`**: Authorize capability store via this capability.
22 |   If the permission is not present, and the tag on the stored capability is
23 |   valid, then a hardware exception will be thrown.
24 | 
25 | In addition to architectural permissions, CHERI capabilities have
26 | software-defined permissions.
27 | CheriBSD defines the following additional memory-protection-related
28 | permission:
29 | 
30 | * **`CHERI_PERM_CHERIABI_VMMAP`**: A CheriABI-specific user
31 |   permission that the kernel uses to authorize modifications to
32 |   virtual-memory mappings.
33 |   If the permission is not present, system calls that alter the contents
34 |   or the presentation of memory mappings will reject the request.
35 |   As this is a CheriBSD-specific permission, it is not defined in
36 |   `cheriintrin.h` and requires inclusion of `cheri/cheri.h`.
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2022 Robert N. M. Watson
 2 | 
 3 | This work was created by the University of Cambridge Department of Computer
 4 | Science and Technology with support from Innovate UK project 105694, "Digital
 5 | Security by Design (DSbD) Technology Platform Prototype".
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions
 9 | are met:
10 | 1. Redistributions of source code must retain the above copyright
11 |    notice, this list of conditions and the following disclaimer.
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in the
14 |    documentation and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 | ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 | SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/src/introduction/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | This document is a brief introduction to the CHERI C/C++
 4 | programming languages, which employ CHERI's architectural capability
 5 | primitive to implement C/C++-language memory safety.
 6 | We explain the principles underlying these language variants, and their
 7 | grounding in CHERI's multiple architectural instantiations:
 8 | CHERI-RISC-V application cores, CHERIoT microcontrollers, and Arm's Morello.
 9 | 
10 | We describe the most commonly encountered differences between these
11 | dialects and C/C++ on conventional architectures, and where existing
12 | software may require minor changes.
13 | We document new compiler warnings and errors that may be experienced compiling
14 | code with the CHERI Clang/LLVM compiler, and suggest how they may be addressed
15 | through typically minor source-code changes.
16 | We explain how modest language extensions allow selected software, such
17 | as memory allocators, to further refine permissions and bounds on pointers,
18 | as well as consistently print capability metadata to human-readable strings.
19 | 
20 | This guidance is based on our experience adapting the FreeBSD operating system
21 | kernel and userspace, and applications such as the PostgreSQL database, nginx
22 | web server, and Chromium web browser, to run in a CHERI C/C++ capability-based
23 | programming environment.
24 | It has also benefited from the considerable efforts taken by others to adapt
25 | large-scale code bases such as the Linux kernel and OpenJDK.
26 | 
27 | We conclude by recommending further reading.
28 | 


--------------------------------------------------------------------------------
/src/impact/bounds-from-heap-allocator.md:
--------------------------------------------------------------------------------
 1 | ### Bounds from the heap allocator
 2 | 
 3 | `malloc` will set bounds on pointers to new heap allocations.
 4 | In typical C use, this is not a problem, as programmers expect to access
 5 | addresses only within an allocation.
 6 | 
 7 | However, in some uses of C, there may be an expectation that memory access can
 8 | occur outside the allocation bounds of the pointer via which memory access
 9 | takes place.
10 | For example, if an integer pointer difference `D` is taken between
11 | pointers to two different allocations (`B` and `A`), and later
12 | added to pointer `A`, the new pointer will have an address
13 | within `B`, but permit access only to `A`.
14 | This idiom is mostly likely to be found with non-trivial uses of `realloc` (e.g., cases where multiple pointers into a buffer allocated or reallocated by `realloc` need to be updated).
15 | We note that the subtraction of two pointers from different
16 | allocations is undefined behavior in ISO C, and risks mis-optimization from
17 | breaking compiler alias analysis assumptions.
18 | Further, *any* operation on the pointer passed to `realloc` is undefined upon
19 | return.  Instead, we suggest that the programmer measure a pointer `P`'s
20 | offset into an object `A` *prior to* `realloc` and derive new pointers
21 | from the `realloc` result `B` and these offsets. (i.e., compute
22 | *`B + (P - A)`* rather than
23 | *`P + (B - A)`*).[^4]
24 | 
25 | [^4]: While it may seem that `A` remains available after `realloc`, our
26 | revocation sweeps which enforce temporal safety may have atomically replaced
27 | this with a non-pointer value.  The scalar value
28 | *`D = P - A`* will naturally be preserved by revocation.
29 | 


--------------------------------------------------------------------------------
/src/printf/strfcap.md:
--------------------------------------------------------------------------------
 1 | ### Generating string representations of capabilities
 2 | 
 3 | ```
 4 | ssize_t
 5 |      strfcap(char * restrict buf, size_t maxsize,
 6 |          const char * restrict format, uintcap_t cap);
 7 | ```
 8 | 
 9 | The `strfcap(3)` API accepts multiple arguments:
10 | 
11 |  * `buf` is a target character buffer for the resulting generated string.
12 |  * `maxsize` is the size of the target character buffer.
13 |  * `format` is a string containing zero or more conversion specifiers or
14 |    ordinary characters.
15 |  * `cap` is the capability to decode.
16 | 
17 | The return value is the number of characters that would have been printed if
18 | `size were unlimited, excluding the trailing nul terminator.
19 | A negative value is returned on failure.
20 | 
21 | Various format specifiers, documented in the [CheriBSD `strfcap(3)` man
22 | page](https://man.cheribsd.org/cgi-bin/man.cgi/strfcap.3), include various
23 | individual field specifiers for capability metadata such as its address,
24 | attributes, base address, length, offset, permissions, and so on.
25 | The `%C` format string will print out capabilities with the following format
26 | `%#xa [%P,%#xb-%%xt]%? %A`:
27 | 
28 |  * `%xa`: Hex formatted capability address
29 |  * `%P`: Abbreviated human-readable capability permissions
30 |  * `%xb`: Hex formatted capability base
31 |  * `%xt`: Hext-formatted capability top address
32 |  * `%A`: Textual representation of capability attributes, such as `invalid` or `sentry`
33 | 
34 | For example, the `strfcap(3)` output `0x130b60 [rwRW,0x130b60-0x130b64]`
35 | describes a capability whose:
36 | 
37 |  * Capability address is `0x130b60`
38 |  * Capability permissions are `rwRW` (can read and write both data and
39 |    capabilities)
40 |  * Capability base address is `0x130b60`
41 |  * Capability top address is `0x130b64`
42 |  * Has a valid tag
43 |  * Is not sealed
44 | 
45 | The `strfcap(3)` man page should be referenced for full details.
46 | 


--------------------------------------------------------------------------------
/filters/latex-xref-fixup.lua:
--------------------------------------------------------------------------------
 1 | if FORMAT:match 'latex' then
 2 |   --Normalize LaTeX xref names in a lookup table. (mdBook defines the
 3 |   --targets by section name but the xrefs by filename, so they don't
 4 |   --always match.
 5 |   local xreflookup = {
 6 |     apis="c-apis-to-get-and-set-capability-properties",
 7 |     bitwise_operations="bitwise-operations-on-capability-types",
 8 |     capability_faults="capability-related-faults",
 9 |     cheriabi="the-cheriabi-posix-process-environment",
10 |     compiler="cheri-compiler-warnings-and-errors",
11 |     recommended_use_c_types="recommended-use-of-c-language-types",
12 |     restrictions_in_capability_locations="restrictions-in-capability-locations-in-memory"
13 |   }
14 | 
15 |   function Link(link)
16 |     -- URIs are left unmodified
17 |     if link.target:find("^[%a][%a%d+-%.]*:") then
18 |       return link
19 |     end
20 | 
21 |     -- All other links are Xrefs to sections, so change them to LaTeX
22 |     -- section references, using the cleveref package.
23 |     local normalized = link.target:gsub("%.%a+$","")
24 |     normalized = normalized:gsub("/$","")
25 |     normalized = normalized:gsub("^%.%./?","")
26 |     normalized = normalized:gsub("^[%a-]+/","")
27 | 
28 | 
29 |     --Apply the lookup table for xrefs that don't match their target
30 |     local lookupkey = normalized:gsub("-","_")
31 |     local lookupvalue = xreflookup[lookupkey]
32 |     if lookupvalue ~= nil then
33 |       normalized = lookupvalue
34 |     end
35 | 
36 |     --Replace the original Pandoc element with raw LaTeX for the
37 |     --cleveref xref.
38 |     normalized = "\\Cref{sec:" .. normalized
39 |     normalized = normalized .. "}"
40 |     local latexref = pandoc.RawInline('latex', normalized)
41 |     return latexref
42 |   end
43 | 
44 |   function Header(header)
45 |     --For clarity, prepend LaTeX section labels with "sec:"
46 |     header.identifier = "sec:" .. header.identifier
47 |     return header
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/src/limitations/stack-temporal-safety.md:
--------------------------------------------------------------------------------
 1 | ## Stack temporal safety
 2 | 
 3 | CHERI includes different temporal-safety mechanisms at different
 4 | microarchitectural scales.
 5 | In general, those mechanisms scale well to providing temporal safety for
 6 | services such as memory mappings and heap allocations, but do not perform
 7 | sufficiently well to enable generalized stack temporal safety able to address
 8 | use-after-function-return and use-out-of-scope vulnerabilities.
 9 | In the presence of compartmentalization support (e.g., for libraries in
10 | CheriBSD or between compartments in CHERIoT RTOS), there may be stronger
11 | protections preventing stack reuse or limiting the flow and reuse of stack
12 | memory, but these are not provided within compartments.
13 | 
14 | **Advice to developers**: Existing sanitizers used to detect some
15 |   use-after-function-return and use-out-of-scope bugs should be used when
16 |   writing code intended to be robust on CHERI.
17 |   As exploitation of stack temporal-safety vulnerabilities often involves the
18 |   use of uninitialized stack values, compiler features automatically
19 |   initialize stack variables can also play an important role in mitigation
20 |   (see below).
21 |   Finally, integer-pointer and other CHERI protections, such as sealing of
22 |   code pointers, provide significant robustness against exploitation of such
23 |   vulnerabilities -- but cannot be argued to strongly mitigate stack
24 |   temporal-safety vulnerabilities in the same way that can be argued for heap
25 |   allocations.
26 | 
27 | **Ongoing research**: The broader CHERI research community has been exploring
28 |   a variety of mechanisms to provide probablistic or deterministic stack
29 |   protections at varying costs, including concepts such as capability flow
30 |   control and capability linearity.
31 |   None have yet been deemed appropriate for widespread adoption, but we hope
32 |   that at least one mechanism will proceed to maturity in due course.
33 | 


--------------------------------------------------------------------------------
/src/compiler/ambiguous-provenance.md:
--------------------------------------------------------------------------------
 1 | ## Ambiguous provenance
 2 | 
 3 | For arithmetic and bitwise binary operations between `uintptr_t`/`intptr_t`, the compiler can generally infer which side of the expression should be used as the provenance (and bounds) source.
 4 | However, as noted in [Single-origin provenance](../impact/single-origin-provenance.html), there are cases that are ambiguous as far as the compiler is concerned.
 5 | 
 6 | Consider for example a structure that holds a pointer and a small number of flags.
 7 | In this case the pointer is known to be aligned to at least 8 bytes, so the programmer uses the lowest 3 bits to store additional data:
 8 | 
 9 | ```{.clisting}
10 | typedef struct { uintptr_t data; } pointer_and_flags;
11 | void set_ptr(pointer_and_flags *p, void *value) {
12 |     p->data = (p->data & (uintptr_t)7) | (uintptr_t)(value);
13 | }
14 | void set_flags(pointer_and_flags *p, unsigned flags) {
15 |     p->data = p->data | (flags & 7);
16 | }
17 | ```
18 | 
19 | ```{.compilerwarning}
20 | <source>:3:40: warning: binary expression on capability types '__uintcap_t'
21 | and 'uintptr_t' (aka '__uintcap_t'); it is not clear which should be used as
22 | the source of provenance; currently provenance is inherited from the left-hand
23 | side [-Wcheri-provenance]
24 |     p->data = (p->data & (uintptr_t)7) | (uintptr_t)(value);
25 |               ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~
26 | 1 warning generated.
27 | ```
28 | 
29 | Unlike the compiler, the programmer knows that inside ```set_ptr``` capability metadata should always be taken from the `value` argument.
30 | The suggested fix for this problem is fix is to cast the non-pointer argument to an integer type:
31 | 
32 | <pre><code>void set_ptr(pointer_and_flags *p, void *value) {
33 |     p->data = <mark id="FixAmbig" style="background-color: #77DD77">(size_t)</mark>(p->data & (uintptr_t)7) | (uintptr_t)(value);
34 | }
35 | </code></pre>
36 | 
37 | <!--
38 | \nwfnote{Not use cheri\_low\_bits\_set()?}
39 | -->
40 | 
41 | <!--
42 | \arnote{TODO: this section should have more examples.}
43 | -->
44 | 


--------------------------------------------------------------------------------
/src/limitations/README.md:
--------------------------------------------------------------------------------
 1 | # Limitations to CHERI C/C++ memory safety
 2 | 
 3 | The idea of memory-safe C and C++ variants able to compile and run the vast
 4 | majority of extant code, despite research exploration for several
 5 | decades, remains surprising: Both languages have long histories of
 6 | catatrophic memory-safety vulnerabilities, leading to disastrous reputations
 7 | for their unsafety.
 8 | It is our experience in developing CHERI C and C++ that there are indeed
 9 | fundamental limits to the nature of improvements that can be made: Software
10 | written in C and C++ expects significant type-system flexibility, making many
11 | forms of static and dynamic enforcement difficult, and inherently embeds
12 | assumptions that lead rapidly to "type confusion," a historically exploitable
13 | condition that frequently allows minor memory corruption to be escalated into
14 | arbitrary code execution.
15 | However, it is also our practical experience that quite substantial headway
16 | can be made in achieving strong memory safety for C and C++ despite this.
17 | 
18 | In this section we document and explore a number of constraints and
19 | limitations to CHERI C and C++ memory safety.
20 | Some originate from limitations in capturing language-level notions of type
21 | safety (e.g., preventing integer-pointer confusion while not introducting
22 | dynamic typing to differentiate pointer types from one another) when mapping
23 | into CHERI's simpler capability model, some from CHERI's performance and
24 | memory overhead(such as bounds compression), and others from limitations of
25 | the currently implementations -- especially of the compiler and its
26 | optimizations -- that we hope further research will rectify.
27 | We explore:
28 | 
29 |  * Integer-pointer safety vs. pointer type safety
30 |  * Compile-time uncertainty on regarding pointer types
31 |  * Bounds imprecision, sub-object bounds, and custom allocators
32 |  * Unions
33 |  * Stack temporal safety
34 |  * Compiler optimizations and undefined behavior
35 | 
36 | For each issue, we provide advice to developers and, where applicable,
37 | information on current research directions that may address these issues in
38 | the future.
39 | 


--------------------------------------------------------------------------------
/src/introduction/history.md:
--------------------------------------------------------------------------------
 1 | ## Version history
 2 | 
 3 | The current web version is a living document being prepared for release as a
 4 | second edition in late 2025, based on five years of deployed experience with
 5 | hundreds of CHERI C/C++ developers working on various CHERI platforms
 6 | including Arm's Morello prototype, Microsoft's CHERIoT, Codasip's X730,
 7 | the University of Cambridge's CHERI-Toooba, and Capabilities Limited's
 8 | CVA6-CHERI platforms.
 9 | 
10 | ### 2020
11 | 
12 | We published the first version of the *CHERI C/C++ Programming Guide* in June
13 | 2020.
14 | 
15 | ### 2025
16 | 
17 | This work-in-progress version of the *CHERI C/C++ Programming Guide* contains
18 | the following changes:
19 | 
20 |  * Conversion to mdbook from LaTeX to enable a live web version.
21 |  * Update cited articles and technical reports.
22 |  * Better define, and discourage use of, CHERI Hybrid C/C++.
23 |  * Include information on using CHERI C/C++ on a more diverse range of
24 |    platforms, including Morello and CHERIoT, as well as work on arising
25 |    CHERI-adapted OSes such as CHERI Linux and seL4.
26 |  * Include information on printing capability values via `strfcap(3)` and
27 |    `printf(3)`.
28 |  * Discuss the goal of non-aliasing spatial and temporal memory safety, and
29 |    explore when exceptions may be, or must be, delivered.
30 |  * Document a subset of `malloc_revoke(3)` APIs controlling revocation for the
31 |    system heap allocator.
32 |  * Note that some behaviors, such as bounds precision and revocation behavior,
33 |    are implementation defined.
34 |  * Document expectations for in-memory capabilities, in particular the
35 |    portability of `NULL` pointer values, and non-portability of any other
36 |    assumptions.
37 |  * Document that subobject bounds, as currently implemented, are
38 |    opportunistic, and may not be precise.
39 |  * Provide more detailed discussion of the limitations of the CHERI C/C++
40 |    approach including with respect to integer-pointer type safety vs full
41 |    pointer type safety, compile-time uncertainty on types, bounds imprecision,
42 |    unions, stack temporal safety, and compiler optimizations.
43 |  * Numerous minor editorial and formatting improvements.
44 | 


--------------------------------------------------------------------------------
/src/compiler/underaligned-capabilities.md:
--------------------------------------------------------------------------------
 1 | ## Underaligned capabilities
 2 | 
 3 | <!--
 4 | %\begin{compilerwarning}
 5 | %alignment (<N>) of '<type>' is less than the required capability alignment
 6 | %\end{compilerwarning}
 7 | -->
 8 | 
 9 | This warning is triggered when packed structures contain pointers.
10 | As mentioned in [Restrictions in capability locations in memory](../impact/restrictions-in-capability-locations.md), pointers must always be aligned to the size of a CHERI capability (16 bytes for a 64-bit architecture).
11 | This warning can be triggered by code that attempts to align pointers to at least 8 bytes (e.g., for compatibility between 32- and 64-bit architectures). For example:
12 | 
13 | <pre><code>struct AtLeast8ByteAlignedBad {
14 |     void *data;
15 | } __attribute__((packed, <mark id="BadAlignPacked" style="background-color: #EE918D">aligned(8)</mark>));
16 | </code></pre>
17 | 
18 | ```{.compilerwarning}
19 | <source>:1:8: warning: alignment (8) of 'struct AtLeast8ByteAlignedBad' is
20 | less than the required capability alignment (16) [-Wcheri-capability-misuse]
21 | struct AtLeast8ByteAlignedBad {
22 |        ^
23 | <source>:1:8: note: If you are certain that this is correct you can silence
24 | the warning by adding __attribute__((annotate("underaligned_capability")))
25 | 1 warning generated.
26 | ```
27 | 
28 | The simplest fix for this issue is to either increase alignment to be CHERI-compatible, or use a ternary expression to include `alignof(void *)`:
29 | 
30 | <pre><code><mark id="FixAlign1" style="background-color: #77DD77">#include &lt;stdalign.h&gt;</mark>
31 | struct AtLeast8ByteAlignedGood {
32 |     void *data;
33 | } __attribute__((packed,aligned(<mark id="FixAlign2" style="background-color: #77DD77">alignof(void *) > 8 ? alignof(void *) : 8</mark>)));
34 | </code></pre>
35 | 
36 | In the rare case that creating a potentially underaligned pointer is actually intended, the warning can be silence by adding a `annotate("underaligned_capability")` attribute:
37 | 
38 | <pre><code>struct UnderalignPointerIgnoreWarning {
39 |     void *data;
40 | } __attribute__((packed, aligned(4), <mark id="SilenceAlign" style="background-color: #77DD77">annotate("underaligned_capability")</mark>));
41 | </code></pre>
42 | 


--------------------------------------------------------------------------------
/src/cheriabi/revocation-apis.md:
--------------------------------------------------------------------------------
 1 | ## Revocation APIs
 2 | 
 3 | Some CHERI run-time environments implement heap temporal safety, including
 4 | CheriBSD's CheriABI process environment and CHERIoT RTOS.
 5 | As described in [Referential, spatial, and temporal
 6 | safety](../cheri-ccpp/referential-spatial-temporal-safety.md), this is done on
 7 | the basis of quaranting with deferred (amortized) revocation of pointers that
 8 | must be performed before corresponding memory can be reallocated.
 9 | 
10 | For CheriABI, the
11 | [malloc_revoke(3)](https://man.cheribsd.org/cgi-bin/man.cgi/malloc_revoke)
12 | APIs allowing interacting with, and control of, heap revocation.
13 | These APIs relate only to quarantining and revocation of memory
14 | allocated by the system heap allocator, `malloc(3)`.
15 | Other memory allocators, such as the system memory-map allocator, may
16 | independently implement temporal safety regardless of the heap allocator's
17 | own configuration.
18 | At the time of writing, these APIs are present only in CheriBSD.
19 | See that manual page for complete documentation on available APIs.
20 | 
21 | We briefly consider two APIs: `malloc_revoke_enabled(3)`, which tests for
22 | temporal safety support being enabled, and
23 | `malloc_revoke_quarantine_force_flush(3)`, which triggers revocation and acts
24 | as a barrier on its completion.
25 | 
26 | ### Checking whether heap revocation is enabled
27 | 
28 | Heap allocator use of revocation is configurable by process.
29 | If it is enabled in the current process, `malloc_revoke_enabled(3)` will
30 | return true; otherwise, it will return false.
31 | 
32 | ### Forcing revocation of outstanding freed pointers
33 | 
34 | Revocation of freed pointers is normally deferred, with the memory placed in
35 | quarantine, for performance reasons.
36 | `malloc_revoke_quarantine_force_flush(3)` flushes the current quarantine,
37 | performing two functions:
38 | 
39 |  1. It initiates revocation of all memory quarantined when the API is called.
40 |  2. It acts as a barrier to ensure that all pointers to memory allocations
41 |     freed prior to the API being called have been revoked.
42 | 
43 | There are no guarantees made for frees occuring concurrent to, or after, the
44 | call is made.
45 | 
46 | Use of this API is discouraged: It is provided primarily for testing,
47 | debugging, and demonstration purposes, and can come with a very high
48 | performance overhead.
49 | 


--------------------------------------------------------------------------------
/src/impact/function-prototypes-and-calling-conventions.md:
--------------------------------------------------------------------------------
 1 | ## Function prototypes and calling conventions
 2 | 
 3 | CHERI C/C++ distinguishes between integer and pointer types at an
 4 | architectural level, which can lead to compatibility problems with older C
 5 | programming styles that fail to unambiguously differentiate these types:
 6 | 
 7 | * **Unprototyped (*K&R*) functions**: Because pointers can no longer
 8 |   be loaded and stored without using capability-aware instructions, the
 9 |   compiler must know whenever a load or store might operate on a pointer
10 |   value.
11 |   The C-language default of using an integer type for function arguments when
12 |   there is not an appropriate function prototype will cause pointer values to
13 |   be handled improperly; this is also true on LP64 ABIs (e.g., most 64-bit
14 |   POSIX systems).[^10]
15 | 
16 |   To avoid these problems, the CHERI Clang compiler emits a warning (`-Wcheri-prototypes`) by default when a function without a declared prototype is called.
17 |   This warning is less strict than `-Wstrict-prototypes` and can be
18 |   used to convert *K&R* functions that may cause problems.[^11]
19 |   This should not be an issue for C code written in the last 20 years, but
20 |   many core operating-system components can be significantly older.
21 | 
22 | * **Variadic arguments**: The calling convention for variadic functions
23 |   passes all variadic arguments via the stack and accesses them via an
24 |   appropriately bounded capability.
25 |   This provides memory-protection benefits, but means that vararg functions
26 |   must be declared and called via a correct prototype.
27 | 
28 |   Some C code assumes that the calling convention of variadic and non-variadic
29 |   functions is sufficiently similar that they may be used interchangeably.
30 |   Historically, this included the FreeBSD kernel's implementation of
31 |   `open`, `fcntl`, and `syscall`.
32 | 
33 | <!--
34 |   \rwnote{I wonder if we need to be more specific with an example here.}\arnote{TODO: Add example such as missing open() mode arguments?}
35 | -->
36 | 
37 | [^10]: The forthcoming ISO C2x standard makes function declarations with an
38 | empty parameter list equivalent to a parameter list consisting of a single
39 | `void`.
40 | 
41 | [^11]: If the *K&R* function is defined within the same file, the compiler can
42 | determine the correct calling convention and will not emit a warning.
43 | 


--------------------------------------------------------------------------------
/src/impact/pointer-comparison.md:
--------------------------------------------------------------------------------
 1 | ## Pointer comparison
 2 | 
 3 | In CHERI C/C++, pointer comparison considers only the
 4 | integer address part of a capability.
 5 | This means that differences in tag validity, bounds, permissions, and so on,
 6 | will not be considered when by C operators such as `==`, `<`, and `<=`.
 7 | On the whole, this leads to intuitive behavior in systems software, where,
 8 | for example, `malloc` adjusts bounds on a pointer before returning it to
 9 | a caller, and then expects an address-wise comparison to succeed when the
10 | pointer is later returned via a call to `free`.  <!--
11 | \nwfnote{I don't think I particularly like that example, since the thing `free`
12 | is nominally comparing against is the bounded return from `malloc`.}
13 | -->
14 | However, this behavior could also lead to potentially confusing results; for
15 | example:
16 | 
17 | * If a tag on a pointer is lost due to non-provenance-preserving
18 |   `memcpy` (e.g., a `for` loop copying a sequence of bytes), the
19 |   source and destination pointers will compare as equal even though the
20 |   destination will not be dereferenceable.
21 | 
22 | * If a `realloc` implementation returns a pointer to the same
23 |   address, but with different bounds, a caller check to see if the passed and
24 |   returned pointers are equal will return `true` even though an access
25 |   might be permitted via one pointer but not the other.
26 | 
27 | <!--
28 | \psnote{I'm curious about the impact on compiler optimisation, where in the scope of \texttt{if (p==q)} compilers will often assume the two are interchangeable.  Comment on that?
29 |  }
30 |  \arnote{The choice between exact vs non-exact equals is made extremely late in code generation, it just chooses between emitting CEq and CExEq.
31 |  Compiler analyses use a stricter definition of equality.
32 |  In clang that should include some cases of taking provenance into account for alias information.}
33 | -->
34 | 
35 | However, practical experience has suggested that the current semantics produce fewer
36 | subtle bugs, and require fewer changes, than having comparison operators take
37 | the tag or other metadata into account.[^6]
38 | 
39 | [^6]: The CHERI Clang compiler supports an experimental flag `-cheri-comparison=exact` that causes capability equality comparisons to also include capability metadata and the tag bit.
40 | 
41 | <!--
42 | \arnote{default behavior=\texttt{-cheri-comparison=address}}
43 | }
44 | -->
45 | 


--------------------------------------------------------------------------------
/src/limitations/unions.md:
--------------------------------------------------------------------------------
 1 | ## Unions
 2 | 
 3 | A significant benefit to introducing strong spatial and temporal safety in the
 4 | C and C++ programming languages is to reduce the opportunity for *type
 5 | confusion*, in which two different views (aliases) on the same memory region
 6 | allow that memory to be interpreted in different ways.
 7 | A classic example of this in widespread memory-corruption attacks is to store
 8 | to a memory region via characters (e.g., provided by network input) and then
 9 | to cause the application to load from the region interpreting it as a pointer
10 | (e.g., as a control-flow pointer) making it possible to substantially
11 | manipulate software including by achieving arbitrary code execution.
12 | By implementing non-aliasing protection, pointers to one allocation of memory
13 | cannot be used to implement type confusion with another pointer from a past or
14 | future allocation of that memory.
15 | 
16 | Unions, however, are a C-language feature that intentionally allows multiple
17 | interpretions of the same memory even within a single allocation.
18 | While the precise impact is specific to its use, it is frequently the case
19 | that unions may be used to reduce memory overhead or implement object
20 | orientation, allowing code to interpret regions of memory as (for example)
21 | both an array of characters and a control-flow pointer, enabling common attack
22 | patterns.
23 | 
24 | CHERI's underlying robustness to integer-pointer type confusion is valuable in
25 | mitigating such potential vulnerabilities, but as this does not provide strong
26 | type safety, nor prevent other types of type confusion, ultimately this is
27 | mitigation against attack techniques, rather than elimination of
28 | vulnerability.
29 | 
30 | **Advice to developers**: Avoiding the use of unions in C will avoid such
31 |   situations arising, especially in programming environments where the memory
32 |   savings from such techniques may have little impact.
33 |   Where unions must be used, attention can be paid to the types of confusion
34 |   that might arise, as well as robust programming techniques to avoid type
35 |   confusion.
36 | 
37 | **Ongoing research**: While SRI/Cambridge do not have current research in this
38 |   space, it is easy to imagine introducing static limitations on union use,
39 |   which could be evaluated for their impact on C/C++ code corpora to establish
40 |   how disruptive this limitation might be at scale.
41 | 


--------------------------------------------------------------------------------
/src/introduction/definitions.md:
--------------------------------------------------------------------------------
 1 | ## Definitions
 2 | 
 3 | CHERI Clang/LLVM and LLD implement the following new language,
 4 | code-generation, and linkage models:
 5 | 
 6 | * **CHERI C/C++** are C/C++-language dialects tuned to requirements arising from
 7 | implementing all pointers using CHERI capabilities.
 8 | This includes all explicit pointers (i.e., those declared by the programmer)
 9 | and all implied pointers (e.g., those used to access local and global
10 | variables).
11 | For example, they diverge from C/C++ implementations on conventional
12 | architectures by preventing pointers passed through integer type other
13 | than `uintptr_t` and `intptr_t` from being dereferenced.
14 | New Application Programming Interfaces (APIs) provide access to capability
15 | features of pointers, including getting and setting their bounds, required
16 | by selected software such as memory allocators.
17 | The vast majority of C/C++ source code we have encountered requires little
18 | or no modification to be compiled as CHERI C/C++.
19 | 
20 | * **Pure-capability machine code** is compiled code (or hand-written assembly)
21 | that utilizes CHERI capabilities for all memory accesses &mdash; including
22 | loads, stores, and instruction fetches &mdash; rather than integer addresses.
23 | Capabilities are used to implement pointers explicitly described in the source
24 | program, and also to implement implied pointers in the C execution
25 | environment, such as those used for control flow.
26 | Pure-capability machine code is not binary compatible with capability-unaware
27 | code using integer pointers, not least due to the different size of the
28 | pointer data type.
29 | 
30 | * **CHERI hybrid C/C++** are further language dialects in which only selected
31 | pointers are implemented using capabilities, with the remainder implemented
32 | using integers as on conventional architectures.
33 | We have primarily used hybrid C in systems software that bridges between
34 | environments executing pure-capability machine code and those running largely
35 | or entirely non-CHERI-aware machine code.
36 | While hybrid machine code has stronger binary compatibility with conventionally
37 | generated code, it provides little or no memory protection, and its use is not
38 | generally recommended.
39 | 
40 | The remainder of this document describes the CHERI C/C++ programming languages,
41 | as mapped into pure-capability machine code; hybrid C/C++ will not be
42 | considered further.
43 | 


--------------------------------------------------------------------------------
/src/impact/capability-faults.md:
--------------------------------------------------------------------------------
 1 | ## Capability-related faults
 2 | <!--
 3 | \label{sec:faults}
 4 | -->
 5 | 
 6 | When architectural capability properties are violated, such as by an attempt
 7 | to dereference an invalid capability, access memory outside the bounds of a
 8 | capability, or perform accesses not authorized by the permissions on a
 9 | capability, this typically leads to a hardware exception (trap).
10 | Operating-system kernels are able to catch this exception via a trap handler,
11 | optionally delivering it to the run-time environment via OS-specific
12 | mechanisms.
13 | 
14 | However, these exceptions are not guaranteed under the CHERI C/C++ model;
15 | instead, a combination of hardware and software implement non-aliasing spatial
16 | and temporal memory safety.
17 | This means that pointers are never permitted to access memory from another
18 | allocation, no matter how they are manipulated, but overruns into padding, or
19 | access after free while memory is in quarantine, may still be permitted.
20 | This topic is explored in greater detail in [Non-aliasing vs trapping memory
21 | safety](../cheri-ccpp/nonaliasing-vs-trapping.md).
22 | 
23 | Further, the language-level behavior of CHERI C/C++ is considerably more
24 | subtle: existing undefined behavior semantics in C are retained.
25 | The compiler is free to assume that loads and stores will not trap (i.e., that
26 | any program is free of undefined behavior), and may optimize under this
27 | assumption, including reordering code.
28 | Architectural traps occur when dynamic loads and stores are attempted, and
29 | reordering could lead to potential confusing behavior for programmers.
30 | 
31 | In the CheriABI process environment, the operating system catches the hardware
32 | exception and delivers a `SIGPROT` signal to the user process;
33 | further information may be found in [CheriABI](../cheriabi).
34 | In other environments, such as bare metal or under an embedded OS, behavior is
35 | specific to those environments, as it will depend both on how architectural
36 | exceptions are handled, and how those events are delivered to the C-language
37 | stack.
38 | Fail stop may be appropriate behavior in some environments, and is the default
39 | behavior in CheriABI when `SIGPROT` is not handled.
40 | 
41 | <!--
42 | \rwnote{We've opted to use the term "hardware exception" throughout, and
43 |   mention "traps" only here.  This could cause confusion with respect to C++
44 |   exceptions .. but perhaps less so than if we used the word "exception"
45 |   unadorned.}
46 | -->
47 | 


--------------------------------------------------------------------------------
/src/cheriabi/posix-api-changes.md:
--------------------------------------------------------------------------------
 1 | ## POSIX API changes
 2 | 
 3 | * **Writing and reading pointers via files**: In the CheriABI process
 4 |   environment, only untagged data (not tagged pointers) may be written to or
 5 |   read from files.
 6 |   If a region of memory containing valid pointers is written to a file, and
 7 |   then read back, the pointers in that region will no longer be valid.
 8 |   If a file is memory mapped, then pages mapped copy-on-write
 9 |   (`MAP_PRIVATE`) are able to hold tagged pointers, since they are
10 |   swap-backed rather than file-backed, but pages mapped directly from the
11 |   buffer cache (`MAP_SHARED`) are not.
12 | 
13 | * **Passing pointers via IPC**: In the CheriABI process environment, only
14 |   untagged data, not tagged pointers, may be passed via various forms of
15 |   message-passing Inter-Process Communication (IPC).
16 |   Some existing software takes advantage of a shared address-space layout
17 |   (via `fork`) to pass pointers to elements of shared data structures
18 |   (e.g., entries in dispatch tables).
19 |   This code must be converted to use indexes into tables or other lookup
20 |   mechanisms rather than passing pointers via IPC.
21 | 
22 | * **`mmap` bounds**: In CheriABI, the `mmap` system
23 |    call returns a bounded capability to the allocated address space.
24 |    To ensure the capability does not overlap other allocations,
25 |    lengths that would otherwise be unrepresentable are rounded up
26 |    and padded with a new type of guard pages.
27 |    These guard pages fault on access and may not be mapped over.
28 |    They are unmapped when the rest of the mapping is unmapped.
29 | 
30 | * **`mmap` permissions**: The permissions of the capability
31 |    returned by `mmap` are determined by a combination of the
32 |    requested page protections and the capability passed as an address hint
33 |    (or fixed address with `MAP_FIXED`).
34 |    When using the pattern of requesting a mapping with `PROT_NONE`
35 |    and then filling in sections (as is done in run-time linkers, VM host
36 |    environments, etc), it is necessary to ensure that the initial
37 |    capability has the right permissions.
38 |    The `prot` argument has been extended to accept additional
39 |    flags indicating the maximum permission the page can have so that a
40 |    linker might request a reservation for a library with the permissions
41 |    `(PROT_MAX(PROT_READ|PROT_WRITE|PROT_EXEC) | PROT_NONE)`, which
42 |    would return a capability permitting loads, stores, and instruction
43 |    fetch while mapping the pages with no (MMU) permissions.
44 | 


--------------------------------------------------------------------------------
/src/cheri-ccpp/cheri-runtime.md:
--------------------------------------------------------------------------------
 1 | ## The CHERI C/C++ run-time environment
 2 | 
 3 | CHERI C code executes within a capability-aware run-time environment
 4 | &mdash; whether "bare metal" with a suitable runtime, or in a richer, OS-based
 5 | process environment such as CheriABI (see [CheriABI](../cheriabi)) or
 6 | CHERIoT RTOS, which ensures that:
 7 | 
 8 |  * capabilities are context switched (if required);
 9 |  * tags are maintained by the OS virtual-memory subsystem (if present);
10 |  * capabilities are supported in OS control operations such as
11 |     debugging (as needed);
12 |  * system-call arguments, the
13 | run-time linker, and other aspects of the OS Application Binary Interface
14 | (ABI) utilize capabilities rather than integer pointers;
15 |  * the C/C++-language runtime implements suitable capability preservation
16 |     (e.g., in `memcpy`) and restriction (e.g., in `malloc`); and
17 |  * temporal safety is enforced by heap allocators (if supported).
18 | 
19 | CHERI is supported by a growing set of operating systems:
20 | 
21 |  * CheriBSD, the CHERI-extended version of the open-source FreeBSD operating
22 |    system, CheriABI operates as a complete additional OS ABI.
23 |    CheriABI is implemented in the style of a 32-bit or 64-bit OS personality,
24 |    in that it requires its own set of suitably compiled system libraries and
25 |    classes.
26 |    Userlevel runs with referential, spatial, and temporal safety.
27 |    At the time of writing, the kernel supports referential and spatial
28 |    safety, but not temporal safety.
29 |  * CHERI Linux also implements a pure-capability kernel and process
30 |    environment modeled on CheriABI that support referential and spatial
31 |    safety.
32 |  * A number of bare-metal runtimes, such as newlib, and embedded operating
33 |    systems, such as FreeRTOS (CheriFreeRTOS) and RTEMS (CHERI-RTEMS), have
34 |    been adapted to support referential and spatial memory protection using
35 |    CHERI.
36 |  * seL4 has been updated (out of tree) to support referential and spatial
37 |    memory protction using CHERI.
38 |  * CHERIoT RTOS implements referential, spaital, and temporal memory
39 |    protection using CHERI.
40 | 
41 | Outside of the OS and language runtime themselves, CHERI C/C++ require
42 | relatively few source-code-level changes to C/C++-language software.
43 | Exceptions to this rule of thumb typically take the form of compiler
44 | toolchain, low-level C/C++ runtimes such as run-time linkers, and high-level
45 | language runtimes that may (for example) include just-in-time compilers.
46 | 
47 | We explore the changes required to software in the remainder of this document.
48 | 


--------------------------------------------------------------------------------
/src/apis/modifying-or-restricting-capability-properties.md:
--------------------------------------------------------------------------------
 1 | ## Modifying or restricting capability properties
 2 | 
 3 | The following APIs allow capability properties to be refined on pointers:
 4 | 
 5 | * **`void *cheri_address_set(void *c, ptraddr_t a)`**: Return a new capability with the same permissions and bounds as `c` with the address set to `a`.
 6 | This can be useful to re-derive a valid pointer from an address.
 7 | 
 8 |   `cheri_address_set` is able to set an address `a` that is
 9 |   outside of the current bounds of `c`.  The resulting capability
10 |   is treated as an out-of-bounds pointer as described in [Out-of-bounds
11 |   pointers](../impact/out-of-bounds-pointers.html).
12 |   However, if the address `a` is not representable in the current
13 |   bounds of `c` due to capability compression,
14 |   `cheri_address_set` returns a capability without the tag bit set.
15 | 
16 | <!--
17 |   %  This macro wraps the compiler built-in
18 |   %  `__builtin_cheri_address_set`.
19 | -->
20 | 
21 | * **`void *cheri_bounds_set(void *c, size_t x)`**: Narrow the bounds of capability
22 |   `c` so that the lower bound is the current address (which may
23 |   have been increased relative to `c`'s original lower bound), and its
24 |   upper bound is suitable for a length of `x`.
25 | 
26 |   Note that the effective bounds of the returned capability may be
27 |   wider than the range [`cheri_address_get(c)`,
28 |   `cheri_address_get(c) + x`) due to capability compression (see
29 |   [Bounds alignment due to
30 |   compression](../apis/bounds-alignment-due-to-compression.html)),
31 |   but they will always be a subset of
32 |   the original bounds. <!--% of `c`.-->
33 | 
34 | * **`void *cheri_bounds_set_exact(void *c, size_t x)`**: Narrow the bounds of capability
35 |   `c` so that the lower bound is the current address, and its
36 |   upper bound is `cheri_address_get(c) + x`.
37 |   This is similar to `cheri_bounds_set` but will raise a hardware exception if the resulting capability is not precisely representable instead of rounding the bounds.
38 | 
39 | <!--
40 | \nwfnote{No mention of cheri\_bounds\_set\_exact?}
41 | -->
42 | 
43 | * **`void *cheri_perms_and(void *c, size_t x)`**: Perform a bitwise-AND of capability
44 |   `c`'s permissions and the value `x`, returning the new
45 |   capability (see [Capability permissions](capability-permissions.html)).
46 | 
47 | <!--
48 |   %  This macro wraps the compiler built-in
49 |   %  `__builtin_cheri_perms_and`.
50 | -->
51 | 
52 | * **`void *cheri_tag_clear(void *c)`**: Clear the tag on `c`, returning the
53 |   new capability.
54 | 
55 | <!--
56 |   % \note{Are the references to the `__builtin_` forms useful?  Do we
57 |   % want to encourage their use or the `cheric.h` macros?}{nwf}
58 | -->
59 | 


--------------------------------------------------------------------------------
/src/apis/implications-for-memory-allocator-design.md:
--------------------------------------------------------------------------------
 1 | ## Implications for memory-allocator design
 2 | 
 3 | One use case of these APIs is high-performance applications that contain custom memory
 4 | allocators and wish to narrow the bounds of returned pointers.
 5 | Two kinds of modifications are typically required:
 6 | 
 7 | * **Changes to alignment to allow for capabilities and bounds**:
 8 |   Changes relating to alignment fall into two categories.
 9 |   First, those required to allow pointers to be stored within allocations,
10 |   which requires that allocations be aligned to the pointer width (128 bits).
11 |   Second, further alignment changes will be required to ensure that bounds can
12 |   be represented precisely.
13 |   This requires suitably aligning both the bottom and top bounds to exclude
14 |   any other live allocations, as described in [Bounds alignment due to
15 |   compression](bounds-alignment-due-to-compression.html).
16 | <!--
17 | \arnote{May want to switch order of sections?}
18 | -->
19 | 
20 | * **Reaching allocation metadata on `free`**:
21 |   It is often the case that allocators utilize the value of the pointer passed
22 |   to their custom `free` function to locate corresponding metadata &mdash;
23 |   for example, by always placing that metadata immediately before the
24 |   allocation, which would be outside of the allocation's bounds.
25 |   Therefore, some additional work may be required to derive a pointer to the
26 |   allocation's metadata via another global capability, rather than the one
27 |   that has been passed to `free`.
28 | 
29 | These two concerns may interact: When a custom allocator places metadata at
30 | the beginning of the allocation, care must be taken that the resulting pointer
31 | is still strongly aligned.
32 | While porting programs to run on CHERI, we found multiple sub-allocators
33 | that used 8 bytes of metadata after the result from `malloc`.
34 | This causes the resulting pointer to no longer be sufficiently aligned to
35 | store capabilities without faulting or stripping tag bits.
36 | <!--
37 | \nwfnote{Does CHERI ISAv7 still fault in any of these scenarios?}
38 | -->
39 | 
40 | Note that it is also possible to use the above APIs to validate inputs to
41 | `free`, which is useful when the consumer of `free` is, for example,
42 | an untrusted compartment or a component of a web browser that might be
43 | influenced by an attacker. In such cases, `free` should validate that the
44 | passed-in capability is tagged, is in-bounds, and points to a legitimate,
45 | still-allocated allocation.  For allocators engaged in revocation for temporal
46 | safety, concurrent revocation opens the door to TOCTTOU races within
47 | `free`; additional care must be taken to prevent a double-`free`
48 | using a stale pointer from freeing an object allocated after revocation.
49 | 


--------------------------------------------------------------------------------
/src/impact/restrictions-in-capability-locations.md:
--------------------------------------------------------------------------------
 1 | ### Restrictions in capability locations in memory
 2 | <!--
 3 | \label{sec:restricted-capability-locations}
 4 | -->
 5 | 
 6 | CHERI C/C++ constrain how and where pointers can be stored in memory in two
 7 | ways:
 8 | 
 9 | * **Alignment**: CHERI's tags are associated with capability-aligned,
10 |   capability-sized locations in physical memory.
11 |   Because of this, all valid pointers must be stored at such locations,
12 |   potentially disrupting code that may use other alignments.
13 | 
14 |   On the whole, for performance and atomicity reasons, pointers are strongly
15 |   aligned even on non-tagged architectures &mdash; however, when C constructs such
16 |   as `__packed` are used, unaligned pointers can arise, and will not
17 |   work with CHERI.
18 |   While the compiler and native allocators (stack, heap, ...) will
19 |   provide sufficient alignment for capability-based pointers, custom
20 |   allocators may align allocations to `sizeof(intmax_t)` rather than
21 |   `alignof(maxalign_t)`.
22 | 
23 | * **Size**: CHERI capabilities are twice the size of an integer able to
24 |   describe the full address space.
25 |   On 64-bit systems, this means that CHERI pointers will have a width of 128
26 |   bits &mdash; while maintaining the arithmetic properties of a 64-bit integer
27 |   address.
28 |   C code historically embeds assumptions about pointer size in a number of forms,
29 |   all of which will need to be addressed when porting to CHERI,
30 |   including:
31 | 
32 |   * Assuming that a pointer will fit into the largest integer type.
33 |   * Assuming that the number of bits in a pointer type is the same
34 |     as the number of bits indexing the address space it can refer to.
35 |   * Assuming that the number of bits in a pointer type is the same as the
36 |     number of bits suitable for use in performing bit-wise manipulations of
37 |     pointer values.
38 |   * Assuming that pointers must either be 32 or 64 bits.
39 |   * Assuming that aligning to `sizeof(double)` is sufficient to store any type.
40 |   * Assuming that high bits of the pointer address can be used for
41 |   additional metadata. This is not true on CHERI since toggling high bits of a
42 |   pointer can cause it to be so far out of bounds that it is no longer representable
43 |   due to the compression of pointer bounds. However, it is still possible to use
44 |   the low bits for additional metadata (see [Bitwise operations on capability types](bitwise-operations.html)).
45 | 
46 | <!--
47 |   \rwnote{Should there be more things in this list?}
48 | -->
49 | 
50 | These portability problems will typically be found due to hardware exceptions
51 | thrown on attempted unaligned accesses of capability values
52 | (see [Capability-related faults](capability-faults.html)).
53 | However, they can also arise in the form of stripped tag bits, leading to
54 | invalid capabilities that cannot be dereferenced, if, for example, pointer
55 | values are copied into inappropriately aligned allocations.
56 | 


--------------------------------------------------------------------------------
/src/impact/revocation.md:
--------------------------------------------------------------------------------
 1 | ## Implications of capability revocation for temporal safety
 2 | 
 3 | Heap temporal safety utilizes revocation sweeps, which, after some quarantine
 4 | period, replace in-register and in-memory capabilities to freed memory with
 5 | non-dereferenceable values.
 6 | For performance reasons, that replacement may be substantially deferred, or,
 7 | if there is little demand for fresh allocations, may never occur.
 8 | Pointer value replacement may also permit some instances of
 9 | a pointer to continue to be usable for longer than others, but the referenced
10 | memory will not be reallocated or otherwise reused until all instances have been rendered unusable.
11 | This model does permit non-exploitable *use-after-free* of heap memory,
12 | but prohibits exploitable memory aliasing by disallowing *use-after-reallocation*.
13 | 
14 | A pointer's value after `free` is undefined, and so dereference is
15 | an undefined behavior.
16 | In practice, however, the value of a `free`-d pointer may still be
17 | observed in a number of situations, including in lockless algorithms, which
18 | may compare an allocated pointer to a freed one.
19 | 
20 | Our systems have a choice of replacement values for revoked pointers; all that
21 | is required for correct temporal safety is that the replacement not authorize
22 | access to memory.
23 | Our prototype implementation clears the tag when replacing, as this
24 | certainly removes authority and possibly simplifies debugging and
25 | non-dereferencing operations, as the original capability bits are left behind.
26 | For example, pointer equality checks that compare only the addresses of the two
27 | pointers (and not their tag values) will continue to work as expected.  With
28 | revocation performed this way, software making explicit use of tags must be
29 | designed to tolerate capability tag clearing by revocation.
30 | 
31 | Unfortunately, tag-clearing risks type confusion if programmers intend to use
32 | the capability tag to distinguish between integers and pointers in tagged
33 | unions (we have so far generally discouraged this idea, but understand why it
34 | may remain attractive).  Therefore, we have considered other options for
35 | revocation, including tag-preserving *permission*-zeroing (but tag
36 | preservation) and wholesale replacement with `NULL` (i.e., the untagged
37 | all zero value).  These options may be more attractive for some software, and
38 | would have different implications for the C/C++ programming model.
39 | 
40 | We anticipate that revocation will remain a tag-clearing operation by default,
41 | as tag-clearing removes any risk of needlessly re-examining the capability in
42 | later revocations.  However, it may be possible to allow coarse control over
43 | revocation behavior either per process or by region of the address space.  In
44 | the latter case, `mmap` may gain flags specifying which revocation
45 | behavior is desirable for capabilities pointing *into* the mapped region
46 | and/or `madvise` may gain flags controlling the revocation behavior of
47 | capabilities *within* a target region.  Which of these or similar
48 | mechanisms provide utility to software and can be offered at reasonable
49 | performance remains an open question.
50 | 


--------------------------------------------------------------------------------
/src/impact/out-of-bounds-pointers.md:
--------------------------------------------------------------------------------
 1 | ### Out-of-bounds pointers
 2 | <!--
 3 | \label{sec:oob}
 4 | -->
 5 | 
 6 | <!--
 7 | \note{I feel like this section wants a reference to CHERI Concentrate?}{nwf}
 8 | -->
 9 | 
10 | ISO C permits pointers to go only one byte beyond their original
11 | allocation, but widely used code sometimes constructs transient pointer
12 | values that are further out of bounds.
13 | For example, `for` loops iterating over an array may increment a pointer
14 | into the array by the array entry size before performing an overflow check
15 | that terminates the loop.
16 | This temporarily constructs an out-of-bounds pointer without an out-of-bounds
17 | dereference taking place.
18 | 
19 | <!--
20 | \nwfnote{In the straightforward case, tho, that still results in the pointer
21 | being only one past the end of its allocation, doesn't it?}
22 | -->
23 | 
24 | To support this behavior, capabilities
25 | can hold a range of out-of-bounds addresses while retaining a valid
26 | tag, and CHERI-enabled hardware performs bounds checks only on pointer
27 | use (i.e., dereference), not on pointer manipulation.  Dereferencing
28 | an out-of-bounds pointer will raise a hardware exception (see
29 | [Capability-related faults](capability-faults.md)).
30 | However, an out-of-bounds pointer can be
31 | dereferenced once it has been brought back in bounds, by adjusting the
32 | address or supplying a suitable offset in the dereference.
33 | 
34 | There is, however, a limit to the range of out-of-bounds addresses a capability can hold.
35 | The capability compression model exploits redundancy between the pointer's address and
36 | its bounds to reduce memory overhead (see [CHERI
37 | capabilities](../background/cheri-capabilities.html)).
38 | However, when a pointer goes out of bounds, this redundancy is reduced, and at
39 | some point the bounds can no longer be represented within the capability.
40 | The architecture prohibits manipulations that would produce such
41 | a capability.
42 | Depending on the architecture and context, this may lead to the
43 | tag being cleared, resulting in an invalid capability, or in an immediate
44 | hardware exception being thrown.
45 | Attempting to dereference the invalid capability will fail in the same
46 | manner as a loss of pointer provenance validity (see [Pointer provenance
47 | validity](pointer-provenance-validity.html)).<!--
48 | \psnote{Comment on whether that should immediately trap instead?} -->
49 | The range of out-of-bounds addresses permitted for a capability is
50 | a function of the length of the bounded region and the number of bits used for bounds in the capability representation.
51 | With 27 bits of the capability used for bounds, 64-bit
52 | CHERI-RISC-V provide the following guarantees:
53 | 
54 | * A pointer is able to travel at least 1/4 the size of the object, or
55 |   2 KiB (2<sup>*floor*(*bounds_bits*/2)-2</sup>), whichever is greater,
56 |   above its upper bound.
57 | 
58 | * It is able to travel at least 1/8 the size of the object, or 1 KiB
59 |   (2<sup>*floor*(*bounds_bits*/2)-3</sup>), whichever is greater, below
60 |   its lower bound.
61 | 
62 | In general, programmers should not rely on support for arbitrary out-of-bounds
63 | pointers.  Nevertheless, in practice, we have found that the CHERI capability
64 | compression scheme supports almost all in-the-field out-of-bounds behavior in
65 | widely used software such as FreeBSD, PostgreSQL, and WebKit.
66 | 


--------------------------------------------------------------------------------
/src/cheri-ccpp/referential-spatial-temporal-safety.md:
--------------------------------------------------------------------------------
 1 | ## Referential, spatial, and temporal safety
 2 | 
 3 | Pure-capability C/C++ introduces a number of new types of protection not
 4 | present in compilation to conventional architectures:
 5 | 
 6 | * **Referential safety** protects pointers (references) themselves.
 7 |   This includes *integrity* (corrupted pointers cannot be dereferenced)
 8 |   and *provenance validity* (only pointers derived from valid pointers
 9 |   via valid manipulations can be dereferenced).
10 | 
11 |   When pointers are implemented using architectural capabilities, CHERI's
12 |   capability tags and provenance validity naturally provide this protection.
13 | 
14 | * **Spatial safety** ensures that pointers may be used only to access memory
15 |   within bounds of their associated allocation; dually, manipulating an
16 |   out-of-bounds pointer will not grant access to another allocation.
17 | 
18 |   This is accomplished by adapting various memory allocators, including the run-time
19 |   linker for global variables, thread-local variables, the stack allocator,
20 |   and the heap allocator,
21 |   to set the bounds on the capability implementing a pointer before returning
22 |   it to the caller.
23 |   Due to precision constraints on capability bounds, bounds on returned
24 |   pointers may include additional padding, but will still not permit access to any
25 |   other allocations (see [Bounds alignment due to
26 |   compression](../apis/bounds-alignment-due-to-compression.html)).
27 |   Monotonicity ensures that callers cannot later broaden the bounds to cover
28 |   other allocations.
29 | 
30 | Referential safety and spatial safety are implemented in CheriBSD's and
31 | CHERI Linux's kernels and pure-capability CheriABI execution environments, in
32 | CHERIoT RTOS, CHERI-adapted seL4, and a number of CHERI-adapted embedded
33 | operating systems including FreeRTOS and RTEMS.
34 | 
35 | * **Temporal safety** prevents a pointer retained after the release of its
36 |   underlying allocation from being used to access its memory if that memory
37 |   has been reused for a fresh allocation (e.g., after a fresh pointer to that
38 |   memory has been returned by a further call to `malloc` after the
39 |   current pointer passed to `free`).
40 | 
41 |   Heap temporal safety is accomplished by preventing new pointers being
42 |   returned to a previously allocated region of memory while any prior pointers
43 |   to that memory persist in application-accessible memory.
44 |   Memory will be held in *quarantine* until any prior pointers have
45 |   been revoked; then the memory may be reallocated.
46 | 
47 |   Architectural capability tags and revocation features allow intermittent
48 |   *revocation sweeps* to accurately and efficiently locate and
49 |   overwrite any capabilities implementing stale pointers.
50 |   On application cores, architectural support includes MMU features to allow
51 |   the tracking of "capability versions" of pages (a la loadside
52 |   garbage-collection techniques), and on microcontroller cores, direct
53 |   revocation bits may be stored directly in SRAM.
54 |   Spatial safety ensures that pointers cannot be used to reference other
55 |   memory, including other freed memory.
56 | 
57 | Temporal heap safety is implemented in CheriBSD's pure-capability CheriABI
58 | execution environment running on Arm Morello and CHERI-RISC-V application
59 | cores, and in CHERIoT RTOS running on CHERIoT microcontroller cores.
60 | 


--------------------------------------------------------------------------------
/src/compiler/loss-of-provenance.md:
--------------------------------------------------------------------------------
 1 | ## Loss of provenance
 2 | 
 3 | <!--
 4 | %\begin{compilerwarning}
 5 | %cast from provenance-free integer type to pointer type will give pointer that can not be dereferenced
 6 | %\end{compilerwarning}
 7 | -->
 8 | 
 9 | This common compiler warning<!--
10 | \arnote{that should be an error by default?}
11 | -->
12 | is triggered when casting a non-capability type (e.g., `long`) to a pointer.
13 | As mentioned in [Pointer provenance
14 | validity](../impact/pointer-provenance-validity.html), the result of this cast is a `NULL`-derived capability with the address set to the integer value.
15 | As any `NULL`-derived capability is untagged, any attempt to dereference it will trap.
16 | 
17 | Usually, this warning is caused by programmers incorrectly assuming that `long` is able to store pointers.
18 | The fix for this problem is to change the type of the cast source to a provenance-carrying type such as `intptr_t` or `uintptr_t` (see [Recommended use of
19 | C-language types](../impact/recommended-use-c-types.md)):
20 | 
21 | <!--
22 | Need to use HTML formatting here for mdBook to get the highlight
23 | colors in the code examples. The "id" attribute is required, because
24 | we use that unique identifier in Pandoc to generate LaTeX highlight
25 | formatting.
26 | -->
27 | <pre><code>char *example_bad(<mark id="BadParamTy" style="background-color: #EE918D">long</mark> ptr_or_int) {
28 |     return strdup((const char *)ptr_or_int);
29 | }
30 | char *example_good(<mark id="GoodParamTy" style="background-color: #77DD77">intptr_t</mark> ptr_or_int) {
31 |   return strdup((const char *)ptr_or_int);
32 | }
33 | </code></pre>
34 | 
35 | 
36 | ```{.compilerwarning}
37 | <source>:2:17: warning: cast from provenance-free integer type to pointer type
38 | will give pointer that can not be dereferenced [-Wcheri-capability-misuse]
39 |   return strdup((const char *)ptr_or_int);
40 |                 ^
41 | 1 warning generated.
42 | ```
43 | 
44 | In some cases, this warning can be a false positive.
45 | For example, it is common for C callback APIs take a `void *` data argument that is passed to the callback.
46 | If this value is in fact an integer constant, the warning can be silenced by casting to `uintptr_t` first:
47 | 
48 | <!--
49 | Need to use HTML formatting here for mdBook to get the highlight
50 | colors in the code examples. The "id" attribute is required, because
51 | we use that unique identifier in Pandoc to generate LaTeX highlight
52 | formatting.
53 | -->
54 | <pre><code>void invoke_cb(void (*cb)(void *), void *);
55 | void callback(void *arg);
56 | void false_positive_example(int callback_data) {
57 |     invoke_cb(&callback, (void *)callback_data); // warning
58 |     invoke_cb(&callback, (void *)<mark id="SilenceProv" style="background-color: #77DD77">(uintptr_t)</mark>callback_data); // no warning
59 | }
60 | </code></pre>
61 | 
62 | 
63 | ```{.compilerwarning}
64 | <source>:4:24: warning: cast from provenance-free integer type to pointer type
65 | will give pointer that can not be dereferenced [-Wcheri-capability-misuse]
66 |   invoke_cb(&callback, (void *)callback_data); // warning
67 |                        ^
68 | <source>:15:24: warning: cast to 'void *' from smaller integer type 'int'
69 | [-Wint-to-void-pointer-cast]
70 |   invoke_cb(&callback, (void *)callback_data); // warning
71 |                        ^
72 | 2 warnings generated.
73 | ```
74 | 
75 | <!--
76 | \nwfnote{The ``:15:24'' above should also be ``:4:24''?}
77 | -->
78 | 


--------------------------------------------------------------------------------
/src/background/architectural-rules.md:
--------------------------------------------------------------------------------
 1 | ## Architectural rules for capability use
 2 | 
 3 | The architecture enforces several important security properties on changes to
 4 | this metadata:
 5 | 
 6 | * **Provenance validity** ensures that capabilities can be used &mdash; for
 7 |   load, store, instruction fetch, etc. &mdash; only if they are derived via valid
 8 |   transformations of valid capabilities.
 9 |   This property holds for capabilities in both registers and memory.
10 | 
11 | * **Monotonicity** requires that any capability derived from another
12 |   cannot exceed the permissions and bounds of the capability from which it was
13 |   derived (leaving aside sealed capabilities, used for domain transition,
14 |   whose mechanism is not detailed in this report).
15 | 
16 | ### Root capabilities and capability derivation
17 | 
18 | At boot time, the architecture provides initial capabilities to the firmware,
19 | allowing data access and instruction fetch across the full address space.
20 | Additionally, all tags are cleared in memory.
21 | Further capabilities can then be derived (in accordance with the monotonicity
22 | property) as they are passed from firmware to boot loader, from boot loader to
23 | hypervisor, from hypervisor to the OS, and from the OS to the application.
24 | At each stage in the derivation chain, bounds and permissions may be
25 | restricted to further limit access.
26 | For example, the OS may assign capabilities for only a limited portion of the
27 | address space to the user software, preventing use of other portions of the
28 | address space.
29 | 
30 | ### Intentionality
31 | 
32 | Similarly, capabilities carry with them *intentionality*: when a
33 | process passes a capability as an argument to a system call, the OS kernel can
34 | carefully use only that capability to ensure that it does not access other
35 | process memory that was not intended by the user process &mdash; even though the
36 | kernel may in fact have permission to access the entire address space through
37 | other capabilities it holds.
38 | This is important, as it prevents "confused deputy" problems, in which a more
39 | privileged party uses an excess of privilege when acting on behalf of a less
40 | privileged party, performing operations that were not intended to be
41 | authorized.
42 | For example, this prevents the kernel from overflowing the bounds on a
43 | userspace buffer when a pointer to the buffer is passed as a
44 | system-call argument.
45 | 
46 | ### Capability atomicity
47 | 
48 | The hardware furthermore guarantees that capability tags and capability data is written atomically.
49 | For example, if one thread stores a valid capability and another writes arbitrary data to the same location, it is impossible to observe the arbitrary data with the validity bit set.
50 | 
51 | ### Formally verified groundings for memory safety
52 | 
53 | These architectural properties provide the foundation on which a
54 | capability-based OS, compiler, and runtime can implement C/C++-language memory
55 | safety.
56 | They have been made precise and have been proved, with machine-checked proof,
57 | to hold for the CHERI-MIPS architecture.[^1]
58 | 
59 | [^1]: Kyndylan Nienhuis, Alexandre Joannou, Thomas Bauereiss, Anthony Fox, Michael Roe, Brian Campbell, Matthew Naylor, Robert M. Norton, Simon W. Moore, Peter G. Neumann, Ian Stark, Robert N. M. Watson, and Peter Sewell. [Rigorous engineering for hardware security: Formal modelling and proof in the CHERI design and implementation process](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/202005oakland-cheri-formal.pdf). In Proceedings of the 41st IEEE Symposium on Security and Privacy (Oakland 2020). San Jose, CA, USA, May 18-20, 2020.
60 | 


--------------------------------------------------------------------------------
/src/apis/bounds-alignment-due-to-compression.md:
--------------------------------------------------------------------------------
 1 | ## Bounds alignment due to compression
 2 | 
 3 | <!--
 4 | \label{sec:bounds_alignment}
 5 | -->
 6 | 
 7 | Bounds imprecisions may require a memory allocator to increase the alignment
 8 | of an allocation, or increase padding on an allocation, to prevent bounds from
 9 | spanning more than one object.
10 | When the length of an object exceeds 2<sup>*floor*(*bounds_bits*/2)-1</sup> (i.e., 4 KiB for 64-bit CHERI-RISC-V), additional alignment requirements
11 | apply to the lower and upper bounds.
12 | The alignment required for allocations exceeding the minimum representable range (4 KiB for 64-bit CHERI-RISC-V) is 2<sup>*E*+3</sup> bytes, where
13 | *E* is determined from the length, *l*, by
14 | *E* = 52 - CountLeadingZeros(*l*[64: *floor*(*bounds_bits*/2)]).
15 | 
16 | <!--
17 | \arnote{Is this too much detail?}
18 | -->
19 | 
20 | <!--
21 | %\jrtcnote{Do we want to clarify that this is a 65-bit length? One would naively
22 | %expect it to be 64-bit and thus be off by one in all calculations. We should
23 | %probably also steer people towards CRRL/CRAM regardless (and add cheri\_foo
24 | %APIs for them).}
25 | %\arnote{65-bit length is probably too much detail. But CRRL/CRAM now documented}
26 | -->
27 | 
28 | Correctly computing the rounded size and minimum alignment for a given
29 | allocation is non-trivial and may require many instructions to compute,
30 | especially in the context of fast allocators such as the stack allocator.
31 | Moreover, the architectural constants used for bounds precision differ across
32 | architectures or their variations, and so alignment constraints also vary.
33 | For example, the number of bits available for bounds differs between 32-bit and
34 | 64-bit CHERI-RISC-V, and also between 64-bit CHERI-RISC-V and Morello.
35 | 
36 | To avoid overly specific software knowledge of alignment requirements, and also to allow efficient calculation of alignment constraints during (for example) stack allocation, the CHERI ISA provides instructions that allow determining precisely representable allocations.
37 | These instructions can be generated using compiler builtins that are provided by `cheriintrin.h`:
38 | 
39 | * **`size_t cheri_representable_length(size_t len)`**: returns the length that a capability would have after using `cheri_bounds_set` to set the length to `len` (assuming appropriate alignment of the base).
40 | 
41 | * **`size_t cheri_representable_alignment_mask(size_t len)`**: returns a bitmask that can be used to align an address downwards such that it is sufficiently aligned to create a precisely bounded capability.
42 | 
43 | The precisely representable base address can be computed using:
44 | 
45 | ```{.clisting}
46 | base = base & cheri_representable_alignment_mask(len);
47 | ```
48 | 
49 | When allocating from a contiguous buffer, the base needs to be aligned upwards instead of downwards.
50 | This can be done with the following code:
51 | 
52 | ```{.clisting}
53 | size_t required_alignment(size_t len) {
54 |     return ~cheri_representable_alignment_mask(len) + 1;
55 | }
56 | struct Buffer {
57 |     void *data;
58 |     size_t allocated;
59 | };
60 | void *allocate_next(struct Buffer *buf, size_t len) {
61 |     char *result = buf->data + buf->allocated;
62 |     result = __builtin_align_up(result, required_alignment(len));
63 |     size_t rounded_len = cheri_representable_length(len);
64 |     buf->allocated = (result + rounded_len) - (char *)buf->data;
65 |     return cheri_bounds_set_exact(result, rounded_len);
66 | }
67 | ```
68 | 
69 | Software written to use these compiler builtins, rather than encoding alignment
70 | requirements directly, is more likely to be portable between
71 | CHERI-RISC-V and Morello.
72 | 


--------------------------------------------------------------------------------
/src/impact/capability-representation.md:
--------------------------------------------------------------------------------
 1 | ## Capability representation in memory
 2 | 
 3 | Underlying implementations of CHERI are diverse, spanning 32-bit
 4 | microcontrollers (such as Microsoft's CHERIoT) to 64-bit server-class
 5 | processors (such as Arm's Morello).
 6 | CHERI C/C++ provide broad flexibility for implementations to represent
 7 | capability metadata in the ways most suitable to their individual
 8 | requirements.
 9 | One specific area in which CHERI implementations may differ is in the specific
10 | in-memory representations of capabilities, due to not just different address
11 | sizes, but also different tradeoffs around bounds compression, permissions,
12 | and so on.
13 | 
14 | CHERI C/C++ in general expect that capabilities will be accessed via pointer
15 | types, with operations such as dereferencing a pointer or performing pointer
16 | arithmetic implemented by compiler-generated code.
17 | Broadly, a capability consists of three parts: An address, inline metadata,
18 | and a validity tag.
19 | When stored in memory, CHERI capabilities are twice the size of the native
20 | address type (e.g., 128 bits on 64-bit systems, and 64 bits on 32-bit
21 | systems), in addition to an unaddressable tag bit.
22 | There is one tag per capability aligned region of memory, and hence
23 | capabilities must themselves be stored at capability alignment.
24 | 
25 | ### Non-portability of the in-memory representation
26 | 
27 | To the greatest extent possible, it is desirable to write *portable CHERI
28 | C/C++ code* that never directly interprets the in-memory representation of a
29 | capability, with the exception of `NULL` values (see below).
30 | Portable access to capability fields must be made using the [CHERI C APIs to
31 | get and set capability
32 | properties](../apis/retrieving-capability-properties.md).
33 | 
34 | However, there are cases in which writing *non-portable CHERI C/C++ code* is
35 | both acceptable and essential, such as in the implementation of compilation,
36 | linking, debugging, and tracing tools intentionally targeting specific target
37 | architectures.
38 | This is especially true when code will not be operating on the target
39 | architecture itself, such as for cross-compilation, cross-linkage, and
40 | cross-debugging, including in accessing core dumps.
41 | In these cases, architecture specifications must be referenced in writing
42 | encoding and decoding code, as there are significant variations between
43 | platforms, and platforms themselves may also have parametizable elements to
44 | their encoding.
45 | 
46 | ### In-memory representation of NULL pointers
47 | 
48 | Conventional, integer-based architectures implement `NULL` pointers
49 | integers with a value of `0`.
50 | CHERI C/C++ similarly represents `NULL` as an all-zero capability value with
51 | zero tag value, which is the only *portable* aspect of the in-memory
52 | representation of a CHERI capability.
53 | 
54 | This has a number of implications, including that zero-filled memory with
55 | zeroed tag values will be interpreted as being `NULL`-filled, as is the case
56 | with conventional runtimes for C/C++.
57 | This is particularly relevant for automatically initialized variable values
58 | (such as global variables without specific initialization values), pre-zeroed
59 | memory allocated by `calloc()`, or memory explicitly zeroed using
60 | `memset(p, 0, n)`.
61 | Similarly, storing `NULL` pointer values in memory will result in that memory
62 | being zeroed.
63 | 
64 | The following code will always succeed:
65 | ```
66 | 	void *p = NULL;
67 | 	char zeroes[sizeof(p)];
68 | 
69 | 	/* NULL == 0. */
70 | 	assert(p == 0);
71 | 	assert(cheri_address_get(p) == 0);
72 | 
73 | 	/* All bytes in the NULL pointer are 0. */
74 | 	memset(zeroes, 0, sizeof(zeroes));
75 | 	assert(memcmp(&p, zeroes, sizeof(p)) == 0);
76 | ```
77 | 


--------------------------------------------------------------------------------
/src/limitations/integer-pointer-safety.md:
--------------------------------------------------------------------------------
 1 | ## Integer-pointer safety vs. pointer type safety
 2 | 
 3 | CHERI C/C++ provide strong, dynamic differentiation of integer and pointer
 4 | values by virtue of capability tags: Integer values are not architecturally
 5 | dereferenceable.
 6 | CHERI C/C++ also strongly differentiate executable pointers (which will be
 7 | sealed and have execute permission) from data pointers (which will have load
 8 | and store permissions), which prevents execution of data as well as pointer
 9 | arithmetic on control-flow pointers.
10 | 
11 | However, tagged capabilities do not dynamically enforce C/C++-language types.
12 | For example, casting from `struct foo *` to `struct bar *` is permitted
13 | dynamically as long as the compiler accepts the cast, regardless of whether
14 | that leads to unintended type confusion.
15 | This is required because idiomatic C/C++ software has a strong expectation for
16 | flexible casting, and enforcing these types breaks a significant proportion of
17 | real-world software.
18 | 
19 | Common usage patterns requiring this sort of cast include object-oriented
20 | programming styles used in C, and it is not clear what dynamic type system
21 | might be both strong enough to provide useful vulnerability mitigation while
22 | also being flexible enough to accept most software.
23 | In C++, there are well defined rules for casting between classes and
24 | subclasses, but those rules are extremely flexible and are not easily or
25 | efficiently implemented architecturally.
26 | 
27 | CHERI permissions to prevent certain very narrow types of dynamic type
28 | confusion that are essential to low-level memory safety: Specifically,
29 | CHERI-enabled kernels and run-time linkers will by invariant prevent the
30 | creation of code pointers with store privilege, or data pointers with execute
31 | privilege.
32 | Sealed capabilities also prevent undesired mutation of code pointers outside
33 | of run-time linkers, just-in-time compilers, or the dynamic creation of
34 | return addresses during function calls.
35 | 
36 | As such, while CHERI C/C++ preventing integer-pointer type, as well as certain
37 | narrow forms of pointer-pointer type confusion, is incredibly valuable in
38 | implementing memory safety, its benefits should not be confused with those of
39 | full dynamic type safety.
40 | 
41 | **Advice to developers**: While many integer-pointer type confusions are
42 |   strongly prevented in CHERI C/C++, pointer-pointer type confusions other
43 |   than between code and data are not prevented.
44 |   Given the current absence of static analysis tools addressing this problem,
45 |   the best recourse will be defensive programming styles that avoid the
46 |   opportunity for pointer-pointer type confusion, and careful tagging and
47 |   checking of invariants dynamically.
48 | 
49 | **Ongoing research**: SRI/Cambridge have been exploring dynamic enforcement
50 |   of language-level types using the architectural *otype* feature, but have
51 |   yet to find a satisfactory middle ground between idiomatic type flexibility
52 |   and strong vulnerability mitigation.
53 |   Frequently, type errors of interest in C++ involve complex class hierarchies
54 |   and inheritence, which are not directly mappable into a flat type space.
55 |   New types of dynamic type enforcement, using *otype* or other capability 
56 |   features, seem a promising area to explore in the future.
57 | 
58 |   In the CHERIoT design, sealing is frequently used for type safety between
59 |   compartments to allow compartment state to be efficiently referenced without
60 |   connoting access to that state, which was one of the fundamental design aims
61 |   of the capability *otype* mechanism.
62 |   This is driven via direct use of types in the source code, rather than being
63 |   an automatic compiler feature.
64 |   Similarly, we have used sealing in earlier explorations of how vtables in
65 |   language runtimes (e.g., Java runtimes) can be used to improve robustness
66 |   through explicit use of the *otype* mechanism.
67 | 


--------------------------------------------------------------------------------
/src/impact/pointer-provenance-validity.md:
--------------------------------------------------------------------------------
 1 | ## Pointer provenance validity
 2 | <!--
 3 | \label{sec:pointer_provenance_validity}
 4 | -->
 5 | 
 6 | CHERI C/C++ implement pointers using architectural
 7 | capabilities, rather than using conventional 32-bit or 64-bit integers.
 8 | This allows the provenance validity of language-level pointers to be
 9 | protected by the provenance properties of CHERI architectural capabilities:
10 | only pointers implemented using valid capabilities can be dereferenced.
11 | Other types that contain pointers, `uintptr_t` and `intptr_t`,
12 | are similarly implemented
13 | using architectural capabilities, so that casts through these types
14 | can retain capability properties.
15 | When a dereference is attempted on a capability without a valid tag &mdash;
16 | including load, store, and instruction fetch &mdash; a hardware exception fires
17 | (see [Capability-related faults](capability-faults.html)).
18 | <!--
19 | %\psnote{It would be better to exhaustively list them (is it just intptr\_t and uintptr\_t?) rather than this vague "such as"}
20 | %\arnote{There are also cases such as C++11 strongly typed enums that use uintcap\_t as the underlying type, but we really don't need to mention this here. And I'm also not sure if we want to keep allowing that since enums should really be integer values only}
21 | -->
22 | 
23 | On the whole, the effects of pointer provenance validity are non-disruptive to
24 | C/C++ source code.
25 | However, a number of cases exist in language runtimes and other
26 | (typically less portable) C code that conflate integers and pointers that can
27 | disrupt provenance validity.
28 | In general, generated code will propagate provenance validity in only two
29 | situations:
30 | 
31 | * **Pointer types** The compiler will generate suitable code to propagate
32 |   the provenance validity of pointers by using capability load and store
33 |   instructions.
34 |   This occurs when using a pointer type (e.g., `void *`) or an
35 |   integer type defined as being able to hold a pointer (e.g.,
36 |   `intptr_t`).
37 |   As with attempting to store 64-bit pointers in 32-bit integers on 64-bit
38 |   architectures, passing a pointer through an inappropriate type will lead to
39 |   truncation of metadata (e.g., the validity tag and bounds).
40 |   It is therefore important that a suitable type be used to hold pointers.
41 | 
42 |   This pattern often occurs where an opaque field exists in a data structure
43 |   &mdash; e.g., a `long_t` argument to a callback in older C code &mdash; that
44 |   needs to be changed to use a capability-oblivious type such as `intptr_t`.
45 | 
46 | <!--
47 | \psnote{I'm not sure this document has explained the ISA behavior concretely enough for this stuff to really make sense &mdash; the previous description was quite high-level.  Maybe somewhere it should be explicit that registers have tags, that load and store instructions must be via a capability, and that there are both capability and non-capability load and store instructions, with the former preserving tags (both ways) and the latter clearing them?}
48 | -->
49 | 
50 | * **Capability-oblivious code** In some portions of the C/C++ runtime and
51 |   compiler-generated code, it may not be possible to know whether memory is
52 |   intended to contain a pointer or not &mdash; and yet preserving pointers is
53 |   desirable.
54 |   In those cases, memory accesses must be performed in a way that preserves
55 |   pointer provenance.
56 |   In the C runtime itself, this includes `memcpy`, which must use
57 |   capability load and store instructions to transparently propagate capability
58 |   metadata and tags.
59 | 
60 |   A useful example of potentially surprising code requiring modification for
61 |   CHERI C/C++ is `qsort`.
62 |   Some C programs assume that `qsort` on an array of data structures
63 |   containing pointers will preserve the usability of those pointers.
64 |   As a result, `qsort` must be modified to perform memory copies using
65 |   pointer-based types, such as `intptr_t`, when size and alignment
66 |   require it.
67 | 


--------------------------------------------------------------------------------
/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | [CHERI C/C++ Programming Guide](cover/README.md)
 2 | 
 3 | - [Introduction](introduction/README.md)
 4 |   - [Definitions](introduction/definitions.md)
 5 |   - [Version history](introduction/history.md)
 6 | - [Background](background/README.md)
 7 |   - [CHERI capabilities](background/cheri-capabilities.md)
 8 |   - [Architectural rules for capability use](background/architectural-rules.md)
 9 | - [CHERI C/C++](cheri-ccpp/README.md)
10 |   - [The CHERI C/C++ run-time environment](cheri-ccpp/cheri-runtime.md)
11 |   - [Referential, spatial, and temporal safety](cheri-ccpp/referential-spatial-temporal-safety.md)
12 |   - [Non-aliasing vs trapping memory safety](cheri-ccpp/nonaliasing-vs-trapping.md)
13 | - [Impact on the C/C++ programming model](impact/README.md)
14 |   - [Capability-related faults](impact/capability-faults.md)
15 |   - [Pointer provenance validity](impact/pointer-provenance-validity.md)
16 |     - [Recommended use of C-language types](impact/recommended-use-c-types.md)
17 |     - [Capability alignment in memory](impact/capability-alignment-in-memory.md)
18 |     - [Single-origin provenance](impact/single-origin-provenance.md)
19 |   - [Capability representation in memory](impact/capability-representation.md)
20 |   - [Bounds](impact/bounds.md)
21 |     - [Bounds from the compiler and linker](impact/bounds-from-compiler.md)
22 |     - [Bounds from the heap allocator](impact/bounds-from-heap-allocator.md)
23 |     - [Subobject bounds](impact/subobject-bounds.md)
24 |     - [Other sources of bounds](impact/other-sources-of-bounds.md)
25 |     - [Out-of-bounds pointers](impact/out-of-bounds-pointers.md)
26 |   - [Pointer comparison](impact/pointer-comparison.md)
27 |   - [Implications of capability revocation for temporal safety](impact/revocation.md)
28 |   - [Bitwise operations on capability types](impact/bitwise-operations.md)
29 |   - [Function prototypes and calling conventions](impact/function-prototypes-and-calling-conventions.md)
30 |   - [Data-structure and memory-allocation alignment](impact/data-structure-and-memory-allocation-alignment.md)
31 |     - [Restrictions in capability locations in memory](impact/restrictions-in-capability-locations.md)
32 | - [Limitations to CHERI C/C++ memory safety](limitations/README.md)
33 |   - [Integer-pointer safety vs. pointer type safety](limitations/integer-pointer-safety.md)
34 |   - [Compile-time uncertainty on regarding pointer types](limitations/compile-time-type-uncertainty.md)
35 |   - [Bounds imprecision, sub-object bounds, and custom allocators](limitations/bounds-imprecision.md)
36 |   - [Unions](limitations/unions.md)
37 |   - [Stack temporal safety](limitations/stack-temporal-safety.md)
38 |   - [Compiler optimizations and undefined behavior](limitations/compiler-optimizations.md)
39 | - [CHERI compiler warnings and errors](compiler/README.md)
40 |   - [Loss of provenance](compiler/loss-of-provenance.md)
41 |   - [Ambiguous provenance](compiler/ambiguous-provenance.md)
42 |   - [Underaligned capabilities](compiler/underaligned-capabilities.md)
43 | - [C APIs to get and set capability properties](apis/README.md)
44 |   - [CHERI-related header files](apis/cheri-related-header-files.md)
45 |   - [Retrieving capability properties](apis/retrieving-capability-properties.md)
46 |   - [Modifying or restricting capability properties](apis/modifying-or-restricting-capability-properties.md)
47 |   - [Capability permissions](apis/capability-permissions.md)
48 |   - [Bounds alignment due to compression](apis/bounds-alignment-due-to-compression.md)
49 |   - [Implications for memory-allocator design](apis/implications-for-memory-allocator-design.md)
50 | - [Printing capabilities from C](printf/README.md)
51 |   - [Generating string representations of capabilities](printf/strfcap.md)
52 |   - [Printing capabilities with the printf(3) API family](printf/printf.md)
53 | - [The CheriABI POSIX process environment](cheriabi/README.md)
54 |   - [POSIX API changes](cheriabi/posix-api-changes.md)
55 |   - [Handling capability-related signals](cheriabi/handling-capability-signals.md)
56 |   - [Revocation APIs](cheriabi/revocation-apis.md)
57 | - [Further reading](reading/README.md)
58 | - [Acknowledgements](acks/README.md)
59 | 


--------------------------------------------------------------------------------
/filters/latex-custom-formats.lua:
--------------------------------------------------------------------------------
  1 | if FORMAT:match 'latex' then
  2 |   function RawInline(element)
  3 |     -- Superscript formats are added to the Markdown source files as
  4 |     -- embedded HTML, so we need a filter to convert these to LaTeX.
  5 |     if element.text == "<sup>" then
  6 |       return pandoc.RawInline('latex', "\\textsuperscript{")
  7 |     elseif element.text == "</sup>" then
  8 |       return pandoc.RawInline('latex', "}")
  9 |     end
 10 | 
 11 |     -- All other RawInline elements are left unmodified
 12 |     return element
 13 |   end
 14 | 
 15 |   function RawBlock(block)
 16 |     if (block.format == "html") then
 17 |       -- Highlighted text within code blocks (to mark good or bad CHERI
 18 |       -- C/C++ examples) are added to the Markdown source files as
 19 |       -- embedded HTML, so we need a filter to convert these to LaTeX.
 20 |       if (block.text:find('^<pre><code>') ~= nil) then
 21 |         local highlights = ""
 22 |         
 23 |         -- Remove the HTML wrapper tags
 24 |         local listing = block.text:gsub("<pre><code>", "")
 25 |         listing = listing:gsub("</code></pre>", "")
 26 | 
 27 |         -- Define Tikz code listing highlights from HTML mark tags.
 28 | 	-- In LaTeX, each highlight must have a unique name, so we use
 29 |         -- the HTML "id" attribute from the mark tag.
 30 |         for id, color in listing:gmatch('<mark id="(%w+)" style="background%-color: #?(%w+)">') do
 31 |           if color == "77DD77" then
 32 |             color = "green"
 33 |           elseif color == "EE918D" then
 34 |             color = "red"
 35 |           end
 36 |           hltag = {"\\TikzListingHighlightStartEnd[", color, "]{", id, "}\n"}
 37 |           highlights = highlights .. table.concat(hltag, "")
 38 |         end
 39 | 
 40 |         -- Replace HTML format text highlights with LaTeX
 41 |         listing = listing:gsub(
 42 |             '<mark id="(%w+)" style="background%-color: #?%w+">([^<]+)</mark>',
 43 |             "£\\vcpgfmark{Start%1}£%2£\\vcpgfmark{End%1}£")
 44 | 
 45 |         -- Clean up inline HTML escaping
 46 |         listing = listing:gsub("&lt;", "<")
 47 |         listing = listing:gsub("&gt;", ">")
 48 | 
 49 |         -- Wrap the code block in a LaTeX code listing environment
 50 |         listing = latex_code_listing(listing, true)
 51 |         listing = highlights .. listing
 52 | 
 53 |         return pandoc.RawBlock('latex', listing)
 54 | 
 55 |       -- Pass-through blocks of inline LaTeX. This is effectively the
 56 |       -- same as Pandoc's built-in fenced code blocks, but these are
 57 |       -- annotated HTML comments instead of code blocks, to make them
 58 |       -- invisible to mdBook.
 59 |       elseif (block.text:find('^<!%-%-{=latex}') ~= nil) then
 60 |         local latexblock = block.text:gsub("^<!%-%-{=latex}", "")
 61 |         latexblock = latexblock:gsub("%-%->$", "")
 62 |         return pandoc.RawBlock('latex', latexblock)
 63 |       end
 64 |     end
 65 | 
 66 |     -- All other RawBlocks are left unmodified
 67 |     return block
 68 |   end
 69 | 
 70 |   function CodeBlock(block)
 71 |     if block.classes:includes("clisting") then
 72 |       local numbered = false
 73 |       if block.classes:includes("numbered") then
 74 |         numbered = true
 75 |       end
 76 |       local listing = latex_code_listing(block.text, numbered)
 77 |       return pandoc.RawBlock('latex', listing)
 78 |     elseif block.classes:includes("compilerwarning") then
 79 |       local listing = "\\begin{compilerwarning}\n"
 80 |       listing = listing .. block.text
 81 |       listing = listing .. "\n\\end{compilerwarning}"
 82 |       return pandoc.RawBlock('latex', listing)
 83 |     end
 84 | 
 85 |     -- All other CodeBlocks are left unmodified
 86 |     return block
 87 |   end
 88 | end
 89 | 
 90 | -- Helper functions
 91 | 
 92 | -- Construct a LaTeX code listing block in a custom style
 93 | function latex_code_listing(code, numbered)
 94 |   local listing = code
 95 |   if (numbered) then
 96 |     listing = "\\begin{numberedclisting}\n" .. listing
 97 |     listing = listing .. "\\end{numberedclisting}"
 98 |   else
 99 |     listing = "\\begin{clisting}\n" .. listing
100 |     listing = listing .. "\n\\end{clisting}"
101 |   end
102 | 
103 |   return listing
104 | end
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- ANCHOR: cover -->
 2 | 
 3 | # CHERI C/C++ Programming Guide
 4 | 
 5 | * Robert N. M. Watson (University of Cambridge, Capabilities Limited),
 6 | * Alexander Richardson (University of Cambridge),
 7 | * Brooks Davis (SRI International),
 8 | * John Baldwin (Ararat River Consulting, LLC),
 9 | * David Chisnall (Microsoft Research),
10 | * Jessica Clarke (University of Cambridge),
11 | * Nathaniel Filardo (Microsoft Research),
12 | * Simon W. Moore (University of Cambridge),
13 | * Edward Napierala (University of Cambridge, Capabilities Limited),
14 | * Allison Randal (Capabilities Limited),
15 | * Peter Sewell (University of Cambridge), and
16 | * Peter G. Neumann (SRI International)
17 | 
18 | This is the CHERI Pure-Capability C/C++ Programming Guide, a short guide to
19 | help developers working with pure-capability C/C++ understand the benefits
20 | that it brings, any code adaptations they might need to make, and how to
21 | interpret new compiler warnings and errors that arise with pure-capability
22 | code.
23 | 
24 | *This is a living document; feedback and contributions are welcomed.
25 | Please see our
26 | [GitHub Repository](https://github.com/CTSRD-CHERI/cheri-c-programming) for
27 | source code and an issue tracker.
28 | There is a [rendered version on the web](https://ctsrd-cheri.github.io/cheri-c-programming/), which is automatically updated when the git repository is
29 | committed to.*
30 | 
31 | The [2020 published version of the CHERI C/C++ Programmers
32 | Guide](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.html) can be cited
33 | as follows:
34 | 
35 | Robert N. M. Watson, Alexander Richardson, Brooks Davis, John Baldwin, David Chisnall, Jessica Clarke, Nathaniel Filardo, Simon W. Moore, Edward Napierala, Peter Sewell, and Peter G. Neumann. CHERI C/C++ Programming Guide, Technical Report UCAM-CL-TR-947, Computer Laboratory, June 2020.
36 | 
37 | Or in BibTex:
38 | 
39 | ```
40 | @TechReport{UCAM-CL-TR-947,
41 |   author =	 {Watson, Robert N. M. and Richardson, Alexander and Davis,
42 |           	  Brooks and Baldwin, John and Chisnall, David and Clarke,
43 |           	  Jessica and Filardo, Nathaniel and Moore, Simon W. and
44 |           	  Napierala, Edward and Sewell, Peter and Neumann, Peter G.},
45 |   title = 	 {{CHERI C/C++ Programming Guide}},
46 |   year = 	 2020,
47 |   month = 	 jun,
48 |   url = 	 {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-947.pdf},
49 |   institution =  {University of Cambridge, Computer Laboratory},
50 |   doi = 	 {10.48456/tr-947},
51 |   number = 	 {UCAM-CL-TR-947}
52 | }
53 | ```
54 | 
55 | ## Acknowledgments
56 | 
57 | We gratefully acknowledge the helpful feedback from our colleagues, including
58 | Hesham Almatary, Ruben Ayrapetyan, Silviu Baranga, Jacob Bramley, Rod Chapman,
59 | Paul Gotch, Al Grant, Brett Gutstein, Alfredo Mazzinghi, Alan Mycroft, and Lee
60 | Smith.
61 | 
62 | This work was supported by the Defense Advanced Research Projects Agency
63 | (DARPA) and the Air Force Research Laboratory (AFRL), under contracts
64 | FA8750-10-C-0237 ("CTSRD") and HR0011-18-C-0016 ("ECATS").
65 | The views, opinions, and/or findings contained in this report are those of the
66 | authors and should not be interpreted as representing the official views or
67 | policies of the Department of Defense or the U.S. Government.
68 | 
69 | This work was supported in part by the Innovate UK project Digital Security by
70 | Design (DSbD) Technology Platform Prototype, 105694.
71 | 
72 | This work was supported by part by the Engineering and Physical Sciences
73 | Research Council (EPSRC) under UKRI3001: CHERI Research Centre, and under the
74 | EPSRC REMS Programme Grant (EP/EP/K008528/1).
75 | 
76 | This project has received funding from the European Research Council (ERC)
77 | under the European Union’s Horizon 2020 research and innovation programme
78 | (grant agreement No 789108), ERC Advanced Grant ELVER.
79 | 
80 | We also acknowledge Arm Limited, HP Enterprise, and Google, Inc.
81 | Approved for Public Release, Distribution Unlimited.
82 | 
83 | ## Building
84 | 
85 | Building the book from the Markdown sources requires
86 | [mdBook](https://github.com/rust-lang/mdBook). Once installed, `mdbook build`
87 | will build the static HTML files in the `book/` directory, whilst `mdbook
88 | serve` will build and serve them at `http://localhost:3000`. Please refer to
89 | the mdBook documentation for futher options.
90 | 


--------------------------------------------------------------------------------
/src/cheri-ccpp/nonaliasing-vs-trapping.md:
--------------------------------------------------------------------------------
 1 | ## Non-aliasing vs. trapping for memory safety
 2 | 
 3 | The CHERI architecture accepts a number of tradeoffs for performance reasons,
 4 | including imprecise bounds for spatial safety (to reduce pointer-size growth),
 5 | and quarantining for temporal safety (to mitigate revocation overheads).
 6 | CHERI C/C++ therefore adopt the following definitions and approximations:
 7 | 
 8 |  * **Referential safety** guarantees that a corrupted or reinjected pointer
 9 |    will be non-dereferenceable.
10 |    This protection is guaranteed from the point of mis-manipulation (e.g., a
11 |    partial overwrite in memory, manipulation using an inappropriate arithmetic
12 |    operation, an attempt to violate monotonicity) but an architectural
13 |    exception is not guaranteed to take place until a dereference (e.g., a
14 |    load, store, or jump) is attempted.
15 | 
16 |  * **Spatial safety** guarantees non-aliasing between allocations: A pointer
17 |    returned for one allocation will throw an architectural exception rather
18 |    than permit out-of-bounds access to memory associated with another
19 |    allocation.
20 |    However, it is not guaranteed to throw an architectural exception if an
21 |    out-of-bounds access would not access memory associated with another
22 |    allocation.
23 |    For example, non-trapping access may be permitted to padding after the end
24 |    of a heap allocation for larger allocation sizes, due to bounds
25 |    imprecision.
26 |    It is the responsibility of the allocator to ensure that any non-trapping
27 |    out-of-bounds access is *safe*.
28 | 
29 |  * **Temporal safety** guarantees non-aliasing between freed and current
30 |    allocations: A pointer returned to a previously freed allocation will throw
31 |    an architectural exception rather than permit use of the memory after
32 |    reallocation.
33 |    However, it is not guaranteed to throw an architectural exception if a use
34 |    after free occurs before reallocation of that memory.
35 |    For example, non-trapping access may be permitted to memory immediately
36 |    after a call to `free()` but prior to asynchronous revocation or a further
37 |    call to `malloc()` that reallocates the sam memory.
38 |    It is the responsibility of the allocator to ensure that any non-trapping
39 |    use-after-free access is *safe*.
40 | 
41 | These practical design choices have some important implications, including:
42 | 
43 |  * **Exception delivery semantics** are implementation defined, as: (a) bounds
44 |    precision varies by underlying architecture; (b) memory allocators and
45 |    revocation support may very by software runtime; and (c) compiler behavior,
46 |    and in particular optimization, will vary in the presence of statically
47 |    identifiable undefined behavior.
48 |    Software developers should not make strong assumptions about whether an
49 |    overflow on a particular size object may lead immediately to an exception.
50 |    However, they may depend on dynamically enforced, deterministic,
51 |    non-aliasing memory protection.
52 | 
53 |  * **Security arguments** are often easier to make in the presence of
54 |    fail-stop behavior.
55 |    Immediate trapping on a bounds or temporal-safety violation may make it
56 |    easier to understand that code does not then proceed to other insecure
57 |    behavior.
58 |    Allocator authors may therefore choose to avoid aligning and padding
59 |    strategies that unnecessarily introduce bounds imprecision.
60 |    However, these tradeoffs do not weaken a security argument based on
61 |    deterministic non-aliasing as the security guarantee, which tolerate
62 |    continued execution beyond undefined behavior caused by a exception-free
63 |    memory-safety bug.
64 | 
65 |  * **Debugabbility** is greatest when software fails stop close to the point
66 |    of a bug occurring.
67 |    CHERI will frequently ease debugging by ensuring trapping when aliasing
68 |    takes place, as well as in many other situations.
69 |    Deferred architectural exceptions until the point of dererence (for
70 |    referential safety), or the point of potential alising (for spatial and
71 |    temporal safety) do not weaken current debuggability, but also may not
72 |    improve it in some situations.
73 |    This is especially true when working with large or non-aligned memory
74 |    allocations (for spatial safety) or rapid use-after-free without
75 |    reallocation (for temporal safety).
76 |    These design choices differ from those made in, for example, LLVM's address
77 |    sanitizer, where rapid exception throwing is weighted more greatly than
78 |    security mitigation.
79 | 


--------------------------------------------------------------------------------
/src/impact/single-origin-provenance.md:
--------------------------------------------------------------------------------
 1 | ### Single-origin provenance
 2 | <!--
 3 | \label{sec:ambiguous-provenance}
 4 | -->
 5 | 
 6 | In the CHERI memory protection model, capabilities are derived from a single other
 7 | capability.
 8 | However, in C code, expressions may construct a new `intptr_t` value from more
 9 | than one provenance-carrying parent `intptr_t` &mdash; for example, by casting both a
10 | pointer and a literal value to `intptr_t`-s, and then adding them. <!--
11 | \psnote{That literal value wouldn't have a non-empty provenance, so this isn't the best example.   Maybe better to have something like \texttt{p+(q1-q2)} ?}
12 | \psnote{More generally, there is a bit of a mismatch between this and our C provenance treatment of \cintptrt, which there is a plain integer type with no provenance &mdash; but which regains provenance in some cases when cast back to a pointer.  To ponder...}
13 | -->
14 | In that case, the compiler must decide which input capability provides the
15 | capability metadata (bounds, permissions, ...) to be used in the output
16 | value.
17 | Consider for example the following code:
18 | 
19 | ```{.clisting}
20 | void *c1 = (void *)((uintptr_t)input_ptr + 1);
21 | void *c2 = (void *)(1 + (uintptr_t)input_ptr);
22 | uintptr_t offset = 1;
23 | void *c3 = (void *)(offset + (uintptr_t)input_ptr);
24 | ```
25 | 
26 | In C with integer pointers, the values of `c1`, `c2`, and `c3` might be expected to have the
27 | same value as `input_ptr`, except with the address incremented by one.
28 | In CHERI C, each expression includes an arithmetic operation between provenance-carrying types.
29 | While not visible in the source code, the constant `1` is promoted to a capability type, `uintptr_t`.
30 | In the current implementation, the compiler will return the expected provenance-carrying result for cases `c1` and `c2` but not `c3`.[^2]
31 | 
32 | For `c1` and `c2`, the compiler sees that one of the sides is a non-provenance-carrying integer type that was promoted to `uintptr_t` and therefore selects the other operand as the provenance source.
33 | It is not feasible to infer the correct provenance source for the third case, so the compiler will emit a warning.[^3]
34 | 
35 | The current behavior for such ambiguous cases is to select the left-hand-side as the provenance source, but we are considering making this an error in the future.
36 | 
37 | The recommended approach to resolve such ambiguous cases is to change the type of one operand to a non-provenance-carrying type such as `size_t`.
38 | Alternatively, if the variable declaration cannot be changed, it is also possible to use a cast in the expression itself.
39 | 
40 | ```{.clisting}
41 | size_t offset_size_t = 1;
42 | void *c3_good1 = (void *)(offset_size_t + (uintptr_t)input_ptr);
43 | 
44 | uintptr_t offset_uintptr_t = 1;
45 | void *c3_good2 = (void *)((size_t)offset_uintptr_t + (uintptr_t)input_ptr);
46 | ```
47 | 
48 | We also provide a new attribute `cheri_no_provenance` that can be used to annotate variables or fields of type `intptr_t`/`uintptr_t` where the underlying type cannot be changed:
49 | 
50 | ```{.clisting}
51 | struct S {
52 |     uintptr_t maybe_tagged;
53 |     uintptr_t never_tagged __attribute__((cheri_no_provenance));
54 | }
55 | void test(struct S s, uintptr_t ptr) {
56 |     void *x1 = (void *)(s.maybe_tagged + ptr); // ambiguous, currently uses LHS
57 |     void *x2 = (void *)(s.never_tagged + ptr); // not ambiguous, uses RHS
58 | }
59 | ```
60 | 
61 | <!--
62 | \psnote{This doesn't really explain what `cheri_no_provenance` does?  And what it means when applied to other types?}\arnote{compiler error if it's not \cuintptrt. Will try to improve example later.}
63 | -->
64 | 
65 | [^2]: Historically, the CHERI compiler would select the left-hand-most pointer in the expression as the provenance source.
66 | While this model follows a single consistent rule, it can lead to surprising behavior if an expression places the provenance-carrying value to the right-hand-side.
67 | In the example above, the value of `c1` would be a valid capability, but `c2` and `c3` would hold an untagged value (albeit with the expected address).
68 | 
69 | [^3]: We could add a data-flow-sensitive analysis to determine whether values are the result of promotion from a non-provenance-carrying type.
70 | However, this would add significant complexity to the compiler and we have not seen many cases where this would have avoided changes to the source code.
71 | <!--
72 | \psnote{from a language-design POV, it'd be pretty horrid to have substantial semantics depend on just how smart one's analysis is}
73 | \arnote{I agree. Even the current behavior is quite ugly, but at least it has measurable compatibility benefits.}
74 | -->
75 | 


--------------------------------------------------------------------------------
/src/limitations/bounds-imprecision.md:
--------------------------------------------------------------------------------
 1 | ## Bounds imprecision, sub-object bounds, and custom allocators
 2 | 
 3 | CHERI capabilities employ bounds compression to fit both the lower and upper
 4 | bounds into a single address-sized word of metadata.
 5 | See (Bounds precision)[../background/cheri-capabilities.html#bounds-precision]
 6 | for further details.
 7 | 
 8 | As a result, it is not possible to represent all possible combinations of
 9 | lower and upper bounds with a given address, leading to stronger alignment and
10 | padding requirements for memory allocations.
11 | The compiler, linker, and run-time environment are aware of these constraints,
12 | and hence generally introduce necessary alignment and padding as required --
13 | for example, by placing additional padding around some memory allocations to
14 | ensure that more coarse bounds do not allow underruns or overruns to access
15 | memory associated with other allocations.
16 | See (Bounds alignment due to compression)[../apis/bounds-alignment-due-to-compression.html#bounds-alignment-due-to-compression]
17 | for further details.
18 | 
19 | There are, however, situations in which programmers must be explicitly aware
20 | of this imprecise bounding behavior, including:
21 | 
22 |  - Optional use of sub-object bounds, which is currently considered
23 |    *opportunistic protection* as sub-object bounds do not adjust structure
24 |    alignment and padding for fields.
25 |    This feaure is not currently enabled by default in the compiler due to
26 |    these limitations.
27 | 
28 |  - Application-specific memory allocators, which require modest extensions to
29 |    not just set bounds, but also ensure suitable alignment and padding such
30 |    that non-aliasing can be enforced using CHERI bounds.
31 | 
32 |  - Sub-allocation patterns, where the result of a single call to `malloc()` is used
33 |    to allocate two or more related but disjoint objects, possibly of different types.
34 |    This case is different from custom allocators, because the intention is not to write
35 |    a memory allocator, but rather to optimise the allocation of multiple
36 |    related objects. Consider, for example, the contiguous allocation of an
37 |    array and a structure that references the array. In this case, bounds must be
38 |    set manually and considering precision will be necessary.
39 | 
40 |  - Other uses of manually set bounds in libraries and applications to limit
41 |    the potential for underruns and overruns, such as in packet parsing, which
42 |    must similarly take into account new alignment and padding requirements.
43 | 
44 | Where possible, the compiler will emit warnings in situations where sub-object
45 | bounds cannot be guaranteed to provide precise spatial protection.
46 | 
47 | **Advice to developers**: When using sub-object bounds, additional padding and
48 |   alignment may be required to ensure precise protection.
49 |   In some situations, it may be more efficient to use external allocations
50 |   pointed to by a primary allocation rather than embed sub-objects with large
51 |   sizes and poor alignment within a larger strucure.
52 |   Compiler warnings about limited precision should be observed.
53 |   When implementing protecion in memory allocators, guidance provided in this
54 |   document should be observed to ensure precise spatial safety is achieved.
55 | 
56 | **Advice to developers**: Sub-allocation patterns should be avoided. If multiple
57 |   objects need to be allocated, the system allocator (or application-specific
58 |   allocators) should be used to allocate each disjoint object separately, and
59 |   should guarantee that the allocation is properly aligned and padded for
60 |   representability. If sub-allocation can not be avoided, care must be taken to
61 |   ensure that each sub-allocated object is placed at a representable boundary
62 |   within the main allocation (note that this is platform-specific).
63 |   Splitting a large allocation into multiple representable objects is not
64 |   straightforward, see [Bounds alignment](../apis/bounds-alignment-due-to-compression.md#bounds-alignment-due-to-compression).
65 | 
66 | **Ongoing research**: SRI/Cambridge/SCI have ongoing research to improve the
67 |   safety of sub-object bounds in the hopes of transitioning them from being
68 |   "opportunistic" to being deterministically safe.
69 |   This involves improvements to compiler analysis, the use of trapping
70 |   bounds-setting instructions to ensure fail-closedd behavior, and optional
71 |   support for additional alignment and padding.
72 |   Early experiments with the CheriBSD kernel suggest that migrating to a
73 |   deterministically safe sub-object bounds mode can be feasible with potentially
74 |   modest disruption to the codebase (at least for the kernel). The main sources
75 |   of friction appear to be Variable Length Arrays (VLAs). The toolchain can be
76 |   extended to introduce padding to ensure sub-object representability; however,
77 |   there are open questions to quantify the disruption to structure padding,
78 |   whether this is acceptable and whether this can be automated.
79 | 


--------------------------------------------------------------------------------
/src/background/cheri-capabilities.md:
--------------------------------------------------------------------------------
  1 | ## CHERI capabilities
  2 | 
  3 | CHERI capabilities are twice the width of the native integer pointer type of
  4 | the baseline architecture: there are 128-bit capabilities on 64-bit platforms,
  5 | and 64-bit capabilities on 32-bit platforms.
  6 | Each capability consists of an integer (virtual) address of the natural size for
  7 | the architecture (e.g., 32 or 64 bit), and also additional metadata that is
  8 | compressed in order to fit in the remaining 32 or 64 bits of the capability
  9 | (see Figure 1 for an example; details
 10 | vary across underlying architectures and word sizes).
 11 | In addition, capabilities are associated with a 1-bit validity "tag" whose
 12 | value is maintained in registers and memory by the architecture, but is not
 13 | part of addressable memory.
 14 | 
 15 | <!--{=latex}\begin{comment}
 16 | In LaTeX, we want to use the bitbox figure below instead of the jpeg
 17 | image file, so we comment it out.
 18 | -->
 19 | 
 20 | ![CHERI capability format illustration](capability-format.jpg)
 21 | *Figure 1: 128-bit CHERI Concentrate capability representation used in 64-bit CHERI-RISC-V as described in CHERI ISAv9: 64-bit address and metadata in addressable memory; and 1-bit out-of-band tag.*
 22 | 
 23 | <!--{=latex}
 24 | \end{comment}
 25 | 
 26 | \begin{figure}[b]
 27 | \hspace{2.5cm}
 28 | % Tag
 29 | \begin{subfigure}[t!]{0.1\textwidth}
 30 | \begin{bytefield}[bitwidth=3pt]{1}
 31 | % \bitheader[endianness=big]{~,~} \\
 32 | \begin{leftwordgroup}{1-bit tag}
 33 | \bitbox{1}{}
 34 | \end{leftwordgroup}
 35 | \end{bytefield}
 36 | \end{subfigure}
 37 | % Capability
 38 | \begin{subfigure}[t!]{0.1\textwidth}
 39 | \begin{bytefield}[bitwidth=3pt]{64}
 40 | \bitheader[endianness=big]{0,63} \\
 41 | \begin{rightwordgroup}{128-bit \\ in-memory \\ capability}
 42 | \bitbox{16}{perms} & \bitbox{3}{\color{lightgray}\rule{\width}{\height}} & \bitbox{15}{otype} & \bitbox{30}{bounds} \\
 43 | \bitbox[lrb]{64}{64-bit~address}
 44 | \end{rightwordgroup}
 45 | \end{bytefield}
 46 | \end{subfigure}
 47 | \caption{128-bit CHERI Concentrate capability representation used in
 48 |   64-bit CHERI-RISC-V: 64-bit address
 49 |   and metadata in addressable memory; and 1-bit out-of-band tag.}
 50 | \label{figure:cheri-capability-representation}
 51 | \end{figure}
 52 | -->
 53 | 
 54 | Each element of the additional metadata and tag of the capability contributes
 55 | to the protection model:
 56 | 
 57 | 
 58 | * **Validity tag**: The tag tracks the validity of a capability.
 59 |   If invalid, the capability cannot be used for load, store, instruction
 60 |   fetch, or other operations.
 61 |   It is still possible to extract fields from an invalid capability,
 62 |   including its address.
 63 | 
 64 | * **Bounds**: The lower and upper bounds are addresses restricting the
 65 |   portion of the address space within which the capability can be used for
 66 |   load, store, and instruction fetch.
 67 |   Setting a capability's address (i.e., where it points) within
 68 |   bounds will retain the capability's validity tag.
 69 |   Setting addresses out of bounds is subject to the precision limits of the
 70 |   bounds compression model (see below and [Out-of-bounds
 71 |   pointers](../impact/out-of-bounds-pointers.html)).
 72 |   Broadly speaking, setting addresses "nearly within" the capability's bounds
 73 |   will be "representable" in the model, and will preserve the validity tag.
 74 |   Out-of-bounds capabilities continue to authorize access only to memory
 75 |   within bounds.
 76 | 
 77 | * **Permissions**: The permissions mask controls how the capability can be
 78 |   used &mdash; for example, by authorizing the loading and storing of data and/or
 79 |   capabilities.
 80 | 
 81 | * **Object type**: If this value is not equal to the unsealed object type, the capability is "sealed" and
 82 |   cannot be modified or dereferenced, but can be used to implement opaque
 83 |   pointer types.
 84 |   This feature is not described further in this document, as it is primarily
 85 |   used to implement software compartmentalization rather than object-level
 86 |   memory protection.
 87 | 
 88 | <!-- XXXRW: Should talk about sealed entry capabilities here. -->
 89 | 
 90 | ### Capability tagging
 91 | 
 92 | When stored in memory, valid capabilities must be naturally aligned &mdash; i.e.,
 93 | at 64-bit or 128-bit boundaries, depending on capability size &mdash; as that is
 94 | the granularity at which in-memory tags are maintained.
 95 | Partial or complete overwrites with data, rather than a complete overwrite
 96 | with a valid capability, lead to the in-memory tag being cleared, preventing
 97 | corrupted capabilities from later being dereferenced.
 98 | 
 99 | ### Bounds precision
100 | 
101 | In order to reduce the memory footprint of capabilities, capability
102 | compression is used to reduce the overhead of bounds so that the full
103 | capability, including address, permissions, and bounds fits within 64 or
104 | 128 bits (plus the 1-bit out-of-band tag).
105 | Bounds compression takes advantage of redundancy between the address
106 | and the bounds, which occurs because a pointer typically falls within (or
107 | close to) its associated allocation, and because allocations are typically
108 | well aligned.
109 | The compression scheme uses a floating-point representation, allowing high-precision bounds for small
110 | objects, but requiring stronger alignment and padding for larger allocations
111 | (see [Bounds alignment due to compression](../apis/bounds-alignment-due-to-compression.html)).
112 | As a result, allocators handling large objects may define "spatial safety" as
113 | providing non-aliasing rather than the guarantee of a trap, as allocation
114 | bounds may not exactly align to architectural capability bounds.
115 | 


--------------------------------------------------------------------------------
/src/limitations/compiler-optimizations.md:
--------------------------------------------------------------------------------
  1 | ## Compiler optimizations and undefined behavior
  2 | 
  3 | Many of CHERI's protections can be seen as an effort to determine new, and
  4 | safer, dynamic outcomes to undefined behavior specified in the C/C++
  5 | languages.
  6 | For example, CHERI's dynamic bounds checking and exception throwing replaces
  7 | the common (and specification-compliant) behavior of enabling arbitrary code
  8 | execution found in many C/C++ implementations.
  9 | There is, however, an important interaction with compiler optimizations:
 10 | Compilers are allowed to (and not infrequently do) assume that software will
 11 | not behave in undefined ways, and are permitted to optimize on that basis.
 12 | In this section, we explore several such cases that are important to be aware
 13 | of in understanding key limitations to the CHERI C/C++ approach.
 14 | This is an active area of research at SRI/Cambridge.
 15 | 
 16 | ### Uninitialized local variables
 17 | 
 18 | In the C language, it is undefined behavior to depend on the value of an
 19 | uninitialized local variable, and bugs triggering this behavior are sometimes
 20 | exploitable vulnerabilities.
 21 | For example, an integer or pointer value may "shadow through" from a now free
 22 | stack frames, which could lead to misbehavior such as attacker-controlled
 23 | control flows.
 24 | 
 25 | CHERI does not, itself, prevent the use of uninitialized values, although it
 26 | does impose a number of protections that hamper exploitation (such as tagged
 27 | pointers, sealed control-flow pointers, spatial safety, and temporal safety).
 28 | Further, compartmentalization may limit the scope for stack reuse, preventing
 29 | (for example) application compartments from reusing stack space previously
 30 | used for privileged components such as the run-time linker and heap
 31 | allocators.
 32 | However, CHERI in isolation does not prevent the use of undefined values
 33 | exposed by uninitalized local variables,and it remains important to prevent or
 34 | mitigate these vulnerabilities in other ways.
 35 | 
 36 | **Advice to developers**: Compilers support both warnings regarding the use of
 37 |   unitialized local variables, as well as options to automatically zero (or
 38 |   otherwise initialize) uninitialized local variables.
 39 |   We strongly recommend that uninitialized local variables be considered a
 40 |   compile-time error or that automatic initialization be enabled, when using
 41 |   CHERI C/C++ for memory protection.
 42 | 
 43 | **Ongoing research**: The CHERI research community is actively exploring
 44 |   potential extensions to CHERI to detect or prevent undefined behavior.
 45 | 
 46 | ### Uninitialized arguments and return values
 47 | 
 48 | As with software bugs involving uninitialized local variables, CHERI does not
 49 | directly prevent vulnerabilities from arising as a result of uninitialized
 50 | arguments or return values.
 51 | And, as with uninitialized local variabls, compilers are permitted to optimize
 52 | based on the assumption that no undefined behavior occurs in execution, and
 53 | may generate code that causes surprising resuls in the presence of improper
 54 | initialization.
 55 | CHERI indirectly affects the exploitability of those vulnerabilities by
 56 | virtue of pointer tagging, spatial safety, and temporal safety, but these
 57 | limited exploitation rather than preventing vulnerability.
 58 | 
 59 | **Advice to developers**: As with uninitialized local variables, compilers
 60 |   support warnings regarding the use of uninitialized arguments and failures
 61 |   to return values as defined by a function's prototype.
 62 |   We strongly recommend that uninitialized arguments and return values be
 63 |   considered compile-time errors.
 64 | 
 65 | ### Out-of-bounds memory accesses
 66 | 
 67 | Compilers are, however, permitted to assume that undefined behavior does not
 68 | occur for the purposes of optimization, and may, for example, elide stores
 69 | that can be statically determined to be out of bounds.
 70 | 
 71 | For example, dynamic bounds checking on memory allocations frequently replaces
 72 | the specification-compliant corruption of other in-memory structures when a
 73 | buffer overflow occurs with a dynamic exception that, by default, terminates
 74 | an application.
 75 | 
 76 | Further, for a buffer with suitably scoped lifetime, not only may the store be
 77 | elided, but a later load may carry forward the expected value for being
 78 | loaded, despite the store not taking place.
 79 | 
 80 | This interacts importantly with the definitions of spatial and temporal safety
 81 | in CHERI, which are focused on *non-aliasing* rather than *precise
 82 | exceptions*.
 83 | If the compiler optimizes out an out-of-bounds store, then no CHERI exception
 84 | will be thrown dynamically.
 85 | Further, out-of-bounds loads may not only not throw an exception, but they may
 86 | see the value that was not stored.
 87 | CHERI's spatial safety is not violated, as no other object was corrupted.
 88 | However, this behavior may be surprising, and is a more broad example of how
 89 | memory-unsafe code may not fail stop, leading to further undefined execution
 90 | that could have surprising or insecure behavior.
 91 | 
 92 | **Advice to developers**: If memory-safety vulnerabilities are reported in
 93 |   software, it is important to validate the protection CHERI provides through
 94 |   testing and not just source-code inspection.
 95 |   This will help differentiate cases in which a clean "fail stop" is generated
 96 |   vs. those in which other undefined behavior may be reached, which must be
 97 |   analyzed in order to determine impact.
 98 | 
 99 | **Ongoing research**: The SRI/Cambridge team is actively investigating the
100 |   impact of undefined behavior and compiler optimizations alongside CHERI
101 |   memory protection.
102 | 


--------------------------------------------------------------------------------
/src/reading/README.md:
--------------------------------------------------------------------------------
  1 | # Further reading
  2 | 
  3 | ## The CHERI ISA
  4 | 
  5 | The primary reference for the CHERI Instruction-Set Architecture (ISA) is the
  6 | ISA specification; at the time of writing, the most recent version is
  7 | [Capability Hardware Enhanced RISC Instructions: CHERI Instruction-Set
  8 | Architecture (Version
  9 | 7)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-927.pdf)[^1].
 10 | 
 11 | ## An Introduction to CHERI
 12 | 
 13 | Our technical report, [An Introduction to
 14 | CHERI](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf), provides a
 15 | high-level overview of the CHERI architecture, ISA modeling, hardware
 16 | implementations, and software stack[^2].
 17 | 
 18 | ## C/C++ Memory Safety
 19 | 
 20 | Published at ASPLOS 2015, [Beyond the PDP-11: Architectural support for a
 21 | memory-safe C abstract
 22 | machine](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201503-asplos2015-cheri-cmachine.pdf)
 23 | describes idiomatic C and spatial memory protection[^3].
 24 | 
 25 | We published a paper on our memory-safe OS userspace and application suite,
 26 | [CheriABI: Enforcing Valid Pointer Provenance and Minimizing Pointer Privilege
 27 | in the POSIX C Run-time
 28 | Environment](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201904-asplos-cheriabi.pdf),
 29 | at ASPLOS 2019[^4].
 30 | We also released an [extended technical-report version of this
 31 | paper](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-932.pdf) that includes
 32 | greater implementation detail[^5].
 33 | 
 34 | We published [Cornucopia: Temporal Safety for CHERI
 35 | Heaps](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2020oakland-cornucopia.pdf) at Oakland 2020, explaining how to implement "sweeping revocation"
 36 | using virtual-memory acceleration[^6].
 37 | 
 38 | We published our paper, [Exploring C Semantics and Pointer
 39 | Provenance](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201901-popl-cerberus.pdf),
 40 | and the implications for software design, at POPL 2019; CHERI C was a case
 41 | study in the practical enforcement of capability provenance-validity
 42 | enforcement[^7].
 43 | 
 44 | [^1]: Robert N. M. Watson, Peter G. Neumann, Jonathan Woodruff, Michael Roe,
 45 | Hesham Almatary, Jonathan Anderson, John Baldwin, David Chisnall,
 46 | Brooks Davis, Nathaniel Wesley Filardo, Alexandre Joannou, Ben Laurie,
 47 | A. Theodore Markettos, Simon W. Moore, Steven J. Murdoch, Kyndylan Nienhuis,
 48 | Robert Norton, Alex Richardson, Peter Rugg, Peter Sewell, Stacey Son,
 49 | Hongyan Xia. [Capability Hardware Enhanced RISC Instructions: CHERI
 50 | Instruction-Set Architecture (Version
 51 | 7)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-927.pdf), Technical Report
 52 | UCAM-CL-TR-927, Computer Laboratory, June 2019.
 53 | 
 54 | [^2]: Robert N. M. Watson, Simon W. Moore, Peter Sewell, and Peter G. Neumann.
 55 | [An Introduction to
 56 | CHERI](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-941.pdf), Technical
 57 | Report UCAM-CL-TR-941, Computer Laboratory, September 2019.
 58 | 
 59 | [^3]: David Chisnall, Colin Rothwell, Robert N.M. Watson, Jonathan Woodruff,
 60 | Munraj Vadera, Simon W. Moore, Michael Roe, Brooks Davis, and
 61 | Peter G. Neumann. [Beyond the PDP-11: Architectural support for a memory-safe
 62 | C abstract machine](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201503-asplos2015-cheri-cmachine.pdf),
 63 | Proceedings of the Twentieth International Conference on Architectural Support
 64 | for Programming Languages and Operating Systems (ASPLOS 2015), Istanbul,
 65 | Turkey, March 2015.
 66 | 
 67 | [^4]: Brooks Davis, Robert N. M. Watson, Alexander Richardson,
 68 | Peter G. Neumann, Simon W. Moore, John Baldwin, David Chisnall,
 69 | Jessica Clarke, Nathaniel Wesley Filardo, Khilan Gudka, Alexandre Joannou,
 70 | Ben Laurie, A. Theodore Markettos, J. Edward Maste, Alfredo Mazzinghi,
 71 | Edward Tomasz Napierala, Robert M. Norton, Michael Roe, Peter Sewell,
 72 | Stacey Son, and Jonathan Woodruff. [CheriABI: Enforcing Valid Pointer
 73 | Provenance and Minimizing Pointer Privilege in the POSIX C Run-time
 74 | Environment](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201904-asplos-cheriabi.pdf).
 75 | In Proceedings of 2019 Architectural Support for Programming Languages and
 76 | Operating Systems (ASPLOS’19). Providence, RI, USA, April 13-17, 2019.
 77 | 
 78 | [^5]: Brooks Davis, Robert N. M. Watson, Alexander Richardson,
 79 | Peter G. Neumann, Simon W. Moore, John Baldwin, David Chisnall,
 80 | Jessica Clarke, Nathaniel Wesley Filardo, Khilan Gudka, Alexandre Joannou,
 81 | Ben Laurie, A. Theodore Markettos, J. Edward Maste, Alfredo Mazzinghi,
 82 | Edward Tomasz Napierala, Robert M. Norton, Michael Roe, Peter Sewell,
 83 | Stacey Son, and Jonathan Woodruff. [CheriABI: Enforcing valid pointer
 84 | provenance and minimizing pointer privilege in the POSIX C run-time
 85 | environment](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-932.pdf),
 86 | Technical Report UCAM-CL-TR-932, Computer Laboratory, January 2019.
 87 | 
 88 | [^6]: Nathaniel Wesley Filardo, Brett F. Gutstein, Jonathan Woodruff,
 89 | Sam Ainsworth, Lucian Paul-Trifu, Brooks Davis, Hongyan Xia,
 90 | Edward Tomasz Napierala, Alexander Richardson, John Baldwin, David Chisnall,
 91 | Jessica Clarke, Khilan Gudka, Alexandre Joannou, A. Theodore Markettos,
 92 | Alfredo Mazzinghi, Robert M. Norton, Michael Roe, Peter Sewell, Stacey Son,
 93 | Timothy M. Jones, Simon W. Moore, Peter G. Neumann, and Robert N. M. Watson.
 94 | [Cornucopia: Temporal Safety for CHERI
 95 | Heaps](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/2020oakland-cornucopia.pdf).
 96 | In Proceedings of the 41st IEEE Symposium on Security and Privacy (Oakland
 97 | 2020). San Jose, CA, USA, May 18-20, 2020.
 98 | 
 99 | [^7]: Kayvan Memarian, Victor B. F. Gomes, Brooks Davis, Stephen Kell,
100 | Alexander Richardson, Robert N. M. Watson, and Peter Sewell. [Exploring C
101 | Semantics and Pointer
102 | Provenance](https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201901-popl-cerberus.pdf).
103 | In Proceedings of the 46th ACM SIGPLAN Symposium on Principles of Programming
104 | Languages (POPL), Cascais, Portugal, 13-19 January, 2019.
105 | 


--------------------------------------------------------------------------------
/latex/cheri-c-programming.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[12pt,twoside,openright,a4paper]{article}
  2 | \usepackage[inner=25mm,outer=25mm,top=20mm,bottom=20mm]{geometry}
  3 | 
  4 | \usepackage[headings]{fullpage}
  5 | \usepackage[hidelinks]{hyperref}
  6 | 
  7 | \usepackage[font={footnotesize,it}]{caption}
  8 | \usepackage{comment}
  9 | \def\tightlist{} % Ignore some Pandoc LaTeX formatting
 10 | 
 11 | \usepackage{bytefield}
 12 | \usepackage{color}
 13 | \usepackage[scaled=0.8]{DejaVuSansMono}
 14 | \usepackage[T1]{fontenc}
 15 | \usepackage{listings}
 16 | \usepackage{mdframed} % To avoid linebreaks in lstlistings
 17 | \lstnewenvironment{clisting}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={C}}}{\endminipage\endgraf}
 18 | \lstnewenvironment{numberedclisting}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={C},numbers=left,breaklines=true,frame=L,#1}}{\endminipage\endgraf}
 19 | \lstnewenvironment{compilerwarning}[1][]{\endgraf\noindent\minipage{\linewidth}\lstset{language={},breaklines=true,basicstyle=\scriptsize\ttfamily\bfseries,frame=L,#1}}{\endminipage\endgraf}
 20 | 
 21 | \usepackage{subcaption}
 22 | \usepackage{times}
 23 | \usepackage{url}
 24 | \usepackage[svgnames]{xcolor}
 25 | \definecolor{lightgray}{gray}{0.8}
 26 | \usepackage{xspace}
 27 | \usepackage{xfrac}
 28 | 
 29 | \usepackage[nameinlink,noabbrev,capitalise]{cleveref}
 30 | 
 31 | % drawing over lstlistings (code stolen from nwf)
 32 | \usepackage{tikz}
 33 |    \usetikzlibrary{decorations.pathreplacing}
 34 |    \usetikzlibrary{fit}
 35 |    \usetikzlibrary{tikzmark}
 36 |    \usetikzlibrary{calc}
 37 |    \usetikzlibrary{patterns}
 38 | \newcommand*{\vcpgfmark}[1]{\ensuremath{\vcenter{\hbox{\pgfmark{#1}}}}}
 39 | % GBP symbol should be safe since it's easy to enter (at least on a UK keyboard) and won't be in any valid lstlistings
 40 | \lstset{escapechar=£} % Note: ensure this doesn't occur in any of the code
 41 | \newcommand{\TikzListingHighlight}[3][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](#2) -- (#3);}}
 42 | \newcommand*{\TikzListingHighlightStartEnd}[2][]{\tikz[overlay,remember picture]{\draw[\ifstrempty{#1}{yellow}{#1}, line width=10pt,opacity=0.3](pic cs:Start#2) -- (pic cs:End#2);}}
 43 | 
 44 | 
 45 | \renewcommand{\UrlFont}{\ttfamily\small}
 46 | 
 47 | \newcommand{\baselineboxformatting}[1]{%
 48 |   % Measure size of contents
 49 |   \sbox0{#1}%
 50 |   % Use the difference between the contents' height and the bitbox's height,
 51 |   % clamped to [-.44\baselineskip, 0], as our minimum depth.
 52 |   \setlength{\skip0}{\ht0 - \height}%
 53 |   \ifdim\skip0>0pt%
 54 |     \setlength{\skip0}{0}%
 55 |   \else%
 56 |     \ifdim\skip0<-.44\baselineskip%
 57 |       \setlength{\skip0}{-.44\baselineskip}%
 58 |     \fi%
 59 |   \fi%
 60 |   \centering\rule[\skip0]{0pt}{\height}#1}
 61 | \bytefieldsetup{boxformatting=\baselineboxformatting}
 62 | 
 63 | \lstset{basicstyle=\footnotesize\ttfamily}
 64 | %\newcommand{\ccode}[1]{\lstinline[language={C}]{#1}}
 65 | %\newcommand{\cxxcode}[1]{\lstinline[language={C++}]{#1}}
 66 | \newcommand{\ccode}[1]{{\small\ttfamily{#1}}}
 67 | \newcommand{\cxxcode}[1]{{\ccode{#1}}}
 68 | \newcommand{\cconst}[1]{{\ccode{#1}}}
 69 | \newcommand{\cfunc}[1]{{\ccode{#1()}}}
 70 | \newcommand{\cvar}[1]{{\ccode{#1}}}
 71 | \newcommand{\pathname}[1]{{\ccode{#1}}}
 72 | \newcommand{\commandline}[1]{{\ccode{#1}}}
 73 | 
 74 | \renewcommand{\texttt}[1]{{\ccode{#1}}}
 75 | 
 76 | \newcommand{\ptrdifft}{{\ccode{ptrdiff\_t}}\xspace}
 77 | \newcommand{\maxalignt}{{\ccode{max\_align\_t}}\xspace}
 78 | \newcommand{\sizet}{{\ccode{size\_t}}\xspace}
 79 | \newcommand{\ssizet}{{\ccode{ssize\_t}}\xspace}
 80 | \newcommand{\ptraddrt}{{\ccode{ptraddr\_t}}\xspace}
 81 | \newcommand{\cuintptrt}{{\ccode{uintptr\_t}}\xspace}
 82 | \newcommand{\cintptrt}{{\ccode{intptr\_t}}\xspace}
 83 | \newcommand{\ccharstar}{{\ccode{char *}}\xspace}
 84 | \newcommand{\cvoidstar}{{\ccode{void *}}\xspace}
 85 | \newcommand{\clongt}{{\ccode{long}}\xspace}
 86 | \newcommand{\cintt}{{\ccode{int}}\xspace}
 87 | \newcommand{\cintttt}{{\ccode{int32\_t}}\xspace}
 88 | \newcommand{\cintsft}{{\ccode{int64\_t}}\xspace}
 89 | 
 90 | \newcommand{\SIGPROT}{{\ccode{SIGPROT}}\xspace}
 91 | 
 92 | \usepackage{xstring}
 93 | \IfSubStr*{\jobname}{techreport}{
 94 |   %%%% Enable University of Cambridge tech-report formatting
 95 |   %%%% Use \ifdefined\trformat in document to check the mode
 96 |   \def\trformat{}
 97 | }{
 98 |   % Default is no tech-report formatting
 99 | }
100 | 
101 | % typeset C++ sensibly
102 | \usepackage{relsize}
103 | \newcommand*{\cpp}[1][]{C\textsmaller[2]{\nolinebreak[4]\hspace{-.05em}\raisebox{.45ex}{\textbf{++}}}}
104 | % And another macro sensible PDF metadata:
105 | \newcommand*{\cppInHeader}[1][]{\texorpdfstring{\cpp{}}{C++}}
106 | \newcommand*{\purecapCOrCpp}[1]{CHERI C/\cpp{}}
107 | 
108 | \hyphenation{Free-BSD}
109 | \hyphenation{Free-RTOS}
110 | \hyphenation{Cheri-BSD}
111 | \hyphenation{Cheri-Free-RTOS}
112 | \hyphenation{Cheri-ABI}
113 | \hyphenation{Web-Kit}
114 | \hyphenation{Postgre-SQL}
115 | 
116 | \title{CHERI C/\cppInHeader{} Programming Guide}
117 | \author{Robert N. M. Watson$^*$, Alexander Richardson$^*$,
118 |   Brooks Davis$^\dagger$, \\
119 |   John Baldwin$^\ddagger$, David Chisnall$^\S$, Jessica Clarke$^*$,
120 |   Nathaniel Filardo$^*$, \\
121 |   Simon W. Moore$^*$,  Edward Napierala$^*$, Peter Sewell$^*$, and \\
122 |   Peter G. Neumann$^\dagger$ \\
123 |   \\
124 |   $^*$University of Cambridge, $^\dagger$SRI International, \\
125 |   $^\ddagger$Ararat River Consulting, LLC and $^\S$Microsoft Research}
126 | 
127 | 
128 | \begin{document}
129 | \sloppy
130 | 
131 | %% Cambridge CL tech-report format provides its own cover page, so skip the
132 | %% title page for the TR format.
133 | \ifdefined\trformat
134 |   % no title page
135 | \else
136 |   \maketitle
137 | \fi
138 | 
139 | %% Cambridge CL tech-report format requires page numbering to start at 3.
140 | \ifdefined\trformat
141 |   \setcounter{page}{3}
142 | \fi
143 | 
144 | %
145 | % Keep Abstract in sync with the Introduction.
146 | %
147 | \newpage
148 | \begin{abstract}
149 | \input{latex/abstract}
150 | \end{abstract}
151 | 
152 | \newpage
153 | \setcounter{tocdepth}{2}
154 | \tableofcontents
155 | 
156 | \newpage
157 | 
158 | 
159 | \input{latex/sections}
160 | 
161 | \end{document}
162 | 


--------------------------------------------------------------------------------
/src/impact/bitwise-operations.md:
--------------------------------------------------------------------------------
 1 | ## Bitwise operations on capability types
 2 | 
 3 | In most cases bitwise operations &mdash; such as those used to store or clear flags
 4 | in the lower bits of pointers to well-aligned allocations &mdash; will result in the expected `uintptr_t` value being created.
 5 | However, there are some corner cases where the result may be a tagged (but out-of-bounds)
 6 | capability when an integer value is expected. <!--
 7 | \arnote{TODO: add an example. Maybe the mutex example checking low pointer bits + some alignment checks?}
 8 | -->
 9 | Dually, bitwise operations may also result in the loss of tags if intermediate results become unrepresentable (recall [Out-of-bounds pointers](out-of-bounds-pointers.html)).[^7]
10 | 
11 | Most bitwise operations on `uintptr_t` fall into one of three categories for which we provide
12 | higher-level abstractions.
13 | 
14 | **Aligning pointer values**:
15 | If the C code is attempting to align a pointer or check the alignment of pointers,
16 | the following compiler builtins should be used instead:
17 | 
18 | * **`T __builtin_align_down(T ptr, size_t alignment)`**:
19 |   This builtin returns `ptr` rounded down to the next multiple of `alignment`.
20 | * **`T __builtin_align_up(T ptr, size_t alignment)`**:
21 |   This builtin returns `ptr` rounded up to the next multiple of `alignment`.
22 | * **`_Bool __builtin_is_aligned(T ptr, size_t alignment)`**:
23 |   This builtin returns `true` if `ptr` is aligned to at least `alignment` bytes.
24 | 
25 | <!--
26 | \rwnote{It would be nice if we had, and could document here, `cheri_` versions
27 |   of these macros.}
28 | \arnote{Probably best to use the `__builtin` versions since that also works for upstream clang.}
29 | -->
30 | 
31 | One advantage of these builtins compared to `intptr_t` arithmetic is that they preserve the
32 | type of the argument and can therefore remove the need for intermediate casts to `uintptr_t`.
33 | Moreover, using these builtins allows for improved compiler diagnostics and can result in better code-generation compared to hand-written functions or macros.
34 | We have submitted these builtins as part of the upstream Clang 10.0 release, so they can also be used for code that does not depend on CHERI.
35 | 
36 | <!--
37 | \arnote{Should I include some of the documentation I wrote for upstream LLVM? (\url{https://clang.llvm.org/docs/LanguageExtensions.html\#alignment-builtins})}
38 | -->
39 | 
40 | **Storing additional data in pointers**: <!--
41 | \label{sec:low-pointer-bits}
42 | -->
43 | In many cases the minimum alignment of pointer values is known and therefore
44 | programmers assume that the low bits (which will always be zero) can be
45 | used to store additional data.[^8]
46 | Unused high pointer bits cannot be used for additional metadata since toggling them causes a large change to the address field, and capabilities that are significantly far out-of-bounds cannot be represented (see
47 | [Out-of-bounds pointers](out-of-bounds-pointers.html)).
48 | 
49 | The compiler-provided header `<cheri.h>` provides explicit macros for this
50 | use of bitwise arithmetic on pointers.
51 | The use of these macros is currently optional,[^9]
52 | but we believe that they can improve readability compared to hand-written bitwise operations.
53 | Additionally, the bitwise-AND operation is ambiguous since it can be used both to clear bits (which should return a provenance-carrying `uintptr_t`) and to check bits (which should return an integer value).
54 | In complex nested expressions, these macros can avoid ambiguous provenance sources (see [Ambiguous provenance](../compiler/ambiguous-provenance.html)) since it shows the compiler which intermediate results can carry provenance.
55 | 
56 | * **`uintptr_t cheri_low_bits_clear(uintptr_t ptr, ptraddr_t mask)`**:
57 |   This function clears the low bits of `ptr` in the same way as `ptr & ~mask`.
58 |   It returns a new `uintptr_t` value that can be used for memory accesses when cast to a pointer.
59 |   `mask` should be a bitwise-AND mask less than `_Alignof(ptr)`.
60 | * **`ptraddr_t cheri_low_bits_get(uintptr_t ptr, ptraddr_t mask)`**:
61 |   This function returns the low bits of `ptr` in the same way as `ptr & mask`.
62 |   It should be used instead of the raw bitwise operation since it can never return
63 |   an unexpectedly tagged value.
64 |   `mask` should be a bitwise-AND mask less than `_Alignof(ptr)`.
65 | * **`uintptr_t cheri_low_bits_or(uintptr_t ptr, ptraddr_t bits)`**:
66 |   This function performs a bitwise-OR of `ptr` with `bits`.
67 |   In order to retain compatibility with a non-CHERI architecture, `bits` should be less than the known alignment of `ptr`.
68 | * **`uintptr_t cheri_low_bits_set(uintptr_t ptr, ptraddr_t mask, ptraddr_t bits)`**:
69 |   This function sets the low bits of `ptr` to `bits` by clearing the low bits in  `mask` first.
70 | 
71 | **Computing hash values**:
72 | The compiler will also warn when operators such as modulus or shifts are used on
73 | `uintptr_t`. This usually indicates that the pointer is being used as the input to a hash
74 | function or similar computations.
75 | In this case, the programmer should not be using `uintptr_t` but instead cast the pointer
76 | to `ptraddr_t` and perform the arithmetic on this type instead.
77 | 
78 | [^7]: Previous versions of the compiler used the capability offset (address
79 | minus base) instead of the address for arithmetic on `uintptr_t`.
80 | This often resulted in unexpected results and therefore we switched to using
81 | the address in `uintptr_t` arithmetic instead.
82 | The old offset-based mode may be interesting for garbage collected C where
83 | addresses are less useful and therefore it can still be enabled by
84 | passing `-cheri-uintcap=offset`.
85 | However, this may result in significantly reduced compatibility with legacy C code.
86 | 
87 | [^8]: CHERI actually provides many more usable bits than a conventional architecture.
88 | In the current implementation of 128-bit CHERI, any bit between<!--
89 | \psnote{inclusive?} --> the least
90 | significant and the 9th least significant bit may be toggled without causing
91 | the tag to be cleared in pointers that point to the beginning of an allocation (i.e., whose *offset* is zero).  <!--
92 | \psnote{This is confusing &mdash; not clearing the tag isn't the same as not destroying part of the pointer data...} -->
93 | If the pointer is strongly aligned, further bits may be toggled without clearing the tag.
94 | <!--
95 | \nwfnote{But the macros only permit the use of the bottom 5.  We should say that somewhere.}
96 | -->
97 | 
98 | [^9]: Until recently, not using these macros could result in subtle bugs at run time since pointer equality comparisons included the tag bit in addition to the address.
99 | 


--------------------------------------------------------------------------------
/src/impact/recommended-use-c-types.md:
--------------------------------------------------------------------------------
  1 | ### Recommended use of C-language types
  2 | <!--
  3 | \label{sec:recommended-c-types}
  4 | -->
  5 | 
  6 | As confusion frequently arises about the most appropriate types to use for
  7 | integers, pointers, and pointer-related values, we make the following
  8 | recommendations:
  9 | 
 10 | * **`int`, `int32_t`, `long`, `int64_t`, ...**: These pure integer types
 11 |   should be used to hold integer values
 12 |   that will never be cast to a pointer type without first combining them with
 13 |   another pointer value &mdash; e.g., by using them as an array offset.
 14 |   Most integers in a C/C++-language program will be of these types.
 15 | 
 16 | * **`ptraddr_t`**: This is a new integer type introduced by CHERI C and should be
 17 |   used to hold addresses.
 18 |   `ptraddr_t` should not be directly cast to a pointer type for
 19 |   dereference; instead, it must be combined with an existing valid capability
 20 |   to the address space to generate a dereferenceable pointer.
 21 |   Typically, this is done using the `cheri_address_set(c, x)` function.
 22 | 
 23 | * **`size_t`, `ssize_t`**: These integer types should be used
 24 |   to hold the unsigned or signed lengths of regions of address space.
 25 | <!--
 26 |   \arnote{\sizet not necessary the same as unsigned `ptrdiff_t`.}
 27 | -->
 28 | 
 29 | * **`ptrdiff_t`**: This integer type describes the difference of indices
 30 |   between two pointers to elements of the same array, and should not be used
 31 |   for any other purpose.
 32 |   It can be added to a pointer to obtain a new pointer, but the result will
 33 |   be dereferenceable only if the address lies within the bounds of the
 34 |   pointer from which it was derived.
 35 | 
 36 |   <!--
 37 |   \note{Isn't that last sentence true of any combination?}{nwf}
 38 |   -->
 39 | 
 40 |   Less standards-compliant code sometimes uses `ptrdiff_t` when the
 41 |   programmer more likely meant `intptr_t` or (less commonly)
 42 |   `size_t`.
 43 |   When porting code, it is worthwhile to audit use of `ptrdiff_t`.
 44 | 
 45 |   <!--
 46 |   \note{Should we recommend that \sizet be used to hold lengths of
 47 |   allocations and \ptrdifft be used to talk about spans of
 48 |   address space (e.g., the offsets between two subobjects of an allocation)?  I feel
 49 |   like the recommendations here are not as concrete as I'd like.}{nwf}
 50 |   -->
 51 | 
 52 | * **`intptr_t`, `uintptr_t`**: These integer types should be
 53 |   used to hold values that may be valid pointers if cast back to a pointer
 54 |   type.
 55 |   When an `intptr_t` is assigned an integer value &mdash; e.g., due to
 56 |   constant initialization to an integer in the source &mdash; and the result is
 57 |   cast to a pointer type, the pointer will be invalid and hence
 58 |   non-dereferenceable.
 59 |   These types will be used in two cases: (1) Where there is uncertainty as to
 60 |   whether the value to be held will be an integer or a pointer &mdash; e.g., for an
 61 |   opaque argument to a callback function; or (2) Where it is more convenient
 62 |   to place a pointer value in an integer type for the purposes of arithmetic
 63 |   (which takes place on the capability's address and in units of bytes, as if
 64 |   the pointer had been cast to `char *`).
 65 | 
 66 |   The observable, integer range of a `uintptr_t` is the same as
 67 |   that of a `ptraddr_t` (or `ptrdiff_t` for `intptr_t`), despite the increased
 68 |   *alignment* and *storage* requirements.
 69 | 
 70 | * **`intmax_t`, `uintmax_t`**: According to the C standard, <!--
 71 |   \arnote{7.20.1.5 Greatest-width integer types}
 72 |   -->
 73 |   these integer types should be *capable of representing any value of any (unsigned) integer type*.
 74 |   In CHERI C/C++, they are not provenance-carrying and can represent the integer *range* of `uintptr_t`/`intptr_t`, but not the capability metadata or tag bit.
 75 |   As the observable value of `intptr_t`/`intptr_t` is the pointer address
 76 |   range, we believe this choice to be compatible with the C standard.
 77 | 
 78 |   Additionally, due to ABI constraints, it would be extremely difficult to change the width of these types from 64 to 129 bits.
 79 |   This is also true for other architectures such as x86: despite Clang and GCC supporting an `__int128` type, `intmax_t` remains 64 bits wide.
 80 | 
 81 |   We generally do not recommend use of these types in CHERI C/C++.
 82 |   However, the types may be useful in `printf` calls (using the `%j` format string width modifier) as the `inttypes.h` `PRI*` macros can be rather verbose.
 83 | 
 84 | * **`maxalign_t`**: This type is defined in C as *an object type whose alignment is the greatest fundamental alignment*
 85 |   and this includes capability types for CHERI C/C++.  <!--
 86 |   \arnote{C2x \S{}7.19.2} 
 87 |   % and in C++ as a \enquote{type whose alignment requirement is at least as great as that of every scalar type}\arnote{C++17 \S{}21.2.4p5}
 88 |   -->
 89 |   We found that some custom allocators use `sizeof(long double)` or `sizeof(uint64_t)` to align their return values.
 90 |   While this appears to work on most architectures, in CHERI C/C++ this must be changed to `alignof(maxalign_t)`.[^1]
 91 | 
 92 | * **`char *`, ...**: These pointer types are suitable for
 93 |   dereference, but in general <!--
 94 |   \psnote{that "in general" makes me wonder about the exceptions?}
 95 |   \arnote{The only exception I can think of is requiring `void *` due to bad API design (callback parameters, etc).}
 96 |   -->
 97 |   should not be cast to or from arbitrary integer
 98 |   values.
 99 |   Valid pointers are always derived from other valid pointers (including those cast to `intptr_t` or `uintptr_t`), and cannot be
100 |   constructed using arbitrary integer arithmetic.
101 | 
102 | It is important to note that `uintptr_t` is no longer the same size as
103 | `size_t`. This difference may require making some changes to
104 | existing code to use the correct type depending on whether the variable
105 | needs to be able store a pointer type. In cases where this is not obvious
106 | (such as for a callback argument), we recommend the use of `uintptr_t`.
107 | This ensures that provenance is maintained.
108 | 
109 | <!--
110 | \pgnnote{The above section begs questions relating to what is the
111 |   responsibility of programmers and what can be aided or managed by
112 |   compilers.  Ideally, the latter would be preferable to requiring
113 |   programmers to understand things are possibly beyond their so-called
114 |   experience.}
115 | -->
116 | 
117 | [^1]: It is important to use `alignof` instead of `sizeof` since many
118 | common implementations, such as GCC and FreeBSD, define `maxalign_t` as a
119 | `struct` and not a `union`.
120 | 


--------------------------------------------------------------------------------
/src/limitations/compile-time-type-uncertainty.md:
--------------------------------------------------------------------------------
  1 | ## Compile-time uncertainty on pointer types
  2 | 
  3 | CHERI C/C++ provide strong dynamic differentiation of pointer and integer
  4 | values by virtue of the capability tag, which prevents their confusion at run
  5 | time.
  6 | For example, C code intended to increment a pure integer type will clear the
  7 | tag on a previously valid pointer, preventing its future dereference.
  8 | However, there are some necessary situations in which there is ambiguity --
  9 | perhaps required by the language specification, or perhaps just idiomatic use
 10 | -- and either integer or pointer values must be loaded or stored.
 11 | These fall into two common cases: Capability-oblivious copying, and explicit
 12 | type ambiguity.
 13 | 
 14 | ### Capability-oblivious memory copying
 15 | 
 16 | Memory-copying is both a explicit and implied in the C and C++ languages, and
 17 | also a construct that programmers can implement.
 18 | 
 19 | The C `memcpy()` API copies a fixed quantity of data from one memory location
 20 | to another.
 21 | In CHERI C/C++, `memcpy()` is capability-oblivious: It is not, in general,
 22 | possible to know whether the originating memory should or does contain
 23 | capabilities, or whether the destination should or can accept their storage.
 24 | For example, a pointer to a structure that does contain a pointer field could
 25 | be cast to `void *`, losing that information.
 26 | Similarly, a pointer to an array of integer types, and no pointer fields, could
 27 | be cast to `void *`, losing that information.
 28 | While a manual copy of fields might do so using variables that do (or do not)
 29 | preserve tagged values, `memcpy()` implementations must be *capability
 30 | oblivious*: They copy any capabilities present, preserving rather than
 31 | stripping tags.
 32 | 
 33 | The situation is further complicated by compiler optimizations that may either
 34 | inline or outline `memcpy()`.
 35 | For example, a large structure assignment may appear to be type aware,
 36 | generating a series of suitably typed loads and stores, preserving or
 37 | stripping tagged values as appropriate, but the compiler is permitted to
 38 | replace that sequence with a call to `memcpy()` that will preserve tags even
 39 | if the source or destination types would not permit it.
 40 | 
 41 | Finally, there are many APIs in C, common libraries, and applications that are
 42 | in fact `memcpy()` implementations that must similarly be oblivious to dynamic
 43 | enforcement for the same reason.
 44 | For example, `qsort()` might be used on structures that contain pointers, and
 45 | therefore must preserve pointer types.
 46 | This imposes both a compatibility burden (custom memory-copying routines
 47 | require adaptation to preserve pointers) and also in effect causes capability
 48 | values to be propagated even if the C types themselves would not generally
 49 | cause that to take place.
 50 | 
 51 | **Advice to developers**: In general, C APIs such as `memcpy()`, and in fact
 52 |   structure assignment statements, can be assumed to always preserve pointers
 53 |   when they need to, but may also preserve them when not expected to.
 54 |   If it is important to prevent propagation, use the
 55 |   `cheri_perms_and()` API to strip the `CHERI_PERM_LOAD_CAP` permission before
 56 |   passing it to a routine that may perform a memory copy.
 57 |   In the CheriBSD kernel, which frequently needs to limit the flow of
 58 |   capabilities, `memcpynocap()` exists as a wrapper to this.
 59 | 
 60 | **Ongoing research**: SRI/Cambridge are continuing our research into the
 61 |   effects of compiler optimisations and when to constrain optimisations to
 62 |   better enforce protection properties.
 63 |   However, the tradeoffs here are tricky given the pracical goal of minimizing
 64 |   source-code disruption.
 65 |   It may be useful to add a new `memcpy_nocap()` API usable by both userlevel
 66 |   and the kernel.
 67 | 
 68 | ### Intentional integer-pointer type ambiguity
 69 | 
 70 | Sometimes, programmers require an integer type that can be used to hold both
 71 | integer and pointer values, and furher, require that pointer arithmetic
 72 | performed on that type result in a dereferenceable pointer.
 73 | This is typically performed using the types `intptr_t` and `uintptr_t`, which
 74 | will frequently be found in software such as language runtimes, but also in
 75 | code implementing *callbacks* or similar programming behaviors where arbitrary
 76 | arguments or return values must be passed around by code not aware of the true
 77 | data types being used.
 78 | Stripping the tag on values calculated via these types will seriously disrupt
 79 | realworld source code.
 80 | When these types are used in CHERI C/C++, there are two important implications
 81 | with programmer impact:
 82 | 
 83 |   1. Capability-sized storage will be allocated, rather than that of the
 84 |      largest integer type, which can be confusing; i.e., `sizeof(intptr_t)`
 85 |      may not be the same as `sizeof(intmax_t)`.
 86 |      Further, if these types have been used extensively, perhaps in preference
 87 |      to other integer types, this can lead to a significant memory overhead
 88 |      beyond that seen just from increasing the size of pointer types.
 89 | 
 90 |   2. Instructions are therefore used that will preserve the tag on a
 91 |      capability dynamically by virtue of using arithetic instructions normally
 92 |      used only for pointer types.
 93 |      However, this means that CHERI C/C++ are not able to provide certain
 94 |      types of dynamic integer-pointer type-confusion prevention, as the types
 95 |      are inherently ambiguous.
 96 | 
 97 |      For example, while with non-`intptr_t` integer types, the tag will always
 98 |      be cleared when its arithmetic operations are applied to a pointer, this
 99 |      is not true when `intptr_t` is used for integers.
100 |      If `intptr_t` is used extensively for integer types (e.g., as the atom
101 |      type in a language runtime), then the opportunity for dynamic confusion
102 |      is restored: arithmetic operations intended only to operate on integer
103 |      values will also operate on pointers preserving the tag.
104 | 
105 | It is worth further noting that the C types `long` and `unsigned long` have
106 | historically been used for these purposes, although that has been discouraged
107 | for many years.
108 | Code using `long` and `unsigned long` to hold pointer values in CHERI C/C++
109 | will not preserve tags, and hence casting a pointer via `long` or `unsigned
110 | long` will lead to the pointer no longer being dereferenceable.
111 | 
112 | **Advice to developers**: `intptr_t` and `uintptr_t` should be used only where
113 |   essential to achieving the programming goals of either holding a pointer or
114 |   integer in the same type (perhaps as an opaque argument), or to enable more
115 |   rich forms of arithmetic on pointers.
116 |   Where programers wish to compute on the address of pointers without provenance, `ptraddr_t` should be used to make this clear.
117 |   Pointers can be unambiguously reconstructed using `ptraddr_t` computations and `cheri_address_set()`.
118 |   `ptraddr_t` is currently under consideration for standardization as paper [P3744R0](https://isocpp.org/files/papers/P3744R0.html).
119 |   `long` and `unsigned long` should never be used to hold pointers that must
120 |   remain deferenceable.
121 | 
122 | **Advice to developers**: `ptraddr_t` should be used in place of `long`,
123 | `unsigned long` or `uint64_t` where an integer type is required to hold
124 | a virtual address.
125 | As [previously introduced](../impact/recommended-use-c-types.md), `ptraddr_t`
126 | is not dereferenceable on CHERI, and must be combined with a valid capability
127 | to generate a dereferenceable pointer.
128 | 


--------------------------------------------------------------------------------
/src/impact/subobject-bounds.md:
--------------------------------------------------------------------------------
  1 | ### Opportunistic subobject bounds
  2 | 
  3 | CHERI C/C++ also supports opportunistically restricting the
  4 | bounds when a pointer is taken to a subobject &mdash; for example, an array
  5 | embedded within another structure that itself has been heap allocated.
  6 | Subject to limitations arising from imprecise bounds (see
  7 | [Bounds precision](../background/cheri-capabilities.md#bounds-precision)),
  8 | this will prevent an overflow on that array from affecting the remainder of
  9 | the structure, improving spatial safety.
 10 | 
 11 | Subobject bounds are not enabled by default as they may require additional source code changes
 12 | for compatibility, but can be enabled using the `-Xclang -cheri-bounds=subobject-safe` compiler flag.
 13 | This is an active area of research, with consideration being given to enabling
 14 | a subset of subobject bounds checks by default in the future due to the
 15 | measurable security benefit and relatively low adoption friction.
 16 | 
 17 | One example of C code that requires changes for subobject bounds is the `containerof`
 18 | pattern, in which pointer arithmetic on a pointer to a subobject is used to
 19 | recover a pointer to the container object &mdash; for example, as seen in the
 20 | widely used BSD `queue.h` linked-list macros or the generic C
 21 | hash-table implementation, `uthash.h`.
 22 | 
 23 | In these cases, an opt-out annotation can be applied to a given type, field or variable
 24 | that instructs the compiler to not tighten bounds when creating pointers to subobjects.
 25 | We currently define three opt-out annotations that can be used to allow
 26 | existing code to disable use of subobject bounds:
 27 | 
 28 | **Completely disable subobject bounds**: It is possible to annotate a typedef,
 29 | record member, or variable declaration with:
 30 | 
 31 | ```{.clisting}
 32 | __attribute__((cheri_no_subobject_bounds))
 33 | ```
 34 | 
 35 | to indicate that the compiler should not tighten bounds when taking the address or a C++ reference. In C++11/C20 mode this can also be spelled as `[[cheri::no_subobject_bounds]]`.
 36 | 
 37 | ```{.clisting}
 38 | struct str {
 39 |     /*
 40 |      * Nul-terminated string array -- pointers taken to this subobject will
 41 |      * use the array's bounds, not those of the container structure.
 42 |      */
 43 |     char               str_array[128];
 44 | 
 45 |     /*
 46 |      * Linked-list entry element -- because of the additional attribute,
 47 |      * pointers taken to this subobject will use the container structure's
 48 |      * bounds, not those of the specific field.
 49 |      */
 50 |     struct list_entry  str_le __attribute__((cheri_no_subobject_bounds));
 51 | } str_instance;
 52 | 
 53 | void
 54 | fn(void)
 55 | {
 56 |     /* Struct pointer gets bounds of str_instance. */
 57 |     struct str *strp = &str_instance;
 58 | 
 59 |     /* Character pointer gets bounds of the subobject, not str_instance. */
 60 |     char *c = str_instance.str_array;
 61 | 
 62 |     /* Struct pointer gets bounds of str_instance, not the subobject. */
 63 |     struct list_entry *lep = &str_instance.str_le;
 64 | }
 65 | ```
 66 | 
 67 | **Disable subobject bounds in specific expressions**:
 68 | It is also possible to opt out of bounds-tightening on a per-expression
 69 | granularity by casting to an annotated type:
 70 | 
 71 | ```{.clisting}
 72 | char *foo(struct str *strp) {
 73 |     return (&((__attribute__((cheri_no_subobject_bounds))struct str *)
 74 |         strp)->str_array);
 75 | }
 76 | ```
 77 | 
 78 | **Use remaining allocation size**:
 79 | In certain cases, the size of the subobject is not known, but we still know that data
 80 | before the field member will not be accessed (e.g., variable size array members
 81 | inside structs).
 82 | Pre-C99 code will declare such members as fixed-size arrays, which will cause
 83 | a hardware exception if the allocation does not grant access to that many bytes.
 84 | [^5]
 85 | To use the remaining allocation size instead of completely disabling bounds
 86 | (and thus protecting against buffer underflows) the annotation:
 87 | 
 88 | ```{.clisting}
 89 | __attribute__((cheri_subobject_bounds_use_remaining_size))
 90 | ```
 91 | 
 92 | can be used.
 93 | When targeting C++11/C20:
 94 | 
 95 | ```{.clisting}
 96 | [[cheri::subobject_bounds_use_remaining_size]]
 97 | ```
 98 | 
 99 | is also supported.
100 | Examples of this pattern include FreeBSD's `struct dirent`, which uses
101 | `char d_name[255]` for an array that is actually of variable size, with
102 | the containing allocation (e.g., of the heap) being sized to allow additional
103 | space for array entries regardless of size in the type definition.
104 | For example:
105 | 
106 | ```{.clisting}
107 | struct message {
108 |     int     m_type;
109 | 
110 |     /*
111 |      * Variable-length character array -- because of the additional
112 |      * attribute, pointers taken to this subobject will have a lower bound
113 |      * at the first address of the array, but retain an upper bound of the
114 |      * allocation containing the array, rather than 252 bytes higher.
115 |      */
116 |     char    m_data[252]
117 |                  __attribute__((cheri_subobject_bounds_use_remaining_size));
118 | };
119 | ```
120 | 
121 | The use of subobject bounds imposes additional compatibility constraints on
122 | existing C and C++ code.
123 | While we have not encountered many issues related to subobject bounds in
124 | existing code, it does slightly increase the porting effort.
125 | 
126 | ## Effects of imprecise bounds
127 | 
128 | Subobject bounds are considered *opportunistic* because it may not be possible
129 | to prevent aliasing within the bounds of a subobject pointer without
130 | disturbing the binary layout policy for containing structures to permit
131 | greater alignment and padding.
132 | This particularly affects larger objects embedded within otherwise short
133 | structures, such as large buffers with a short header.
134 | Furthermore, variable-size structures pose a challenge because their size is
135 | determined at run-time and the code requires explicit changes to the layout and
136 | at the point of allocation to ensure representability.
137 | 
138 | This is an active area of research.
139 | The problem of subobject bounds imprecision is also found in other programming
140 | patterns, where an allocation is subdivided into multiple chunks, without any
141 | cooperation from the allocator. We refer to these patterns as
142 | intra-allocation bounds.
143 | 
144 | There are multiple approaches to address subobject bounds imprecision.
145 | In general, precise bounds can be achieved by separately heap
146 | allocating storage for each imprecise structure member, rather than embedding
147 | them in the same allocation. This has trade-offs with respect to the added
148 | complexity of managing an additional allocation, as well as additional indirection.
149 | In some limited cases, ordering structure fields can also assist with bounds
150 | precision for subobjects.
151 | 
152 | In the future, new compiler modes may be supported that allow fail stops to
153 | occur if non-aliasing is not achieved, or to implement required alignment and
154 | padding additions -- which may have significant memory overheads.
155 | We are exploring potential improvements to compiler warnings and errors to
156 | assist developers in debugging structure layouts that may lead to imprecise
157 | bounds, a fail stop, or potentially unacceptable memory overhead.
158 | 
159 | Bounds precision of variable-size structures is determined by the offset
160 | and size of the last member. This is more complicated to address with
161 | compile-time warnings, because the size is not known.
162 | Multiple approaches are possible in this case as well.
163 | One option is to continue best-effort for variable size structs, specifically
164 | for the variable-size member, to maximise source code compatibility.
165 | A compiler option could control the exact bounds behaviour for variable-size
166 | structure members, so that the programmer can opt-in to the fail-open behaviour.
167 | It also possible to introduce annotations on variable-size members that specify
168 | the maximum expected size, so that the compiler can insert the appropriate
169 | amount of padding.
170 | 
171 | Finally, note that variable-size structures can only exhibit bounds aliasing on
172 | the base, because the variable-size member is necessarily at the end of the
173 | structure and, assuming the allocator is well-behaved, any rounding on the top
174 | will only alias with extra padding space that is already part of the
175 | representability padding for the whole allocation.
176 | 
177 | [^5]: If flexible arrays members are declared using the C99 syntax with empty
178 | square brackets, the compiler will automatically use the remaining allocation
179 | size.
180 | 
181 | 


--------------------------------------------------------------------------------