├── .circleci
    └── config.yml
├── .github
    └── workflows
    │   ├── archive.yml
    │   ├── ghpages.yml
    │   └── publish.yml
├── .gitignore
├── .note.xml
├── CONTRIBUTING.md
├── LICENSE.md
├── Makefile
├── README.md
├── draft-ietf-mls-architecture-14.md
├── draft-ietf-mls-architecture.md
└── rfc9750-draft.xml


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | version: 2
  2 | jobs:
  3 |   build:
  4 |     docker:
  5 |       - image: martinthomson/i-d-template:latest
  6 |     working_directory: ~/draft
  7 | 
  8 |     steps:
  9 |       - run:
 10 |           name: "Print Configuration"
 11 |           command: |
 12 |             xml2rfc --version
 13 |             gem list -q kramdown-rfc2629
 14 |             echo -n 'mmark '; mmark --version
 15 | 
 16 |       - restore_cache:
 17 |           name: "Restoring cache - Git"
 18 |           keys:
 19 |             - v2-cache-git-{{ .Branch }}-{{ .Revision }}
 20 |             - v2-cache-git-{{ .Branch }}
 21 |             - v2-cache-git-
 22 | 
 23 |       - restore_cache:
 24 |           name: "Restoring cache - References"
 25 |           keys:
 26 |             - v1-cache-references-{{ epoch }}
 27 |             - v1-cache-references-
 28 | 
 29 |       # Workaround for https://discuss.circleci.com/t/22437
 30 |       - run:
 31 |           name: Tag Checkout
 32 |           command: |
 33 |             if [ -n "$CIRCLE_TAG" ] && [ -d .git ]; then
 34 |               remote=$(echo "$CIRCLE_REPOSITORY_URL" | \
 35 |                        sed -e 's,/^git.github.com:,https://github.com/,')
 36 |               git fetch -f "$remote" "refs/tags/$CIRCLE_TAG:refs/tags/$CIRCLE_TAG" || \
 37 |                 (echo 'Removing .git cache for tag build'; rm -rf .git)
 38 |             fi
 39 | 
 40 |       - checkout
 41 | 
 42 |       # Build txt and html versions of drafts
 43 |       - run:
 44 |           name: "Build Drafts"
 45 |           command: "make 'CLONE_ARGS=--reference ~/git-reference'"
 46 | 
 47 |       # Update editor's copy on gh-pages
 48 |       - run:
 49 |           name: "Update GitHub Pages"
 50 |           command: |
 51 |             if [ "${CIRCLE_TAG#draft-}" == "$CIRCLE_TAG" ]; then
 52 |               make gh-pages
 53 |             fi
 54 | 
 55 |       # For tagged builds, upload to the datatracker.
 56 |       - deploy:
 57 |           name: "Upload to Datatracker"
 58 |           command: |
 59 |             if [ "${CIRCLE_TAG#draft-}" != "$CIRCLE_TAG" ]; then
 60 |               make upload
 61 |             fi
 62 | 
 63 |       # Archive GitHub Issues
 64 |       - run:
 65 |           name: "Archive GitHub Issues"
 66 |           command: "make archive || make archive DISABLE_ARCHIVE_FETCH=true && make gh-archive"
 67 | 
 68 |       # Create and store artifacts
 69 |       - run:
 70 |           name: "Create Artifacts"
 71 |           command: "make artifacts CI_ARTIFACTS=/tmp/artifacts"
 72 | 
 73 |       - store_artifacts:
 74 |           path: /tmp/artifacts
 75 | 
 76 |       - run:
 77 |           name: "Prepare for Caching"
 78 |           command: "git reflog expire --expire=now --all && git gc --prune=now"
 79 | 
 80 |       - save_cache:
 81 |           name: "Saving Cache - Git"
 82 |           key: v2-cache-git-{{ .Branch }}-{{ .Revision }}
 83 |           paths:
 84 |             - ~/draft/.git
 85 | 
 86 |       - save_cache:
 87 |           name: "Saving Cache - Drafts"
 88 |           key: v1-cache-references-{{ epoch }}
 89 |           paths:
 90 |             - ~/.cache/xml2rfc
 91 | 
 92 | 
 93 | workflows:
 94 |   version: 2
 95 |   build:
 96 |     jobs:
 97 |       - build:
 98 |           filters:
 99 |             tags:
100 |               only: /.*?/
101 | 


--------------------------------------------------------------------------------
/.github/workflows/archive.yml:
--------------------------------------------------------------------------------
 1 | name: "Archive Issues and Pull Requests"
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * 0,2,4'
 6 |   repository_dispatch:
 7 |     types: [archive]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: "Archive Issues and Pull Requests"
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - name: "Checkout"
15 |       uses: actions/checkout@v2
16 | 
17 |     - name: "Update Archive"
18 |       uses: martinthomson/i-d-template@v1
19 |       with:
20 |         make: archive
21 |         token: ${{ secrets.GITHUB_TOKEN }}
22 | 
23 |     - name: "Update GitHub Pages"
24 |       uses: martinthomson/i-d-template@v1
25 |       with:
26 |         make: gh-archive
27 |         token: ${{ secrets.GITHUB_TOKEN }}
28 | 
29 |     - name: "Save Archive"
30 |       uses: actions/upload-artifact@v2
31 |       with:
32 |         path: archive.json
33 | 


--------------------------------------------------------------------------------
/.github/workflows/ghpages.yml:
--------------------------------------------------------------------------------
 1 | name: "Update Editor's Copy"
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |     - README.md
 7 |     - CONTRIBUTING.md
 8 |     - LICENSE.md
 9 |     - .gitignore
10 |   pull_request:
11 |     paths-ignore:
12 |     - README.md
13 |     - CONTRIBUTING.md
14 |     - LICENSE.md
15 |     - .gitignore
16 | 
17 | jobs:
18 |   build:
19 |     name: "Update Editor's Copy"
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |     - name: "Checkout"
23 |       uses: actions/checkout@v4
24 | 
25 |     - name: "Setup"
26 |       id: setup
27 |       run: date -u "+date=%FT%T" >>"$GITHUB_OUTPUT"
28 | 
29 |     - name: "Caching"
30 |       uses: actions/cache@v4
31 |       with:
32 |         path: |
33 |           .refcache
34 |           .venv
35 |           .gems
36 |           node_modules
37 |           .targets.mk
38 |         key: i-d-${{ steps.setup.outputs.date }}
39 |         restore-keys: i-d-
40 | 
41 |     - name: "Build Drafts"
42 |       uses: martinthomson/i-d-template@v1
43 |       with:
44 |         token: ${{ github.token }}
45 | 
46 |     - name: "Update GitHub Pages"
47 |       uses: martinthomson/i-d-template@v1
48 |       if: ${{ github.event_name == 'push' }}
49 |       with:
50 |         make: gh-pages
51 |         token: ${{ github.token }}
52 | 
53 |     - name: "Archive Built Drafts"
54 |       uses: actions/upload-artifact@v4
55 |       with:
56 |         path: |
57 |           draft-*.html
58 |           draft-*.txt
59 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: "Publish New Draft Version"
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "draft-*"
 7 | 
 8 | jobs:
 9 |   build:
10 |     name: "Publish New Draft Version"
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - name: "Checkout"
14 |       uses: actions/checkout@v2
15 | 
16 |     # See https://github.com/actions/checkout/issues/290
17 |     - name: "Get Tag Annotations"
18 |       run: git fetch -f origin ${{ github.ref }}:${{ github.ref }}
19 | 
20 |     - name: "Cache Setup"
21 |       id: cache-setup
22 |       run: |
23 |         mkdir -p "$HOME"/.cache/xml2rfc
24 |         echo "::set-output name=path::$HOME/.cache/xml2rfc"
25 |         date -u "+::set-output name=date::%FT%T"
26 | 
27 |     - name: "Cache References"
28 |       uses: actions/cache@v2
29 |       with:
30 |         path: ${{ steps.cache-setup.outputs.path }}
31 |         key: refcache-${{ steps.date.outputs.date }}
32 |         restore-keys: |
33 |           refcache-${{ steps.date.outputs.date }}
34 |           refcache-
35 | 
36 |     - name: "Build Drafts"
37 |       uses: martinthomson/i-d-template@v1
38 | 
39 |     - name: "Upload to Datatracker"
40 |       uses: martinthomson/i-d-template@v1
41 |       with:
42 |         make: upload
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.html
 2 | *.pdf
 3 | *.redxml
 4 | *.swp
 5 | *.txt
 6 | *.upload
 7 | *~
 8 | .refcache
 9 | .tags
10 | .targets.mk
11 | /*-[0-9][0-9].xml
12 | archive.json
13 | report.xml
14 | venv/
15 | lib
16 | draft-ietf-mls-architecture.xml
17 | 


--------------------------------------------------------------------------------
/.note.xml:
--------------------------------------------------------------------------------
1 | <note title="Discussion Venues" removeInRFC="true">
2 | <t>Discussion of this document takes place on the
3 |   MLS Working Group mailing list (mls@ietf.org),
4 |   which is archived at <eref target="https://mailarchive.ietf.org/arch/browse/mls/"/>.</t>
5 | <t>Source for this draft and an issue tracker can be found at
6 |   <eref target="https://github.com/mlswg/mls-architecture"/>.</t>
7 | </note>
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | This repository relates to activities in the Internet Engineering Task Force
 4 | ([IETF](https://www.ietf.org/)). All material in this repository is considered
 5 | Contributions to the IETF Standards Process, as defined in the intellectual
 6 | property policies of IETF currently designated as
 7 | [BCP 78](https://www.rfc-editor.org/info/bcp78),
 8 | [BCP 79](https://www.rfc-editor.org/info/bcp79) and the
 9 | [IETF Trust Legal Provisions (TLP) Relating to IETF Documents](http://trustee.ietf.org/trust-legal-provisions.html).
10 | 
11 | Any edit, commit, pull request, issue, comment or other change made to this
12 | repository constitutes Contributions to the IETF Standards Process
13 | (https://www.ietf.org/).
14 | 
15 | You agree to comply with all applicable IETF policies and procedures, including,
16 | BCP 78, 79, the TLP, and the TLP rules regarding code components (e.g. being
17 | subject to a Simplified BSD License) in Contributions.
18 | 
19 | 
20 | ## Other Resources
21 | 
22 | Discussion of this work occurs on the
23 | [mls working group mailing list](https://mailarchive.ietf.org/arch/browse/mls/)
24 | ([subscribe](https://www.ietf.org/mailman/listinfo/mls)).  In addition to
25 | contributions in GitHub, you are encouraged to participate in discussions there.
26 | 
27 | **Note**: Some working groups adopt a policy whereby substantive discussion of
28 | technical issues needs to occur on the mailing list.
29 | 
30 | You might also like to familiarize yourself with other
31 | [working group documents](https://datatracker.ietf.org/wg/mls/documents/).
32 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 | 
3 | See the
4 | [guidelines for contributions](https://github.com/mlswg/mls-architecture/blob/master/CONTRIBUTING.md).
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | LIBDIR := lib
 2 | include $(LIBDIR)/main.mk
 3 | 
 4 | $(LIBDIR)/main.mk:
 5 | ifneq (,$(shell grep "path *= *$(LIBDIR)" .gitmodules 2>/dev/null))
 6 | 	git submodule sync
 7 | 	git submodule update $(CLONE_ARGS) --init
 8 | else
 9 | 	git clone -q --depth 10 $(CLONE_ARGS) \
10 | 	    -b main https://github.com/martinthomson/i-d-template $(LIBDIR)
11 | endif
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Messaging Layer Security (MLS) Architecture
 2 | 
 3 | This is the working area for the IETF [MLS Working Group](https://datatracker.ietf.org/wg/mls/documents/) Internet-Draft, "The Messaging Layer Security (MLS) Architecture".
 4 | 
 5 | * [Editor's Copy](https://mlswg.github.io/mls-architecture/#go.draft-ietf-mls-architecture.html)
 6 | * [Working Group Draft](https://tools.ietf.org/html/draft-ietf-mls-architecture)
 7 | * [Compare Editor's Copy to Working Group Draft](https://mlswg.github.io/mls-architecture/#go.draft-ietf-mls-architecture.diff)
 8 | 
 9 | ## Building the Draft
10 | 
11 | Formatted text and HTML versions of the draft can be built using `make`.
12 | 
13 | ```sh
14 | $ make
15 | ```
16 | 
17 | This requires that you have the necessary software installed.  See
18 | [the instructions](https://github.com/martinthomson/i-d-template/blob/master/doc/SETUP.md).
19 | 
20 | 
21 | ## Contributing
22 | 
23 | See the
24 | [guidelines for contributions](https://github.com/mlswg/mls-architecture/blob/master/CONTRIBUTING.md).
25 | 


--------------------------------------------------------------------------------
/draft-ietf-mls-architecture-14.md:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: The Messaging Layer Security (MLS) Architecture
   3 | abbrev: MLS Architecture
   4 | docname: draft-ietf-mls-architecture-14
   5 | category: info
   6 | 
   7 | ipr: trust200902
   8 | area: Security
   9 | keyword: Internet-Draft
  10 | 
  11 | stand_alone: yes
  12 | pi: [toc, sortrefs, symrefs]
  13 | 
  14 | author:
  15 |  -
  16 |     ins: B. Beurdouche
  17 |     name: Benjamin Beurdouche
  18 |     organization: Inria & Mozilla
  19 |     email: ietf@beurdouche.com
  20 |  -
  21 |     ins: E. Rescorla
  22 |     name: Eric Rescorla
  23 |     organization: Windy Hill Systems, LLC
  24 |     email: ekr@rtfm.com
  25 |  -
  26 |     ins: E. Omara
  27 |     name: Emad Omara
  28 |     email: emad.omara@gmail.com
  29 |  -
  30 |     ins: S. Inguva
  31 |     name: Srinivas Inguva
  32 |     email: singuva@yahoo.com
  33 |  -
  34 |     ins: A. Duric
  35 |     name: Alan Duric
  36 |     organization: Wire
  37 |     email: alan@wire.com
  38 | 
  39 | contributor:
  40 | - name: Richard Barnes
  41 |   org: Cisco
  42 |   email: rlb@ipv.sx
  43 | - name: Katriel Cohn-Gordon
  44 |   org: Meta Platforms
  45 |   email: me@katriel.co.uk
  46 | - name: Cas Cremers
  47 |   org: CISPA Helmholtz Center for Information Security
  48 |   email: cremers@cispa.de
  49 | - name: Britta Hale
  50 |   org: Naval Postgraduate School
  51 |   email: britta.hale@nps.edu
  52 | - name: Albert Kwon
  53 |   org: Badge Inc.
  54 |   email: kwonalbert@badgeinc.com
  55 | - name: Konrad Kohbrok
  56 |   org: Phoenix R&D
  57 |   email: konrad.kohbrok@datashrine.de
  58 | - name: Rohan Mahy
  59 |   org: Wire
  60 |   email: rohan.mahy@wire.com
  61 | - name: Brendan McMillion
  62 |   email: brendanmcmillion@gmail.com
  63 | - name: Thyla van der Merwe
  64 |   email: tjvdmerwe@gmail.com
  65 | - name: Jon Millican
  66 |   org: Meta Platforms
  67 |   email: jmillican@meta.com
  68 | - name: Raphael Robert
  69 |   org: Phoenix R&D
  70 |   email: ietf@raphaelrobert.com
  71 | 
  72 | informative:
  73 | 
  74 |   KT: I-D.draft-ietf-keytrans-architecture
  75 | 
  76 |   CONIKS:
  77 |        target: https://www.usenix.org/system/files/conference/usenixsecurity15/sec15-paper-melara.pdf
  78 |        title: "CONIKS: Bringing Key Transparency to End Users"
  79 |        date: 2015
  80 |        author:
  81 |          - name: Marcela Melara
  82 |          - name: Aaron Blankstein
  83 |          - name: Joseph Bonneau
  84 |          - name: Edward Felten
  85 |          - name: Michael Freedman
  86 | 
  87 |   CAPBR: DOI.10.1145/343477.343502
  88 | 
  89 |   ACCKKMPPWY19:
  90 |     title: "Keep the Dirt: Tainted TreeKEM, Adaptively and Actively Secure Continuous Group Key Agreement"
  91 |     date: 2019
  92 |     author:
  93 |       - name: Joel Alwen
  94 |       - name: Margarita Capretto
  95 |       - name: Miguel Cueto
  96 |       - name: Chethan Kamath
  97 |       - name: Karen Klein
  98 |       - name: Ilia Markov
  99 |       - name: Guillermo Pascual-Perez
 100 |       - name: Krzysztof Pietrzak
 101 |       - name: Michael Walter
 102 |       - name: Michelle Yeo
 103 |     target: https://eprint.iacr.org/2019/1489
 104 | 
 105 |   ACDT19:
 106 |     title: "Security Analysis and Improvements for the IETF MLS Standard for Group Messaging"
 107 |     date: 2019
 108 |     author:
 109 |       - name: Joel Alwen
 110 |       - name: Sandro Coretti
 111 |       - name: Yevgeniy Dodis
 112 |       - name: Yiannis Tselekounis
 113 |     target: https://eprint.iacr.org/2019/1189.pdf
 114 | 
 115 |   ACDT21:
 116 |     title: "Modular Design of Secure Group Messaging Protocols and the Security of MLS"
 117 |     date: 2021
 118 |     author:
 119 |       - name: Joel Alwen
 120 |       - name: Sandro Coretti
 121 |       - name: Yevgeniy Dodis
 122 |       - name: Yiannis Tselekounis
 123 |     target: https://eprint.iacr.org/2021/1083.pdf
 124 | 
 125 |   ACJM20:
 126 |     title: "Continuous Group Key Agreement with Active Security"
 127 |     date: 2020
 128 |     author:
 129 |       - name: Joel Alwen
 130 |       - name: Sandro Coretti
 131 |       - name: Daniel Jost
 132 |       - name: Marta Mularczyk
 133 |     target: https://eprint.iacr.org/2020/752.pdf
 134 | 
 135 |   AHKM21:
 136 |     title: "Server-Aided Continuous Group Key Agreement"
 137 |     date: 2021
 138 |     author:
 139 |       - name: Joel Alwen
 140 |       - name: Dominik Hartmann
 141 |       - name: Eike Kiltz
 142 |       - name: Marta Mularczyk
 143 |     target: https://eprint.iacr.org/2021/1456.pdf
 144 | 
 145 |   AJM20:
 146 |     title: "On The Insider Security of MLS"
 147 |     date: 2020
 148 |     author:
 149 |       - name: Joel Alwen
 150 |       - name: Daniel Jost
 151 |       - name: Marta Mularczyk
 152 |     target: https://eprint.iacr.org/2020/1327.pdf
 153 | 
 154 |   BBN19:
 155 |     title: "Formal Models and Verified Protocols for Group Messaging: Attacks and Proofs for IETF MLS"
 156 |     date: 2019
 157 |     author:
 158 |       - name: Karthikeyan Bhargavan
 159 |       - name: Benjamin Beurdouche
 160 |       - name: Prasad Naldurg
 161 |     target: https://inria.hal.science/hal-02425229/document
 162 | 
 163 |   BBR18:
 164 |     title: "TreeKEM: Asynchronous Decentralized Key Management for Large Dynamic Groups A protocol proposal for Messaging Layer Security (MLS)"
 165 |     date: 2018
 166 |     author:
 167 |       - name: Karthikeyan Bhargavan
 168 |       - name: Richard Barnes
 169 |       - name: Eric Rescorla
 170 |     target: "https://hal.inria.fr/hal-02425247/file/treekem+%281%29.pdf"
 171 | 
 172 |   BCK21:
 173 |     title: "Cryptographic Security of the MLS RFC, Draft 11"
 174 |     date: 2021
 175 |     author:
 176 |       - name: Chris Brzuska
 177 |       - name: Eric Cornelissen
 178 |       - name: Konrad Kohbrok
 179 |     target: https://eprint.iacr.org/2021/137.pdf
 180 | 
 181 |   CHK21:
 182 |     title: "The Complexities of Healing in Secure Group Messaging: Why Cross-Group Effects Matter"
 183 |     date: 2021
 184 |     author:
 185 |       - name: Cas Cremers
 186 |       - name: Britta Hale
 187 |       - name: Konrad Kohbrok
 188 |     target: https://www.usenix.org/system/files/sec21-cremers.pdf
 189 | 
 190 |   WPBB22:
 191 |     title: "TreeSync: Authenticated Group Management for Messaging Layer Security"
 192 |     date: 2022
 193 |     author:
 194 |       - name: Théophile Wallez
 195 |       - name: Jonathan Protzenko
 196 |       - name: Benjamin Beurdouche
 197 |       - name: Karthikeyan Bhargavan
 198 |     target: https://eprint.iacr.org/2022/1732.pdf
 199 | 
 200 |   Loopix:
 201 |     title: "The Loopix Anonymity System"
 202 |     date: 2017
 203 |     author:
 204 |       -
 205 |         ins: A.M. Piotrowska
 206 |         name: Ania M. Piotrowska
 207 |       -
 208 |         ins: J. Hayes
 209 |         name: Jamie Hayes
 210 |       -
 211 |         ins: T. Elahi
 212 |         name: Tariq Elahi
 213 |       -
 214 |         ins: S. Meiser
 215 |         name: Sebastian Meiser
 216 |       -
 217 |         ins: G. Danezis
 218 |         name: George Danezis
 219 | 
 220 | 
 221 | 
 222 | --- abstract
 223 | 
 224 | The Messaging Layer Security (MLS) protocol (I-D.ietf-mls-protocol)
 225 | provides a Group Key Agreement protocol for messaging applications.
 226 | MLS is meant to protect against eavesdropping, tampering, message
 227 | forgery, and provide Forward Secrecy (FS) and Post-Compromise Security
 228 | (PCS).
 229 | 
 230 | This document describes the architecture for using MLS in a general
 231 | secure group messaging infrastructure and defines the security goals
 232 | for MLS.  It provides guidance on building a group messaging system
 233 | and discusses security and privacy tradeoffs offered by multiple
 234 | security mechanisms that are part of the MLS protocol (e.g., frequency
 235 | of public encryption key rotation). The document also provides
 236 | guidance for parts of the infrastructure that are not standardized by
 237 | MLS and are instead left to the application.
 238 | 
 239 | While the recommendations of this document are not mandatory to follow in order
 240 | to interoperate at the protocol level, they affect the overall security
 241 | guarantees that are achieved by a messaging application. This is especially true
 242 | in the case of active adversaries that are able to compromise clients, the
 243 | delivery service, or the authentication service.
 244 | 
 245 | --- middle
 246 | 
 247 | # Introduction
 248 | 
 249 | RFC EDITOR: PLEASE REMOVE THE FOLLOWING PARAGRAPH
 250 | 
 251 | The source for this draft is maintained in GitHub.  Suggested changes should
 252 | be submitted as pull requests at https://github.com/mlswg/mls-architecture.
 253 | Instructions are on that page as well.  Editorial changes can be
 254 | managed in GitHub, but any substantive change should be discussed on
 255 | the MLS mailing list.
 256 | 
 257 | End-to-end security is used in the vast majority of instant messaging systems,
 258 | and also deployed in systems for other purposes such as calling and conferencing.
 259 | In this context, "end-to-end" captures
 260 | the notion that users of the system enjoy some level of security -- with the
 261 | precise level depending on the system design -- even in the face of malicious
 262 | actions by the operator of the messaging system.
 263 | 
 264 | Messaging Layer Security (MLS) specifies an architecture (this document) and a
 265 | protocol {{!I-D.ietf-mls-protocol}} for providing end-to-end security in this
 266 | setting. MLS is not intended as a full instant messaging protocol but rather is
 267 | intended to be embedded in concrete protocols, such as XMPP {{?RFC6120}}.
 268 | Implementations of the MLS protocol will interoperate at the cryptographic
 269 | level, though they may have incompatibilities in terms of how protected messages
 270 | are delivered, contents of protected messages, and identity/authentication
 271 | infrastructures.
 272 | The MLS protocol has been designed to provide the same security guarantees to
 273 | all users, for all group sizes, including groups of only two clients.
 274 | 
 275 | # General Setting
 276 | 
 277 | ## Protocol Overview
 278 | 
 279 | MLS provides a way for _clients_ to form _groups_ within which they can
 280 | communicate securely.  For example, a set of users might use clients on their
 281 | phones or laptops to join a group and communicate with each other. A group may
 282 | be as small as two clients (e.g., for simple person to person messaging) or as
 283 | large as hundreds of thousands.  A client that is part of a group is a _member_
 284 | of that group. As groups change membership and group or member properties, they
 285 | advance from one _epoch_ to another and the cryptographic state of the group
 286 | evolves.
 287 | 
 288 | The group is represented as a tree, which represents the members as the leaves
 289 | of a tree. It is used to efficiently encrypt to subsets of the members. Each
 290 | member has a state called a _LeafNode_ object holding the client's identity,
 291 | credentials, and capabilities.
 292 | 
 293 | Various messages are used in the evolution from epoch to epoch.
 294 | A _Proposal_ message proposes
 295 | a change to be made in the next epoch, such as adding or removing a member.
 296 | A _Commit_ message initiates a new epoch by instructing members of the group to
 297 | implement a collection of proposals. Proposals and Commits are collectively
 298 | called _Handshake messages_.
 299 | A _KeyPackage_ provides keys that can be used to add the client to a group,
 300 | including its LeafNode, and _Signature Key_.
 301 | A _Welcome_ message provides a new member to the group with the information to
 302 | initialize their state for the epoch in which they were added.
 303 | 
 304 | Of course most (but not all) applications use MLS to send encrypted group messages.
 305 | An _application message_ is an MLS message with an arbitrary application payload.
 306 | 
 307 | Finally, a _PublicMessage_ contains an integrity-protected MLS Handshake message,
 308 | while a _PrivateMessage_ contains a confidential, integrity-protected Handshake
 309 | or application message.
 310 | 
 311 | For a more detailed explanation of these terms, please consult the MLS protocol
 312 | specification {{?RFC9420}}.
 313 | 
 314 | ## Abstract Services
 315 | 
 316 | MLS is designed to operate within the context of a messaging service, which
 317 | may be a single service provider, a federated system, or some kind of
 318 | peer-to-peer system. The service needs to provide two services that
 319 | facilitate client communication using MLS:
 320 | 
 321 | - An Authentication Service (AS) which is responsible for
 322 |   attesting to bindings between application-meaningful identifiers and the
 323 |   public key material used for authentication in the MLS protocol. The
 324 |   AS must also be able to generate credentials that encode these
 325 |   bindings and validate credentials provided by MLS clients.
 326 | 
 327 | - A Delivery Service (DS)  which can receive and distribute
 328 |   messages between group members. In the case of group messaging, the delivery
 329 |   service may also be responsible for acting as a "broadcaster" where the sender
 330 |   sends a single message which is then forwarded to each recipient in the group
 331 |   by the DS. The DS is also responsible for storing and delivering initial
 332 |   public key material required by MLS clients in order to proceed with the group
 333 |   secret key establishment that is part of the MLS protocol.
 334 | 
 335 | For presentation purposes, this document treats the AS and DS as conventional
 336 | network services, however MLS does not require a specific implementation
 337 | for the AS or DS. These services may reside on the same server or different
 338 | servers, they may be distributed between server and client components, and they
 339 | may even involve some action by users.  For example:
 340 | 
 341 | * Several secure messaging services today provide a centralized DS, and rely on
 342 |   manual comparison of clients' public keys as the AS.
 343 | 
 344 | * MLS clients connected to a peer-to-peer network could instantiate a
 345 |   decentralized DS by transmitting MLS messages over that network.
 346 | 
 347 | * In an MLS group using a Public Key Infrastructure (PKI) for authentication,
 348 |   the AS would comprise the certificate issuance and validation processes,
 349 |   both of which involve logic inside MLS clients as well as various
 350 |   existing PKI roles (ex: Certification Authorities).
 351 | 
 352 | It is important to note that the Authentication Service can be
 353 | completely abstract in the case of a Service Provider which allows MLS
 354 | clients to generate, distribute, and validate credentials themselves.
 355 | As with the AS, the Delivery Service can be completely abstract if
 356 | users are able to distribute credentials and messages without relying
 357 | on a central Delivery Service (as in a peer-to-peer system).  Note,
 358 | though, that in such scenarios, clients will need to implement logic
 359 | that assures the delivery properties required of the DS (see
 360 | {{delivery-guarantees}}).
 361 | 
 362 | ~~~ aasvg
 363 |      +----------------+    +--------------+
 364 |      | Authentication |    |   Delivery   |
 365 |      |  Service (AS)  |    | Service (DS) |
 366 |      +----------------+    +-------+------+
 367 |                           /        |       \            Group
 368 |                          / ........|........\................
 369 |                         /  .       |         \              .
 370 |               +--------+-+ .  +----+-----+    +----------+  .
 371 |               | Client 1 | .  | Client 2 |    | Client 3 |  .
 372 |               +----------+ .  +----------+    +----------+  .
 373 |                            .   Member 1        Member 2     .
 374 |                            .                                .
 375 |                            ..................................
 376 | ~~~
 377 | {: #fig-mls-overview title="A Simplified Messaging System"}
 378 | 
 379 | {{fig-mls-overview}} shows the relationship of these concepts,
 380 | with three clients and one group, and clients 2 and 3 being
 381 | part of the group and client 1 not being part of any group.
 382 | 
 383 | 
 384 | # Overview of Operation
 385 | 
 386 | {{fig-group-formation-example}} shows the formation of an example
 387 | group consisting of Alice, Bob, and Charlie, with Alice
 388 | driving the creation of the group.
 389 | 
 390 | ~~~ aasvg
 391 | Alice     Bob       Charlie                     AS        DS
 392 | 
 393 | Create account --------------------------------->                |
 394 | <------------------------------------- Credential                |
 395 |           Create account ----------------------->                | Step 1
 396 |           <--------------------------- Credential                |
 397 |                     Create account ------------->                |
 398 |                     <----------------- Credential                |
 399 | 
 400 | Initial Keying Material ----------------------------------->     |
 401 |           Initial Keying Material ------------------------->     | Step 2
 402 |                     Initial Keying Material --------------->     |
 403 | 
 404 | Get Bob Initial Keying Material --------------------------->     |
 405 | <------------------------------- Bob Initial Keying Material     |
 406 | Add Bob to Group ------------------------------------------>     | Step 3
 407 | Welcome (Bob)---------------------------------------------->     |
 408 |           <-------------------------------- Add Bob to Group     |
 409 |           <----------------------------------- Welcome (Bob)     |
 410 | 
 411 | Get Charlie Initial Keying Material ----------------------->     |
 412 | <--------------------------- Charlie Initial Keying Material     |
 413 | Add Charlie to Group -------------------------------------->     |
 414 | Welcome (Charlie) ----------------------------------------->     | Step 4
 415 |           <---------------------------- Add Charlie to Group     |
 416 |                      <----------------- Add Charlie to Group     |
 417 |                      <-------------------- Welcome (Charlie)     |
 418 | ~~~
 419 | {: #fig-group-formation-example title="Group Formation Example"}
 420 | 
 421 | This process proceeds as follows.
 422 | 
 423 | ## Step 1: Account Creation
 424 | 
 425 | Alice, Bob, and Charlie create accounts with a service provider and obtain
 426 | credentials from the AS. This is a one-time setup phase.
 427 | 
 428 | ## Step 2: Initial Keying Material
 429 | 
 430 | Alice, Bob, and Charlie authenticate to the DS and store some initial
 431 | keying material which can be used to send encrypted messages to them
 432 | for the first time. This keying material is authenticated with their
 433 | long-term credentials. Although in principle this keying material
 434 | can be reused for multiple senders, in order to provide forward secrecy
 435 | it is better for this material to be regularly refreshed so that each
 436 | sender can use a new key.
 437 | 
 438 | ## Step 3: Adding Bob to the Group
 439 | 
 440 | When Alice wants to create a group including Bob, she first uses the DS to look
 441 | up his initial keying material. She then generates two messages:
 442 | 
 443 | * A message to the entire group (which at this point is just her and Bob)
 444 |   that adds Bob to the group.
 445 | 
 446 | * A _Welcome_ message just to Bob encrypted with his initial keying material that
 447 |   includes the secret keying information necessary to join the group.
 448 | 
 449 | She sends both of these messages to the Delivery Services, which is responsible
 450 | for sending them to the appropriate people. Note that the security of MLS
 451 | does not depend on the DS forwarding the Welcome message only to Bob, as it
 452 | is encrypted for him; it is simply not necessary for other group members
 453 | to receive it.
 454 | 
 455 | ## Step 4: Adding Charlie to the Group
 456 | 
 457 | If Alice then wants to add Charlie to the group, she follows a similar procedure
 458 | as with Bob: she first uses the DS to look
 459 | up his initial keying material and then generates two messages:
 460 | 
 461 | * A message to the entire group (consisting of her, Bob, and Charlie) adding
 462 |   Charlie to the group.
 463 | 
 464 | * A _Welcome_ message just to Charlie encrypted with his initial keying material that
 465 |   includes the secret keying information necessary to join the group.
 466 | 
 467 | At the completion of this process, we have a group with Alice, Bob, and Charlie,
 468 | which means that they share a single encryption key which can be used to
 469 | send messages or to key other protocols.
 470 | 
 471 | ## Other Group Operations
 472 | 
 473 | Once the group has been created, clients can perform other actions,
 474 | such as:
 475 | 
 476 |  -  sending a message to everyone in the group
 477 | 
 478 |  -  receiving a message from someone in the group
 479 | 
 480 |  -  adding one or more clients to an existing group
 481 | 
 482 |  -  remove one or more members from an existing group
 483 | 
 484 |  -  updating their own key material
 485 | 
 486 |  -  leave a group (by asking to be removed)
 487 | 
 488 | Importantly, MLS does not itself enforce any access control on group
 489 | operations. For instance, any member of the group can send a message
 490 | to add a new member or to evict an existing member.
 491 | This is in contrast to some designs in which there is a single group
 492 | controller who can modify the group. MLS-using applications are
 493 | responsible for setting their own access control policies. For instance,
 494 | if only the group administrator is allowed to change group members,
 495 | then it is the responsibility of the application to inform members
 496 | of this policy and who the administrator is.
 497 | 
 498 | ## Proposals and Commits
 499 | 
 500 | The general pattern for any change in the group state (e.g., to add or remove
 501 | a user) is that it consists of two messages:
 502 | 
 503 | Proposal
 504 | : This message describes the change to be made (e.g., add Bob to the group)
 505 | but does not effect a change.
 506 | 
 507 | Commit
 508 | : This message changes the group state to include the changes described in
 509 | a set of proposals.
 510 | 
 511 | The simplest pattern is for a client to just send a Commit which contains one or
 512 | more Proposals, for instance Alice could send a Commit with the Proposal
 513 | Add(Bob) embedded to add Bob to the group. However, there are situations in
 514 | which one client might send a proposal and another might send the commit. For
 515 | instance, Bob might wish to remove himself from the group and send a Remove
 516 | Proposal to do so (see {{Section 12.1.3 of ?RFC9420}}). Because Bob cannot send
 517 | the Commit, an existing member must do so.  Commits can apply to multiple valid
 518 | Proposals, in which case all the listed changes are applied.
 519 | 
 520 | It is also possible for a Commit to apply to an empty set of Proposals
 521 | in which case it just updates the cryptographic state of the group
 522 | without changing its membership.
 523 | 
 524 | ## Users, Clients, and Groups {#group-members}
 525 | 
 526 | While it's natural to think of a messaging system as consisting of groups of
 527 | users, possibly using different devices, in MLS the basic unit of operation is
 528 | not the user but rather the "client".  Formally, a client is a set of
 529 | cryptographic objects composed of public values such as a name (an identity), a
 530 | public encryption key, and a public signature key. As usual, a user demonstrates
 531 | ownership of the client by demonstrating knowledge of the associated secret
 532 | values.
 533 | 
 534 | In some messaging systems, clients belonging to the same user must all share the
 535 | same signature key pair, but MLS does not assume this; instead a user may have
 536 | multiple clients with the same identity and different keys. In this case, each
 537 | client will have its own cryptographic state, and it is up to the application to
 538 | determine how to present this situation to users. For instance, it may render
 539 | messages to and from a given user identically regardless of which client they
 540 | are associated with, or may choose to distinguish them.
 541 | 
 542 | When a client is part of a Group, it is called a Member.  A group in MLS is
 543 | defined as the set of clients that have knowledge of the shared group secret
 544 | established in the group key establishment phase.  Note that until a client has
 545 | been added to the group and contributed to the group secret in a manner
 546 | verifiable by other members of the group, other members cannot assume that the
 547 | client is a member of the group; for instance, the newly added member might not
 548 | have received the Welcome message or been unable to decrypt it for some reason.
 549 | 
 550 | 
 551 | # Authentication Service
 552 | 
 553 | The Authentication Service (AS) has to provide three services:
 554 | 
 555 | 1. Issue credentials to clients that attest to bindings between identities and
 556 |    signature key pairs
 557 | 
 558 | 2. Enable a client to verify that a credential presented by another client is
 559 |    valid with respect to a reference identifier
 560 | 
 561 | 3. Enable a group member to verify that a credential represents the same client
 562 |    as another credential
 563 | 
 564 | A member with a valid credential authenticates its MLS messages by signing them
 565 | with the private key corresponding to the public key bound by its credential.
 566 | 
 567 | The AS is considered an abstract layer by the MLS specification and part of this
 568 | service could be, for instance, running on the members' devices, while another
 569 | part is a separate entity entirely.  The following examples illustrate the
 570 | breadth of this concept:
 571 | 
 572 | * A PKI could be used as an AS {{?RFC5280}}.  The issuance function would be
 573 |   provided by the certificate authorities in the PKI, and the verification
 574 |   function would correspond to certificate verification by clients.
 575 | 
 576 | * Several current messaging applications rely on users verifying each other's
 577 |   key fingerprints for authentication.  In this scenario, the issuance function
 578 |   is simply the generation of a key pair (i.e., a credential is just an
 579 |   identifier and public key, with no information to assist in verification).
 580 |   The verification function is the application function that enables users
 581 |   to verify keys.
 582 | 
 583 | * In a system based on {{CONIKS}} end user Key Transparency (KT) {{KT}}, the
 584 |   issuance function would correspond to the insertion of a key in a KT log under
 585 |   a user's identity. The verification function would correspond to verifying a
 586 |   key's inclusion in the log for a claimed identity, together with the KT log's
 587 |   mechanisms for a user to monitor and control which keys are associated with
 588 |   their identity.
 589 | 
 590 | By the nature of its roles in MLS authentication, the AS is invested with a
 591 | large amount of trust and the compromise of the AS could
 592 | allow an adversary to, among other things, impersonate group members. We discuss
 593 | security considerations regarding the compromise of the different AS
 594 | functions in detail in {{as-compromise}}.
 595 | 
 596 | The association between members' identities and their signature keys is fairly
 597 | flexible in MLS.  As noted above, there is no requirement that all clients
 598 | belonging to a given user have the same signature key (in fact, having duplicate
 599 | signature keys in a group is forbidden). A member can
 600 | also rotate the signature key they use within a group.  These mechanisms allow
 601 | clients to use different signature keys in different contexts and at different
 602 | points in time, providing unlinkability and post-compromise security benefits.
 603 | Some security trade-offs related to this flexibility are discussed in the
 604 | security considerations.
 605 | 
 606 | In many applications, there are multiple MLS clients that represent a single
 607 | entity, for example a human user with a mobile and desktop version of an
 608 | application. Often the same set of clients is represented in exactly the same
 609 | list of groups. In applications where this is the intended situation, other
 610 | clients can check that a user is consistently represented by the same set of
 611 | clients.  This would make it more difficult for a malicious AS to issue fake
 612 | credentials for a particular user because clients would expect the credential to
 613 | appear in all groups of which the user is a member. If a client credential does
 614 | not appear in all groups after some relatively short period of time, clients
 615 | have an indication that the credential might have been created without the
 616 | user's knowledge. Due to the asynchronous nature of MLS, however, there may be
 617 | transient inconsistencies in a user's client set, so correlating users' clients
 618 | across groups is more of a detection mechanism than a prevention mechanism.
 619 | 
 620 | # Delivery Service
 621 | 
 622 | The Delivery Service (DS) plays two major roles in MLS:
 623 | 
 624 | * As a directory service providing the initial keying material for
 625 |   clients to use. This allows a client to establish a shared key and send
 626 |   encrypted messages to other clients even if they're offline.
 627 | 
 628 | * Routing MLS messages among clients.
 629 | 
 630 | While MLS depends on correct behavior by the Authentication Service in
 631 | order to provide endpoint authentication and hence confidentiality of
 632 | the group key, these properties do not depend on correct behavior by
 633 | the DS; even a malicious DS cannot add itself to groups or recover
 634 | the group key. However, depending precisely on how MLS is used, the DS may
 635 | be able to determine group membership or prevent changes to the
 636 | group from taking place (e.g., by blocking group change messages).
 637 | 
 638 | ## Key Storage and Retrieval
 639 | 
 640 | Upon joining the system, each client stores its initial cryptographic key
 641 | material with the Delivery Service. This key material, called a KeyPackage,
 642 | advertises the functional abilities of the client such as supported protocol
 643 | versions, supported extensions, and the following cryptographic information:
 644 | 
 645 | * A credential from the Authentication Service attesting to the binding between
 646 |   the identity and the client's signature key.
 647 | 
 648 | * The client's asymmetric encryption public key.
 649 | 
 650 | All the parameters in the KeyPackage are signed with the signature
 651 | private key corresponding to the credential.
 652 | As noted in {{group-members}}, users may own multiple clients, each
 653 | with their own keying material. Each KeyPackage is specific to an MLS version
 654 | and ciphersuite, but a client may want to offer support for multiple protocol
 655 | versions and ciphersuites. As such, there may be multiple KeyPackages stored by
 656 | each user for a mix of protocol versions, ciphersuites, and end-user devices.
 657 | 
 658 | When a client wishes to establish a group or add clients to a group, it first
 659 | contacts the Delivery Service to request KeyPackages for each other client,
 660 | authenticates the KeyPackages using the signature keys, includes the KeyPackages
 661 | in Add Proposals, encrypts the information needed to join the group
 662 | (the _GroupInfo_ object) with an ephemeral key, then separately encrypts the
 663 | ephemeral key with the `init_key` from each KeyPackage.
 664 | When a client requests a KeyPackage in order to add a user to a group, the
 665 | Delivery Service should provide the minimum number of KeyPackages necessary to
 666 | satisfy the request.  For example, if the request specifies the MLS version, the
 667 | DS might provide one KeyPackage per supported ciphersuite, even if it has
 668 | multiple such KeyPackages to enable the corresponding client to be added to
 669 | multiple groups before needing to upload more fresh KeyPackages.
 670 | 
 671 | In order to avoid replay attacks and provide forward secrecy for messages sent
 672 | using the initial keying material, KeyPackages are intended to be used only
 673 | once. The Delivery Service is responsible for ensuring that each KeyPackage is
 674 | only used to add its client to a single group, with the possible exception of a
 675 | "last resort" KeyPackage that is specially designated by the client to be used
 676 | multiple times. Clients are responsible for providing new KeyPackages as
 677 | necessary in order to minimize the chance that the "last resort" KeyPackage will
 678 | be used.
 679 | 
 680 | > **RECOMMENDATION:** Ensure that "last resort" KeyPackages don't get used by
 681 | > provisioning enough standard KeyPackages.
 682 | 
 683 | > **RECOMMENDATION:** Rotate "last resort" KeyPackages as soon as possible
 684 | > after being used or if they have been stored for a prolonged period of time.
 685 | > Overall, avoid reusing last resort KeyPackages as much as possible.
 686 | 
 687 | > **RECOMMENDATION:** Ensure that the client for which a last resort KeyPackage
 688 | > has been used is updating leaf keys as early as possible.
 689 | 
 690 | Overall, it needs to be noted that key packages need to be updated when
 691 | signature keys are changed.
 692 | 
 693 | ## Delivery of Messages {#delivery-guarantees}
 694 | 
 695 | The main responsibility of the Delivery Service is to ensure delivery of
 696 | messages. Some MLS messages need only be delivered to specific clients (e.g., a
 697 | Welcome message initializing a new member's state), while others need to be
 698 | delivered to all the members of a group.  The Delivery Service may enable the
 699 | latter delivery pattern via unicast channels (sometimes known as "client
 700 | fanout"), broadcast channels ("server fanout"), or a mix of both.
 701 | 
 702 | For the most part, MLS does not require the Delivery Service to deliver messages
 703 | in any particular order. Applications can set policies that control their
 704 | tolerance for out-of-order messages (see {{operational-requirements}}), and
 705 | messages that arrive significantly out-of-order can be dropped without otherwise
 706 | affecting the protocol. There are two exceptions to this. First, Proposal
 707 | messages should all arrive before the Commit that references them.  Second,
 708 | because an MLS group has a linear history of epochs, the members of the group
 709 | must agree on the order in which changes are applied.  Concretely, the group
 710 | must agree on a single MLS Commit message that ends each epoch and begins the
 711 | next one.
 712 | 
 713 | In practice, there's a realistic risk of two members generating Commit messages
 714 | at the same time, based on the same epoch, and both attempting to send them to
 715 | the group at the same time. The extent to which this is a problem, and the
 716 | appropriate solution, depends on the design of the Delivery Service. Per the CAP
 717 | theorem {{CAPBR}}, there are two general classes of distributed system that the
 718 | Delivery Service might fall into:
 719 | 
 720 | * Consistent and Partition-tolerant, or Strongly Consistent, systems can provide
 721 |   a globally consistent view of data but has the inconvenience of clients needing
 722 |   to handle rejected messages;
 723 | * Available and Partition-tolerant, or Eventually Consistent, systems continue
 724 |   working despite network issues but may return different views of data to
 725 |   different users.
 726 | 
 727 | Strategies for sequencing messages in strongly and eventually consistent systems
 728 | are described in the next two subsections. Most Delivery Services will use the
 729 | Strongly Consistent paradigm but this remains a choice that can be handled in
 730 | coordination with the client and advertized in the KeyPackages.
 731 | 
 732 | However, note that a malicious Delivery Service could also reorder messages or
 733 | provide an inconsistent view to different users.  The "generation" counter in
 734 | MLS messages provides per-sender loss detection and ordering that cannot be
 735 | manipulated by the DS, but this does not provide complete protection against
 736 | partitioning.  A DS can cause a partition in the group by partitioning key
 737 | exchange messages; this can be detected only by out-of-band comparison (e.g.,
 738 | confirming that all clients have the same `epoch_authenticator` value). A
 739 | mechanism for more robust protections is discussed in
 740 | {{?I-D.ietf-mls-extensions}}.
 741 | 
 742 | Other forms of Delivery Service misbehavior are still possible that are not easy
 743 | to detect. For instance, a Delivery Service can simply refuse to relay messages
 744 | to and from a given client. Without some sort of side information, other clients
 745 | cannot generally detect this form of Denial of Service (DoS) attack.
 746 | 
 747 | ### Strongly Consistent
 748 | 
 749 | With this approach, the Delivery Service ensures that some types of incoming
 750 | messages have a linear order and all members agree on that order.  The Delivery
 751 | Service is trusted to break ties when two members send a Commit message at the
 752 | same time.
 753 | 
 754 | As an example, there could be an "ordering server" Delivery Service that
 755 | broadcasts all messages received to all users and ensures that all clients see
 756 | handshake messages in the same order. Clients that send a Commit would then wait
 757 | to apply it until it's broadcast back to them by the Delivery Service, assuming
 758 | they don't receive another Commit first.
 759 | 
 760 | The Delivery Service can rely on the `epoch` and `content_type` fields of an
 761 | MLSMessage for providing an order only to handshake messages, and possibly even
 762 | filter or reject redundant Commit messages proactively to prevent them from
 763 | being broadcast. Alternatively, the Delivery Service could simply apply an order
 764 | to all messages and rely on clients to ignore redundant Commits.
 765 | 
 766 | There is some risk associated with filtering.  Situations can arise where a
 767 | malicious or buggy client sends a Commit that is not accepted by some members of
 768 | the group, and the DS is not able to detect this and reject the Commit.  For
 769 | example, a buggy client might send a encrypted Commit with an invalid set of
 770 | proposals.  Or a malicious client might send a malformed Commit of the form
 771 | described in {{Section 16.12 of RFC9420}}.
 772 | 
 773 | In such situations, the DS might update its internal state under the assumption
 774 | that the Commit has succeeded and thus end up in a state inconsistent with the
 775 | members of the group.  For example, the DS might think that the current epoch is
 776 | now `n+1` and reject any commits from other epochs, while the members think the
 777 | epoch is `n`, and as a result, the group is stuck -- no member can send a Commit
 778 | that the DS will accept.
 779 | 
 780 | Given these risks, it is effectively impossible for a strongly consistent DS to
 781 | know with absolute certainty when it is safe to update its internal state.  It
 782 | is up to the designers and operators of a DS to ensure that sufficient
 783 | mechanisms are in place to address these risks.
 784 | 
 785 | ### Eventually Consistent
 786 | 
 787 | With this approach, the Delivery Service is built in a way that may be
 788 | significantly more available or performant than a strongly consistent system,
 789 | but offers weaker consistency guarantees. Messages may arrive to different
 790 | clients in different orders and with varying amounts of latency, which means
 791 | clients are responsible for reconciliation.
 792 | 
 793 | This type of Delivery Service might arise, for example, when group members are
 794 | sending each message to each other member individually, or when a distributed
 795 | peer-to-peer network is used to broadcast messages.
 796 | 
 797 | Upon receiving a Commit from the Delivery Service, clients can either:
 798 | 
 799 | 1. Pause sending new messages for a short amount of time to account for a
 800 |    reasonable degree of network latency and see if any other Commits are
 801 |    received for the same epoch. If multiple Commits are received, the clients
 802 |    can use a deterministic tie-breaking policy to decide which to accept, and
 803 |    then resume sending messages as normal.
 804 | 
 805 | 2. Accept the Commit immediately but keep a copy of the previous group state for
 806 |    a short period of time. If another Commit for a past epoch is received,
 807 |    clients use a deterministic tie-breaking policy to decide if they should
 808 |    continue using the Commit they originally accepted or revert and use the
 809 |    later one. Note that any copies of previous or forked group states must be
 810 |    deleted within a reasonable amount of time to ensure the protocol provides
 811 |    forward-secrecy.
 812 | 
 813 | If the Commit references an unknown proposal, group members may need to solicit
 814 | the Delivery Service or other group members individually for the contents of the
 815 | proposal.
 816 | 
 817 | ### Welcome Messages
 818 | 
 819 | Whenever a commit adds new members to a group, MLS requires the committer to
 820 | send a Welcome message to the new members. Applications should ensure that
 821 | Welcome messages are coupled with the tie-breaking logic for commits, discussed
 822 | in {{strongly-consistent}} and {{eventually-consistent}}. That is, when multiple
 823 | commits are sent for the same epoch, applications need to ensure that only
 824 | Welcome messages corresponding to the commit that "succeeded" are processed by
 825 | new members.
 826 | 
 827 | This is particularly important when groups are being reinitialized. When a group
 828 | is reinitialized, it is restarted with a different protocol version and/or
 829 | ciphersuite but identical membership. Whenever an authorized member sends and
 830 | commits a ReInit proposal, this immediately freezes the existing group and
 831 | triggers the creation of a new group with a new `group_id`.
 832 | 
 833 | Ideally, the new group would be created by the same member that committed the
 834 | `ReInit` proposal (including sending Welcome messages for the new group to all
 835 | of the previous group's members). However this operation is not always atomic,
 836 | so it's possible for a member to go offline after committing a ReInit proposal
 837 | but before creating the new group. If this happens, it's necessary for another
 838 | member to continue the reinitialization by creating the new group and sending
 839 | out Welcome messages.
 840 | 
 841 | This has the potential to create a race condition, where multiple members try to
 842 | continue the reinitialization at the same time, and members receive multiple
 843 | Welcome messages for each attempt at reinitializing the same group. Ensuring
 844 | that all members agree on which reinitialization attempt is "correct" is key to
 845 | prevent this from causing forks.
 846 | 
 847 | # Functional Requirements
 848 | 
 849 | MLS is designed as a large-scale group messaging protocol and hence aims to
 850 | provide both performance and security (e.g. integrity and confidentiality)
 851 | to its users. Messaging systems that implement MLS provide support for
 852 | conversations involving two or more members, and aim to scale to groups with
 853 | tens of thousands of members, typically including many users using multiple devices.
 854 | 
 855 | ## Membership Changes
 856 | 
 857 | MLS aims to provide agreement on group membership, meaning that all group
 858 | members have agreed on the list of current group members.
 859 | 
 860 | Some applications may wish to enforce ACLs to limit addition or removal of group
 861 | members, to privileged clients or users. Others may wish to require
 862 | authorization from the current group members or a subset thereof.  Such policies
 863 | can be implemented at the application layer, on top of MLS. Regardless, MLS does
 864 | not allow for or support addition or removal of group members without informing
 865 | all other members.
 866 | 
 867 | Membership of an MLS group is managed at the level of individual clients.  In
 868 | most cases, a client corresponds to a specific device used by a user. If a user
 869 | has multiple devices, the user will generally be represented in a group by
 870 | multiple clients (although applications could choose to have devices share
 871 | keying material).  If an application wishes to implement operations at the level
 872 | of users, it is up to the application to track which clients belong to a given
 873 | user and ensure that they are added / removed consistently.
 874 | 
 875 | MLS provides two mechanisms for changing the membership of a group.  The primary
 876 | mechanism is for an authorized member of the group to send a Commit that adds or
 877 | removes other members.  The second mechanism is an "external join": A member of
 878 | the group publishes certain information about the group, which a new member can
 879 | use to construct an "external" Commit message that adds the new member to the
 880 | group.  (There is no similarly unilateral way for a member to leave the group;
 881 | they must be removed by a remaining member.)
 882 | 
 883 | With both mechanisms, changes to the membership are initiated from inside the
 884 | group.  When members perform changes directly, this is clearly the case.
 885 | External joins are authorized indirectly, in the sense that a member publishing
 886 | a GroupInfo object authorizes anyone to join who has access to the GroupInfo
 887 | object, subject to whatever access control policies the application applies
 888 | for external joins.
 889 | 
 890 | Both types of joins are done via a Commit message, which could be
 891 | blocked by the DS or rejected by clients if the join is not authorized.  The
 892 | former approach requires that Commits be visible to the DS; the latter approach
 893 | requires that clients all share a consistent policy. In the unfortunate event
 894 | that an unauthorized member is able to join, MLS enables any member to remove
 895 | them.
 896 | 
 897 | Application setup may also determine other criteria for membership validity. For
 898 | example, per-device signature keys can be signed by an identity key recognized
 899 | by other participants. If a certificate chain is used to authenticate device
 900 | signature keys, then revocation by the owner adds an alternative mechanism to prompt
 901 | membership removal.
 902 | 
 903 | An MLS group's secrets change on every change of membership, so each client only
 904 | has access to the secrets used by the group while they are a member.  Messages
 905 | sent before a client joins or after they are removed are protected with keys
 906 | that are not accessible to the client.  Compromise of a member removed from a
 907 | group does not affect the security of messages sent after their removal.
 908 | Messages sent during the client's membership are also secure as long as the
 909 | client has properly implemented the MLS deletion schedule, which calls for the
 910 | secrets used to encrypt or decrypt a message to be deleted after use, along with
 911 | any secrets that could be used to derive them.
 912 | 
 913 | ## Parallel Groups
 914 | 
 915 | Any user or client may have membership in several groups simultaneously.  The
 916 | set of members of any group may or may not form a subset of the members of
 917 | another group. MLS guarantees that the FS and PCS goals within a given group are
 918 | maintained and not weakened by user membership in multiple groups. However,
 919 | actions in other groups likewise do not strengthen the FS and PCS guarantees
 920 | within a given group, e.g., key updates within a given group following a device
 921 | compromise does not provide PCS healing in other groups; each group must be
 922 | updated separately to achieve these security objectives.  This also applies to
 923 | future groups that a member has yet to join, which are likewise unaffected by
 924 | updates performed in current groups.
 925 | 
 926 | Applications can strengthen connectivity among parallel groups by requiring
 927 | periodic key updates from a user across all groups in which they have
 928 | membership.
 929 | 
 930 | MLS provides a pre-shared key (PSK) that can be used to link healing properties
 931 | among parallel groups.  For example, suppose a common member M of two groups A
 932 | and B has performed a key update in group A but not in group B.  The key update
 933 | provides PCS with regard to M in group A.  If a PSK is exported from group A and
 934 | injected into group B, then some of these PCS properties carry over to group B,
 935 | since the PSK and secrets derived from it are only known to the new, updated
 936 | version of M, not to the old, possibly compromised version of M.
 937 | 
 938 | ## Asynchronous Usage
 939 | 
 940 | No operation in MLS requires two distinct clients or members to be online
 941 | simultaneously. In particular, members participating in conversations protected
 942 | using MLS can update the group's keys, add or remove new members, and send
 943 | messages without waiting for another user's reply.
 944 | 
 945 | Messaging systems that implement MLS have to provide a transport layer for
 946 | delivering messages asynchronously and reliably.
 947 | 
 948 | ## Access Control
 949 | 
 950 | Because all clients within a group (members) have access to the shared
 951 | cryptographic material, MLS protocol allows each member of the messaging group
 952 | to perform operations. However, every service/infrastructure has control over
 953 | policies applied to its own clients. Applications managing MLS clients can be
 954 | configured to allow for specific group operations. On the one hand, an
 955 | application could decide that a group administrator will be the only member to
 956 | perform add and remove operations. On the other hand, in many settings such as
 957 | open discussion forums, joining can be allowed for anyone.
 958 | 
 959 | While MLS Application messages are always encrypted,
 960 | MLS handshake messages can be sent either encrypted (in an MLS
 961 | PrivateMessage) or unencrypted (in an MLS PublicMessage). Applications
 962 | may be designed such that intermediaries need to see handshake
 963 | messages, for example to enforce policy on which commits are allowed,
 964 | or to provide MLS ratchet tree data in a central location. If
 965 | handshake messages are unencrypted, it is especially important that
 966 | they be sent over a channel with strong transport encryption
 967 | (see {{security-and-privacy-considerations}}) in order to prevent external
 968 | attackers from monitoring the status of the group. Applications that
 969 | use unencrypted handshake messages may take additional steps to reduce
 970 | the amount of metadata that is exposed to the intermediary. Everything
 971 | else being equal, using encrypted handshake messages provides stronger
 972 | privacy properties than using unencrypted handshake messages,
 973 | as it prevents intermediaries from learning about the structure
 974 | of the group.
 975 | 
 976 | If handshake messages are encrypted, any access
 977 | control policies must be applied at the client, so the application must ensure
 978 | that the access control policies are consistent across all clients to make sure
 979 | that they remain in sync.  If two different policies were applied, the clients
 980 | might not accept or reject a group operation and end-up in different
 981 | cryptographic states, breaking their ability to communicate.
 982 | 
 983 | > **RECOMMENDATION:** Avoid using inconsistent access control policies in the
 984 | > case of encrypted group operations.
 985 | 
 986 | MLS allows actors outside the group to influence the group in two ways: External
 987 | signers can submit proposals for changes to the group, and new joiners can use
 988 | an external join to add themselves to the group.  The `external_senders`
 989 | extension ensures that all members agree on which signers are allowed to send
 990 | proposals, but any other policies must be assured to be consistent as above.
 991 | 
 992 | > **RECOMMENDATION:** Have an explicit group policy setting the conditions under
 993 | > which external joins are allowed.
 994 | 
 995 | ## Handling Authentication Failures
 996 | 
 997 | Within an MLS group, every member is authenticated to every other member by
 998 | means of credentials issued and verified by the Authentication Service.  MLS
 999 | does not prescribe what actions, if any, an application should take in the event
1000 | that a group member presents an invalid credential.  For example, an application
1001 | may require such a member to be immediately evicted, or may allow some grace
1002 | period for the problem to be remediated. To avoid operational problems, it is
1003 | important for all clients in a group to have a consistent view of which
1004 | credentials in a group are valid, and how to respond to invalid credentials.
1005 | 
1006 | > **RECOMMENDATION:** Have a uniform credential validation process to ensure
1007 | > that all group members evaluate other members' credentials in the same way.
1008 | 
1009 | > **RECOMMENDATION:** Have a uniform policy for how invalid credentials are
1010 | > handled.
1011 | 
1012 | In some authentication systems, it is possible for a previously-valid credential
1013 | to become invalid over time.  For example, in a system based on X.509
1014 | certificates, credentials can expire or be revoked.  The MLS update mechanisms
1015 | allow a client to replace an old credential with a new one. This is best done
1016 | before the old credential becomes invalid.
1017 | 
1018 | > **RECOMMENDATION:** Proactively rotate credentials, especially if a credential
1019 | > is about to become invalid.
1020 | 
1021 | ## Recovery After State Loss {#state-loss}
1022 | 
1023 | Group members whose local MLS state is lost or corrupted can reinitialize their
1024 | state by re-joining the group as a new member and removing the member
1025 | representing their earlier state.  An application can require that a client
1026 | performing such a reinitialization prove its prior membership with a PSK that
1027 | was exported from the prevoius state.
1028 | 
1029 | There are a few practical challenges to this approach.  For example, the
1030 | application will need to ensure that all members have the required PSK,
1031 | including any new members that have joined the group since the epoch in which
1032 | the PSK was issued.  And of course, if the PSK is lost or corrupted along with
1033 | the member's other state, then it cannot be used to recover.
1034 | 
1035 | Reinitializing in this way does not provide the member with access to group
1036 | messages from during the state loss window, but enables proof of prior
1037 | membership in the group. Applications may choose various configurations for
1038 | providing lost messages to valid group members that are able to prove prior
1039 | membership.
1040 | 
1041 | ## Support for Multiple Devices
1042 | 
1043 | It is typically expected for users within a group to own various devices. A new
1044 | device can be added to a group and be considered as a new client by the
1045 | protocol. This client will not gain access to the history even if it is owned by
1046 | someone who owns another member of the group.  MLS does not provide direct
1047 | support for restoring history in this case, but applications can elect to
1048 | provide such a mechanism outside of MLS.  Such mechanisms, if used, may reduce
1049 | the FS and PCS guarantees provided by MLS.
1050 | 
1051 | ## Extensibility
1052 | 
1053 | The MLS protocol provides several extension points where additional information
1054 | can be provided.  Extensions to KeyPackages allow clients to disclose additional
1055 | information about their capabilities.  Groups can also have extension data
1056 | associated with them, and the group agreement properties of MLS will confirm
1057 | that all members of the group agree on the content of these extensions.
1058 | 
1059 | ## Application Data Framing and Type Advertisements
1060 | 
1061 | Application messages carried by MLS are opaque to the protocol; they can contain
1062 | arbitrary data. Each application which uses MLS needs to define the format of
1063 | its `application_data` and any mechanism necessary to determine the format of
1064 | that content over the lifetime of an MLS group. In many applications this means
1065 | managing format migrations for groups with multiple members who may each be
1066 | offline at unpredictable times.
1067 | 
1068 | > **RECOMMENDATION:** Use the default content mechanism defined in
1069 | > {{Section 3.3 of I-D.ietf-mls-extensions}}, unless the specific application defines another
1070 | > mechanism which more appropriately addresses the same requirements for that
1071 | > application of MLS.
1072 | 
1073 | The MLS framing for application messages also provides a field where clients can
1074 | send information that is authenticated but not encrypted.  Such information can
1075 | be used by servers that handle the message, but group members are assured that
1076 | it has not been tampered with.
1077 | 
1078 | ## Federation
1079 | 
1080 | The protocol aims to be compatible with federated environments. While this
1081 | document does not specify all necessary mechanisms required for federation,
1082 | multiple MLS implementations can interoperate to form federated systems if they
1083 | use compatible authentication mechanisms, ciphersuites, application content, and
1084 | infrastructure functionalities. Federation is described in more detail in
1085 | {{?I-D.ietf-mls-federation}}.
1086 | 
1087 | ## Compatibility with Future Versions of MLS
1088 | 
1089 | It is important that multiple versions of MLS be able to coexist in the
1090 | future. Thus, MLS offers a version negotiation mechanism; this mechanism
1091 | prevents version downgrade attacks where an attacker would actively rewrite
1092 | messages with a lower protocol version than the ones originally offered by the
1093 | endpoints. When multiple versions of MLS are available, the negotiation protocol
1094 | guarantees that the version agreed upon will be the highest version supported in
1095 | common by the group.
1096 | 
1097 | In MLS 1.0, the creator of the group is responsible for selecting the best
1098 | ciphersuite supported across clients. Each client is able to verify availability
1099 | of protocol version, ciphersuites and extensions at all times once he has at
1100 | least received the first group operation message.
1101 | 
1102 | Each member of an MLS group advertises the protocol functionality they support.
1103 | These capability advertisements can be updated over time, e.g., if client
1104 | software is updated while the client is a member of a group. Thus, in addition
1105 | to preventing downgrade attacks, the members of a group can also observe when it
1106 | is safe to upgrade to a new ciphersuite or protocol version.
1107 | 
1108 | # Operational Requirements
1109 | 
1110 | MLS is a security layer that needs to be integrated with an application. A
1111 | fully-functional deployment of MLS will have to make a number of decisions about
1112 | how MLS is configured and operated.  Deployments that wish to interoperate will
1113 | need to make compatible decisions. This section lists all of the dependencies of
1114 | an MLS deployment that are external to the protocol specification, but would
1115 | still need to be aligned within a given MLS deployment, or for two deployments
1116 | to potentially interoperate.
1117 | 
1118 | The protocol has a built-in ability to negotiate protocol versions,
1119 | ciphersuites, extensions, credential types, and additional proposal types. For
1120 | two deployments to interoperate, they must have overlapping support in each of
1121 | these categories. The `required_capabilities` extension (Section 7.2 of
1122 | {{!RFC9420}}) can promote interoperability with a wider set of clients by
1123 | ensuring that certain functionality continues to be supported by a group, even
1124 | if the clients in the group aren't currently relying on it.
1125 | 
1126 | MLS relies on the following network services, that need to be compatible in
1127 | order for two different deployments based on them to interoperate.
1128 | 
1129 | - An **Authentication Service**, described fully in {{authentication-service}},
1130 |   defines the types of credentials which may be used in a deployment and
1131 |   provides methods for:
1132 |   1. Issuing new credentials with a relevant credential lifetime,
1133 |   2. Validating a credential against a reference identifier,
1134 |   3. Validating whether or not two credentials represent the same client, and
1135 |   4. Optionally revoking credentials which are no longer authorized.
1136 | 
1137 | - A **Delivery Service**, described fully in {{delivery-service}}, provides
1138 |   methods for:
1139 |   1. Delivering messages for a group to all members in the group.
1140 |   2. Delivering Welcome messages to new members of a group.
1141 |   3. Uploading new KeyPackages for a user's own clients.
1142 |   4. Downloading KeyPackages for specific clients. Typically, KeyPackages are
1143 |      used once and consumed.
1144 | 
1145 | - Additional services may or may not be required depending on the application
1146 |   design:
1147 | 
1148 |   - In cases where group operations are not encrypted, the DS has the ability to
1149 |     observe and maintain a copy of the public group state. In particular, this
1150 |     is useful for clients that do not have the ability to send the full public
1151 |     state in a Welcome message when inviting a user, or for a client that needs to
1152 |     recover from losing their state. Such public state can contain privacy
1153 |     sensitive information such as group members' credentials and related public
1154 |     keys, hence services need to carefully evaluate the privacy impact of
1155 |     storing this data on the DS.
1156 |   - If external joiners are allowed, there must be a method to publish a
1157 |     serialized `GroupInfo` object (with an `external_pub` extension) that
1158 |     corresponds to a specific group and epoch, and keep that object in sync with
1159 |     the state of the group.
1160 |   - If an application chooses not to allow external joining, it may
1161 |     instead provide a method for external users to solicit group members (or a
1162 |     designated service) to add them to a group.
1163 |   - If the application uses PSKs that members of a group may not have access to
1164 |     (e.g., to control entry into the group or to prove membership in the group
1165 |     in the past, as in {{state-loss}}) there must be a method for distributing
1166 |     these PSKs to group members who might not have them, for instance if they
1167 |     joined the group after the PSK was generated.
1168 |   - If an application wishes to detect and possibly discipline members that send
1169 |     malformed commits with the intention of corrupting a group's state, there
1170 |     must be a method for reporting and validating malformed commits.
1171 | 
1172 | MLS requires the following parameters to be defined, which must be the same for
1173 | two implementations to interoperate:
1174 | 
1175 | - The maximum total lifetime that is acceptable for a KeyPackage.
1176 | 
1177 | - How long to store the resumption PSK for past epochs of a group.
1178 | 
1179 | - The degree of tolerance that's allowed for out-of-order message delivery:
1180 |   - How long to keep unused nonce and key pairs for a sender
1181 |   - A maximum number of unused key pairs to keep.
1182 |   - A maximum number of steps that clients will move a secret tree ratchet
1183 |     forward in response to a single message before rejecting it.
1184 |   - Whether to buffer messages that aren't able to be understood yet due to
1185 |     other messages not arriving first, and if so, how many and for how long. For
1186 |     example, Commit messages that arrive before a proposal they reference, or
1187 |     application messages that arrive before the Commit starting an epoch.
1188 | 
1189 | If implementations differ in these parameters, they will interoperate to some
1190 | extent but may experience unexpected failures in certain situations, such as
1191 | extensive message reordering.
1192 | 
1193 | MLS provides the following locations where an application may store arbitrary
1194 | data. The format and intention of any data in these locations must align for two
1195 | deployments to interoperate:
1196 | 
1197 | - Application data, sent as the payload of an encrypted message.
1198 | 
1199 | - Additional authenticated data, sent unencrypted in an otherwise encrypted
1200 |   message.
1201 | 
1202 | - Group IDs, as decided by group creators and used to uniquely identify a group.
1203 | 
1204 | - Application-level identifiers of public key material (specifically
1205 |   the `application_id` extension as defined in {{Section 5.3.3 of ?RFC9420}}).
1206 | 
1207 | MLS requires the following policies to be defined, which restrict the set of
1208 | acceptable behavior in a group. These policies must be consistent between
1209 | deployments for them to interoperate:
1210 | 
1211 | - A policy on which ciphersuites are acceptable.
1212 | 
1213 | - A policy on any mandatory or forbidden MLS extensions.
1214 | 
1215 | - A policy on when to send proposals and commits in plaintext instead of
1216 |   encrypted.
1217 | 
1218 | - A policy for which proposals are valid to have in a commit, including but not
1219 |   limited to:
1220 |   - When a member is allowed to add or remove other members of the group.
1221 |   - When, and under what circumstances, a reinitialization proposal is allowed.
1222 |   - When proposals from external senders are allowed and how to authorize
1223 |     those proposals.
1224 |   - When external joiners are allowed and how to authorize those external
1225 |     commits.
1226 |   - Which other proposal types are allowed.
1227 | 
1228 | - A policy of when members should commit pending proposals in a group.
1229 | 
1230 | - A policy of how to protect and share the GroupInfo objects needed for
1231 |   external joins.
1232 | 
1233 | - A policy for when two credentials represent the same client. Note that many
1234 |   credentials may be issued attesting the same identity but for different
1235 |   signature keys, because each credential corresponds to a different client
1236 |   owned by the same application user. However, one device may control multiple
1237 |   signature keys -- for instance if they have keys corresponding to multiple
1238 |   overlapping time periods -- but should still only be considered a single
1239 |   client.
1240 | 
1241 | - A policy on how long to allow a member to stay in a group without updating its
1242 |   leaf keys before removing them.
1243 | 
1244 | Finally, there are some additional application-defined behaviors that are
1245 | partially an individual application's decision but may overlap with
1246 | interoperability:
1247 | 
1248 | - When and how to pad messages.
1249 | 
1250 | - When to send a reinitialization proposal.
1251 | 
1252 | - How often clients should update their leaf keys.
1253 | 
1254 | - Whether to prefer sending full commits or partial/empty commits.
1255 | 
1256 | - Whether there should be a `required_capabilities` extension in groups.
1257 | 
1258 | 
1259 | # Security and Privacy Considerations
1260 | 
1261 | MLS adopts the Internet threat model {{?RFC3552}} and therefore assumes that the
1262 | attacker has complete control of the network. It is intended to provide the
1263 | security services described in {{intended-security-guarantees}} in the face of
1264 | attackers who can:
1265 | 
1266 | - Monitor the entire network.
1267 | 
1268 | - Read unprotected messages.
1269 | 
1270 | - Can generate, inject and delete any message in the unprotected
1271 |   transport layer.
1272 | 
1273 | While MLS should be run over a secure transport such as QUIC {{?RFC9000}} or TLS
1274 | {{?RFC8446}}, the security guarantees of MLS do not depend on the
1275 | transport. This departs from the usual design practice of trusting the transport
1276 | because MLS is designed to provide security even in the face of compromised
1277 | network elements, especially the DS.
1278 | 
1279 | Generally, MLS is designed under the assumption that the transport layer is
1280 | present to keep metadata private from network observers, while the MLS protocol
1281 | provides confidentiality, integrity, and authentication guarantees for the
1282 | application data (which could pass through multiple systems). Additional
1283 | properties such as partial anonymity or deniability could also be achieved in
1284 | specific architecture designs.
1285 | 
1286 | In addition, these guarantees are intended to degrade gracefully in the presence
1287 | of compromise of the transport security links as well as of both clients and
1288 | elements of the messaging system, as described in the remainder of this section.
1289 | 
1290 | 
1291 | ## Assumptions on Transport Security Links
1292 | 
1293 | As discussed above, MLS provides the highest level of security when its messages
1294 | are delivered over an encrypted transport.  The main use of the secure transport
1295 | layer for MLS is to protect the already limited amount of metadata. Very little
1296 | information is contained in the unencrypted header of the MLS protocol message
1297 | format for group operation messages, and application messages are always
1298 | encrypted in MLS.
1299 | 
1300 | > **RECOMMENDATION:** Use transports that provide reliability and metadata
1301 | > confidentiality whenever possible, e.g., by transmitting MLS messages over
1302 | > a protocol such as TLS {{?RFC8446}} or QUIC {{?RFC9000}}.
1303 | 
1304 | MLS avoids needing to send the full list of recipients to the server for
1305 | dispatching messages because that list could potentially contain tens of
1306 | thousands of recipients. Header metadata in MLS messages typically consists of
1307 | an opaque `group_id`, a numerical value to determine the epoch of the group (the
1308 | number of changes that have been made to the group), and whether the message is
1309 | an application message, a proposal, or a commit.
1310 | 
1311 | Even though some of this metadata information does not consist of sensitive
1312 | information, in correlation with other data a network observer might be able to
1313 | reconstruct sensitive information. Using a secure channel to transfer this
1314 | information will prevent a network attacker from accessing this MLS protocol
1315 | metadata if it cannot compromise the secure channel.
1316 | 
1317 | ### Integrity and Authentication of Custom Metadata
1318 | 
1319 | MLS provides an authenticated "Additional Authenticated Data" (AAD) field for
1320 | applications to make data available outside a PrivateMessage, while
1321 | cryptographically binding it to the message.
1322 | 
1323 | > **RECOMMENDATION:** Use the "Additional Authenticated Data" field of the
1324 | > PrivateMessage instead of using other unauthenticated means of sending
1325 | > metadata throughout the infrastructure. If the data should be kept private, the
1326 | > infrastructure should use encrypted Application messages instead.
1327 | 
1328 | ### Metadata Protection for Unencrypted Group Operations
1329 | 
1330 | Having no secure channel to exchange MLS messages can have a serious impact on
1331 | privacy when transmitting unencrypted group operation messages. Observing the
1332 | contents and signatures of the group operation messages may lead an adversary to
1333 | extract information about the group membership.
1334 | 
1335 | > **RECOMMENDATION:** Never use the unencrypted mode for group operations
1336 | > without using a secure channel for the transport layer.
1337 | 
1338 | ### DoS protection
1339 | 
1340 | In general we do not consider Denial of Service (DoS) resistance to be the
1341 | responsibility of the protocol. However, it should not be possible for anyone
1342 | aside from the Delivery Service to perform a trivial DoS attack from which it is
1343 | hard to recover. This can be achieved through the secure transport layer.
1344 | 
1345 | In the centralized setting, DoS protection can typically be performed by using
1346 | tickets or cookies which identify users to a service for a certain number of
1347 | connections. Such a system helps in preventing anonymous clients from sending
1348 | arbitrary numbers of group operation messages to the Delivery Service or the MLS
1349 | clients.
1350 | 
1351 | > **RECOMMENDATION:** Use credentials uncorrellated with specific users to help
1352 | > prevent DoS attacks, in a privacy preserving manner. Note that the privacy of
1353 | > these mechanisms has to be adjusted in accordance with the privacy expected
1354 | > from secure transport links. (See more discussion in the next section.)
1355 | 
1356 | ### Message Suppression and Error Correction
1357 | 
1358 | As noted above, MLS is designed to provide some robustness in the face of
1359 | tampering within the secure transport, i.e., tampering by the Delivery Service.
1360 | The confidentiality and authenticity properties of MLS prevent the DS from
1361 | reading or writing messages.  MLS also provides a few tools for detecting
1362 | message suppression, with the caveat that message suppression cannot always be
1363 | distinguished from transport failure.
1364 | 
1365 | Each encrypted MLS message carries a "generation" number which is a per-sender
1366 | incrementing counter.  If a group member observes a gap in the generation
1367 | sequence for a sender, then they know that they have missed a message from that
1368 | sender.  MLS also provides a facility for group members to send authenticated
1369 | acknowledgments of application messages received within a group.
1370 | 
1371 | As discussed in {{delivery-service}}, the Delivery Service is trusted to select
1372 | the single Commit message that is applied in each epoch from among the ones sent
1373 | by group members.  Since only one Commit per epoch is meaningful, it's not
1374 | useful for the DS to transmit multiple Commits to clients.  The risk remains
1375 | that the DS will use the ability maliciously.
1376 | 
1377 | While it is difficult or impossible to prevent a network adversary from
1378 | suppressing payloads in transit, in certain infrastructures such as banks or
1379 | governments settings, unidirectional transports can be used and be enforced via
1380 | electronic or physical devices such as diodes. This can lead to payload
1381 | corruption which does not affect the security or privacy properties of the MLS
1382 | protocol but does affect the reliability of the service. In that case specific
1383 | measures can be taken to ensure the appropriate level of redundancy and quality
1384 | of service for MLS.
1385 | 
1386 | ## Intended Security Guarantees
1387 | 
1388 | MLS aims to provide a number of security guarantees, covering authentication, as
1389 | well as confidentiality guarantees to different degrees in different scenarios.
1390 | 
1391 | ### Message Secrecy and Authentication {#message-secrecy-authentication}
1392 | 
1393 | MLS enforces the encryption of application messages and thus generally
1394 | guarantees authentication and confidentiality of application messages sent in a
1395 | group.
1396 | 
1397 | In particular, this means that only other members of a given group can decrypt
1398 | the payload of a given application message, which includes information about the
1399 | sender of the message.
1400 | 
1401 | Similarly, group members receiving a message from another group member can
1402 | authenticate that group member as the sender of the message and verify the
1403 | message's integrity.
1404 | 
1405 | Message content can be deniable if the signature keys are exchanged over a
1406 | deniable channel prior to signing messages.
1407 | 
1408 | Depending on the group settings, handshake messages can be encrypted as well. If
1409 | that is the case, the same security guarantees apply.
1410 | 
1411 | MLS optionally allows the addition of padding to messages, mitigating the amount
1412 | of information leaked about the length of the plaintext to an observer on the
1413 | network.
1414 | 
1415 | ### Forward and Post-Compromise Security {#fs-and-pcs}
1416 | 
1417 | MLS provides additional protection regarding secrecy of past messages and future
1418 | messages. These cryptographic security properties are Forward Secrecy (FS) and
1419 | Post-Compromise Security (PCS).
1420 | 
1421 | FS means that access to all encrypted traffic history combined with access to
1422 | all current keying material on clients will not defeat the secrecy properties of
1423 | messages older than the oldest key of the compromised client.  Note that this
1424 | means that clients have the extremely important role of deleting appropriate
1425 | keys as soon as they have been used with the expected message, otherwise the
1426 | secrecy of the messages and the security for MLS is considerably weakened.
1427 | 
1428 | PCS means that if a group member's state is compromised at some time t1 but the
1429 | group member subsequently performs an update at some time t2, then all MLS
1430 | guarantees apply to messages sent by the member after time t2, and by other
1431 | members after they have processed the update. For example, if an attacker learns
1432 | all secrets known to Alice at time t1, including both Alice's long-term secret
1433 | keys and all shared group keys, but Alice performs a key update at time t2, then
1434 | the attacker is unable to violate any of the MLS security properties after the
1435 | updates have been processed.
1436 | 
1437 | Both of these properties are satisfied even against compromised DSs and ASs in
1438 | the case where some other mechanism for verifying keys is in use, such as Key
1439 | Transparency {{KT}}.
1440 | 
1441 | Confidentiality is mainly ensured on the client side.  Because Forward Secrecy
1442 | (FS) and Post-Compromise Security (PCS) rely on the active deletion and
1443 | replacement of keying material, any client which is persistently offline may
1444 | still be holding old keying material and thus be a threat to both FS and PCS if
1445 | it is later compromised.
1446 | 
1447 | MLS partially defends against this problem by active members including
1448 | freshness, however not much can be done on the inactive side especially in the
1449 | case where the client has not processed messages.
1450 | 
1451 | > **RECOMMENDATION:** Mandate key updates from clients that are not otherwise
1452 | > sending messages and evict clients which are idle for too long.
1453 | 
1454 | These recommendations will reduce the ability of idle compromised clients to
1455 | decrypt a potentially long set of messages that might have followed the point of
1456 | the compromise.
1457 | 
1458 | The precise details of such mechanisms are a matter of local policy and beyond
1459 | the scope of this document.
1460 | 
1461 | ### Non-Repudiation vs Deniability {#Non-Repudiation-vs-Deniability}
1462 | 
1463 | 
1464 | MLS provides strong authentication within a group, such that a group member
1465 | cannot send a message that appears to be from another group member.
1466 | Additionally, some services require that a recipient be able to prove to the
1467 | service provider that a message was sent by a given client, in order to report
1468 | abuse. MLS supports both of these use cases. In some deployments, these services
1469 | are provided by mechanisms which allow the receiver to prove a message's origin
1470 | to a third party. This is often called "non-repudiation".
1471 | 
1472 | Roughly speaking, "deniability" is the opposite of "non-repudiation", i.e., the
1473 | property that it is impossible to prove to a third party that a message was sent
1474 | by a given sender.  MLS does not make any claims with regard to deniability.  It
1475 | may be possible to operate MLS in ways that provide certain deniability
1476 | properties, but defining the specific requirements and resulting notions of
1477 | deniability requires further analysis.
1478 | 
1479 | 
1480 | ### Associating a User's Clients
1481 | 
1482 | When a user has multiple devices, the base MLS protocol only describes how to
1483 | operate each device as a distinct client in the MLS groups that the user is a
1484 | member of. As a result, the other members of the group will be able to identify
1485 | which of a user's devices sent each message, and therefore which device the user
1486 | was using at the time. Group members would also be able to detect when the user
1487 | adds or removes authorized devices from their account. For some applications,
1488 | this may be an unacceptable breach of the user's privacy.
1489 | 
1490 | This risk only arises when the leaf nodes for the clients in question provide
1491 | data that can be used to correlate the clients.  So one way to mitigate this
1492 | risk is by only doing client-level authentication within MLS. If user-level
1493 | authentication is still desirable, the application would have to provide it
1494 | through some other mechanism.
1495 | 
1496 | It is also possible to maintain user-level authentication while hiding
1497 | information about the clients that a user owns.  This can be done by having the
1498 | clients share cryptographic state, so that they appear as a single client within
1499 | the MLS group. Appearing as a single client has the privacy benefits of no
1500 | longer leaking which device was used to send a particular message, and no longer
1501 | leaking the user's authorized devices. However, the application would need to
1502 | provide a synchronization mechanism so that the clients' state remain consistent
1503 | across changes to the MLS group. Flaws in this synchronization mechanism may
1504 | impair the ability of the user to recover from a compromise of one of their
1505 | devices. In particular, state synchronization may make it easier for an attacker
1506 | to use one compromised device to establish exclusive control of a user's
1507 | account, locking them out entirely and preventing them from recovering.
1508 | 
1509 | ## Endpoint Compromise
1510 | 
1511 | The MLS protocol adopts a threat model which includes multiple forms of
1512 | endpoint/client compromise. While adversaries are in a strong position if
1513 | they have compromised an MLS client, there are still situations where security
1514 | guarantees can be recovered thanks to the PCS properties achieved by the MLS
1515 | protocol.
1516 | 
1517 | In this section we will explore the consequences and recommendations regarding
1518 | the following compromise scenarios:
1519 | 
1520 | - The attacker has access to a symmetric encryption key
1521 | 
1522 | - The attacker has access to a application ratchet secret
1523 | 
1524 | - The attacker has access to the group secrets for one group
1525 | 
1526 | - The attacker has access to a signature oracle for any group
1527 | 
1528 | - The attacker has access to the signature key for one group
1529 | 
1530 | - The attacker has access to all secrets of a user for all groups (full state
1531 |   compromise)
1532 | 
1533 | ### Compromise of Symmetric Keying Material {#symmetric-key-compromise}
1534 | 
1535 | As described above, each MLS epoch creates a new Group Secret.
1536 | 
1537 | These group secrets are then used to create a per-sender Ratchet Secret, which
1538 | in turn is used to create a per-sender with additional data (AEAD) {{!RFC5116}}
1539 | key that is then used to encrypt MLS Plaintext messages.  Each time a message is
1540 | sent, the Ratchet Secret is used to create a new Ratchet Secret and a new
1541 | corresponding AEAD key.  Because of the properties of the key derivation
1542 | function, it is not possible to compute a Ratchet Secret from its corresponding
1543 | AEAD key or compute Ratchet Secret n-1 from Ratchet Secret n.
1544 | 
1545 | Below, we consider the compromise of each of these pieces of keying material in
1546 | turn, in ascending order of severity.  While this is a limited kind of
1547 | compromise, it can be realistic in cases of implementation vulnerabilities where
1548 | only part of the memory leaks to the adversary.
1549 | 
1550 | #### Compromise of AEAD Keys
1551 | 
1552 | In some circumstances, adversaries may have access to specific AEAD keys and
1553 | nonces which protect an Application or a Group Operation message. Compromise of
1554 | these keys allows the attacker to decrypt the specific message encrypted with
1555 | that key but no other; because the AEAD keys are derived from the Ratchet
1556 | Secret, it cannot generate the next Ratchet Secret and hence not the next AEAD
1557 | key.
1558 | 
1559 | In the case of an Application message, an AEAD key compromise means that the
1560 | encrypted application message will be leaked as well as the signature over that
1561 | message. This means that the compromise has both confidentiality and privacy
1562 | implications on the future AEAD encryptions of that chain.  In the case of a
1563 | Group Operation message, only the privacy is affected, as the signature is
1564 | revealed, because the secrets themselves are protected by HPKE encryption.  Note
1565 | that under that compromise scenario, authentication is not affected in either of
1566 | these cases.  As every member of the group can compute the AEAD keys for all the
1567 | chains (they have access to the Group Secrets) in order to send and receive
1568 | messages, the authentication provided by the AEAD encryption layer of the common
1569 | framing mechanism is weak. Successful decryption of an AEAD encrypted message
1570 | only guarantees that some member of the group sent the message.
1571 | 
1572 | Compromise of the AEAD keys allows the attacker to send an encrypted message
1573 | using that key, but cannot send a message to a group which appears to be from
1574 | any valid client since they cannot forge the signature. This applies to all the
1575 | forms of symmetric key compromise described in {{symmetric-key-compromise}}.
1576 | 
1577 | #### Compromise of Ratchet Secret material
1578 | 
1579 | When a Ratchet Secret is compromised, the adversary can compute both the current
1580 | AEAD keys for a given sender as well as any future keys for that sender in this
1581 | epoch. Thus, it can decrypt current and future messages by the corresponding
1582 | sender. However, because it does not have previous Ratchet Secrets, it cannot
1583 | decrypt past messages as long as those secrets and keys have been deleted.
1584 | 
1585 | Because of its Forward Secrecy guarantees, MLS will also retain secrecy of all
1586 | other AEAD keys generated for *other* MLS clients, outside this dedicated chain
1587 | of AEAD keys and nonces, even within the epoch of the compromise.  MLS provides
1588 | Post-Compromise Security against an active adaptive attacker across epochs for
1589 | AEAD encryption, which means that as soon as the epoch is changed, if the
1590 | attacker does not have access to more secret material they won't be able to
1591 | access any protected messages from future epochs.
1592 | 
1593 | #### Compromise of the Group Secrets of a single group for one or more group epochs
1594 | 
1595 | An adversary who gains access to a set of Group secrets--as when a member of the
1596 | group is compromised--is significantly more powerful. In this section, we
1597 | consider the case where the signature keys are not compromised, which can occur
1598 | if the attacker has access to part of the memory containing the group secrets
1599 | but not to the signature keys which might be stored in a secure enclave.
1600 | 
1601 | In this scenario, the adversary gains the ability to compute any number of
1602 | Ratchet Secrets for the epoch and their corresponding AEAD encryption keys and
1603 | thus can encrypt and decrypt all messages for the compromised epochs.
1604 | 
1605 | If the adversary is passive, it is expected from the PCS properties of the MLS
1606 | protocol that, as soon as the compromised party remediates the compromise and
1607 | sends an honest Commit message, the next epochs will provide message secrecy.
1608 | 
1609 | If the adversary is active, the adversary can engage in the protocol itself and
1610 | perform updates on behalf of the compromised party with no ability for an honest
1611 | group to recover message secrecy. However, MLS provides PCS against active
1612 | adaptive attackers through its Remove group operation. This means that, as long
1613 | as other members of the group are honest, the protocol will guarantee message
1614 | secrecy for all messages exchanged in the epochs after the compromised party has
1615 | been removed.
1616 | 
1617 | ### Compromise by an active adversary with the ability to sign messages
1618 | 
1619 | If an active adversary has compromised an MLS client and can sign messages, two
1620 | different settings emerge. In the strongest compromise scenario, the attacker
1621 | has access to the signing key and can forge authenticated messages. In a weaker,
1622 | yet realistic scenario, the attacker has compromised a client but the client
1623 | signature keys are protected with dedicated hardware features which do not allow
1624 | direct access to the value of the private key and instead provide a signature
1625 | API.
1626 | 
1627 | When considering an active adaptive attacker with access to a signature oracle,
1628 | the compromise scenario implies a significant impact on both the secrecy and
1629 | authentication guarantees of the protocol, especially if the attacker also has
1630 | access to the group secrets. In that case both secrecy and authentication are
1631 | broken.  The attacker can generate any message, for the current and future
1632 | epochs, until the compromise is remediated and the formerly compromised client
1633 | sends an honest update.
1634 | 
1635 | Note that under this compromise scenario, the attacker can perform all
1636 | operations which are available to a legitimate client even without access to the
1637 | actual value of the signature key.
1638 | 
1639 | ### Compromise of the authentication with access to a signature key
1640 | 
1641 | The difference between having access to the value of the signature key and only
1642 | having access to a signing oracle is not about the ability of an active adaptive
1643 | network attacker to perform different operations during the time of the
1644 | compromise, the attacker can perform every operation available to a legitimate
1645 | client in both cases.
1646 | 
1647 | There is a significant difference, however in terms of recovery after a
1648 | compromise.
1649 | 
1650 | Because of the PCS guarantees provided by the MLS protocol, when a previously
1651 | compromised client recovers from compromise and performs an honest Commit, both
1652 | secrecy and authentication of future messages can be recovered as long as the
1653 | attacker doesn't otherwise get access to the key. Because the adversary doesn't
1654 | have the signing key, they cannot authenticate messages on behalf of the
1655 | compromised party, even if they still have control over some group keys by
1656 | colluding with other members of the group.
1657 | 
1658 | This is in contrast with the case where the signature key is leaked. In that
1659 | case the compromised endpoint needs to refresh its credentials and invalidate
1660 | the old credentials before the attacker will be unable to authenticate messages.
1661 | 
1662 | Beware that in both oracle and private key access, an active adaptive attacker
1663 | can follow the protocol and request to update its own credential. This in turn
1664 | induces a signature key rotation which could provide the attacker with part or
1665 | the full value of the private key depending on the architecture of the service
1666 | provider.
1667 | 
1668 | > **RECOMMENDATION:** Signature private keys should be compartmentalized from
1669 | > other secrets and preferably protected by an HSM or dedicated hardware
1670 | > features to allow recovery of the authentication for future messages after a
1671 | > compromise.
1672 | 
1673 | > **RECOMMENDATION:** When the credential type supports revocation, the users of
1674 | > a group should check for revoked keys.
1675 | 
1676 | ### Security consideration in the context of a full state compromise
1677 | 
1678 | In real-world compromise scenarios, it is often the case that adversaries target
1679 | specific devices to obtain parts of the memory or even the ability to execute
1680 | arbitrary code in the targeted device.
1681 | 
1682 | Also, recall that in this setting, the application will often retain the
1683 | unencrypted messages. If so, the adversary does not have to break encryption at
1684 | all to access sent and received messages. Messages may also be sent by using the
1685 | application to instruct the protocol implementation.
1686 | 
1687 | > **RECOMMENDATION:** If messages are stored on the device, they should be
1688 | > protected using encryption at rest, and the keys used should be stored
1689 | > securely using dedicated mechanisms on the device.
1690 | 
1691 | > **RECOMMENDATION:** If the threat model of the system is against an adversary
1692 | > which can access the messages on the device without even needing to attack
1693 | > MLS, the application should delete plaintext and ciphertext messages as soon
1694 | > as practical after encryption or decryption.
1695 | 
1696 | Note that this document makes a clear distinction between the way signature keys
1697 | and other group shared secrets must be handled.  In particular, a large set of
1698 | group secrets cannot necessarily be assumed to be protected by an HSM or secure
1699 | enclave features. This is especially true because these keys are frequently used
1700 | and changed with each message received by a client.
1701 | 
1702 | However, the signature private keys are mostly used by clients to send a
1703 | message. They also provide strong authentication guarantees to other clients,
1704 | hence we consider that their protection by additional security mechanisms should
1705 | be a priority.
1706 | 
1707 | Overall there is no way to detect or prevent these compromises, as discussed in
1708 | the previous sections, performing separation of the application secret states
1709 | can help recovery after compromise, this is the case for signature keys but
1710 | similar concern exists for client's encryption private keys.
1711 | 
1712 | > **RECOMMENDATION:** The secret keys used for public key encryption should be
1713 | > stored similarly to the way the signature keys are stored, as keys can be used
1714 | > to decrypt the group operation messages and contain the secret material used
1715 | > to compute all the group secrets.
1716 | 
1717 | Even if secure enclaves are not perfectly secure, or even completely broken,
1718 | adopting additional protections for these keys can ease recovery of the secrecy
1719 | and authentication guarantees after a compromise where, for instance, an
1720 | attacker can sign messages without having access to the key. In certain
1721 | contexts, the rotation of credentials might only be triggered by the AS through
1722 | ACLs, hence be outside of the capabilities of the attacker.
1723 | 
1724 | ## Service Node Compromise
1725 | 
1726 | ### General considerations
1727 | 
1728 | #### Privacy of the network connections
1729 | 
1730 | There are many scenarios leading to communication between the application on a
1731 | device and the Delivery Service or the Authentication Service. In particular
1732 | when:
1733 | 
1734 | - The application connects to the Authentication Service to generate or validate
1735 |   a new credential before distributing it.
1736 | 
1737 | - The application fetches credentials at the Delivery Service prior to creating
1738 |   a messaging group (one-to-one or more than two clients).
1739 | 
1740 | - The application fetches service provider information or messages on the
1741 |   Delivery Service.
1742 | 
1743 | - The application sends service provider information or messages to the Delivery
1744 |   Service.
1745 | 
1746 | In all these cases, the application will often connect to the device via a
1747 | secure transport which leaks information about the origin of the request such as
1748 | the IP address and depending on the protocol the MAC address of the device.
1749 | 
1750 | Similar concerns exist in the peer-to-peer use cases of MLS.
1751 | 
1752 | > **RECOMMENDATION:** In the case where privacy or anonymity is
1753 | > important, using adequate protection such as MASQUE
1754 | > {{?I-D.schinazi-masque-proxy}}, ToR, or a VPN can improve metadata
1755 | > protection.
1756 | 
1757 | More generally, using anonymous credentials in an MLS based architecture might
1758 | not be enough to provide strong privacy or anonymity properties.
1759 | 
1760 | #### Storage of Metadata and Ecryption at rest on the Servers
1761 | 
1762 | In the case where private data or metadata has to be persisted on the servers
1763 | for functionality (mappings between identities and push tokens, group
1764 | metadata...), it should be stored encrypted at rest and only decrypted upon need
1765 | during the execution. Honest Service Providers can rely on such encryption at
1766 | rest mechanism to be able to prevent access to the data when not using it.
1767 | 
1768 | > **RECOMMENDATION:** Store cryptographic material used for server-side
1769 | > decryption of sensitive meta-data on the clients and only send it when needed.
1770 | > The server can use the secret to open and update encrypted data containers
1771 | > after which they can delete these keys until the next time they need it, in
1772 | > which case those can be provided by the client.
1773 | 
1774 | > **RECOMMENDATION:** Rely on group secrets exported from the MLS session for
1775 | > server-side encryption at rest and update the key after each removal from the
1776 | > group. Rotate those keys on a regular basis otherwise.
1777 | 
1778 | ### Delivery Service Compromise
1779 | 
1780 | MLS is intended to provide strong guarantees in the face of compromise of the
1781 | DS. Even a totally compromised DS should not be able to read messages or inject
1782 | messages that will be acceptable to legitimate clients. It should also not be
1783 | able to undetectably remove, reorder or replay messages.
1784 | 
1785 | However, a malicious DS can mount a variety of DoS attacks on the system,
1786 | including total DoS attacks (where it simply refuses to forward any messages)
1787 | and partial DoS attacks (where it refuses to forward messages to and from
1788 | specific clients).  As noted in {{delivery-guarantees}}, these attacks are only
1789 | partially detectable by clients without an out-of-band channel. Ultimately,
1790 | failure of the DS to provide reasonable service must be dealt with as a customer
1791 | service matter, not via technology.
1792 | 
1793 | Because the DS is responsible for providing the initial keying material to
1794 | clients, it can provide stale keys. This does not inherently lead to compromise
1795 | of the message stream, but does allow it to attack forward security to a limited
1796 | extent. This threat can be mitigated by having initial keys expire.
1797 | 
1798 | Initial keying material (KeyPackages) using the `basic` Credential type is more
1799 | vulnerable to replacement by a malicious or compromised DS, as there is no
1800 | built-in cryptographic binding between the identity and the public key of the
1801 | client.
1802 | 
1803 | > **RECOMMENDATION:** Prefer a Credential type in KeyPackages which includes a
1804 | > strong cryptographic binding between the identity and its key (for example the
1805 | > `x509` Credential type). When using the `basic` Credential type take extra
1806 | > care to verify the identity (typically out-of-band).
1807 | 
1808 | #### Privacy of delivery and push notifications
1809 | 
1810 | An important mechanism that is often ignored from the privacy considerations are
1811 | the push-tokens. In many modern messaging architectures, applications are using
1812 | push notification mechanisms typically provided by OS vendors. This is to make
1813 | sure that when messages are available at the Delivery Service (or by other
1814 | mechanisms if the DS is not a central server), the recipient application on a
1815 | device knows about it. Sometimes the push notification can contain the
1816 | application message itself which saves a round trip with the DS.
1817 | 
1818 | To "push" this information to the device, the service provider and the OS
1819 | infrastructures use unique per-device, per-application identifiers called
1820 | push-tokens. This means that the push notification provider and the service
1821 | provider have information on which devices receive information and at which
1822 | point in time. Alternatively, non-mobile applications could use a websocket or
1823 | persistent connection for notifications directly from the DS.
1824 | 
1825 | Even though they can't necessarily access the content, which is typically
1826 | encrypted MLS messages, the service provider and the push notification provider
1827 | have to be trusted to avoid making correlation on which devices are recipients
1828 | of the same message.
1829 | 
1830 | For secure messaging systems, push notifications are often sent real-time as it
1831 | is not acceptable to create artificial delays for message retrieval.
1832 | 
1833 | > **RECOMMENDATION:** If real time notifications are not necessary, one can
1834 | > delay notifications randomly across recipient devices using a mixnet or other
1835 | > techniques.
1836 | 
1837 | Note that with a legal request to ask the service provider for the push-token
1838 | associated with an identifier, it is easy to correlate the token with a second
1839 | request to the company operating the push-notification system to get information
1840 | about the device, which is often linked with a real identity via a cloud
1841 | account, a credit card or other information.
1842 | 
1843 | > **RECOMMENDATION:** If stronger privacy guarantees are needed with regard to
1844 | > the push notification provider, the client can choose to periodically connect
1845 | > to the Delivery Service without the need of a dedicated push notification
1846 | > infrastructure.
1847 | 
1848 | Applications can also consider anonymous systems for server fanout (for
1849 | example {{Loopix}}).
1850 | 
1851 | ### Authentication Service Compromise {#as-compromise}
1852 | 
1853 | The Authentication Service design is left to the infrastructure designers. In
1854 | most designs, a compromised AS is a serious matter, as the AS can serve
1855 | incorrect or attacker-provided identities to clients.
1856 | 
1857 | - The attacker can link an identity to a credential
1858 | 
1859 | - The attacker can generate new credentials
1860 | 
1861 | - The attacker can sign new credentials
1862 | 
1863 | - The attacker can publish or distribute credentials
1864 | 
1865 | An attacker that can generate or sign new credentials may or may not have access
1866 | to the underlying cryptographic material necessary to perform such
1867 | operations. In that last case, it results in windows of time for which all
1868 | emitted credentials might be compromised.
1869 | 
1870 | > **RECOMMENDATION:** Use HSMs to store the root signature keys to limit the
1871 | > ability of an adversary with no physical access to extract the top-level
1872 | > signature private key.
1873 | 
1874 | Note that historically some systems generate signature keys on the
1875 | Authentication Service and distribute the private keys to clients along with
1876 | their credential. This is a dangerous practice because it allows the AS or an
1877 | attacker who has compromised the AS to silently impersonate the client.
1878 | 
1879 | #### Authentication compromise: Ghost users and impersonations
1880 | 
1881 | One important property of MLS is that all Members know which other members are
1882 | in the group at all times. If all Members of the group and the Authentication
1883 | Service are honest, no parties other than the members of the current group can
1884 | read and write messages protected by the protocol for that Group.
1885 | 
1886 | This guarantee applies to the cryptographic identities of the members.
1887 | Details about how to verify the identity of a client depend on the MLS
1888 | Credential type used. For example, cryptographic verification of credentials can
1889 | be largely performed autonomously (e.g., without user interaction) by the
1890 | clients themselves for the `x509` Credential type.
1891 | 
1892 | In contrast, when MLS clients use the `basic` Credential type, then some other
1893 | mechanism must be used to verify identities. For instance the Authentication
1894 | Service could operate some sort of directory server to provide keys, or users
1895 | could verify keys via an out-of-band mechanism.
1896 | 
1897 | > **RECOMMENDATION:** Select the MLS Credential type with the strongest security
1898 | > which is supported by all target members of an MLS group.
1899 | 
1900 | > **RECOMMENDATION:** Do not use the same signature keypair across
1901 | > groups. Update all keys for all groups on a regular basis. Do not preserve
1902 | > keys in different groups when suspecting a compromise.
1903 | 
1904 | If the AS is compromised, it could validate a (or generate a new) signature
1905 | keypair for an attacker. The attacker could then use this keypair to join a
1906 | group as if it were another of the user's clients.  Because a user can have many
1907 | MLS clients running the MLS protocol, it possibly has many signature keypairs
1908 | for multiple devices. These attacks could be very difficult to detect,
1909 | especially in large groups where the UI might not reflect all the changes back
1910 | to the users. If the application participates in a key transparency mechanism in
1911 | which it is possible to determine every key for a given user, then this
1912 | would allow for detection of surreptitiously created false credentials.
1913 | 
1914 | > **RECOMMENDATION:** Make sure that MLS clients reflect all the membership
1915 | > changes to the users as they happen. If a choice has to be made because the
1916 | > number of notifications is too high, the client should provide a log of state
1917 | > of the device so that the user can examine it.
1918 | 
1919 | > **RECOMMENDATION:** Provide a key transparency mechanism for the
1920 | > Authentication Services to allow public verification of the credentials
1921 | > authenticated by this service.
1922 | 
1923 | While the ways to handle MLS credentials are not defined by the protocol or the
1924 | architecture documents, the MLS protocol has been designed with a mechanism that
1925 | can be used to provide out-of-band authentication to users. The
1926 | "authentication_secret" generated for each user at each epoch of the group is a
1927 | one-time, per client, authentication secret which can be exchanged between users
1928 | to prove their identity to each other. This can be done for instance using a QR
1929 | code that can be scanned by the other parties.
1930 | 
1931 | > **RECOMMENDATION:** Provide one or more out-of-band authentication mechanisms
1932 | > to limit the impact of an Authentication Service compromise.
1933 | 
1934 | We note, again, that the Authentication Service may not be a centralized
1935 | system, and could be realized by many mechanisms such as establishing prior
1936 | one-to-one deniable channels, gossiping, or using trust on first use (TOFU) for
1937 | credentials used by the MLS Protocol.
1938 | 
1939 | Another important consideration is the ease of redistributing new keys on client
1940 | compromise, which helps recovering security faster in various cases.
1941 | 
1942 | #### Privacy of the Group Membership
1943 | 
1944 | Group membership is itself sensitive information and MLS is designed to limit
1945 | the amount of persistent metadata. However, large groups often require an
1946 | infrastructure which provides server fanout.  In the case of client fanout, the
1947 | destination of a message is known by all clients, hence the server usually does
1948 | not need this information.  However, they may learn this information through
1949 | traffic analysis.  Unfortunately, in a server-side fanout model, the Delivery
1950 | Service can learn that a given client is sending the same message to a set of
1951 | other clients. In addition, there may be applications of MLS in which the group
1952 | membership list is stored on some server associated with the Delivery Service.
1953 | 
1954 | While this knowledge is not a breach of the protocol's authentication or
1955 | confidentiality guarantees, it is a serious issue for privacy.
1956 | 
1957 | Some infrastructure keep a mapping between keys used in the MLS protocol and
1958 | user identities. An attacker with access to this information due to compromise
1959 | or regulation can associate unencrypted group messages (e.g., Commits and
1960 | Proposals) with the corresponding user identity.
1961 | 
1962 | > **RECOMMENDATION:** Use encrypted group operation messages to limit privacy
1963 | > risks whenever possible.
1964 | 
1965 | In certain cases, the adversary can access specific bindings between public keys
1966 | and identities. If the signature keys are reused across groups, the adversary
1967 | can get more information about the targeted user.
1968 | 
1969 | > **RECOMMENDATION:** Ensure that linking between public keys and identities
1970 | > only happens in expected scenarios. Otherwise privilege a stronger separation.
1971 | 
1972 | ## Considerations for attacks outside of the threat model
1973 | 
1974 | Physical attacks on devices storing and executing MLS principals are not
1975 | considered in depth in the threat model of the MLS protocol.  While
1976 | non-permanent, non-invasive attacks can sometimes be equivalent to software
1977 | attacks, physical attacks are considered outside of the MLS threat model.
1978 | 
1979 | Compromise scenarios typically consist of a software adversary, which can
1980 | maintain active adaptive compromise and arbitrarily change the behavior of the
1981 | client or service.
1982 | 
1983 | On the other hand, security goals consider that honest clients will always run
1984 | the protocol according to its specification. This relies on implementations of
1985 | the protocol to securely implement the specification, which remains non-trivial.
1986 | 
1987 | > **RECOMMENDATION:** Additional steps should be taken to protect the device and
1988 | > the MLS clients from physical compromise. In such settings, HSMs and secure
1989 | > enclaves can be used to protect signature keys.
1990 | 
1991 | ## Cryptographic Analysis of the MLS Protocol
1992 | 
1993 | Various academic works have analyzed MLS and the different security guarantees
1994 | it aims to provide. The security of large parts of the protocol has been
1995 | analyzed by {{BBN19}} (draft 7), {{ACDT21}} (draft 11) and {{AJM20}} (draft 12).
1996 | 
1997 | Individual components of various drafts of the MLS protocol have been analyzed
1998 | in isolation and with differing adversarial models, for example, {{BBR18}},
1999 | {{ACDT19}}, {{ACCKKMPPWY19}}, {{AJM20}}, {{ACJM20}}, and {{AHKM21}} analyze the
2000 | ratcheting tree sub-protocol of MLS that facilitates key agreement, {{WPBB22}}
2001 | analyzes the sub-protocol of MLS for group state agreement and authentication,
2002 | while {{BCK21}} analyzes the key derivation paths in the ratchet tree and key
2003 | schedule. Finally, {{CHK21}} analyzes the authentication and cross-group healing
2004 | guarantees provided by MLS.
2005 | 
2006 | # IANA Considerations
2007 | 
2008 | This document makes no requests of IANA.
2009 | 


--------------------------------------------------------------------------------
/draft-ietf-mls-architecture.md:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: The Messaging Layer Security (MLS) Architecture
   3 | abbrev: MLS Architecture
   4 | docname: draft-ietf-mls-architecture-latest
   5 | category: info
   6 | 
   7 | ipr: trust200902
   8 | area: Security
   9 | keyword: Internet-Draft
  10 | 
  11 | stand_alone: yes
  12 | pi: [toc, sortrefs, symrefs]
  13 | 
  14 | author:
  15 |  -
  16 |     ins: B. Beurdouche
  17 |     name: Benjamin Beurdouche
  18 |     organization: Inria & Mozilla
  19 |     email: ietf@beurdouche.com
  20 |  -
  21 |     ins: E. Rescorla
  22 |     name: Eric Rescorla
  23 |     email: ekr@rtfm.com
  24 |  -
  25 |     ins: E. Omara
  26 |     name: Emad Omara
  27 |     email: emad.omara@gmail.com
  28 |  -
  29 |     ins: S. Inguva
  30 |     name: Srinivas Inguva
  31 |     email: singuva@yahoo.com
  32 |  -
  33 |     ins: A. Duric
  34 |     name: Alan Duric
  35 |     email: alan@duric.net
  36 | 
  37 | contributor:
  38 | - name: Richard Barnes
  39 |   org: Cisco
  40 |   email: rlb@ipv.sx
  41 | - name: Katriel Cohn-Gordon
  42 |   org: Meta Platforms
  43 |   email: me@katriel.co.uk
  44 | - name: Cas Cremers
  45 |   org: CISPA Helmholtz Center for Information Security
  46 |   email: cremers@cispa.de
  47 | - name: Britta Hale
  48 |   org: Naval Postgraduate School
  49 |   email: britta.hale@nps.edu
  50 | - name: Albert Kwon
  51 |   org: Badge Inc.
  52 |   email: kwonalbert@badgeinc.com
  53 | - name: Konrad Kohbrok
  54 |   org: Phoenix R&D
  55 |   email: konrad.kohbrok@datashrine.de
  56 | - name: Rohan Mahy
  57 |   org: Wire
  58 |   email: rohan.mahy@wire.com
  59 | - name: Brendan McMillion
  60 |   email: brendanmcmillion@gmail.com
  61 | - name: Thyla van der Merwe
  62 |   email: tjvdmerwe@gmail.com
  63 | - name: Jon Millican
  64 |   org: Meta Platforms
  65 |   email: jmillican@meta.com
  66 | - name: Raphael Robert
  67 |   org: Phoenix R&D
  68 |   email: ietf@raphaelrobert.com
  69 | 
  70 | informative:
  71 | 
  72 |   KT: I-D.draft-ietf-keytrans-architecture
  73 | 
  74 |   CONIKS:
  75 |        target: https://www.usenix.org/system/files/conference/usenixsecurity15/sec15-paper-melara.pdf
  76 |        title: "CONIKS: Bringing Key Transparency to End Users"
  77 |        date: 2015
  78 |        author:
  79 |          - name: Marcela Melara
  80 |          - name: Aaron Blankstein
  81 |          - name: Joseph Bonneau
  82 |          - name: Edward Felten
  83 |          - name: Michael Freedman
  84 | 
  85 |   CAPBR: DOI.10.1145/343477.343502
  86 | 
  87 |   ACCKKMPPWY19:
  88 |     title: "Keep the Dirt: Tainted TreeKEM, Adaptively and Actively Secure Continuous Group Key Agreement"
  89 |     date: 2019
  90 |     author:
  91 |       - name: Joel Alwen
  92 |       - name: Margarita Capretto
  93 |       - name: Miguel Cueto
  94 |       - name: Chethan Kamath
  95 |       - name: Karen Klein
  96 |       - name: Ilia Markov
  97 |       - name: Guillermo Pascual-Perez
  98 |       - name: Krzysztof Pietrzak
  99 |       - name: Michael Walter
 100 |       - name: Michelle Yeo
 101 |     target: https://eprint.iacr.org/2019/1489
 102 | 
 103 |   ACDT19:
 104 |     title: "Security Analysis and Improvements for the IETF MLS Standard for Group Messaging"
 105 |     date: 2019
 106 |     author:
 107 |       - name: Joel Alwen
 108 |       - name: Sandro Coretti
 109 |       - name: Yevgeniy Dodis
 110 |       - name: Yiannis Tselekounis
 111 |     target: https://eprint.iacr.org/2019/1189.pdf
 112 | 
 113 |   ACDT21:
 114 |     title: "Modular Design of Secure Group Messaging Protocols and the Security of MLS"
 115 |     date: 2021
 116 |     author:
 117 |       - name: Joel Alwen
 118 |       - name: Sandro Coretti
 119 |       - name: Yevgeniy Dodis
 120 |       - name: Yiannis Tselekounis
 121 |     target: https://eprint.iacr.org/2021/1083.pdf
 122 | 
 123 |   ACJM20:
 124 |     title: "Continuous Group Key Agreement with Active Security"
 125 |     date: 2020
 126 |     author:
 127 |       - name: Joel Alwen
 128 |       - name: Sandro Coretti
 129 |       - name: Daniel Jost
 130 |       - name: Marta Mularczyk
 131 |     target: https://eprint.iacr.org/2020/752.pdf
 132 | 
 133 |   AHKM21:
 134 |     title: "Server-Aided Continuous Group Key Agreement"
 135 |     date: 2021
 136 |     author:
 137 |       - name: Joel Alwen
 138 |       - name: Dominik Hartmann
 139 |       - name: Eike Kiltz
 140 |       - name: Marta Mularczyk
 141 |     target: https://eprint.iacr.org/2021/1456.pdf
 142 | 
 143 |   AJM20:
 144 |     title: "On The Insider Security of MLS"
 145 |     date: 2020
 146 |     author:
 147 |       - name: Joel Alwen
 148 |       - name: Daniel Jost
 149 |       - name: Marta Mularczyk
 150 |     target: https://eprint.iacr.org/2020/1327.pdf
 151 | 
 152 |   BBN19:
 153 |     title: "Formal Models and Verified Protocols for Group Messaging: Attacks and Proofs for IETF MLS"
 154 |     date: 2019
 155 |     author:
 156 |       - name: Karthikeyan Bhargavan
 157 |       - name: Benjamin Beurdouche
 158 |       - name: Prasad Naldurg
 159 |     target: https://inria.hal.science/hal-02425229/document
 160 | 
 161 |   BBR18:
 162 |     title: "TreeKEM: Asynchronous Decentralized Key Management for Large Dynamic Groups A protocol proposal for Messaging Layer Security (MLS)"
 163 |     date: 2018
 164 |     author:
 165 |       - name: Karthikeyan Bhargavan
 166 |       - name: Richard Barnes
 167 |       - name: Eric Rescorla
 168 |     target: "https://hal.inria.fr/hal-02425247/file/treekem+%281%29.pdf"
 169 | 
 170 |   BCK21:
 171 |     title: "Cryptographic Security of the MLS RFC, Draft 11"
 172 |     date: 2021
 173 |     author:
 174 |       - name: Chris Brzuska
 175 |       - name: Eric Cornelissen
 176 |       - name: Konrad Kohbrok
 177 |     target: https://eprint.iacr.org/2021/137.pdf
 178 | 
 179 |   CHK21:
 180 |     title: "The Complexities of Healing in Secure Group Messaging: Why Cross-Group Effects Matter"
 181 |     date: 2021
 182 |     author:
 183 |       - name: Cas Cremers
 184 |       - name: Britta Hale
 185 |       - name: Konrad Kohbrok
 186 |     target: https://www.usenix.org/system/files/sec21-cremers.pdf
 187 | 
 188 |   WPBB22:
 189 |     title: "TreeSync: Authenticated Group Management for Messaging Layer Security"
 190 |     date: 2022
 191 |     author:
 192 |       - name: Théophile Wallez
 193 |       - name: Jonathan Protzenko
 194 |       - name: Benjamin Beurdouche
 195 |       - name: Karthikeyan Bhargavan
 196 |     target: https://eprint.iacr.org/2022/1732.pdf
 197 | 
 198 | 
 199 |   CGWZ25:
 200 |     title: "ETK: External-Operations TreeKEM and the Security of MLS in RFC 9420"
 201 |     date: 2025
 202 |     author:
 203 |       - name: Cas Cremers
 204 |       - name: Esra Günsay
 205 |       - name: Vera Wesselkamp
 206 |       - name: Mang Zhao
 207 |     target: https://eprint.iacr.org/2025/229.pdf
 208 | 
 209 |   WPB25:
 210 |     title: "TreeKEM: A Modular Machine-Checked Symbolic Security Analysis of Group Key Agreement in Messaging Layer Security"
 211 |     date: 2025
 212 |     author:
 213 |       - name: Théophile Wallez
 214 |       - name: Jonathan Protzenko
 215 |       - name: Karthikeyan Bhargavan
 216 |     target: https://eprint.iacr.org/2025/410.pdf
 217 | 
 218 |   Loopix:
 219 |     title: "The Loopix Anonymity System"
 220 |     date: 2017
 221 |     author:
 222 |       -
 223 |         ins: A.M. Piotrowska
 224 |         name: Ania M. Piotrowska
 225 |       -
 226 |         ins: J. Hayes
 227 |         name: Jamie Hayes
 228 |       -
 229 |         ins: T. Elahi
 230 |         name: Tariq Elahi
 231 |       -
 232 |         ins: S. Meiser
 233 |         name: Sebastian Meiser
 234 |       -
 235 |         ins: G. Danezis
 236 |         name: George Danezis
 237 | 
 238 |   Tor:
 239 |     title: "The Tor Project"
 240 |     target: https://torproject.org/
 241 | 
 242 | --- abstract
 243 | 
 244 | The Messaging Layer Security (MLS) protocol (RFC 9420)
 245 | provides a group key agreement protocol for messaging applications.
 246 | MLS is designed to protect against eavesdropping, tampering, and message
 247 | forgery, and to provide forward secrecy (FS) and post-compromise security
 248 | (PCS).
 249 | 
 250 | This document describes the architecture for using MLS in a general
 251 | secure group messaging infrastructure and defines the security goals
 252 | for MLS.  It provides guidance on building a group messaging system
 253 | and discusses security and privacy trade-offs offered by multiple
 254 | security mechanisms that are part of the MLS protocol (e.g., frequency
 255 | of public encryption key rotation). The document also provides
 256 | guidance for parts of the infrastructure that are not standardized by
 257 | MLS and are instead left to the application.
 258 | 
 259 | While the recommendations of this document are not mandatory to follow in order
 260 | to interoperate at the protocol level, they affect the overall security
 261 | guarantees that are achieved by a messaging application. This is especially true
 262 | in the case of active adversaries that are able to compromise clients, the
 263 | Delivery Service (DS), or the Authentication Service (AS).
 264 | 
 265 | --- middle
 266 | 
 267 | # Introduction
 268 | 
 269 | End-to-end security is used in the vast majority of instant messaging systems
 270 | and is also deployed in systems for other purposes such as calling and conferencing.
 271 | In this context, "end-to-end" captures
 272 | the notion that users of the system enjoy some level of security -- with the
 273 | precise level depending on the system design -- even in the face of malicious
 274 | actions by the operator of the messaging system.
 275 | 
 276 | Messaging Layer Security (MLS) specifies an architecture (this document) and a
 277 | protocol {{RFC9420}} for providing end-to-end security in this
 278 | setting. MLS is not intended as a full instant messaging protocol but rather is
 279 | intended to be embedded in concrete protocols, such as the Extensible Messaging and Presence Protocol (XMPP) {{?RFC6120}}.
 280 | Implementations of the MLS protocol will interoperate at the cryptographic
 281 | level, though they may have incompatibilities in terms of how protected messages
 282 | are delivered, contents of protected messages, and identity/authentication
 283 | infrastructures.
 284 | The MLS protocol has been designed to provide the same security guarantees to
 285 | all users, for all group sizes, including groups of only two clients.
 286 | 
 287 | # General Setting
 288 | 
 289 | ## Protocol Overview
 290 | 
 291 | MLS provides a way for _clients_ to form _groups_ within which they can
 292 | communicate securely.  For example, a set of users might use clients on their
 293 | phones or laptops to join a group and communicate with each other. A group may
 294 | be as small as two clients (e.g., for simple person-to-person messaging) or as
 295 | large as hundreds of thousands.  A client that is part of a group is a _member_
 296 | of that group. As groups change membership and group or member properties, they
 297 | advance from one _epoch_ to another and the cryptographic state of the group
 298 | evolves.
 299 | 
 300 | The group is represented as a tree, which represents the members as the leaves
 301 | of a tree. It is used to efficiently encrypt to subsets of the members. Each
 302 | member has a state called a _LeafNode_ object holding the client's identity,
 303 | credentials, and capabilities.
 304 | 
 305 | Various messages are used in the evolution from epoch to epoch.
 306 | A _Proposal_ message proposes
 307 | a change to be made in the next epoch, such as adding or removing a member.
 308 | A _Commit_ message initiates a new epoch by instructing members of the group to
 309 | implement a collection of proposals. Proposals and Commits are collectively
 310 | called _handshake messages_.
 311 | A _KeyPackage_ provides keys that can be used to add the client to a group,
 312 | including a public encryption key and a signature key (both stored in
 313 | the KeyPackage's `LeafNode` object).
 314 | A _Welcome_ message provides a new member to the group with the information to
 315 | initialize their state for the epoch in which they were added.
 316 | 
 317 | Of course most (but not all) applications use MLS to send encrypted group messages.
 318 | An _application message_ is an MLS message with an arbitrary application payload.
 319 | 
 320 | Finally, a _PublicMessage_ contains an integrity-protected MLS handshake message,
 321 | while a _PrivateMessage_ contains a confidential, integrity-protected handshake
 322 | or application message.
 323 | 
 324 | For a more detailed explanation of these terms, please consult the MLS protocol
 325 | specification {{?RFC9420}}.
 326 | 
 327 | ## Abstract Services
 328 | 
 329 | MLS is designed to operate within the context of a messaging service, which
 330 | may be a single service provider, a federated system, or some kind of
 331 | peer-to-peer system. The service needs to provide two services that
 332 | facilitate client communication using MLS:
 333 | 
 334 | - An Authentication Service (AS), which is responsible for
 335 |   attesting to bindings between application-meaningful identifiers and the
 336 |   public key material used for authentication in the MLS protocol. The
 337 |   AS must also be able to generate credentials that encode these
 338 |   bindings and validate credentials provided by MLS clients.
 339 | 
 340 | - A Delivery Service (DS), which can receive and distribute
 341 |   messages between group members. In the case of group messaging, the DS
 342 |   may also be responsible for acting as a "broadcaster" where the sender
 343 |   sends a single message which is then forwarded to each recipient in the group
 344 |   by the DS. The DS is also responsible for storing and delivering initial
 345 |   public key material required by MLS clients in order to proceed with the group
 346 |   secret key establishment that is part of the MLS protocol.
 347 | 
 348 | For presentation purposes, this document treats the AS and DS as conventional
 349 | network services. However, MLS does not require a specific implementation
 350 | for the AS or DS. These services may reside on the same server or different
 351 | servers, they may be distributed between server and client components, and they
 352 | may even involve some action by users.  For example:
 353 | 
 354 | * Several secure messaging services today provide a centralized DS and rely on
 355 |   manual comparison of clients' public keys as the AS.
 356 | 
 357 | * MLS clients connected to a peer-to-peer network could instantiate a
 358 |   decentralized DS by transmitting MLS messages over that network.
 359 | 
 360 | * In an MLS group using a Public Key Infrastructure (PKI) for authentication,
 361 |   the AS would comprise the certificate issuance and validation processes,
 362 |   both of which involve logic inside MLS clients as well as various
 363 |   existing PKI roles (e.g., Certification Authorities).
 364 | 
 365 | It is important to note that the AS can be
 366 | completely abstract in the case of a service provider which allows MLS
 367 | clients to generate, distribute, and validate credentials themselves.
 368 | As with the AS, the DS can be completely abstract if
 369 | users are able to distribute credentials and messages without relying
 370 | on a central DS (as in a peer-to-peer system).  Note,
 371 | though, that in such scenarios, clients will need to implement logic
 372 | that assures the delivery properties required of the DS (see
 373 | {{delivery-guarantees}}).
 374 | 
 375 | {{fig-mls-overview}} shows the relationship of these concepts,
 376 | with three clients and one group, and clients 2 and 3 being
 377 | part of the group and client 1 not being part of any group.
 378 | 
 379 | ~~~ aasvg
 380 |      +----------------+    +--------------+
 381 |      | Authentication |    |   Delivery   |
 382 |      |  Service (AS)  |    | Service (DS) |
 383 |      +----------------+    +-------+------+
 384 |                           /        |       \            Group
 385 |                          / ........|........\................
 386 |                         /  .       |         \              .
 387 |               +--------+-+ .  +----+-----+    +----------+  .
 388 |               | Client 1 | .  | Client 2 |    | Client 3 |  .
 389 |               +----------+ .  +----------+    +----------+  .
 390 |                            .   Member 1        Member 2     .
 391 |                            .                                .
 392 |                            ..................................
 393 | ~~~
 394 | {: #fig-mls-overview title="A Simplified Messaging System"}
 395 | 
 396 | 
 397 | 
 398 | # Overview of Operation
 399 | 
 400 | {{fig-group-formation-example}} shows the formation of an example
 401 | group consisting of Alice, Bob, and Charlie, with Alice
 402 | driving the creation of the group.
 403 | 
 404 | ~~~ aasvg
 405 | Alice     Bob       Charlie                     AS        DS
 406 | 
 407 | Create account --------------------------------->                |
 408 | <------------------------------------- Credential                |
 409 |           Create account ----------------------->                | Step 1
 410 |           <--------------------------- Credential                |
 411 |                     Create account ------------->                |
 412 |                     <----------------- Credential                |
 413 | 
 414 | Initial Keying Material ----------------------------------->     |
 415 |           Initial Keying Material ------------------------->     | Step 2
 416 |                     Initial Keying Material --------------->     |
 417 | 
 418 | Get Bob Initial Keying Material --------------------------->     |
 419 | <------------------------------- Bob Initial Keying Material     |
 420 | Add Bob to group ------------------------------------------>     | Step 3
 421 | Welcome(Bob) ---------------------------------------------->     |
 422 |           <-------------------------------- Add Bob to group     |
 423 |           <------------------------------------ Welcome(Bob)     |
 424 | 
 425 | Get Charlie Initial Keying Material ----------------------->     |
 426 | <--------------------------- Charlie Initial Keying Material     |
 427 | Add Charlie to group -------------------------------------->     |
 428 | Welcome(Charlie) ------------------------------------------>     | Step 4
 429 |           <---------------------------- Add Charlie to group     |
 430 |                      <----------------- Add Charlie to group     |
 431 |                      <--------------------- Welcome(Charlie)     |
 432 | ~~~
 433 | {: #fig-group-formation-example title="Group Formation Example"}
 434 | 
 435 | This process proceeds as follows.
 436 | 
 437 | ## Step 1: Account Creation
 438 | 
 439 | Alice, Bob, and Charlie create accounts with a service provider and obtain
 440 | credentials from the AS. This is a one-time setup phase.
 441 | 
 442 | ## Step 2: Initial Keying Material
 443 | 
 444 | Alice, Bob, and Charlie authenticate to the DS and store some initial
 445 | keying material which is used to send encrypted messages to them
 446 | for the first time. This keying material is authenticated with their
 447 | long-term credentials. Although in principle this keying material
 448 | can be reused for multiple senders, in order to provide forward secrecy
 449 | it is better for this material to be regularly refreshed so that each
 450 | sender can use a new key and delete older keys.
 451 | 
 452 | ## Step 3: Adding Bob to the Group
 453 | 
 454 | When Alice wants to create a group including Bob, she first uses the DS to look
 455 | up his initial keying material. She then generates two messages:
 456 | 
 457 | * A message to the entire group (which at this point is just her and Bob)
 458 |   that adds Bob to the group.
 459 | 
 460 | * A Welcome message just to Bob encrypted with his initial keying material that
 461 |   includes the secret keying information necessary to join the group.
 462 | 
 463 | She sends both of these messages to the DS, which is responsible
 464 | for sending them to the appropriate people. Note that the security of MLS
 465 | does not depend on the DS forwarding the Welcome message only to Bob, as it
 466 | is encrypted for him; it is simply not necessary for other group members
 467 | to receive it.
 468 | 
 469 | ## Step 4: Adding Charlie to the Group
 470 | 
 471 | If Alice then wants to add Charlie to the group, she follows a similar procedure
 472 | as with Bob. She first uses the DS to look
 473 | up his initial keying material and then generates two messages:
 474 | 
 475 | * A message to the entire group (consisting of her, Bob, and Charlie) adding
 476 |   Charlie to the group.
 477 | 
 478 | * A Welcome message just to Charlie encrypted with his initial keying material that
 479 |   includes the secret keying information necessary to join the group.
 480 | 
 481 | At the completion of this process, we have a group with Alice, Bob, and Charlie,
 482 | which means that they share a single encryption key which can be used to
 483 | send messages or to key other protocols.
 484 | 
 485 | ## Other Group Operations
 486 | 
 487 | Once the group has been created, clients can perform other actions,
 488 | such as:
 489 | 
 490 |  -  sending a message to everyone in the group
 491 | 
 492 |  -  receiving a message from someone in the group
 493 | 
 494 |  -  adding one or more clients to an existing group
 495 | 
 496 |  -  removing one or more members from an existing group
 497 | 
 498 |  -  updating their own key material
 499 | 
 500 |  -  leaving a group (by asking to be removed)
 501 | 
 502 | Importantly, MLS does not itself enforce any access control on group
 503 | operations. For instance, any member of the group can send a message
 504 | to add a new member or to evict an existing member.
 505 | This is in contrast to some designs in which there is a single group
 506 | controller who can modify the group. MLS-using applications are
 507 | responsible for setting their own access control policies. For instance,
 508 | if only the group administrator is allowed to change group members,
 509 | then it is the responsibility of the application to inform members
 510 | of this policy and who the administrator is.
 511 | 
 512 | ## Proposals and Commits
 513 | 
 514 | The general pattern for any change in the group state (e.g., to add or remove
 515 | a user) is that it consists of two messages:
 516 | 
 517 | Proposal:
 518 | : This message describes the change to be made (e.g., add Bob to the group)
 519 | but does not effect a change.
 520 | 
 521 | Commit:
 522 | : This message changes the group state to include the changes described in
 523 | a set of proposals.
 524 | 
 525 | The simplest pattern is for a client to just send a Commit which contains one or
 526 | more Proposals. For instance, Alice could send a Commit with the Proposal
 527 | Add(Bob) embedded to add Bob to the group. However, there are situations in
 528 | which one client might send a Proposal and another might send the corresponding Commit. For
 529 | instance, Bob might wish to remove himself from the group and send a Remove
 530 | proposal to do so (see {{Section 12.1.3 of ?RFC9420}}). Because Bob cannot send
 531 | the Commit, an existing member must do so.  Commits can apply to multiple valid
 532 | Proposals, in which case all the listed changes are applied.
 533 | 
 534 | It is also possible for a Commit to apply to an empty set of Proposals,
 535 | in which case it just updates the cryptographic state of the group
 536 | without changing its membership.
 537 | 
 538 | ## Users, Clients, and Groups {#group-members}
 539 | 
 540 | While it's natural to think of a messaging system as consisting of groups of
 541 | users, possibly using different devices, in MLS the basic unit of operation is
 542 | not the user but rather the "client".  Formally, a client is a set of
 543 | cryptographic objects composed of public values such as a name (an identity), a
 544 | public encryption key, and a public signature key. As usual, a user demonstrates
 545 | ownership of the client by demonstrating knowledge of the associated secret
 546 | values.
 547 | 
 548 | In some messaging systems, clients belonging to the same user must all share the
 549 | same signature key pair, but MLS does not assume this; instead, a user may have
 550 | multiple clients with the same identity and different keys. In this case, each
 551 | client will have its own cryptographic state, and it is up to the application to
 552 | determine how to present this situation to users. For instance, it may render
 553 | messages to and from a given user identically regardless of which client they
 554 | are associated with, or it may choose to distinguish them. It is also possible
 555 | to have multiple clients associated with the same user share state, as
 556 | described in {{associating-a-users-clients}}.
 557 | 
 558 | When a client is part of a group, it is called a member.  A group in MLS is
 559 | defined as the set of clients that have knowledge of the shared group secret
 560 | established in the group key establishment phase.  Note that until a client has
 561 | been added to the group and contributed to the group secret in a manner
 562 | verifiable by other members of the group, other members cannot assume that the
 563 | client is a member of the group; for instance, the newly added member might not
 564 | have received the Welcome message or been unable to decrypt it for some reason.
 565 | 
 566 | 
 567 | # Authentication Service
 568 | 
 569 | The Authentication Service (AS) has to provide three services:
 570 | 
 571 | 1. Issue credentials to clients that attest to bindings between identities and
 572 |    signature key pairs.
 573 | 
 574 | 2. Enable a client to verify that a credential presented by another client is
 575 |    valid with respect to a reference identifier.
 576 | 
 577 | 3. Enable a group member to verify that a credential represents the same client
 578 |    as another credential.
 579 | 
 580 | A member with a valid credential authenticates its MLS messages by signing them
 581 | with the private key corresponding to the public key bound by its credential.
 582 | 
 583 | The AS is considered an abstract layer by the MLS specification; part of this
 584 | service could be, for instance, running on the members' devices, while another
 585 | part is a separate entity entirely.  The following examples illustrate the
 586 | breadth of this concept:
 587 | 
 588 | * A PKI could be used as an AS {{?RFC5280}}.  The issuance function would be
 589 |   provided by the certificate authorities in the PKI, and the verification
 590 |   function would correspond to certificate verification by clients.
 591 | 
 592 | * Several current messaging applications rely on users verifying each other's
 593 |   key fingerprints for authentication.  In this scenario, the issuance function
 594 |   is simply the generation of a key pair (i.e., a credential is just an
 595 |   identifier and public key, with no information to assist in verification).
 596 |   The verification function is the application function that enables users
 597 |   to verify keys.
 598 | 
 599 | * In a system based on end-user Key Transparency (KT) {{KT}}, the
 600 |   issuance function would correspond to the insertion of a key in a KT log under
 601 |   a user's identity. The verification function would correspond to verifying a
 602 |   key's inclusion in the log for a claimed identity, together with the KT log's
 603 |   mechanisms for a user to monitor and control which keys are associated with
 604 |   their identity.
 605 | 
 606 | By the nature of its role in MLS authentication, the AS is invested with a
 607 | large amount of trust and the compromise of the AS could
 608 | allow an adversary to, among other things, impersonate group members. We discuss
 609 | security considerations regarding the compromise of the different AS
 610 | functions in detail in {{as-compromise}}.
 611 | 
 612 | The association between members' identities and their signature keys is fairly
 613 | flexible in MLS.  As noted above, there is no requirement that all clients
 614 | belonging to a given user have the same signature key (in fact, having duplicate
 615 | signature keys in a group is forbidden). A member can
 616 | also rotate the signature key they use within a group.  These mechanisms allow
 617 | clients to use different signature keys in different contexts and at different
 618 | points in time, providing unlinkability and post-compromise security benefits.
 619 | Some security trade-offs related to this flexibility are discussed in
 620 | {{security-and-privacy-considerations}}.
 621 | 
 622 | In many applications, there are multiple MLS clients that represent a single
 623 | entity, such as a human user with a mobile and desktop version of an
 624 | application. Often, the same set of clients is represented in exactly the same
 625 | list of groups. In applications where this is the intended situation, other
 626 | clients can check that a user is consistently represented by the same set of
 627 | clients.  This would make it more difficult for a malicious AS to issue fake
 628 | credentials for a particular user because clients would expect the credential to
 629 | appear in all groups of which the user is a member. If a client credential does
 630 | not appear in all groups after some relatively short period of time, clients
 631 | have an indication that the credential might have been created without the
 632 | user's knowledge. Due to the asynchronous nature of MLS, however, there may be
 633 | transient inconsistencies in a user's client set, so correlating users' clients
 634 | across groups is more of a detection mechanism than a prevention mechanism.
 635 | 
 636 | # Delivery Service
 637 | 
 638 | The Delivery Service (DS) plays two major roles in MLS:
 639 | 
 640 | * As a directory service, providing the initial keying material for
 641 |   clients to use. This allows a client to establish a shared key and send
 642 |   encrypted messages to other clients even if they're offline.
 643 | 
 644 | * Routing MLS messages among clients.
 645 | 
 646 | While MLS depends on correct behavior by the AS in
 647 | order to provide endpoint authentication and hence confidentiality of
 648 | the group key, these properties do not depend on correct behavior by
 649 | the DS; even a malicious DS cannot add itself to groups or recover
 650 | the group key. However, depending precisely on how MLS is used, the DS may
 651 | be able to determine group membership or prevent changes to the
 652 | group from taking place (e.g., by blocking group change messages).
 653 | 
 654 | ## Key Storage and Retrieval
 655 | 
 656 | Upon joining the system, each client stores its initial cryptographic key
 657 | material with the DS. This key material, called a KeyPackage,
 658 | advertises the functional abilities of the client (e.g., supported protocol
 659 | versions, supported extensions, etc.) and the following cryptographic information:
 660 | 
 661 | * A credential from the AS attesting to the binding between
 662 |   the identity and the client's signature key.
 663 | 
 664 | * The client's asymmetric encryption public key.
 665 | 
 666 | All the parameters in the KeyPackage are signed with the signature
 667 | private key corresponding to the credential.
 668 | As noted in {{group-members}}, users may own multiple clients, each
 669 | with their own keying material. Each KeyPackage is specific to an MLS version
 670 | and cipher suite, but a client may want to offer support for multiple protocol
 671 | versions and cipher suites. As such, there may be multiple KeyPackages stored by
 672 | each user for a mix of protocol versions, cipher suites, and end-user devices.
 673 | 
 674 | When a client wishes to establish a group or add clients to a group, it first
 675 | contacts the DS to request KeyPackages for each of the other clients,
 676 | authenticates the KeyPackages using the signature keys, includes the KeyPackages
 677 | in Add proposals, and encrypts the information needed to join the group
 678 | (the _GroupInfo_ object) with an ephemeral key; it then separately encrypts the
 679 | ephemeral key with the public encryption key (`init_key`) from each KeyPackage.
 680 | When a client requests a KeyPackage in order to add a user to a group, the
 681 | DS should provide the minimum number of KeyPackages necessary to
 682 | satisfy the request.  For example, if the request specifies the MLS version, the
 683 | DS might provide one KeyPackage per supported cipher suite, even if it has
 684 | multiple such KeyPackages to enable the corresponding client to be added to
 685 | multiple groups before needing to upload more fresh KeyPackages.
 686 | 
 687 | In order to avoid replay attacks and provide forward secrecy for messages sent
 688 | using the initial keying material, KeyPackages are intended to be used only
 689 | once, and `init_key` is intended to be deleted by the client after decryption
 690 | of the Welcome message. The DS is responsible for ensuring that
 691 | each KeyPackage is only used to add its client to a single group, with the
 692 | possible exception of a "last resort" KeyPackage that is specially designated
 693 | by the client to be used multiple times. Clients are responsible for providing
 694 | new KeyPackages as necessary in order to minimize the chance that the "last
 695 | resort" KeyPackage will be used.
 696 | 
 697 | > **Recommendation:** Ensure that "last resort" KeyPackages don't get used by
 698 | > provisioning enough standard KeyPackages.
 699 | 
 700 | > **Recommendation:** Rotate "last resort" KeyPackages as soon as possible
 701 | > after being used or if they have been stored for a prolonged period of time.
 702 | > Overall, avoid reusing "last resort" KeyPackages as much as possible.
 703 | 
 704 | > **Recommendation:** Ensure that the client for which a "last resort" KeyPackage
 705 | > has been used is updating leaf keys as early as possible.
 706 | 
 707 | > **Recommendation:** Ensure that clients delete the private component
 708 | > of their `init_key` after processing a Welcome message, or after the
 709 | > rotation of the "last resort" KeyPackage.
 710 | 
 711 | Overall, it needs to be noted that key packages need to be updated when
 712 | signature keys are changed.
 713 | 
 714 | ## Delivery of Messages {#delivery-guarantees}
 715 | 
 716 | The main responsibility of the DS is to ensure delivery of
 717 | messages. Some MLS messages need only be delivered to specific clients (e.g., a
 718 | Welcome message initializing a new member's state), while others need to be
 719 | delivered to all the members of a group.  The DS may enable the
 720 | latter delivery pattern via unicast channels (sometimes known as "client
 721 | fanout"), broadcast channels ("server fanout"), or a mix of both.
 722 | 
 723 | For the most part, MLS does not require the DS to deliver messages
 724 | in any particular order. Applications can set policies that control their
 725 | tolerance for out-of-order messages (see {{operational-requirements}}), and
 726 | messages that arrive significantly out of order can be dropped without otherwise
 727 | affecting the protocol. There are two exceptions to this. First, Proposal
 728 | messages should all arrive before the Commit that references them.  Second,
 729 | because an MLS group has a linear history of epochs, the members of the group
 730 | must agree on the order in which changes are applied.  Concretely, the group
 731 | must agree on a single MLS Commit message that ends each epoch and begins the
 732 | next one.
 733 | 
 734 | In practice, there's a realistic risk of two members generating Commit messages
 735 | at the same time, based on the same epoch, and both attempting to send them to
 736 | the group at the same time. The extent to which this is a problem, and the
 737 | appropriate solution, depend on the design of the DS. Per the CAP
 738 | theorem {{CAPBR}}, there are two general classes of distributed systems that the
 739 | DS might fall into:
 740 | 
 741 | * Consistent and Partition-tolerant, or Strongly Consistent, systems, which can provide
 742 |   a globally consistent view of data but have the inconvenience of clients needing
 743 |   to handle rejected messages.
 744 | 
 745 | * Available and Partition-tolerant, or Eventually Consistent, systems, which continue
 746 |   working despite network issues but may return different views of data to
 747 |   different users.
 748 | 
 749 | Strategies for sequencing messages in strongly and eventually consistent systems
 750 | are described in the next two subsections. Most DSs will use the
 751 | strongly consistent paradigm, but this remains a choice that can be handled in
 752 | coordination with the client and advertised in the KeyPackages.
 753 | 
 754 | However, note that a malicious DS could also reorder messages or
 755 | provide an inconsistent view to different users.  The "generation" counter in
 756 | MLS messages provides per-sender loss detection and ordering that cannot be
 757 | manipulated by the DS, but this does not provide complete protection against
 758 | partitioning.  A DS can cause a partition in the group by partitioning key
 759 | exchange messages; this can be detected only by out-of-band comparison (e.g.,
 760 | confirming that all clients have the same `epoch_authenticator` value). A
 761 | mechanism for more robust protections is discussed in
 762 | {{?EXTENSIONS=I-D.ietf-mls-extensions}}.
 763 | 
 764 | Other forms of DS misbehavior are still possible that are not easy
 765 | to detect. For instance, a DS can simply refuse to relay messages
 766 | to and from a given client. Without some sort of side information, other clients
 767 | cannot generally detect this form of Denial-of-Service (DoS) attack.
 768 | 
 769 | ### Strongly Consistent
 770 | 
 771 | With this approach, the DS ensures that some types of incoming
 772 | messages have a linear order and all members agree on that order.  The Delivery
 773 | Service is trusted to break ties when two members send a Commit message at the
 774 | same time.
 775 | 
 776 | As an example, there could be an "ordering server" DS that
 777 | broadcasts all messages received to all users and ensures that all clients see
 778 | messages in the same order. This would allow clients to only apply the first
 779 | valid Commit for an epoch and ignore subsequent Commits. Clients that send a Commit
 780 | would then wait to apply it until it is broadcast back to them by the Delivery
 781 | Service, assuming that they do not receive another Commit first.
 782 | 
 783 | Alternatively, the DS can rely on the `epoch` and `content_type`
 784 | fields of an MLSMessage to provide an order only to handshake messages, and
 785 | possibly even filter or reject redundant Commit messages proactively to prevent
 786 | them from being broadcast. There is some risk associated with filtering; this
 787 | is discussed further in {{invalid-commits}}.
 788 | 
 789 | ### Eventually Consistent
 790 | 
 791 | With this approach, the DS is built in a way that may be
 792 | significantly more available or performant than a strongly consistent
 793 | system, but where it offers weaker consistency guarantees. Messages
 794 | may arrive to different
 795 | clients in different orders and with varying amounts of latency, which means
 796 | clients are responsible for reconciliation.
 797 | 
 798 | This type of DS might arise, for example, when group members are
 799 | sending each message to each other member individually or when a distributed
 800 | peer-to-peer network is used to broadcast messages.
 801 | 
 802 | Upon receiving a Commit from the DS, clients can either:
 803 | 
 804 | 1. Pause sending new messages for a short amount of time to account for a
 805 |    reasonable degree of network latency and see if any other Commits are
 806 |    received for the same epoch. If multiple Commits are received, the clients
 807 |    can use a deterministic tie-breaking policy to decide which to accept, and
 808 |    then resume sending messages as normal.
 809 | 
 810 | 2. Accept the Commit immediately but keep a copy of the previous group state for
 811 |    a short period of time. If another Commit for a past epoch is received,
 812 |    clients use a deterministic tie-breaking policy to decide if they should
 813 |    continue using the Commit they originally accepted or revert and use the
 814 |    later one. Note that any copies of previous or forked group states must be
 815 |    deleted within a reasonable amount of time to ensure that the protocol provides
 816 |    forward secrecy.
 817 | 
 818 | If the Commit references an unknown proposal, group members may need to solicit
 819 | the DS or other group members individually for the contents of the
 820 | proposal.
 821 | 
 822 | ### Welcome Messages
 823 | 
 824 | Whenever a commit adds new members to a group, MLS requires the committer to
 825 | send a Welcome message to the new members. Applications should ensure that
 826 | Welcome messages are coupled with the tie-breaking logic for commits (see
 827 | {{strongly-consistent}} and {{eventually-consistent}}). That is, when multiple
 828 | commits are sent for the same epoch, applications need to ensure that only
 829 | Welcome messages corresponding to the commit that "succeeded" are processed by
 830 | new members.
 831 | 
 832 | This is particularly important when groups are being reinitialized. When a group
 833 | is reinitialized, it is restarted with a different protocol version and/or
 834 | cipher suite but identical membership. Whenever an authorized member sends and
 835 | commits a ReInit proposal, this immediately freezes the existing group and
 836 | triggers the creation of a new group with a new `group_id`.
 837 | 
 838 | Ideally, the new group would be created by the same member that committed the
 839 | `ReInit` proposal (including sending Welcome messages for the new group to all
 840 | of the previous group's members). However, this operation is not always atomic,
 841 | so it's possible for a member to go offline after committing a ReInit proposal
 842 | but before creating the new group. If this happens, it's necessary for another
 843 | member to continue the reinitialization by creating the new group and sending
 844 | out Welcome messages.
 845 | 
 846 | This has the potential to create a race condition, where multiple members try to
 847 | continue the reinitialization at the same time, and members receive multiple
 848 | Welcome messages for each attempt at reinitializing the same group. Ensuring
 849 | that all members agree on which reinitialization attempt is "correct" is key to
 850 | prevent this from causing forks.
 851 | 
 852 | ## Invalid Commits
 853 | 
 854 | Situations can arise where a malicious or buggy client sends a Commit that is
 855 | not accepted by all members of the group, and the DS is not able to detect this
 856 | and reject the Commit.  For example, a buggy client might send an encrypted
 857 | Commit with an invalid set of proposals, or a malicious client might send a
 858 | malformed Commit of the form described in {{Section 16.12 of RFC9420}}.
 859 | 
 860 | In situations where the DS is attempting to filter redundant Commits, the DS
 861 | might update its internal state under the assumption that a Commit has succeeded
 862 | and thus end up in a state inconsistent with the members of the group.  For
 863 | example, the DS might think that the current epoch is now `n+1` and reject any
 864 | commits from other epochs, while the members think the epoch is `n`, and as a
 865 | result, the group is stuck -- no member can send a Commit that the DS will
 866 | accept.
 867 | 
 868 | Such "desynchronization" problems can arise even when the DS takes
 869 | no stance on which Commit is "correct" for an epoch. The DS can enable clients
 870 | to choose between Commits, for example by providing Commits in the order
 871 | received and allowing clients to reject any Commits that
 872 | violate their view of the group's policies. As such, all honest and
 873 | correctly implemented clients will arrive at the same "first valid Commit" and
 874 | choose to process it. Malicious or buggy clients that process a different Commit
 875 | will end up in a forked view of the group.
 876 | 
 877 | When these desynchronizations happen, the application may choose to take action
 878 | to restore the functionality of the group.  These actions themselves can have
 879 | security implications.  For example, a client developer might have a client
 880 | automatically rejoin a group, using an external join, when it processes an
 881 | invalid Commit.  In this operation, however, the client trusts that the
 882 | GroupInfo provided by the DS faithfully represents the state of the group, and
 883 | not, say, an earlier state containing a compromised leaf node. In addition, the
 884 | DS may be able to trigger this condition by deliberately sending the victim an
 885 | invalid Commit. In certain scenarios, this trust can enable the DS or a
 886 | malicious insider to undermine the post-compromise security guarantees provided
 887 | by MLS.
 888 | 
 889 | Actions to recover from desynchronization can also have availability and DoS
 890 | implications.  For example, if a recovery mechanism relies on external joins, a
 891 | malicious member that deliberately posts an invalid Commit could also post a
 892 | corrupted GroupInfo object in order to prevent victims from rejoining the group.
 893 | Thus, careful analysis of security implications should be made for any system
 894 | for recovering from desynchronization.
 895 | 
 896 | # Functional Requirements
 897 | 
 898 | MLS is designed as a large-scale group messaging protocol and hence aims to
 899 | provide both performance and security (e.g., integrity and confidentiality)
 900 | to its users. Messaging systems that implement MLS provide support for
 901 | conversations involving two or more members, and aim to scale to groups with
 902 | tens of thousands of members, typically including many users using multiple devices.
 903 | 
 904 | ## Membership Changes
 905 | 
 906 | MLS aims to provide agreement on group membership, meaning that all group
 907 | members have agreed on the list of current group members.
 908 | 
 909 | Some applications may wish to enforce Access Control Lists (ACLs) to limit addition or removal of group
 910 | members to privileged clients or users. Others may wish to require
 911 | authorization from the current group members or a subset thereof.  Such policies
 912 | can be implemented at the application layer, on top of MLS. Regardless, MLS does
 913 | not allow for or support addition or removal of group members without informing
 914 | all other members.
 915 | 
 916 | Membership of an MLS group is managed at the level of individual clients.  In
 917 | most cases, a client corresponds to a specific device used by a user. If a user
 918 | has multiple devices, the user will generally be represented in a group by
 919 | multiple clients (although applications could choose to have devices share
 920 | keying material).  If an application wishes to implement operations at the level
 921 | of users, it is up to the application to track which clients belong to a given
 922 | user and ensure that they are added/removed consistently.
 923 | 
 924 | MLS provides two mechanisms for changing the membership of a group.  The primary
 925 | mechanism is for an authorized member of the group to send a Commit that adds or
 926 | removes other members.  A secondary mechanism is an "external join": A member of
 927 | the group publishes certain information about the group, which a new member can
 928 | use to construct an "external" Commit message that adds the new member to the
 929 | group.  (There is no similarly unilateral way for a member to leave the group;
 930 | they must be removed by a remaining member.)
 931 | 
 932 | With both mechanisms, changes to the membership are initiated from inside the
 933 | group.  When members perform changes directly, this is clearly the case.
 934 | External joins are authorized indirectly, in the sense that a member publishing
 935 | a GroupInfo object authorizes anyone to join who has access to the GroupInfo
 936 | object, subject to whatever access control policies the application applies
 937 | for external joins.
 938 | 
 939 | Both types of joins are done via a Commit message, which could be
 940 | blocked by the DS or rejected by clients if the join is not authorized.  The
 941 | former approach requires that Commits be visible to the DS; the latter approach
 942 | requires that clients all share a consistent policy. In the unfortunate event
 943 | that an unauthorized member is able to join, MLS enables any member to remove
 944 | them.
 945 | 
 946 | Application setup may also determine other criteria for membership validity. For
 947 | example, per-device signature keys can be signed by an identity key recognized
 948 | by other participants. If a certificate chain is used to authenticate device
 949 | signature keys, then revocation by the owner adds an alternative mechanism to prompt
 950 | membership removal.
 951 | 
 952 | An MLS group's secrets change on every change of membership, so each client only
 953 | has access to the secrets used by the group while they are a member.  Messages
 954 | sent before a client joins or after they are removed are protected with keys
 955 | that are not accessible to the client.  Compromise of a member removed from a
 956 | group does not affect the security of messages sent after their removal.
 957 | Messages sent during the client's membership are also secure as long as the
 958 | client has properly implemented the MLS deletion schedule, which calls for the
 959 | secrets used to encrypt or decrypt a message to be deleted after use, along with
 960 | any secrets that could be used to derive them.
 961 | 
 962 | ## Parallel Groups
 963 | 
 964 | Any user or client may have membership in several groups simultaneously.  The
 965 | set of members of any group may or may not overlap with the members of
 966 | another group. MLS guarantees that the FS and PCS goals within a given group are
 967 | maintained and not weakened by user membership in multiple groups. However,
 968 | actions in other groups likewise do not strengthen the FS and PCS guarantees
 969 | within a given group, e.g., key updates within a given group following a device
 970 | compromise do not provide PCS healing in other groups; each group must be
 971 | updated separately to achieve these security objectives.  This also applies to
 972 | future groups that a member has yet to join, which are likewise unaffected by
 973 | updates performed in current groups.
 974 | 
 975 | Applications can strengthen connectivity among parallel groups by requiring
 976 | periodic key updates from a user across all groups in which they have
 977 | membership.
 978 | 
 979 | MLS provides a pre-shared key (PSK) mechanism that can be used to link healing properties
 980 | among parallel groups.  For example, suppose a common member M of two groups A
 981 | and B has performed a key update in group A but not in group B.  The key update
 982 | provides PCS with regard to M in group A.  If a PSK is exported from group A and
 983 | injected into group B, then some of these PCS properties carry over to group B,
 984 | since the PSK and secrets derived from it are only known to the new, updated
 985 | version of M, not to the old, possibly compromised version of M.
 986 | 
 987 | ## Asynchronous Usage
 988 | 
 989 | No operation in MLS requires two distinct clients or members to be online
 990 | simultaneously. In particular, members participating in conversations protected
 991 | using MLS can update the group's keys, add or remove new members, and send
 992 | messages without waiting for another user's reply.
 993 | 
 994 | Messaging systems that implement MLS have to provide a transport layer for
 995 | delivering messages asynchronously and reliably.
 996 | 
 997 | ## Access Control
 998 | 
 999 | Because all clients within a group (members) have access to the shared
1000 | cryptographic material, the MLS protocol allows each member of the messaging group
1001 | to perform operations. However, every service/infrastructure has control over
1002 | policies applied to its own clients. Applications managing MLS clients can be
1003 | configured to allow for specific group operations. On the one hand, an
1004 | application could decide that a group administrator will be the only member to
1005 | perform Add and Remove operations. On the other hand, in many settings such as
1006 | open discussion forums, joining can be allowed for anyone.
1007 | 
1008 | While MLS application messages are always encrypted,
1009 | MLS handshake messages can be sent either encrypted (in an MLS
1010 | PrivateMessage) or unencrypted (in an MLS PublicMessage). Applications
1011 | may be designed such that intermediaries need to see handshake
1012 | messages, for example to enforce policy on which commits are allowed,
1013 | or to provide MLS ratchet tree data in a central location. If
1014 | handshake messages are unencrypted, it is especially important that
1015 | they be sent over a channel with strong transport encryption
1016 | (see {{security-and-privacy-considerations}}) in order to prevent external
1017 | attackers from monitoring the status of the group. Applications that
1018 | use unencrypted handshake messages may take additional steps to reduce
1019 | the amount of metadata that is exposed to the intermediary. Everything
1020 | else being equal, using encrypted handshake messages provides stronger
1021 | privacy properties than using unencrypted handshake messages,
1022 | as it prevents intermediaries from learning about the structure
1023 | of the group.
1024 | 
1025 | If handshake messages are encrypted, any access
1026 | control policies must be applied at the client, so the application must ensure
1027 | that the access control policies are consistent across all clients to make sure
1028 | that they remain in sync.  If two different policies were applied, the clients
1029 | might not accept or reject a group operation and end up in different
1030 | cryptographic states, breaking their ability to communicate.
1031 | 
1032 | > **Recommendation:** Avoid using inconsistent access control policies,
1033 | > especially when using encrypted group operations.
1034 | 
1035 | MLS allows actors outside the group to influence the group in two ways: External
1036 | signers can submit proposals for changes to the group, and new joiners can use
1037 | an external join to add themselves to the group.  The `external_senders`
1038 | extension ensures that all members agree on which signers are allowed to send
1039 | proposals, but any other policies must be assured to be consistent, as noted above.
1040 | 
1041 | > **Recommendation:** Have an explicit group policy setting the conditions under
1042 | > which external joins are allowed.
1043 | 
1044 | ## Handling Authentication Failures
1045 | 
1046 | Within an MLS group, every member is authenticated to every other member by
1047 | means of credentials issued and verified by the AS.  MLS
1048 | does not prescribe what actions, if any, an application should take in the event
1049 | that a group member presents an invalid credential.  For example, an application
1050 | may require such a member to be immediately evicted or may allow some grace
1051 | period for the problem to be remediated. To avoid operational problems, it is
1052 | important for all clients in a group to have a consistent view of which
1053 | credentials in a group are valid, and how to respond to invalid credentials.
1054 | 
1055 | > **Recommendation:** Have a uniform credential validation process to ensure
1056 | > that all group members evaluate other members' credentials in the same way.
1057 | 
1058 | > **Recommendation:** Have a uniform policy for how invalid credentials are
1059 | > handled.
1060 | 
1061 | In some authentication systems, it is possible for a previously valid credential
1062 | to become invalid over time.  For example, in a system based on X.509
1063 | certificates, credentials can expire or be revoked.  The MLS update mechanisms
1064 | allow a client to replace an old credential with a new one. This is best done
1065 | before the old credential becomes invalid.
1066 | 
1067 | > **Recommendation:** Proactively rotate credentials, especially if a credential
1068 | > is about to become invalid.
1069 | 
1070 | ## Recovery After State Loss {#state-loss}
1071 | 
1072 | Group members whose local MLS state is lost or corrupted can reinitialize their
1073 | state by rejoining the group as a new member and removing the member
1074 | representing their earlier state.  An application can require that a client
1075 | performing such a reinitialization prove its prior membership with a PSK that
1076 | was exported from the previous state.
1077 | 
1078 | There are a few practical challenges to this approach.  For example, the
1079 | application will need to ensure that all members have the required PSK,
1080 | including any new members that have joined the group since the epoch in which
1081 | the PSK was issued.  And of course, if the PSK is lost or corrupted along with
1082 | the member's other state, then it cannot be used to recover.
1083 | 
1084 | Reinitializing in this way does not provide the member with access to group
1085 | messages exchanged during the state loss window, but enables proof of prior
1086 | membership in the group. Applications may choose various configurations for
1087 | providing lost messages to valid group members that are able to prove prior
1088 | membership.
1089 | 
1090 | ## Support for Multiple Devices
1091 | 
1092 | It is common for users within a group to own multiple devices. A new
1093 | device can be added to a group and be considered as a new client by the
1094 | protocol. This client will not gain access to the history even if it is owned by
1095 | someone who owns another member of the group.  MLS does not provide direct
1096 | support for restoring history in this case, but applications can elect to
1097 | provide such a mechanism outside of MLS.  Such mechanisms, if used, may reduce
1098 | the FS and PCS guarantees provided by MLS.
1099 | 
1100 | ## Extensibility
1101 | 
1102 | The MLS protocol provides several extension points where additional information
1103 | can be provided.  Extensions to KeyPackages allow clients to disclose additional
1104 | information about their capabilities.  Groups can also have extension data
1105 | associated with them, and the group agreement properties of MLS will confirm
1106 | that all members of the group agree on the content of these extensions.
1107 | 
1108 | ## Application Data Framing and Type Advertisements
1109 | 
1110 | Application messages carried by MLS are opaque to the protocol and can contain
1111 | arbitrary data. Each application that uses MLS needs to define the format of
1112 | its `application_data` and any mechanism necessary to determine the format of
1113 | that content over the lifetime of an MLS group. In many applications, this means
1114 | managing format migrations for groups with multiple members who may each be
1115 | offline at unpredictable times.
1116 | 
1117 | > **Recommendation:** Use the content mechanism defined in
1118 | > {{EXTENSIONS}}, unless the specific application defines another
1119 | > mechanism that more appropriately addresses the same requirements for that
1120 | > application of MLS.
1121 | 
1122 | The MLS framing for application messages also provides a field where clients can
1123 | send information that is authenticated but not encrypted.  Such information can
1124 | be used by servers that handle the message, but group members are assured that
1125 | it has not been tampered with.
1126 | 
1127 | ## Federation
1128 | 
1129 | The protocol aims to be compatible with federated environments. While this
1130 | document does not specify all necessary mechanisms required for federation,
1131 | multiple MLS implementations can interoperate to form federated systems if they
1132 | use compatible authentication mechanisms, cipher suites, application content, and
1133 | infrastructure functionalities. Federation is described in more detail in
1134 | {{?FEDERATION=I-D.ietf-mls-federation}}.
1135 | 
1136 | ## Compatibility with Future Versions of MLS
1137 | 
1138 | It is important that multiple versions of MLS be able to coexist in the
1139 | future. Thus, MLS offers a version negotiation mechanism; this mechanism
1140 | prevents version downgrade attacks where an attacker would actively rewrite
1141 | messages with a lower protocol version than the messages originally offered by the
1142 | endpoints. When multiple versions of MLS are available, the negotiation protocol
1143 | guarantees that the creator is able to select the best version out of those
1144 | suported in common by the group.
1145 | 
1146 | In MLS 1.0, the creator of the group is responsible for selecting the best
1147 | cipher suite supported across clients. Each client is able to verify availability
1148 | of protocol version, cipher suites, and extensions at all times once it has at
1149 | least received the first group operation message.
1150 | 
1151 | Each member of an MLS group advertises the protocol functionality they support.
1152 | These capability advertisements can be updated over time, e.g., if client
1153 | software is updated while the client is a member of a group. Thus, in addition
1154 | to preventing downgrade attacks, the members of a group can also observe when it
1155 | is safe to upgrade to a new cipher suite or protocol version.
1156 | 
1157 | # Operational Requirements
1158 | 
1159 | MLS is a security layer that needs to be integrated with an application. A
1160 | fully functional deployment of MLS will have to make a number of decisions about
1161 | how MLS is configured and operated.  Deployments that wish to interoperate will
1162 | need to make compatible decisions. This section lists all of the dependencies of
1163 | an MLS deployment that are external to the protocol specification, but would
1164 | still need to be aligned within a given MLS deployment, or for two deployments
1165 | to potentially interoperate.
1166 | 
1167 | The protocol has a built-in ability to negotiate protocol versions,
1168 | cipher suites, extensions, credential types, and additional proposal types. For
1169 | two deployments to interoperate, they must have overlapping support in each of
1170 | these categories. The `required_capabilities` extension (Section 7.2 of
1171 | {{!RFC9420}}) can promote interoperability with a wider set of clients by
1172 | ensuring that certain functionality continues to be supported by a group, even
1173 | if the clients in the group aren't currently relying on it.
1174 | 
1175 | MLS relies on the following network services, which need to be compatible in
1176 | order for two different deployments based on them to interoperate.
1177 | 
1178 | - An **Authentication Service**, described fully in {{authentication-service}},
1179 |   defines the types of credentials which may be used in a deployment and
1180 |   provides methods for:
1181 |   1. Issuing new credentials with a relevant credential lifetime,
1182 |   2. Validating a credential against a reference identifier,
1183 |   3. Validating whether or not two credentials represent the same client, and
1184 |   4. Optionally revoking credentials which are no longer authorized.
1185 | 
1186 | - A **Delivery Service**, described fully in {{delivery-service}}, provides
1187 |   methods for:
1188 |   1. Delivering messages for a group to all members in the group.
1189 |   2. Delivering Welcome messages to new members of a group.
1190 |   3. Uploading new KeyPackages for a user's own clients.
1191 |   4. Downloading KeyPackages for specific clients. Typically, KeyPackages are
1192 |      used once and consumed.
1193 | 
1194 | - Additional services may or may not be required, depending on the application
1195 |   design:
1196 | 
1197 |   - In cases where group operations are not encrypted, the DS has the ability to
1198 |     observe and maintain a copy of the public group state. In particular, this
1199 |     is useful for either (1) clients that do not have the ability to send the full public
1200 |     state in a Welcome message when inviting a user or (2) clients that need to
1201 |     recover from losing their state. Such public state can contain privacy-sensitive
1202 |     information such as group members' credentials and related public
1203 |     keys; hence, services need to carefully evaluate the privacy impact of
1204 |     storing this data on the DS.
1205 |   - If external joiners are allowed, there must be a method for publishing a
1206 |     serialized `GroupInfo` object (with an `external_pub` extension) that
1207 |     corresponds to a specific group and epoch, and for keeping that object in sync with
1208 |     the state of the group.
1209 |   - If an application chooses not to allow external joining, it may
1210 |     instead provide a method for external users to solicit group members (or a
1211 |     designated service) to add them to a group.
1212 |   - If the application uses PSKs that members of a group may not have access to
1213 |     (e.g., to control entry into the group or to prove membership in the group
1214 |     in the past, as discussed in {{state-loss}}), there must be a method for distributing
1215 |     these PSKs to group members who might not have them -- for instance, if they
1216 |     joined the group after the PSK was generated.
1217 |   - If an application wishes to detect and possibly discipline members that send
1218 |     malformed commits with the intention of corrupting a group's state, there
1219 |     must be a method for reporting and validating malformed commits.
1220 | 
1221 | MLS requires the following parameters to be defined, which must be the same for
1222 | two implementations to interoperate:
1223 | 
1224 | - The maximum total lifetime that is acceptable for a KeyPackage.
1225 | 
1226 | - How long to store the resumption PSK for past epochs of a group.
1227 | 
1228 | - The degree of tolerance that's allowed for out-of-order message delivery:
1229 |   - How long to keep unused nonce and key pairs for a sender.
1230 |   - A maximum number of unused key pairs to keep.
1231 |   - A maximum number of steps that clients will move a secret tree ratchet
1232 |     forward in response to a single message before rejecting it.
1233 |   - Whether to buffer messages that aren't yet able to be understood due to
1234 |     other messages not arriving first, and, if so, how many and for how long -- for
1235 |     example, Commit messages that arrive before a proposal they reference or
1236 |     application messages that arrive before the Commit starting an epoch.
1237 | 
1238 | If implementations differ in these parameters, they will interoperate to some
1239 | extent but may experience unexpected failures in certain situations, such as
1240 | extensive message reordering.
1241 | 
1242 | MLS provides the following locations where an application may store arbitrary
1243 | data. The format and intention of any data in these locations must align for two
1244 | deployments to interoperate:
1245 | 
1246 | - Application data, sent as the payload of an encrypted message.
1247 | 
1248 | - Additional authenticated data, sent unencrypted in an otherwise encrypted
1249 |   message.
1250 | 
1251 | - Group IDs, as decided by group creators and used to uniquely identify a group.
1252 | 
1253 | - Application-level identifiers of public key material (specifically,
1254 |   the `application_id` extension as defined in {{Section 5.3.3 of ?RFC9420}}).
1255 | 
1256 | MLS requires the following policies to be defined, which restrict the set of
1257 | acceptable behaviors in a group. These policies must be consistent between
1258 | deployments for them to interoperate:
1259 | 
1260 | - A policy on which cipher suites are acceptable.
1261 | 
1262 | - A policy on any mandatory or forbidden MLS extensions.
1263 | 
1264 | - A policy on when to send proposals and commits in plaintext instead of
1265 |   encrypted.
1266 | 
1267 | - A policy for which proposals are valid to have in a commit, including but not
1268 |   limited to:
1269 |   - When a member is allowed to add or remove other members of the group.
1270 |   - When, and under what circumstances, a reinitialization proposal is allowed.
1271 |   - When proposals from external senders are allowed and how to authorize
1272 |     those proposals.
1273 |   - When external joiners are allowed and how to authorize those external
1274 |     commits.
1275 |   - Which other proposal types are allowed.
1276 | 
1277 | - A policy of when members should commit pending proposals in a group.
1278 | 
1279 | - A policy of how to protect and share the GroupInfo objects needed for
1280 |   external joins.
1281 | 
1282 | - A policy for when two credentials represent the same client, distinguishing
1283 |   the following two cases:
1284 |   - When there are multiple devices for a given user.
1285 |   - When a single device has multiple signature keys -- for instance, if the device has keys corresponding to multiple
1286 |     overlapping time periods.
1287 | 
1288 | - A policy on how long to allow a member to stay in a group without updating its
1289 |   leaf keys before removing them.
1290 | 
1291 | Finally, there are some additional application-defined behaviors that are
1292 | partially an individual application's decision but may overlap with
1293 | interoperability:
1294 | 
1295 | - When and how to pad messages.
1296 | 
1297 | - When to send a reinitialization proposal.
1298 | 
1299 | - How often clients should update their leaf keys.
1300 | 
1301 | - Whether to prefer sending full commits or partial/empty commits.
1302 | 
1303 | - Whether there should be a `required_capabilities` extension in groups.
1304 | 
1305 | 
1306 | # Security and Privacy Considerations
1307 | 
1308 | MLS adopts the Internet threat model {{?RFC3552}} and therefore assumes that the
1309 | attacker has complete control of the network. It is intended to provide the
1310 | security services described in {{intended-security-guarantees}} in the face of
1311 | attackers who can:
1312 | 
1313 | - Monitor the entire network.
1314 | 
1315 | - Read unprotected messages.
1316 | 
1317 | - Generate, inject, and delete any message in the unprotected
1318 |   transport layer.
1319 | 
1320 | While MLS should be run over a secure transport such as QUIC {{?RFC9000}} or TLS
1321 | {{?RFC8446}}, the security guarantees of MLS do not depend on the
1322 | transport. This departs from the usual design practice of trusting the transport
1323 | because MLS is designed to provide security even in the face of compromised
1324 | network elements, especially the DS.
1325 | 
1326 | Generally, MLS is designed under the assumption that the transport layer is
1327 | present to keep metadata private from network observers, while the MLS protocol
1328 | provides confidentiality, integrity, and authentication guarantees for the
1329 | application data (which could pass through multiple systems). Additional
1330 | properties such as partial anonymity or deniability could also be achieved in
1331 | specific architecture designs.
1332 | 
1333 | In addition, these guarantees are intended to degrade gracefully in the presence
1334 | of compromise of the transport security links as well as of both clients and
1335 | elements of the messaging system, as described in the remainder of this section.
1336 | 
1337 | 
1338 | ## Assumptions on Transport Security Links
1339 | 
1340 | As discussed above, MLS provides the highest level of security when its messages
1341 | are delivered over an encrypted transport, thus preventing attackers from
1342 | selectively interfering with MLS communications as well as
1343 | protecting the already limited amount of metadata. Very little
1344 | information is contained in the unencrypted header of the MLS protocol message
1345 | format for group operation messages, and application messages are always
1346 | encrypted in MLS.
1347 | 
1348 | > **Recommendation:** Use transports that provide reliability and metadata
1349 | > confidentiality whenever possible, e.g., by transmitting MLS messages over
1350 | > a protocol such as TLS {{?RFC8446}} or QUIC {{?RFC9000}}.
1351 | 
1352 | MLS avoids the need to send the full list of recipients to the server for
1353 | dispatching messages because that list could potentially contain tens of
1354 | thousands of recipients. Header metadata in MLS messages typically consists of
1355 | an opaque `group_id`, a numerical value to determine the epoch of the group (the
1356 | number of changes that have been made to the group), and whether the message is
1357 | an application message, a proposal, or a commit.
1358 | 
1359 | Even though some of this metadata information does not consist of sensitive
1360 | information, when correlated with other data a network observer might be able to
1361 | reconstruct sensitive information. Using a secure channel to transfer this
1362 | information will prevent a network attacker from accessing this MLS protocol
1363 | metadata if it cannot compromise the secure channel.
1364 | 
1365 | ### Integrity and Authentication of Custom Metadata
1366 | 
1367 | MLS provides an authenticated "Additional Authenticated Data" (AAD) field for
1368 | applications to make data available outside a PrivateMessage, while
1369 | cryptographically binding it to the message.
1370 | 
1371 | > **Recommendation:** Use the "Additional Authenticated Data" field of the
1372 | > PrivateMessage instead of using other unauthenticated means of sending
1373 | > metadata throughout the infrastructure. If the data should be kept private, the
1374 | > infrastructure should use encrypted application messages instead.
1375 | 
1376 | ### Metadata Protection for Unencrypted Group Operations
1377 | 
1378 | Having no secure channel to exchange MLS messages can have a serious impact on
1379 | privacy when transmitting unencrypted group operation messages. Observing the
1380 | contents and signatures of the group operation messages may lead an adversary to
1381 | extract information about the group membership.
1382 | 
1383 | > **Recommendation:** Never use the unencrypted mode for group operations
1384 | > without using a secure channel for the transport layer.
1385 | 
1386 | ### DoS Protection
1387 | 
1388 | In general, we do not consider DoS resistance to be the
1389 | responsibility of the protocol. However, it should not be possible for anyone
1390 | aside from the DS to perform a trivial DoS attack from which it is
1391 | hard to recover. This can be achieved through the secure transport layer,
1392 | which prevents selective attack on MLS communications by network
1393 | attackers.
1394 | 
1395 | In the centralized setting, DoS protection can typically be performed by using
1396 | tickets or cookies which identify users to a service for a certain number of
1397 | connections. Such a system helps in preventing anonymous clients from sending
1398 | arbitrary numbers of group operation messages to the DS or the MLS
1399 | clients.
1400 | 
1401 | > **Recommendation:** Use credentials uncorrelated with specific users to help
1402 | > prevent DoS attacks, in a privacy-preserving manner. Note that the privacy of
1403 | > these mechanisms has to be adjusted in accordance with the privacy expected
1404 | > from secure transport links. (See more discussion in the next section.)
1405 | 
1406 | ### Message Suppression and Error Correction
1407 | 
1408 | As noted above, MLS is designed to provide some robustness in the face of
1409 | tampering within the secure transport, e.g., tampering by the DS.
1410 | The confidentiality and authenticity properties of MLS prevent the DS from
1411 | reading or writing messages.  MLS also provides a few tools for detecting
1412 | message suppression, with the caveat that message suppression cannot always be
1413 | distinguished from transport failure.
1414 | 
1415 | Each encrypted MLS message carries a per-sender incrementing "generation" number.
1416 | If a group member observes a gap in the generation
1417 | sequence for a sender, then they know that they have missed a message from that
1418 | sender.  MLS also provides a facility for group members to send authenticated
1419 | acknowledgments of application messages received within a group.
1420 | 
1421 | As discussed in {{delivery-service}}, the DS is trusted to select
1422 | the single Commit message that is applied in each epoch from among the Commits sent
1423 | by group members.  Since only one Commit per epoch is meaningful, it's not
1424 | useful for the DS to transmit multiple Commits to clients.  The risk remains
1425 | that the DS will use the ability maliciously.
1426 | 
1427 | ## Intended Security Guarantees
1428 | 
1429 | MLS aims to provide a number of security guarantees, covering authentication, as
1430 | well as confidentiality guarantees to different degrees in different scenarios.
1431 | 
1432 | ### Message Secrecy and Authentication {#message-secrecy-authentication}
1433 | 
1434 | MLS enforces the encryption of application messages and thus generally
1435 | guarantees authentication and confidentiality of application messages sent in a
1436 | group.
1437 | 
1438 | In particular, this means that only other members of a given group can decrypt
1439 | the payload of a given application message, which includes information about the
1440 | sender of the message.
1441 | 
1442 | Similarly, group members receiving a message from another group member can
1443 | authenticate that group member as the sender of the message and verify the
1444 | message's integrity.
1445 | 
1446 | Message content can be deniable if the signature keys are exchanged over a
1447 | deniable channel prior to signing messages.
1448 | 
1449 | Depending on the group settings, handshake messages can be encrypted as well. If
1450 | that is the case, the same security guarantees apply.
1451 | 
1452 | MLS optionally allows the addition of padding to messages, mitigating the amount
1453 | of information leaked about the length of the plaintext to an observer on the
1454 | network.
1455 | 
1456 | ### Forward Secrecy and Post-Compromise Security {#fs-and-pcs}
1457 | 
1458 | MLS provides additional protection regarding secrecy of past messages and future
1459 | messages. These cryptographic security properties are forward secrecy (FS) and
1460 | post-compromise security (PCS).
1461 | 
1462 | FS means that access to all encrypted traffic history combined with
1463 | access to all current keying material on clients will not defeat the
1464 | secrecy properties of messages older than the oldest key of the
1465 | compromised client.  Note that this means that clients have to delete the appropriate
1466 | keys as soon as they have been used with the expected message;
1467 | otherwise, the secrecy of the messages and the security of MLS are
1468 | considerably weakened.
1469 | 
1470 | PCS means that if a group member's state is compromised at some time t1 but the
1471 | group member subsequently performs an update at some time t2, then all MLS
1472 | guarantees apply to messages sent by the member after time t2 and to messages
1473 | sent by other members after they have processed the update. For example, if an attacker learns
1474 | all secrets known to Alice at time t1, including both Alice's long-term secret
1475 | keys and all shared group keys, but Alice performs a key update at time t2, then
1476 | the attacker is unable to violate any of the MLS security properties after the
1477 | updates have been processed.
1478 | 
1479 | Both of these properties are satisfied even against compromised DSs and ASes in
1480 | the case where some other mechanism for verifying keys is in use, such as Key
1481 | Transparency {{KT}}.
1482 | 
1483 | Confidentiality is mainly ensured on the client side.  Because FS
1484 | and PCS rely on the active deletion and
1485 | replacement of keying material, any client which is persistently offline may
1486 | still be holding old keying material and thus be a threat to both FS and PCS if
1487 | it is later compromised.
1488 | 
1489 | MLS partially defends against this problem by active members including
1490 | new keying material. However, not much can be done on the inactive side especially in the
1491 | case where the client has not processed messages.
1492 | 
1493 | > **Recommendation:** Mandate key updates from clients that are not otherwise
1494 | > sending messages and evict clients that are idle for too long.
1495 | 
1496 | These recommendations will reduce the ability of idle compromised clients to
1497 | decrypt a potentially long set of messages that might have been sent
1498 | after the point of compromise.
1499 | 
1500 | The precise details of such mechanisms are a matter of local policy and beyond
1501 | the scope of this document.
1502 | 
1503 | ### Non-Repudiation vs. Deniability {#Non-Repudiation-vs-Deniability}
1504 | 
1505 | 
1506 | MLS provides strong authentication within a group, such that a group member
1507 | cannot send a message that appears to be from another group member.
1508 | Additionally, some services require that a recipient be able to prove to the
1509 | service provider that a message was sent by a given client, in order to report
1510 | abuse. MLS supports both of these use cases. In some deployments, these services
1511 | are provided by mechanisms which allow the receiver to prove a message's origin
1512 | to a third party. This is often called "non-repudiation".
1513 | 
1514 | Roughly speaking, "deniability" is the opposite of "non-repudiation", i.e., the
1515 | property that it is impossible to prove to a third party that a message was sent
1516 | by a given sender.  MLS does not make any claims with regard to deniability.  It
1517 | may be possible to operate MLS in ways that provide certain deniability
1518 | properties, but defining the specific requirements and resulting notions of
1519 | deniability requires further analysis.
1520 | 
1521 | 
1522 | ### Associating a User's Clients
1523 | 
1524 | When a user has multiple devices, the base MLS protocol only describes how to
1525 | operate each device as a distinct client in the MLS groups that the user is a
1526 | member of. As a result, the other members of the group will be able to identify
1527 | which of a user's devices sent each message and, therefore, which device the user
1528 | was using at the time. Group members would also be able to detect when the user
1529 | adds or removes authorized devices from their account. For some applications,
1530 | this may be an unacceptable breach of the user's privacy.
1531 | 
1532 | This risk only arises when the leaf nodes for the clients in question provide
1533 | data that can be used to correlate the clients.  One way to mitigate this
1534 | risk is by only doing client-level authentication within MLS. If user-level
1535 | authentication is still desirable, the application would have to provide it
1536 | through some other mechanism.
1537 | 
1538 | It is also possible to maintain user-level authentication while hiding
1539 | information about the clients that a user owns.  This can be done by having the
1540 | clients share cryptographic state, so that they appear as a single client within
1541 | the MLS group. Appearing as a single client has the privacy benefits of no
1542 | longer leaking which device was used to send a particular message and no longer
1543 | leaking the user's authorized devices. However, the application would need to
1544 | provide a synchronization mechanism so that the state of each client remains consistent
1545 | across changes to the MLS group. Flaws in this synchronization mechanism may
1546 | impair the ability of the user to recover from a compromise of one of their
1547 | devices. In particular, state synchronization may make it easier for an attacker
1548 | to use one compromised device to establish exclusive control of a user's
1549 | account, locking them out entirely and preventing them from recovering.
1550 | 
1551 | ## Endpoint Compromise
1552 | 
1553 | The MLS protocol adopts a threat model which includes multiple forms of
1554 | endpoint/client compromise. While adversaries are in a strong position if
1555 | they have compromised an MLS client, there are still situations where security
1556 | guarantees can be recovered thanks to the PCS properties achieved by the MLS
1557 | protocol.
1558 | 
1559 | In this section we will explore the consequences and recommendations regarding
1560 | the following compromise scenarios:
1561 | 
1562 | - The attacker has access to a symmetric encryption key.
1563 | 
1564 | - The attacker has access to an application ratchet secret.
1565 | 
1566 | - The attacker has access to the group secrets for one group.
1567 | 
1568 | - The attacker has access to a signature oracle for any group.
1569 | 
1570 | - The attacker has access to the signature key for one group.
1571 | 
1572 | - The attacker has access to all secrets of a user for all groups (full state
1573 |   compromise).
1574 | 
1575 | ### Compromise of Symmetric Keying Material {#symmetric-key-compromise}
1576 | 
1577 | As described above, each MLS epoch creates a new group secret.
1578 | 
1579 | These group secrets are then used to create a per-sender ratchet secret, which
1580 | in turn is used to create a per-sender
1581 | Authenticated Encryption with Associated Data (AEAD) {{!RFC5116}}
1582 | key that is then used to encrypt MLS plaintext messages.  Each time a message is
1583 | sent, the ratchet secret is used to create a new ratchet secret and a new
1584 | corresponding AEAD key.  Because of the properties of the key derivation
1585 | function, it is not possible to compute a ratchet secret from its corresponding
1586 | AEAD key or compute ratchet secret n-1 from ratchet secret n.
1587 | 
1588 | Below, we consider the compromise of each of these pieces of keying material in
1589 | turn, in ascending order of severity.  While this is a limited kind of
1590 | compromise, it can be realistic in cases of implementation vulnerabilities where
1591 | only part of the memory leaks to the adversary.
1592 | 
1593 | #### Compromise of AEAD Keys
1594 | 
1595 | In some circumstances, adversaries may have access to specific AEAD keys and
1596 | nonces which protect an application message or a group operation message. Compromise of
1597 | these keys allows the attacker to decrypt the specific message encrypted with
1598 | that key but no other; because the AEAD keys are derived from the ratchet
1599 | secret, it cannot generate the next ratchet secret and hence not the next AEAD
1600 | key.
1601 | 
1602 | In the case of an application message, an AEAD key compromise means that the
1603 | encrypted application message will be leaked as well as the signature over that
1604 | message. This means that the compromise has both confidentiality and privacy
1605 | implications on the future AEAD encryptions of that chain.  In the case of a
1606 | group operation message, only the privacy is affected, as the signature is
1607 | revealed, because the secrets themselves are protected by Hybrid Public Key Encryption
1608 | (HPKE).  Note
1609 | that under that compromise scenario, authentication is not affected in either of
1610 | these cases.  As every member of the group can compute the AEAD keys for all the
1611 | chains (they have access to the group secrets) in order to send and receive
1612 | messages, the authentication provided by the AEAD encryption layer of the common
1613 | framing mechanism is weak. Successful decryption of an AEAD encrypted message
1614 | only guarantees that some member of the group -- or in this case an attacker
1615 | who has compromised the AEAD keys -- sent the message.
1616 | 
1617 | Compromise of the AEAD keys allows the attacker to send an encrypted message
1618 | using that key, but the attacker cannot send a message to a group that appears to be from
1619 | any valid client because the attacker cannot forge the signature. This applies to all the
1620 | forms of symmetric key compromise described in {{symmetric-key-compromise}}.
1621 | 
1622 | #### Compromise of Ratchet Secret Material
1623 | 
1624 | When a ratchet secret is compromised, the adversary can compute both the current
1625 | AEAD keys for a given sender and any future keys for that sender in this
1626 | epoch. Thus, it can decrypt current and future messages by the corresponding
1627 | sender. However, because it does not have previous ratchet secrets, it cannot
1628 | decrypt past messages as long as those secrets and keys have been deleted.
1629 | 
1630 | Because of its forward secrecy guarantees, MLS will also retain secrecy of all
1631 | other AEAD keys generated for *other* MLS clients, outside this dedicated chain
1632 | of AEAD keys and nonces, even within the epoch of the compromise.  MLS provides
1633 | post-compromise security against an active adaptive attacker across epochs for
1634 | AEAD encryption, which means that as soon as the epoch is changed, if the
1635 | attacker does not have access to more secret material they won't be able to
1636 | access any protected messages from future epochs.
1637 | 
1638 | #### Compromise of the Group Secrets of a Single Group for One or More Group Epochs
1639 | 
1640 | An adversary who gains access to a set of group secrets -- as when a member of the
1641 | group is compromised -- is significantly more powerful. In this section, we
1642 | consider the case where the signature keys are not compromised. This can occur
1643 | if the attacker has access to part of the memory containing the group secrets
1644 | but not to the signature keys which might be stored in a secure enclave.
1645 | 
1646 | In this scenario, the adversary gains the ability to compute any number of
1647 | ratchet secrets for the epoch and their corresponding AEAD encryption keys and
1648 | thus can encrypt and decrypt all messages for the compromised epochs.
1649 | 
1650 | If the adversary is passive, it is expected from the PCS properties of the MLS
1651 | protocol that as soon as the compromised party remediates the compromise and
1652 | sends an honest Commit message, the next epochs will provide message secrecy.
1653 | 
1654 | If the adversary is active, the adversary can engage in the protocol itself and
1655 | perform updates on behalf of the compromised party with no ability for an honest
1656 | group to recover message secrecy. However, MLS provides PCS against active
1657 | adaptive attackers through its Remove group operation. This means that as long
1658 | as other members of the group are honest, the protocol will guarantee message
1659 | secrecy for all messages exchanged in the epochs after the compromised party has
1660 | been removed.
1661 | 
1662 | ### Compromise by an Active Adversary with the Ability to Sign Messages
1663 | 
1664 | If an active adversary has compromised an MLS client and can sign messages, two
1665 | different scenarios emerge. In the strongest compromise scenario, the attacker
1666 | has access to the signing key and can forge authenticated messages. In a weaker,
1667 | yet realistic scenario, the attacker has compromised a client but the client
1668 | signature keys are protected with dedicated hardware features which do not allow
1669 | direct access to the value of the private key and instead provide a signature
1670 | API.
1671 | 
1672 | When considering an active adaptive attacker with access to a signature oracle,
1673 | the compromise scenario implies a significant impact on both the secrecy and
1674 | authentication guarantees of the protocol, especially if the attacker also has
1675 | access to the group secrets. In that case, both secrecy and authentication are
1676 | broken.  The attacker can generate any message, for the current and future
1677 | epochs, until the compromise is remediated and the formerly compromised client
1678 | sends an honest update.
1679 | 
1680 | Note that under this compromise scenario, the attacker can perform all
1681 | operations which are available to a legitimate client even without access to the
1682 | actual value of the signature key.
1683 | 
1684 | ### Compromise of Authentication with Access to a Signature Key
1685 | 
1686 | The difference between having access to the value of the signature key and only
1687 | having access to a signing oracle is not about the ability of an active adaptive
1688 | network attacker to perform different operations during the time of the
1689 | compromise; the attacker can perform every operation available to a legitimate
1690 | client in both cases.
1691 | 
1692 | There is a significant difference, however, in terms of recovery after a
1693 | compromise.
1694 | 
1695 | Because of the PCS guarantees provided by the MLS protocol, when a previously
1696 | compromised client recovers from compromise and performs an honest Commit, both
1697 | secrecy and authentication of future messages can be recovered as long as the
1698 | attacker doesn't otherwise get access to the key. Because the adversary doesn't
1699 | have the signing key, they cannot authenticate messages on behalf of the
1700 | compromised party, even if they still have control over some group keys by
1701 | colluding with other members of the group.
1702 | 
1703 | This is in contrast with the case where the signature key is leaked. In that
1704 | case, the compromised endpoint needs to refresh its credentials and invalidate
1705 | the old credentials before the attacker will be unable to authenticate messages.
1706 | 
1707 | Beware that in both oracle and private key access, an active adaptive attacker
1708 | can follow the protocol and request to update its own credential. This in turn
1709 | induces a signature key rotation, which could provide the attacker with part or
1710 | the full value of the private key, depending on the architecture of the service
1711 | provider.
1712 | 
1713 | > **Recommendation:** Signature private keys should be compartmentalized from
1714 | > other secrets and preferably protected by a Hardware Security Module (HSM) or dedicated hardware
1715 | > features to allow recovery of the authentication for future messages after a
1716 | > compromise.
1717 | 
1718 | > **Recommendation:** When the credential type supports revocation, the users of
1719 | > a group should check for revoked keys.
1720 | 
1721 | ### Security Considerations in the Context of a Full State Compromise
1722 | 
1723 | In real-world compromise scenarios, it is often the case that adversaries target
1724 | specific devices to obtain parts of the memory or even the ability to execute
1725 | arbitrary code in the targeted device.
1726 | 
1727 | Also, recall that in this setting, the application will often retain the
1728 | unencrypted messages. If so, the adversary does not have to break encryption at
1729 | all to access sent and received messages. Messages may also be sent by using the
1730 | application to instruct the protocol implementation.
1731 | 
1732 | > **Recommendation:** If messages are stored on the device, they should be
1733 | > protected using encryption at rest, and the keys used should be stored
1734 | > securely using dedicated mechanisms on the device.
1735 | 
1736 | > **Recommendation:** If the threat model of the system includes an adversary
1737 | > that can access the messages on the device without even needing to attack
1738 | > MLS, the application should delete plaintext and ciphertext messages as soon
1739 | > as practical after encryption or decryption.
1740 | 
1741 | Note that this document makes a clear distinction between the way signature keys
1742 | and other group shared secrets must be handled.  In particular, a large set of
1743 | group secrets cannot necessarily be assumed to be protected by an HSM or secure
1744 | enclave features. This is especially true because these keys are frequently used
1745 | and changed with each message received by a client.
1746 | 
1747 | However, the signature private keys are mostly used by clients to send a
1748 | message. They also provide strong authentication guarantees to other clients;
1749 | hence, we consider that their protection by additional security mechanisms should
1750 | be a priority.
1751 | 
1752 | Overall, there is no way to detect or prevent these compromises, as discussed in
1753 | the previous sections: Performing separation of the application secret states
1754 | can help recovery after compromise; this is the case for signature keys, but
1755 | similar concerns exist for a client's encryption private keys.
1756 | 
1757 | > **Recommendation:** The secret keys used for public key encryption should be
1758 | > stored similarly to the way the signature keys are stored, as keys can be used
1759 | > to decrypt the group operation messages and contain the secret material used
1760 | > to compute all the group secrets.
1761 | 
1762 | Even if secure enclaves are not perfectly secure or are even completely broken,
1763 | adopting additional protections for these keys can ease recovery of the secrecy
1764 | and authentication guarantees after a compromise where, for instance, an
1765 | attacker can sign messages without having access to the key. In certain
1766 | contexts, the rotation of credentials might only be triggered by the AS through
1767 | ACLs and hence be beyond the capabilities of the attacker.
1768 | 
1769 | ## Service Node Compromise
1770 | 
1771 | ### General Considerations
1772 | 
1773 | #### Privacy of the Network Connections
1774 | 
1775 | There are many scenarios leading to communication between the application on a
1776 | device and the DS or the AS. In particular,
1777 | when:
1778 | 
1779 | - The application connects to the AS to generate or validate
1780 |   a new credential before distributing it.
1781 | 
1782 | - The application fetches credentials at the DS prior to creating
1783 |   a messaging group (one-to-one or more than two clients).
1784 | 
1785 | - The application fetches service provider information or messages on the
1786 |   DS.
1787 | 
1788 | - The application sends service provider information or messages to the Delivery
1789 |   Service.
1790 | 
1791 | In all these cases, the application will often connect to the device via a
1792 | secure transport which leaks information about the origin of the request, such as
1793 | the IP address and -- depending on the protocol -- the MAC address of the device.
1794 | 
1795 | Similar concerns exist in the peer-to-peer use cases for MLS.
1796 | 
1797 | > **Recommendation:** In the case where privacy or anonymity is
1798 | > important, using adequate protection such as Multiplexed
1799 | > Application Substrate over QUIC Encryption (MASQUE)
1800 | > {{?MASQUE-PROXY=I-D.schinazi-masque-proxy}}, Tor {{Tor}},
1801 | > or a VPN can improve metadata
1802 | > protection.
1803 | 
1804 | More generally, using anonymous credentials in an MLS-based architecture might
1805 | not be enough to provide strong privacy or anonymity properties.
1806 | 
1807 | #### Storage of Metadata and Encryption at Rest on the Servers
1808 | 
1809 | In the case where private data or metadata has to be persisted on the servers
1810 | for functionality (mappings between identities and push tokens, group
1811 | metadata, etc.), it should be stored encrypted at rest and only decrypted upon need
1812 | during the execution. Honest service providers can rely on such "encryption at
1813 | rest" mechanisms to be able to prevent access to the data when not using it.
1814 | 
1815 | > **Recommendation:** Store cryptographic material used for server-side
1816 | > decryption of sensitive metadata on the clients and only send it when needed.
1817 | > The server can use the secret to open and update encrypted data containers
1818 | > after which they can delete these keys until the next time they need it, in
1819 | > which case those can be provided by the client.
1820 | 
1821 | > **Recommendation:** Rely on group secrets exported from the MLS session for
1822 | > server-side encryption at rest and update the key after each removal from the
1823 | > group. Otherwise, rotate those keys on a regular basis.
1824 | 
1825 | ### Delivery Service Compromise
1826 | 
1827 | MLS is intended to provide strong guarantees in the face of compromise of the
1828 | DS. Even a totally compromised DS should not be able to read messages or inject
1829 | messages that will be acceptable to legitimate clients. It should also not be
1830 | able to undetectably remove, reorder, or replay messages.
1831 | 
1832 | However, a malicious DS can mount a variety of DoS attacks on the system,
1833 | including total DoS attacks (where it simply refuses to forward any messages)
1834 | and partial DoS attacks (where it refuses to forward messages to and from
1835 | specific clients).  As noted in {{delivery-guarantees}}, these attacks are only
1836 | partially detectable by clients without an out-of-band channel. Ultimately,
1837 | failure of the DS to provide reasonable service must be dealt with as a customer
1838 | service matter, not via technology.
1839 | 
1840 | Because the DS is responsible for providing the initial keying material to
1841 | clients, it can provide stale keys. This does not inherently lead to compromise
1842 | of the message stream, but does allow the DS to attack post-compromise security to
1843 | a limited extent. This threat can be mitigated by having initial keys expire.
1844 | 
1845 | Initial keying material (KeyPackages) using the `basic` credential type is more
1846 | vulnerable to replacement by a malicious or compromised DS, as there is no
1847 | built-in cryptographic binding between the identity and the public key of the
1848 | client.
1849 | 
1850 | > **Recommendation:** Prefer a credential type in KeyPackages which includes a
1851 | > strong cryptographic binding between the identity and its key (for example, the
1852 | > `x509` credential type). When using the `basic` credential type, take extra
1853 | > care to verify the identity (typically out of band).
1854 | 
1855 | 
1856 | #### Privacy of Delivery and Push Notifications
1857 | 
1858 | Push tokens provide an important mechanism that is often ignored from
1859 | the standpoint of privacy considerations. In many modern messaging
1860 | architectures, applications are using push notification mechanisms
1861 | typically provided by OS vendors. This is to make sure that when
1862 | messages are available at the DS (or via other
1863 | mechanisms if the DS is not a central server), the recipient
1864 | application on a device knows about it. Sometimes the push
1865 | notification can contain the application message itself, which saves a
1866 | round trip with the DS.
1867 | 
1868 | To "push" this information to the device, the service provider and the OS
1869 | infrastructures use unique per-device, per-application identifiers called
1870 | push tokens. This means that the push notification provider and the service
1871 | provider have information on which devices receive information and at which
1872 | point in time. Alternatively, non-mobile applications could use a WebSocket or
1873 | persistent connection for notifications directly from the DS.
1874 | 
1875 | Even though the service provider and the push notification provider
1876 | can't necessarily access the content (typically encrypted MLS
1877 | messages), no technical mechanism in MLS prevents them from determining
1878 | which devices are recipients of the same message.
1879 | 
1880 | For secure messaging systems, push notifications are often sent in real time, as it
1881 | is not acceptable to create artificial delays for message retrieval.
1882 | 
1883 | > **Recommendation:** If real-time notifications are not necessary, one can
1884 | > delay notifications randomly across recipient devices using a mixnet or other
1885 | > techniques.
1886 | 
1887 | Note that with a legal request to ask the service provider for the push token
1888 | associated with an identifier, it is easy to correlate the token with a second
1889 | request to the company operating the push notification system to get information
1890 | about the device, which is often linked with a real identity via a cloud
1891 | account, a credit card, or other information.
1892 | 
1893 | > **Recommendation:** If stronger privacy guarantees are needed with regard to
1894 | > the push notification provider, the client can choose to periodically connect
1895 | > to the DS without the need of a dedicated push notification
1896 | > infrastructure.
1897 | 
1898 | Applications can also consider anonymous systems for server fanout (for
1899 | example, {{Loopix}}).
1900 | 
1901 | ### Authentication Service Compromise {#as-compromise}
1902 | 
1903 | The Authentication Service design is left to the infrastructure designers. In
1904 | most designs, a compromised AS is a serious matter, as the AS can serve
1905 | incorrect or attacker-provided identities to clients.
1906 | 
1907 | - The attacker can link an identity to a credential.
1908 | 
1909 | - The attacker can generate new credentials.
1910 | 
1911 | - The attacker can sign new credentials.
1912 | 
1913 | - The attacker can publish or distribute credentials.
1914 | 
1915 | An attacker that can generate or sign new credentials may or may not have access
1916 | to the underlying cryptographic material necessary to perform such
1917 | operations. In that last case, it results in windows of time for which all
1918 | emitted credentials might be compromised.
1919 | 
1920 | > **Recommendation:** Use HSMs to store the root signature keys to limit the
1921 | > ability of an adversary with no physical access to extract the top-level
1922 | > signature private key.
1923 | 
1924 | Note that historically some systems generate signature keys on the
1925 | AS and distribute the private keys to clients along with
1926 | their credential. This is a dangerous practice because it allows the AS or an
1927 | attacker who has compromised the AS to silently impersonate the client.
1928 | 
1929 | #### Authentication Compromise: Ghost Users and Impersonation
1930 | 
1931 | One important property of MLS is that all members know which other members are
1932 | in the group at all times. If all members of the group and the AS
1933 | are honest, no parties other than the members of the current group can
1934 | read and write messages protected by the protocol for that group.
1935 | 
1936 | This guarantee applies to the cryptographic identities of the members.
1937 | Details about how to verify the identity of a client depend on the MLS
1938 | credential type used. For example, cryptographic verification of credentials can
1939 | be largely performed autonomously (e.g., without user interaction) by the
1940 | clients themselves for the `x509` credential type.
1941 | 
1942 | In contrast, when MLS clients use the `basic` credential type, some other
1943 | mechanism must be used to verify identities. For instance, the Authentication
1944 | Service could operate some sort of directory server to provide keys, or users
1945 | could verify keys via an out-of-band mechanism.
1946 | 
1947 | > **Recommendation:** Select the MLS credential type with the strongest security
1948 | > which is supported by all target members of an MLS group.
1949 | 
1950 | > **Recommendation:** Do not use the same signature key pair across
1951 | > groups. Update all keys for all groups on a regular basis. Do not preserve
1952 | > keys in different groups when suspecting a compromise.
1953 | 
1954 | If the AS is compromised, it could validate a signature
1955 | key pair (or generate a new one) for an attacker. The attacker could then use this key pair to join a
1956 | group as if it were another of the user's clients.  Because a user can have many
1957 | MLS clients running the MLS protocol, it possibly has many signature key pairs
1958 | for multiple devices. These attacks could be very difficult to detect,
1959 | especially in large groups where the UI might not reflect all the changes back
1960 | to the users. If the application participates in a key transparency mechanism in
1961 | which it is possible to determine every key for a given user, then this
1962 | would allow for detection of surreptitiously created false credentials.
1963 | 
1964 | > **Recommendation:** Make sure that MLS clients reflect all the membership
1965 | > changes to the users as they happen. If a choice has to be made because the
1966 | > number of notifications is too high, the client should provide a log of state
1967 | > of the device so that the user can examine it.
1968 | 
1969 | > **Recommendation:** Provide a key transparency mechanism for the
1970 | > AS to allow public verification of the credentials
1971 | > authenticated by this service.
1972 | 
1973 | While the ways to handle MLS credentials are not defined by the protocol or the
1974 | architecture documents, the MLS protocol has been designed with a mechanism that
1975 | can be used to provide out-of-band authentication to users. The
1976 | `authentication_secret` generated for each user at each epoch of the group is a
1977 | one-time, per-client authentication secret which can be exchanged between users
1978 | to prove their identities to each other. This can be done, for instance, using a QR
1979 | code that can be scanned by the other parties.
1980 | 
1981 | > **Recommendation:** Provide one or more out-of-band authentication mechanisms
1982 | > to limit the impact of an AS compromise.
1983 | 
1984 | We note, again, that the AS may not be a centralized
1985 | system and could be realized by many mechanisms such as establishing prior
1986 | one-to-one deniable channels, gossiping, or using trust on first use (TOFU) for
1987 | credentials used by the MLS protocol.
1988 | 
1989 | Another important consideration is the ease of redistributing new keys on client
1990 | compromise, which helps recovering security faster in various cases.
1991 | 
1992 | #### Privacy of the Group Membership
1993 | 
1994 | Group membership is itself sensitive information, and MLS is designed to limit
1995 | the amount of persistent metadata. However, large groups often require an
1996 | infrastructure that provides server fanout.  In the case of client fanout, the
1997 | destination of a message is known by all clients; hence, the server usually does
1998 | not need this information.  However, servers may learn this information through
1999 | traffic analysis.  Unfortunately, in a server-side fanout model, the Delivery
2000 | Service can learn that a given client is sending the same message to a set of
2001 | other clients. In addition, there may be applications of MLS in which the group
2002 | membership list is stored on some server associated with the DS.
2003 | 
2004 | While this knowledge is not a breach of the protocol's authentication or
2005 | confidentiality guarantees, it is a serious issue for privacy.
2006 | 
2007 | Some infrastructures keep a mapping between keys used in the MLS protocol and
2008 | user identities. An attacker with access to this information due to compromise
2009 | or regulation can associate unencrypted group messages (e.g., Commits and
2010 | Proposals) with the corresponding user identity.
2011 | 
2012 | > **Recommendation:** Use encrypted group operation messages to limit privacy
2013 | > risks whenever possible.
2014 | 
2015 | In certain cases, the adversary can access specific bindings between public keys
2016 | and identities. If the signature keys are reused across groups, the adversary
2017 | can get more information about the targeted user.
2018 | 
2019 | > **Recommendation:** Ensure that linking between public keys and identities
2020 | > only happens in expected scenarios.
2021 | 
2022 | ## Considerations for Attacks Outside of the Threat Model
2023 | 
2024 | Physical attacks on devices storing and executing MLS principals are not
2025 | considered in depth in the threat model of the MLS protocol.  While
2026 | non-permanent, non-invasive attacks can sometimes be equivalent to software
2027 | attacks, physical attacks are considered outside of the MLS threat model.
2028 | 
2029 | Compromise scenarios typically consist of a software adversary, which can
2030 | maintain active adaptive compromise and arbitrarily change the behavior of the
2031 | client or service.
2032 | 
2033 | On the other hand, security goals consider that honest clients will always run
2034 | the protocol according to its specification. This relies on implementations of
2035 | the protocol to securely implement the specification, which remains non-trivial.
2036 | 
2037 | > **Recommendation:** Additional steps should be taken to protect the device and
2038 | > the MLS clients from physical compromise. In such settings, HSMs and secure
2039 | > enclaves can be used to protect signature keys.
2040 | 
2041 | ## No Protection Against Replay by Insiders
2042 | 
2043 | MLS does not protect against one group member replaying a PrivateMessage sent by another
2044 | group member within the same epoch that the message was originally sent. Similarly, MLS
2045 | does not protect against the replay (by a group member or otherwise) of a PublicMessage
2046 | within the same epoch that the message was originally sent. Applications for
2047 | whom replay is an important risk should apply mitigations at the application layer, as
2048 | discussed below.
2049 | 
2050 | In addition to the risks discussed in {{symmetric-key-compromise}}, an attacker
2051 | with access to the ratchet secrets for an endpoint can replay PrivateMessage
2052 | objects sent by other members of the group by taking the signed content of the
2053 | message and re-encrypting it with a new generation of the original sender's
2054 | ratchet.  If the other members of the group interpret a message with a new
2055 | generation as a fresh message, then this message will appear fresh.  (This is
2056 | possible because the message signature does not cover the `generation` field
2057 | of the message.)  Messages sent as PublicMessage objects similarly lack replay
2058 | protections.  There is no message counter comparable to the `generation` field
2059 | in PrivateMessage.
2060 | 
2061 | Applications can detect replay by including a unique identifier for the message
2062 | (e.g., a counter) in either the message payload or the `authenticated_data`
2063 | field, both of which are included in the signatures for
2064 | PublicMessage and PrivateMessage.
2065 | 
2066 | ## Cryptographic Analysis of the MLS Protocol
2067 | 
2068 | Various academic works have analyzed MLS and the different security guarantees
2069 | it aims to provide. The security of large parts of the protocol has been
2070 | analyzed by {{BBN19}} (for MLS Draft 7), {{ACDT21}} (for MLS Draft 11), and {{AJM20}} (for MLS
2071 | Draft 12).
2072 | 
2073 | Individual components of various drafts of the MLS protocol have been
2074 | analyzed in isolation and with differing adversarial models. For
2075 | example, {{BBR18}}, {{ACDT19}}, {{ACCKKMPPWY19}}, {{AJM20}},
2076 | {{ACJM20}}, {{AHKM21}}, {{CGWZ25}}, and {{WPB25}} analyze the
2077 | ratcheting tree sub-protocol of MLS that facilitates key agreement;
2078 | {{WPBB22}} analyzes the sub-protocol of MLS for group state agreement
2079 | and authentication; and {{BCK21}} analyzes the key derivation paths in
2080 | the ratchet tree and key schedule. Finally, {{CHK21}} analyzes the
2081 | authentication and cross-group healing guarantees provided by MLS.
2082 | 
2083 | # IANA Considerations
2084 | 
2085 | This document has no IANA actions.
2086 | 


--------------------------------------------------------------------------------