├── .flake8
├── .github
├── release.yml
└── workflows
│ ├── docs.yaml
│ └── mega-linter.yml
├── .gitignore
├── .mega-linter.yml
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .spellcheck.yml
├── .yaml-lint.yml
├── README.md
├── docs
├── css
│ └── extra.css
├── getting_started
│ ├── background.md
│ ├── overview.md
│ └── quickstart.md
├── images
│ ├── logo.png
│ ├── logo.svg
│ ├── logo_color.png
│ └── opaque_prompts_logo.png
├── index.md
├── overrides
│ └── partials
│ │ └── integrations
│ │ └── analytics
│ │ └── custom.html
├── reference
│ └── library_api.md
├── requirements.in
├── requirements.txt
└── spelling_wordlist.txt
├── mkdocs.yml
├── pyproject.toml
├── python-package
├── LICENSE
├── README.md
├── pyproject.toml
├── setup.py
└── src
│ └── opaqueprompts
│ ├── __init__.py
│ ├── authentication.py
│ ├── configuration.py
│ └── opaqueprompts_service.py
└── scripts
└── install_pre_commit.sh
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore =
3 | # E203: Whitespace before ':'
4 | # Causing false positive on list slices
5 | # https://github.com/PyCQA/pycodestyle/issues/373
6 | E203,
7 | # E266: Too many leading '#' for block comment
8 | # This rule is too strict for comment blocks that we currently have
9 | E266,
10 | # W503: Line break occurred before a binary operator
11 | # PEP8 now recommend line breaks should occur before the binary operator
12 | W503
13 | exclude =
14 | .git,
15 | __pycache__,
16 | max-line-length = 79
17 | max-doc-length = 79
18 | max-complexity = 18
19 | select = B,C,E,F,W,T4,B9
20 |
--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
1 | ---
2 | changelog:
3 | exclude:
4 | labels: []
5 | authors: []
6 | categories:
7 | - title: User-facing features
8 | labels:
9 | - feat
10 | - title: Bug fixes
11 | labels:
12 | - fix
13 | - title: Performance improvements
14 | labels:
15 | - perf
16 | - title: Documentation updates
17 | labels:
18 | - docs
19 | - title: Formatting changes
20 | labels:
21 | - style
22 | - title: Refactoring
23 | labels:
24 | - refactor
25 | - title: Test updates
26 | labels:
27 | - test
28 | - title: Build system updates
29 | labels:
30 | - build
31 |
--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | name: Spellcheck Documentation
3 |
4 | on:
5 | push:
6 | branches:
7 | - dev
8 | - master
9 | paths:
10 | - 'docs/**'
11 | pull_request:
12 | paths:
13 | - 'docs/**'
14 |
15 | # Only allow one run of this workflow per PR at a time.
16 | #
17 | # This will cancel any still-running workflows triggered by a previous commit to
18 | # this PR. Note this will not affect workflows triggered by a push (e.g. merging
19 | # a PR to `dev` or `master`).
20 | concurrency:
21 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
22 | cancel-in-progress: true
23 |
24 | jobs:
25 | spellcheck:
26 | runs-on: ubuntu-20.04
27 | steps:
28 | - name: Checkout repo
29 | uses: actions/checkout@v4
30 |
31 | # Spellcheck documentation
32 | - name: Spellcheck
33 | uses: rojopolis/spellcheck-github-actions@0.36.0
34 | with:
35 | config_path: .spellcheck.yml
36 |
--------------------------------------------------------------------------------
/.github/workflows/mega-linter.yml:
--------------------------------------------------------------------------------
1 | ---
2 | # MegaLinter GitHub Action configuration file
3 | # More info at https://oxsecurity.github.io/megalinter
4 | name: MegaLinter
5 |
6 | # Run this workflow every time a new commit pushed to your repository
7 | on:
8 | push:
9 | branches:
10 | - dev
11 | - master
12 | pull_request:
13 | branches:
14 | - dev
15 | - release-*
16 | - hotfix-*
17 | - master
18 |
19 | env:
20 | # Apply linter fixes configuration, see link for details
21 | # https://oxsecurity.github.io/megalinter/latest/configuration/#apply-fixes
22 | APPLY_FIXES: none # do not apply any fixes
23 | APPLY_FIXES_EVENT: pull_request
24 | APPLY_FIXES_MODE: commit
25 |
26 | # Only allow one run of this workflow per PR at a time.
27 | #
28 | # This will cancel any still-running workflows triggered by a previous commit to
29 | # this PR. Note this will not affect workflows triggered by a push (e.g. merging
30 | # a PR to `dev` or `master`).
31 | concurrency:
32 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
33 | cancel-in-progress: true
34 |
35 | jobs:
36 | build:
37 | name: MegaLinter
38 | runs-on: ubuntu-latest
39 | steps:
40 | # Git Checkout
41 | - name: Checkout Code
42 | uses: actions/checkout@v4
43 | with:
44 | token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
45 | # If you use VALIDATE_ALL_CODEBASE = true,
46 | # you can remove this line to improve performances
47 | # fetch-depth: 0
48 |
49 | # MegaLinter
50 | - name: MegaLinter
51 | id: ml
52 | # You can override MegaLinter flavor used to have faster performances
53 | # More info at https://oxsecurity.github.io/megalinter/flavors/
54 | uses: oxsecurity/megalinter/flavors/cupcake@beta
55 | env:
56 | # All available variables are described in documentation
57 | # https://oxsecurity.github.io/megalinter/configuration/
58 | # Set ${{ github.event_name ==
59 | # 'push' && github.ref == 'refs/heads/main' }}
60 | # to validate only diff with main branch
61 | VALIDATE_ALL_CODEBASE: true
62 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
63 | FILTER_REGEX_EXCLUDE: .*run-clang-tidy\.py
64 | # ADD YOUR CUSTOM ENV VARIABLES HERE TO OVERRIDE VALUES
65 | # OF .mega-linter.yml AT THE ROOT OF YOUR REPOSITORY
66 |
67 | # Upload MegaLinter artifacts
68 | - name: Archive production artifacts
69 | if: ${{ success() || failure() }}
70 | uses: actions/upload-artifact@v4
71 | with:
72 | name: MegaLinter reports
73 | path: |
74 | megalinter-reports
75 | mega-linter.log
76 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__*
2 | *.egg-info*
3 |
--------------------------------------------------------------------------------
/.mega-linter.yml:
--------------------------------------------------------------------------------
1 | # Configuration file for MegaLinter
2 | # See all available variables at
3 | # https://oxsecurity.github.io/megalinter/configuration/
4 | # and in linters documentation
5 |
6 | ########## Mega Linter Settings ##########
7 | APPLY_FIXES: none # all, none, or list of linter keys
8 | # If you use ENABLE variable, all other languages/formats/tooling-formats
9 | # will be disabled by default
10 | # ENABLE:
11 | # If you use ENABLE_LINTERS variable,
12 | # all other linters will be disabled by default
13 | ENABLE_LINTERS:
14 | - ACTION_ACTIONLINT
15 | - PYTHON_BLACK
16 | - PYTHON_FLAKE8
17 | - PYTHON_ISORT
18 | - YAML_YAMLLINT
19 | # DISABLE:
20 | # - COPYPASTE # Uncomment to disable checks of excessive copy-pastes
21 | # - SPELL # Uncomment to disable checks of spelling mistakes
22 | SHOW_ELAPSED_TIME: true
23 | FILEIO_REPORTER: false
24 | # Uncomment if you want MegaLinter to detect errors but not block CI to pass
25 | # DISABLE_ERRORS: true
26 | # FILTER_REGEX_EXCLUDE: >
27 | LINTER_RULES_PATH: /
28 | ########## Individual Linter Settings ##########
29 |
30 | #===== Github Action =====#
31 | ACTION_ACTIONLINT_RULES_PATH: .github
32 |
33 | #===== Python =====#
34 | PYTHON_BLACK_CONFIG_FILE: pyproject.toml
35 | PYTHON_BLACK_DISABLE_ERRORS: false
36 | PYTHON_FLAKE8_CONFIG_FILE: .flake8
37 | PYTHON_ISORT_CONFIG_FILE: pyproject.toml
38 | PYTHON_ISORT_DISABLE_ERRORS: false
39 |
40 | #===== YAML =====#
41 | YAML_YAMLLINT_CONFIG_FILE: .yaml-lint.yml
42 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | repos:
3 | - repo: https://github.com/ambv/black
4 | rev: 23.1.0
5 | hooks:
6 | - id: black
7 | args: [--config=pyproject.toml]
8 | language_version: python3
9 | exclude: ^(sql/src/cpp/thirdparty/duckdb/.*)|(scripts/run-clang-tidy.py)
10 | - repo: https://github.com/pycqa/flake8
11 | rev: 5.0.4
12 | hooks:
13 | - id: flake8
14 | exclude: ^(sql/src/cpp/thirdparty/duckdb/.*)|(scripts/run-clang-tidy.py)
15 | - repo: https://github.com/pocc/pre-commit-hooks
16 | rev: v1.3.5
17 | hooks:
18 | - id: clang-format
19 | args:
20 | - -i
21 | - --style=file
22 | - --fallback-style=Chromium
23 | - repo: https://github.com/pycqa/isort
24 | rev: 5.12.0
25 | hooks:
26 | - id: isort
27 | files: "\\.(py)$"
28 | args: [--settings-path=pyproject.toml]
29 | - repo: https://github.com/pre-commit/mirrors-mypy
30 | rev: v0.991
31 | hooks:
32 | - id: mypy
33 | args:
34 | - --ignore-missing-imports
35 | - --follow-imports=silent
36 | additional_dependencies: ['types-waitress']
37 | - repo: https://github.com/rhysd/actionlint
38 | rev: v1.6.22
39 | hooks:
40 | - id: actionlint
41 | - repo: https://github.com/adrienverge/yamllint.git
42 | rev: v1.28.0
43 | hooks:
44 | - id: yamllint
45 | args: [-c=.yaml-lint.yml]
46 | - repo: https://github.com/getindata/py-pre-commit-hooks
47 | rev: v0.2.0
48 | hooks:
49 | - id: pyspelling-docker
50 | name: spellcheck
51 | files: ^docs/
52 | - repo: local
53 | hooks:
54 | - id: sort-spelling-wordlist
55 | name: sort-spelling-wordlist
56 | description: Sort spelling wordlist
57 | language: system
58 | entry: bash -c 'sort -f docs/spelling_wordlist.txt
59 | -o docs/spelling_wordlist.txt'
60 | files: ^docs/spelling_wordlist.txt$
61 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | # .readthedocs.yaml
3 | # Read the Docs configuration file
4 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
5 |
6 | # Required
7 | version: 2
8 |
9 | # Set the version of Python and other tools you might need
10 | build:
11 | os: ubuntu-20.04
12 | tools:
13 | python: "3.9"
14 |
15 | mkdocs:
16 | configuration: mkdocs.yml
17 |
18 | # Optionally declare the Python requirements required to build your docs
19 | python:
20 | install:
21 | - requirements: docs/requirements.txt
22 |
--------------------------------------------------------------------------------
/.spellcheck.yml:
--------------------------------------------------------------------------------
1 | ---
2 | matrix:
3 | - name: Markdown
4 | aspell:
5 | lang: en
6 | ignore-case: true
7 | dictionary:
8 | wordlists:
9 | - docs/spelling_wordlist.txt
10 | encoding: utf-8
11 | pipeline:
12 | - pyspelling.filters.markdown:
13 | markdown_extensions:
14 | - pymdownx.superfences:
15 | - pyspelling.filters.html:
16 | comments: false
17 | ignores:
18 | - code
19 | - pre
20 | sources:
21 | - 'docs/**/*.md'
22 | default_encoding: utf-8
23 |
--------------------------------------------------------------------------------
/.yaml-lint.yml:
--------------------------------------------------------------------------------
1 | ---
2 | extends: default
3 |
4 | rules:
5 | truthy:
6 | ignore: |
7 | # The truthy rule is not relevant for GitHub Actions workflows,
8 | # as the YAML syntax for it is a bit different from normal YAML
9 | .github/*/*.yaml
10 | # Set line length to warning because some bash commands are long
11 | line-length:
12 | max: 80
13 | level: warning
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | ---
8 |
9 | [](https://discord.gg/mVuCfxudrD)
10 | [](https://badge.fury.io/py/opaqueprompts)
11 |
12 | Opaque Gateway enables applications to leverage the power of language models while preserving user privacy. This repo contains the Opaque Gateway Python library, which provides a simple interface for interacting with the Opaque Gateway Service. More information about Opaque Gateway can be found in the [documentation](https://opaquegateway.readthedocs.io/).
13 |
14 | **API Stability:** This package is still in development. As such, its API may
15 | change until it is sufficiently mature.
16 |
17 | ## Installation
18 |
19 | The Opaque Gateway Python library can be installed via pip:
20 |
21 | ```bash
22 | pip install opaqueprompts
23 | ```
24 |
25 | ## Documentation
26 | For a quickstart, technical overview, and API reference, see the [Opaque Gateway documentation](https://opaquegateway.readthedocs.io/).
27 |
28 | ## Contact
29 | To contact us, join our [Discord server](https://discord.gg/mVuCfxudrD) or email us at [opaquegateway@opaque.co](mailto:opaquegateway@opaque.co).
30 |
--------------------------------------------------------------------------------
/docs/css/extra.css:
--------------------------------------------------------------------------------
1 | /* Taken from https://mkdocstrings.github.io/python/customization/ */
2 | /* Indentation. */
3 | div.doc-contents:not(.first) {
4 | padding-left: 25px;
5 | border-left: .05rem solid var(--md-typeset-table-color);
6 | }
7 |
8 | /* Mark external links as such. */
9 | a.autorefs-external::after {
10 | /* https://primer.style/octicons/arrow-up-right-24 */
11 | background-image: url('data:image/svg+xml,');
12 | content: ' ';
13 |
14 | display: inline-block;
15 | position: relative;
16 | top: 0.1em;
17 | margin-left: 0.2em;
18 | margin-right: 0.1em;
19 |
20 | height: 1em;
21 | width: 1em;
22 | border-radius: 100%;
23 | background-color: var(--md-typeset-a-color);
24 | }
25 |
26 | a.autorefs-external:hover::after {
27 | background-color: var(--md-accent-fg-color);
28 | }
29 |
30 | .hidden-warning {
31 | display: none
32 | }
33 |
34 | /* The Read the Docs flyout is formatted with a font-size that is 90% of the
35 | body's. Material for MkDocs has a body font-size that is 0.5rem. This body
36 | font-size will result in the flyout having a font-size of 0.7rem, consistent
37 | with the font-size of other elements in the theme.
38 | */
39 | body {
40 | font-size: 0.777778rem;
41 | }
42 |
43 | /* Increase h2, h3, and h4 font size and reduce h4 font weight */
44 | .md-typeset h2 {
45 | font-size: 1.7em;
46 | }
47 |
48 | .md-typeset h3 {
49 | font-size: 1.4em;
50 | }
51 |
52 | .md-typeset h4 {
53 | font-size: 1.2em;
54 | font-weight: 300;
55 | }
56 |
57 | /* h6 for glossary terms */
58 | .md-typeset h6 {
59 | font-size: 1em;
60 | font-weight: 600;
61 | color: #000000de;
62 | }
63 |
64 | /* Indent glossary definitions */
65 | .md-typeset h6~p,
66 | .md-typeset h6~ul {
67 | padding-left: 1.25rem;
68 | }
69 |
70 | .md-typeset h6~ol {
71 | padding-left: .75rem;
72 | }
73 |
74 | /* Unordered list
symbols:
75 | * Level 2 is a hollow circle
76 | * Level 3 is a filled square
77 | */
78 | article ul ul {
79 | list-style-type: circle !important;
80 | }
81 |
82 | article ul ul ul {
83 | list-style-type: square !important;
84 | }
85 |
86 | /* Grids */
87 | .md-typeset .grid {
88 | grid-gap: .4rem;
89 | display: grid;
90 | grid-template-columns: repeat(auto-fit, minmax(16rem, 1fr));
91 | margin: 1em 0
92 | }
93 |
94 | .md-typeset .grid.cards>ol,
95 | .md-typeset .grid.cards>ul {
96 | display: contents
97 | }
98 |
99 | .md-typeset .grid.cards>ol>li,
100 | .md-typeset .grid.cards>ul>li,
101 | .md-typeset .grid>.card {
102 | border: .05rem solid var(--md-default-fg-color--lightest);
103 | border-radius: .1rem;
104 | display: block;
105 | margin: 0;
106 | padding: .8rem;
107 | transition: border .25s, box-shadow .25s
108 | }
109 |
110 | .md-typeset .grid.cards>ol>li:focus-within,
111 | .md-typeset .grid.cards>ol>li:hover,
112 | .md-typeset .grid.cards>ul>li:focus-within,
113 | .md-typeset .grid.cards>ul>li:hover,
114 | .md-typeset .grid>.card:focus-within,
115 | .md-typeset .grid>.card:hover {
116 | border-color: #0000;
117 | box-shadow: var(--md-shadow-z2)
118 | }
119 |
120 | .md-typeset .grid.cards>ol>li>hr,
121 | .md-typeset .grid.cards>ul>li>hr,
122 | .md-typeset .grid>.card>hr {
123 | margin-bottom: 1em;
124 | margin-top: 1em
125 | }
126 |
127 | .md-typeset .grid.cards>ol>li>:first-child,
128 | .md-typeset .grid.cards>ul>li>:first-child,
129 | .md-typeset .grid>.card>:first-child {
130 | margin-top: 0
131 | }
132 |
133 | .md-typeset .grid.cards>ol>li>:last-child,
134 | .md-typeset .grid.cards>ul>li>:last-child,
135 | .md-typeset .grid>.card>:last-child {
136 | margin-bottom: 0
137 | }
138 |
139 | .md-typeset .grid>*,
140 | .md-typeset .grid>.admonition,
141 | .md-typeset .grid>.highlight>*,
142 | .md-typeset .grid>.highlighttable,
143 | .md-typeset .grid>.md-typeset details,
144 | .md-typeset .grid>details,
145 | .md-typeset .grid>pre {
146 | margin-bottom: 0;
147 | margin-top: 0
148 | }
149 |
150 | .md-typeset .grid>.highlight>pre:only-child,
151 | .md-typeset .grid>.highlight>pre>code,
152 | .md-typeset .grid>.highlighttable,
153 | .md-typeset .grid>.highlighttable>tbody,
154 | .md-typeset .grid>.highlighttable>tbody>tr,
155 | .md-typeset .grid>.highlighttable>tbody>tr>.code,
156 | .md-typeset .grid>.highlighttable>tbody>tr>.code>.highlight,
157 | .md-typeset .grid>.highlighttable>tbody>tr>.code>.highlight>pre,
158 | .md-typeset .grid>.highlighttable>tbody>tr>.code>.highlight>pre>code {
159 | height: 100%
160 | }
161 |
162 | .md-typeset .grid>.tabbed-set {
163 | margin-bottom: 0;
164 | margin-top: 0
165 | }
166 |
167 | /* Right-align elements */
168 | .right {
169 | float: right;
170 | }
171 |
172 | /* Featured content */
173 | .md-typeset .featured-content ul {
174 | width: 100%;
175 | margin: 0;
176 | padding: 0 0 0 25px;
177 | }
178 |
179 | .md-typeset .featured-content ul li {
180 | width: 30%;
181 | float: left;
182 | list-style-type: disc;
183 | list-style-position: outside;
184 | white-space: nowrap;
185 | margin-right: 20px;
186 | }
187 |
188 | @media only screen and (max-width: 1599px) {
189 | .md-typeset .featured-content ul li {
190 | margin-right: 10px;
191 | }
192 |
193 | @media only screen and (max-width: 966px) {
194 | .md-typeset .featured-content ul li {
195 | width: 50%;
196 | margin: 0;
197 | }
198 |
199 | @media only screen and (max-width: 480px) {
200 | .md-typeset .featured-content ul li {
201 | width: 100%;
202 | }
203 | }
204 | }
205 | }
206 |
207 | /* Adjust permalink icon in headings */
208 | .md-typeset .headerlink {
209 | width: 1em;
210 | height: 0.8em;
211 | vertical-align: middle;
212 | background-color: var(--md-default-fg-color--lighter);
213 | background-size: 0.8em;
214 | mask-size: 0.8em;
215 | -webkit-mask-size: 0.8em;
216 | mask-repeat: no-repeat;
217 | -webkit-mask-repeat: no-repeat;
218 | visibility: visible;
219 | mask-image: var(--md-admonition-icon--link);
220 | -webkit-mask-image: var(--md-admonition-icon--link);
221 | }
222 |
223 | .md-typeset .headerlink:hover,
224 | .md-typeset [id]:target .headerlink {
225 | background-color: var(--md-accent-fg-color);
226 | }
227 |
228 | @media screen and (min-width: 76.25em) {
229 |
230 | .md-typeset h1,
231 | .md-typeset h2,
232 | .md-typeset h3,
233 | .md-typeset h4,
234 | .md-typeset h5,
235 | .md-typeset h6 {
236 | display: flex;
237 | align-items: center;
238 | flex-direction: row;
239 | }
240 |
241 | .md-typeset .headerlink {
242 | order: -1;
243 | margin-left: -1em !important;
244 | }
245 | }
246 |
247 | /* Move header title closer to logo */
248 | [dir=ltr] .md-header__title {
249 | margin-left: 0.2rem;
250 | }
251 |
252 | [dir=rtl] .md-header__title {
253 | margin-right: 0.2rem;
254 | }
255 |
256 | /* Customize colors */
257 | root, [data-md-color-scheme=default] {
258 | --md-code-hl-color: #ccefff;
259 | }
260 |
--------------------------------------------------------------------------------
/docs/getting_started/background.md:
--------------------------------------------------------------------------------
1 | # Confidential computing background
2 | Computer security has traditionally been predicated on two tenets: the protection of data at rest and the protection of data in transit. The former is addressed by technologies such as full-disk encryption (FDE) the likes of [BitLocker](https://learn.microsoft.com/en-us/windows/security/operating-system-security/data-protection/bitlocker/), [FileVault](https://support.apple.com/guide/mac-help/protect-data-on-your-mac-with-filevault-mh11785/mac), and [LUKS](https://en.wikipedia.org/wiki/Linux_Unified_Key_Setup), while the latter is typically addressed by Transport Layer Security (TLS). These technologies have become ubiquitous in the last several decades and have greatly contributed to securing computer systems. They are, however, insufficient to fully ensure that only authorized parties have access to user data.
3 |
4 | While data is being processed, it is stored in memory in the clear and it is transformed by the processor in full view of the system’s hardware, firmware, and operating system. This comprises tens of millions of lines of code written by a wide variety of vendors, some trustworthy and some less so. In addition, despite the best efforts of their developers, bugs and vulnerabilities are bound to creep into such large codebases, thereby rendering them susceptible to malware. This malware may in turn silently exfiltrate data or tamper with its processing to produce incorrect results.
5 |
6 | Furthermore, system administrators have access, both remote and direct, to systems that handle confidential data. As a result, they can access or tamper with this data, either of their own volition, or by virtue of being compelled to do so by local authorities. Similarly, the service provider itself may misbehave or similarly be required to interfere.
7 | To mitigate against these threats, a new tenet of computer security is needed: the protection of data during use.
8 |
9 | ## Confidential Computing
10 | The term trusted computing base (TCB) refers to all the code and, by extension, all the entities that wrote that code, that are directly or transitively trusted by any given workload. In addition, the TCB of a workload encompasses all hardware that can peek into or alter that workload, such as devices with direct memory access (DMA) capability, as well as any person or organization that manages the infrastructure on which the workload runs and which could, if it so desired, gain access to or tamper with the workload, be it via remote administrative privileges or through direct physical access.
11 |
12 | Therefore, any avenue of access through its TCB can potentially render a workload insecure. This includes malware, system administrators, and government entities whose jurisdiction the servers on which the workload executes find themselves in. As a result, for any workload that is deemed sensitive, it is desirable to render its TCB as small as it practically can be. Trusted execution environments (TEEs) provide a way to minimize and enforce a workload’s TCB.
13 |
14 | TEEs provide a guarantee of confidentiality and integrity to both code and data. For a workload running inside a TEE, this means that no entity outside of the workload’s TCB may read from or write to its memory or otherwise tamper with its execution. In addition, TEEs provide a mechanism via which their trustworthiness and that of the workload running within them may be appraised. This mechanism is known as remote attestation and it allows a remote party to establish trust in a TEE as well as in the workload within it, and subsequently share secrets and other sensitive information with it.
15 |
16 | In and out of themselves, TEEs are an abstract notion, and there exist multiple and wildly different implementations of them. Each implementation may be deemed a TEE irrespective of its internal architectural details if it conforms to the definition of a TEE. Some implementations include a larger TCB by design while others only carry the most minimal viable TCB.
17 |
18 | The [Confidential Computing Consortium (CCC)](https://confidentialcomputing.io/) defines confidential computing as “the protection of data in use by performing computation in a hardware-based, attested Trusted Execution Environment” . The manner of execution of Opaque Gateway abides by this definition by leveraging AMD’s TEE implementation: AMD SEV-SNP.
19 |
20 | ## Remote Attestation
21 | This section introduces nomenclature pertaining to remote attestation procedures. This nomenclature is useful in providing a shared language for the remainder of this document.
22 |
23 | Trust is a choice an entity makes about another system, and trustworthiness is a quality about that system that can be used in deciding whether to trust it. Remote attestation is a process whereby an entity appraises the trustworthiness of a system and determines whether to trust it.
24 |
25 | The system whose trustworthiness is appraised is known as the attester, the entity performing the appraisal is known as the verifier, and the entity that determines whether to trust the attester is known as the relying party.
26 |
27 | The attester produces evidence containing a set of claims. Each claim is typically a key-value pair that describes some aspect of the attester that might be of interest to the verifier. Additionally, the evidence may be accompanied by a set of endorsements. An endorsement is a secure statement produced by an endorser that vouches for the integrity of the attester’s ability to collect claims and sign evidence. An endorser is typically a hardware manufacturer or software vendor whose endorsements help verifiers appraise the authenticity of some of the claims embedded in the evidence produced by the attester. The attester may provide its endorsements alongside the evidence or the verifier may obtain them on its own.
28 |
29 | The verifier compares the values of the claims contained in the evidence against a set of reference values usually provided by a hardware manufacturer or software vendor. Reference values are typically referred to as good-known or nominal values in other documents. The verifier performs this comparison in addition to the appraisal of any endorsements according to an attestation policy. The attestation policy for evidence appraisal set by the verifier owner determines which claims and which endorsements the verifier appraises and how. As a result of this process, the verifier produces an attestation result that vouches for the attester.
30 |
31 | The relying party obtains the attestation result from the verifier and applies to it a set of rules set by the relying party owner known as the appraisal policy for attestation results. This policy determines whether the attestation result satisfies the relying party and thus whether it is convinced that the attester is trustworthy.
32 |
33 | Once the relying party establishes that the attester can be trusted, the relying party may establish a secure communications channel between it and the attester to share secrets with it.
34 |
35 | Note that the nomenclature described does not prescribe an architecture. For instance, there is no requirement for the relying party and the verifier to be two separate entities or programs. There is likewise no restriction, for example, where a verifier cannot also be an attester.
36 |
37 | ### Roots of Trust
38 | An entity in a trust hierarchy that is inherently trusted is said to be in the hierarchy’s root of trust. This is so because a root of trust cannot itself be attested to. This includes an entity that can assume the role of relying party during remote attestation but cannot also be an attester.
39 |
40 | Consider a simple deployment model where an entity is both a relying party and verifier. This entity can establish trust with an attester, but whoever ultimately owns and operates the entity must necessarily trust it implicitly.
41 |
42 | It follows that a component in the root of trust, be it hardware, firmware, or software, must provide a firm foundation from which to build security and trust.
43 |
44 | ## Attested TLS
45 | Two peers who wish to communicate privately over an insecure channel such as the Internet must first establish a secret that they can use to encrypt and decrypt data. The most prevalent method of doing this is Transport Layer Security (TLS), formerly known as Secure Sockets Layer (SSL). TLS implements a handshake protocol based on asymmetric cryptography whereby two peers can agree on a secret encryption key without ever transmitting the key over the channel.
46 |
47 | The two peers who wish to bootstrap a secure channel first each generate an asymmetric key. During the handshake, the peers exchange the public components of their corresponding asymmetric keys over the insecure channel. They then use each other’s public keys to compute an identical secret that they arrive at independently. The details of this computation vary depending on the key exchange method used and are beyond the scope of this document. An invariant is upheld by all cryptographic methods that guarantees that knowledge of the public keys does not result in an eavesdropper being able to also compute the secret.
48 |
49 | Once the peers have computed the shared secret, they use it to configure a symmetric cipher that allows them to protect their data both from unauthorized access as well as from tampering while it is in transit.
50 |
51 | In summary, TLS enables two peers to establish a secure communications tunnel over an initially insecure channel that provides them guarantees of both confidentiality and integrity.
52 |
53 | ### Authentication
54 | TLS offers facilities to enable one or both peers to authenticate the other. With this functionality, each peer can independently ascertain the identity of the other during the handshake. In so doing, each can determine whether the other is who they think they are and thus decide whether to share any sensitive data.
55 |
56 | Asymmetric cryptography is widely used for this type of authentication. More specifically, an asymmetric key pair can be regarded as a form of identity: if one entity knows the public key of another, the latter can prove their identity by providing proof of knowledge of the corresponding private component. In case an entity does not know the public key of another, it can attempt to ascertain its identity via a third-party known as a Certificate Authority (CA).
57 |
58 | Certificate authorities are (usually) reputable entities that other entities choose to trust. There are many CAs that offer their services to the public and are directly or indirectly trusted by millions, forming a fabric of trust across the Internet. Other CAs are localized and specific, such as one internal to a company and trusted only by devices and services within an Intranet. A system of CAs, their corresponding identities, the identities for whom they vouch, and the entities who trust them is known as a Public Key Infrastructure (PKI). In a PKI, CAs are each identified by their own asymmetric key, the private part of which they carefully safeguard, and entities that trust them have a copy of the corresponding public components.
59 |
60 | The role of a CA is to carefully evaluate evidence of identity produced by an entity, such as government-issued ID or company registration information. If the CA is satisfied, it produces a certificate for the entity’s public key. The certificate describes the entity, known as the subject; it names the CA, known as the issuer; it indicates what purposes the certificate may be used for; and it states the certificate’s period of validity. The certificate also carries a digest of the subject’s public key, a digest of the issuer’s public key, and a signature produced by the issuer over the contents of the certificate computed using the CA’s private key.
61 |
62 | Using a PKI, an entity that wants to authenticate another but whose public key it does not recognize can request from the latter one or more chains of certificates for that key. If at least one of these chains is ultimately rooted in a CA that the authenticating entity trusts, it can rest assured of the identity of the other. A simpler way to state this is that A may not know B, but knows C who either directly vouches for B, or who vouches for a series of intermediaries which ultimately vouches for B.
63 |
64 | A crucial and necessary aspect of the process of authentication in TLS is that the asymmetric key produced by the remote peer must be bound to the evidence of its identity. In other words, the public key that the remote peer sends through the insecure channel for the other peer to compute the shared secret with must be the same public key which the remote peer presents a certificate for and whose ultimate issuing authority the other peer trusts. Without this binding, the authentication serves no purpose as at least one of peers need not prove knowledge of the private key for which the certificate presented was issued and can thus instead be anybody.
65 |
66 |
67 | ### Remote Attestation
68 | The process of authentication as described above in the context of client-server communication between a user and a service aids the user in establishing trust in the service provider. That is, a service provider launches one or more instances of a service and provides those instances with an asymmetric key and a corresponding certificate chain that identifies the provider as well as the hierarchy of entities that vouch for the provider’s identity. When a client conducts the TLS handshake with the service, the latter responds with these two pieces of information. With the asymmetric key and certificate chain in hand, the client can convince itself that the service is hosted by a known service provider.
69 |
70 | This form of authentication makes no guarantees as to the trustworthiness of the service itself. If the service provider deployed malicious instances, if these instances have been compromised by malware, or if they are being actively monitored by the cloud service provider be it by malice or by force, the client has no way of finding out and aborting the handshake. The trust terminates in the service provider regardless of the trustworthiness of the service.
71 |
72 | With confidential computing, the service provider is outside of the TCB. Thus, its identity is arguably irrelevant: it does not matter who hosts the service as long as the service itself and the hardware that it runs on can be proven to be trustworthy. Attested TLS (aTLS) binds the public key of the peer running in a TEE not to the identity of the service provider but to the attestation evidence produced by the TEE. In so doing, the authenticating peer (relying party) can ascertain that the secure communications channel being bootstrapped terminates within the confines of the peer being authenticated (attester). Additionally, if both sides of the TLS connection run in a TEE, attestation and key binding can be mutual. This may be used in scenarios where one or more service endpoints are backed by microservices that need to communicate with one another.
73 |
74 | While the specifics of how key binding is achieved varies across TEE and aTLS implementations, the fundamental requirement remains: when a relying party wishes to establish an aTLS connection with an attester, the latter must include in the evidence that it produces a claim that identifies the public key and must submit it during the handshake. With that in place, the relying party, after successful appraisal of the evidence via a verifier, can ascertain that the public key sent by the attester for use in the computation of the shared key is the same as that claimed in the evidence. This in turn binds the channel’s encryption key to the trustworthiness of the attester.
75 |
76 | ### Topological Patterns
77 | A TLS handshake begins with the client sending a ClientHello message to the server. Depending on the aTLS implementation, this message may include an explicit request for aTLS or the requirement may be assumed by both ends. The server in turn responds with a ServerHello message that can either include attestation evidence and possibly endorsements, or an attestation result readily produced by a verifier.
78 |
79 | The Passport Model is defined by the server returning evidence and optionally endorsements to the client. In this model, the burden of appraising the evidence falls on the relying party, which must submit this information to a verifier. In contrast, the Background-Check Model is defined by the server itself submitting its evidence and optional endorsements to a verifier and returning to the client the corresponding attestation result produced by the verifier.
80 |
81 | Regardless of which model is used, the client and server must agree on the model either ahead of time, during the handshake, or the client must be able to dynamically recognize and adapt to the server’s observed behavior.
82 |
83 | ## AMD SEV-SNP
84 | In an effort to remove the hypervisor and cloud fabric from the TCB, AMD has added over the last several years several instruction set extensions to their processors. With the introduction of the Zen 3 architecture, AMD EPYC processors now provide the ability to instantiate a TEE in the form of a Confidential Virtual Machine (CVM).
85 |
86 | The first extension that was added is Secure Memory Encryption (SME). SME was introduced in Zen 1 (Naples), allowing system software to mark individual pages of memory as private. In turn, the processor transparently encrypts and decrypts these pages as necessary using an ephemeral key known only to it that it generates during boot. A stricter version of SME called Transparent Secure Memory Encryption (TSME) was also added whereby the processor automatically encrypts all memory without intervention by system software. SME and TSME aid in thwarting a certain class of attacks where malicious sysadmins, hypervisor bugs, and physical access may leak user data.
87 |
88 | SME was later extended in Zen 1 with Secure Encrypted Virtualization (SEV). This addition augments the behavior of SME with a different ephemeral encryption key per virtual machine. With SEV enabled, each virtual machine is assigned a different memory encryption key managed by the processor. Since the hypervisor and the virtual machines that it manages do not share the same encryption key, hypervisors are effectively rendered unable to read and extract virtual machine data.
89 |
90 | SEV was subsequently extended with Encrypted State (SEV-ES) in Zen 2 (Rome). Whereas SEV protects VM memory, SEV-ES transparently encrypts and decrypts VM processor state. When SEV-ES is enabled, the hypervisor cannot, in addition to reading VM memory, access the VM’s registers when control is transferred from the VM to the hypervisor.
91 |
92 | Finally, in Zen 3 (Milan), AMD introduced Secure Nested Paging (SEV-SNP). Whereas SEV and SEV-ES provide virtual machines with guarantees of confidentiality from the hypervisor, SEV-SNP provides guarantees of integrity, and additionally introduces support for remote attestation. Thus, the combination of SEV, SEV-ES, and SEV-SNP, collectively known as AMD SEV, allow for the creation and execution of TEE-compliant CVMs.
93 |
94 |
95 | ### Operational Overview
96 | In the SEV-SNP threat model, the only components in the TCB are the processor, AMD’s Platform Security Processor (PSP), and the CVM itself. All other hardware, firmware, the hypervisor, the operating system, all device drivers, host-side user-mode software, as well as other VMs and CVMs along with their users and administrators, including the cloud service provider, are strictly outside of the TCB.
97 |
98 | The core operating principle of SEV-SNP is that if a virtual machine writes data to a page in memory (a page is typically a 4KB chunk of byte-aligned memory), if and when the VM reads that page at any later time, it is guaranteed to read the same data that it previously wrote. If the data read differs from the data originally written, the processor alerts the VM to that effect. More specifically, SEV-SNP guards against replay attacks, data corruption, memory aliasing, and memory remapping by implementing additional access checks in hardware. The configuration of these checks is guarded by the hardware such that only authorized software may set up their behavior and only at the appropriate time of the lifecycle of a CVM. In addition, SEV-SNP introduces additional guardrails around interrupt delivery and hypervisor intercepts that cover other avenues by which a hypervisor may tamper with the integrity of a CVM.
99 |
100 | It is important to understand that CVMs are instantiated out of an untrusted system state by untrusted software. It is the cloud fabric that decides on the parameters of the CVM and it is a potentially malicious hypervisor that loads code into the CVM before setting it in motion. For this reason, CVMs carry no secrets with them at the start of their lifecycle. Instead, SEV-SNP includes facilities for the remote attestation of CVMs. Only after careful appraisal of the evidence produced by SEV-SNP CVM can a user of the services provided by the CVM safely share secrets with it.
101 |
102 |
103 | ### Virtual Machine Startup
104 | When a hypervisor is first requested to launch a virtual machine, one of its tasks is to populate the latter’s initial memory contents. These contents include both virtual machine and virtual device configuration as well as the code that the VM will execute immediately upon start. This code is typically hypervisor-specific firmware that allows the VM to set itself up, and to find and start the operating system.
105 |
106 | Seeing as the hypervisor is untrusted, it could load into the CVM any code it desires. Similarly, since the cloud fabric and its sysadmins are also untrusted, they could tamper with the CVM’s contents before it has a chance to start. While the SEV-SNP hardware protects the integrity of the code once the CVM has launched, it is necessary to ensure during attestation that the code that initially seeded its execution is trustworthy.
107 |
108 | To that end, the hypervisor interacts with the PSP throughout the launch flow of a CVM. At first, the hypervisor informs the PSP of its intention to launch a new CVM. In so doing, the PSP initializes a new state tracking structure for the CVM and protects it from the hypervisor. Then, for every page of memory that the hypervisor loads into the CVM, it informs the PSP. The SEV-SNP hardware only allows pages added in this way to be part of the CVM.
109 |
110 | For each new page added, the PSP updates a running digest of the contents and metadata of every page loaded up to and including the last. This running digest consists of a series of hash extensions computed in the following manner:
111 |
112 | `DIGEST_NEW := SHA-384(PAGE_INFO)`
113 |
114 | where PAGE_INFO is a structure that holds the current running digest alongside a set of properties about the new page being added. These properties include a SHA384 digest of the contents of the new page, its read-write-execute permissions, and the guest physical address where it will be mapped in the CVM’s memory layout.
115 |
116 | Once all pages are loaded, the hypervisor once again informs the PSP. At that point, the PSP performs final sanity checks on the CVM, transitions it into the running state, and protects it from further modification.
117 |
118 | The final value of the running digest represents a cryptographic log, expressed as a single hash value, of the ordered sequence of steps that the hypervisor took while loading the virtual machine as well as of the contents, and properties of that content, that the hypervisor loaded into it. This value is known as the CVM’s launch digest, sometimes also referred to as its launch measurement, and is reflected during attestation.
119 |
120 | Lastly, as part of the final message the hypervisor sends to the PSP, it may submit an arbitrary sequence of up to 32 bytes that the code running in the CVM can later retrieve. This array is known as host data, and is also included during attestation.
121 |
122 | ### Remote Attestation
123 | Every processor that supports SEV-SNP carries a secret that is unique to it. In addition, each core in every processor ships with a given version of its microcode, and every PSP, with a given version of its firmware. Should an issue or security vulnerability be found after shipping, both the processor microcode and the PSP firmware can be upgraded at runtime provided the updates carry a signature the processor and PSP recognize, respectively.
124 |
125 | The combination of the lowest microcode version across all processor cores and the version of the PSP firmware forms the TCB version. From a combination of the unique secret and the TCB version, the hardware derives an attestation key known as the Versioned Chip Endorsement Key (VCEK). For each VCEK, which is unique for each processor, AMD issues a certificate signed by the AMD SEV Signing Key (ASK). For the ASK in turn AMD issues a certificate signed by the AMD Root Key (ARK), one of which exists for every product (e.g., Milan, Rome, Naples, etc.). For the ARK, AMD issues a self-signed certificate and publishes it.
126 |
127 | When a workload running in a CVM is challenged to produce evidence for attestation, it issues a request to the PSP to generate an attestation report. In a cloud setting, this request is routed from the workload in user-mode to the kernel-mode SEV guest driver, out to the hypervisor, and finally to the PSP of the physical server where the cloud fabric scheduled the workload.
128 |
129 | The PSP attestation report contains a variety of fields including the CVM’s launch digest, a workload-provided field known as report data, and the value of host data. The entirety of the report is in turn signed by the VCEK.
130 |
131 | The report data field contains any data that the workload wishes to include in the report. Typically, a workload uses this field to encode a combination of a nonce and a public key that helps ensure the freshness of the evidence and to establish a secure channel of communication with the workload, respectively.
132 |
133 | During attestation, the verifier appraises the report by first verifying that the signature around it was produced by a VCEK that is rooted to a well-known, non-revoked ARK certificate. If this is true, the verifier knows that the report was generated by a genuine AMD PSP on a real, SEV-SNP platform. Thereafter, the verifier ensures that the TCB version is sufficiently recent as a means to ensure that the issuing platform does not have any known security vulnerabilities that could render the report untrustworthy. Thereafter, the verifier ensures that fields such as the launch digest, report data, host data, and potentially others depending on its evidence appraisal policy, are acceptable in accordance with its reference values.
134 |
135 | To aid verifiers in validating VCEK signatures, AMD hosts a Key Distribution Service (KDS). The KDS is a Web service that retrieves VCEK certificates signed by the appropriate ASK for each unique processor and TCB version. Additionally, the KDS serves ASK and ARK certificates for each supported product as well as a corresponding Certificate Revocation List (CRL). With the KDS, verifiers can build full certificate chains that help ensure that signatures around attestation reports were generated by genuine SEV-SNP platforms.
136 |
--------------------------------------------------------------------------------
/docs/getting_started/overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | ## Technical overview
4 |
5 | A technical overview on how Opaque Gateway build on top of [confidential computing](https://en.wikipedia.org/wiki/Confidential_computing) and [remote attestation](https://www.redhat.com/en/blog/attestation-confidential-computing) is coming soon.
6 |
7 | ### Attested communication with Opaque Gateway
8 |
9 | All communication from a client to Opaque Gateway occurs over an attested TLS channel. In short, an attested TLS channel enables a client to 1) verify the identity of the server, 2) ensure that the server is running the correct software, and 3) establish an encrypted channel for communication. Attested TLS capabilities rely on the server running on confidential computing hardware. More on confidential computing, remote attestation, and attested TLS can be found [here](background.md).
10 |
11 | ## Supported entities
12 |
13 | Opaque Gateway, for now, supports only the English language. The service identifies and sanitizes the following entity types:
14 |
15 | | **Type** | **Notes** |
16 | |---------------------------------------------------------------------------------------------------------------------------------|-------------------------------------|
17 | | Bank account numbers | |
18 | | Credit card | |
19 | | Crypto wallet numbers | Supports BTC wallets |
20 | | Dates | |
21 | | Driver's license numbers | Supports US drivers' licenses |
22 | | Email addresses | |
23 | | Geographic locations | |
24 | | [Individual Taxpayer Identification Numbers (ITINs)](https://www.irs.gov/individuals/individual-taxpayer-identification-number) | |
25 | | [International Bank Account Numbers (IBANs)](https://n26.com/en-eu/iban-number) | |
26 | | IP addresses | Supports both IPv4 and IPv6 |
27 | | Medical license numbers | |
28 | | Names | |
29 | | Passport numbers | Supports US passports |
30 | | Phone numbers | |
31 | | Social security numbers (SSNs) | Supports US SSNs |
32 | | URLs | |
33 |
34 | For custom entity types, contact us at `hello@opaque.co`.
--------------------------------------------------------------------------------
/docs/getting_started/quickstart.md:
--------------------------------------------------------------------------------
1 | # Quickstart
2 |
3 | ## Installation
4 | To install Opaque Gateway, run:
5 |
6 | ```bash
7 | pip install opaqueprompts
8 | ```
9 |
10 | ## Environment setup
11 | Accessing the Opaque Gateway API requires an API key, which you can get by either
12 |
13 | 1. Creating an account on the [Opaque Gateway](https://opaquegateway.opaque.co) website. Once you have an account, you can find your API key on the [API Key](https://opaquegateway.opaque.co/#/main/api/key) page.
14 |
15 | 1. Contacting us at `hello@opaque.co`.
16 |
17 | Once you have your key, set it as an environment variable:
18 |
19 | ```bash
20 | export OPAQUEPROMPTS_API_KEY="..."
21 | ```
22 |
23 | For a custom or private deployment of Opaque Gateway, you can set the `OPAQUEPROMPTS_SERVER_HOSTNAME` environment variable to the domain (i.e., without the protocol, e.g. `https`) or IP address of the deployment.
24 |
25 | ```bash
26 | export OPAQUEPROMPTS_SERVER_HOSTNAME="..."
27 | ```
28 |
29 | ## Using Opaque Gateway standalone
30 | Opaque Gateway offers two main functions: `sanitize()` and `desanitize()`. The `sanitize()` function takes a string and returns a sanitized (in other words, encrypted and redacted) version of it, while the `desanitize()` function takes a sanitized string and returns the original string.
31 |
32 | ### Sanitization
33 |
34 | ```python
35 | import opaqueprompts
36 |
37 | input_text_with_pii = "John Smith called 213-456-7098 (the phone number of his friend Sarah Jane) and asked her to meet him in San Francisco."
38 | sanitized_response = opaqueprompts.sanitize(input_texts = [input_text_with_pii])
39 | ```
40 | As shown below, `sanitized_response` contains both the `sanitized_texts` and `secure_context` fields, which must be passed to the followup call to `desanitize()`.
41 |
42 | ```python
43 | > print(sanitized_response)
44 | SanitizeResponse(sanitized_texts=['PERSON_2 called PHONE_NUMBER_1 (the phone number of his friend PERSON_1) and asked her to meet him in LOCATION_1.'], secure_context='eyJzZWNyZXRfZW50cm9weSI6IjRocWZxb1VBUmJueWNYeU5JRjROa3VzNjdkSnZtY1ZPVFhYcnlPWDdmNzAxNVR4NVUraTM5c3VGRTJqS3oySjUzMkM4ckF6L0cyME5sWGloZ2hicWhzcFF6N2pVTUZIVVNvMVRGam1UU2tTcG5pR1Bob0s5RUR3N3JQZ2VkMklJNEhRTHh2dVZNUnJlY2h3WVhGbGhZYzhFOEI1VFJkWVl2Sm1QUG5Rbkp3WT0iLCJlbnRpdHlfbWFwIjp7IkxPQ0FUSU9OXzEiOiJibFpDVE1oMTR0OW1FQmZsejk5cWVlWVJSTjdyUzhkUTZRRVZOZHlKNkEwPSIsIlBFUlNPTl8xIjoiZDZiR3VjOEJVNUdPcG56cDJoV1FaUUIyaUtvRzA2U3dCdGlkSXo5WGxaUT0iLCJQRVJTT05fMiI6IkpKSyt6cmhtTENzWGVkTHhoNWxhTWFFSzlUVmw1bU55MkNGR3FZekRmZ3M9IiwiUEhPTkVfTlVNQkVSXzEiOiJhQU9GVmhoT0tqczVzT0Iwczh2dnZwNTBsVk9XcnNyODE3eEVVSnkrTzdRPSJ9fQ==')
45 | ```
46 | As you can see, the `sanitized_texts` field contains the initial message, but with the PII removed. The `secure_context` is just an opaque set of bytes,
47 | which should get passed to the `desanitize` call as shown below.
48 |
49 | ### Desanitization
50 |
51 | ```python
52 | # Assume that sanitized_response.sanitized_text was passed into an LLM of your
53 | # choice, and the final output is saved to 'llm_output" such that
54 | # llm_output = "PERSON_1 and PERSON_2 will be meeting in LOCATION_1"
55 | desanitized_response = opaqueprompts.desanitize(sanitized_text = llm_output, secure_context = sanitized_response.secure_context)
56 | ```
57 |
58 | The `desanitized_response` contains only one field, `desanitized_text`, which contains the desanitized version of `llm_output`.
59 |
60 | ```python
61 | > print(desanitized_response)
62 | DesanitizeResponse(desanitized_text='Sarah Jane and John Smith will be meeting in San Francisco')
63 | ```
64 |
65 | ## Using Opaque Gateway with LangChain
66 |
67 | Opaque Gateway offers a [LangChain](https://python.langchain.com/docs/get_started/introduction.html) integration, enabling you to easily build privacy-preserving LLM applications. See the [OpaquePrompts page in the LangChain documentation](https://python.langchain.com/docs/integrations/llms/opaqueprompts) for more information.
68 |
69 | ## Troubleshooting
70 |
71 | ### Version mismatch
72 |
73 | We may make breaking changes and drop support for old versions of the Python package. If this happens, you should see an error message like this when making a `sanitize` or `desanitize` call:
74 |
75 | ```
76 | Request sent using package version 0.1.0, but minimum supported version is 0.2.0. Please update the opaqueprompts package to a supported version.
77 | ```
78 |
79 | If you see this, simply upgrade `opaqueprompts` with `pip install -U opaqueprompts` and then you should be able to continue using the package without issue.
80 |
81 | ### Missing version header
82 |
83 | The logic to gracefully handle version mismatch was not added to the `opaqueprompts` package until version 0.1.0. As such, if you are using an older version of `opaqueprompts`, you may see the following error:
84 |
85 | ```
86 | Client-Version header not set, please ensure this request was sent using opaqueprompts version >= 0.1.0
87 | ```
88 |
89 | If you see this, make sure to update your `opaqueprompts` package to the latest version per the instructions in the previous section.
90 |
--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opaque-systems/opaquegateway-python/f6639cef33fa206bd39aad67cec2dd92246da1d2/docs/images/logo.png
--------------------------------------------------------------------------------
/docs/images/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/images/logo_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opaque-systems/opaquegateway-python/f6639cef33fa206bd39aad67cec2dd92246da1d2/docs/images/logo_color.png
--------------------------------------------------------------------------------
/docs/images/opaque_prompts_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opaque-systems/opaquegateway-python/f6639cef33fa206bd39aad67cec2dd92246da1d2/docs/images/opaque_prompts_logo.png
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | hide:
3 | - navigation
4 | - toc
5 | ---
6 |
7 | # Introduction
8 | Opaque Gateway is a service that enables applications to leverage the power of language models without compromising user privacy. Designed for composability and ease of integration into existing applications and services, Opaque Gateway is consumable via [a simple Python library](getting_started/quickstart.md#installation) as well as through [LangChain](https://python.langchain.com/docs/integrations/llms/opaqueprompts). Perhaps more importantly, Opaque Gateway leverages the power of [confidential computing](https://en.wikipedia.org/wiki/Confidential_computing) to ensure that even the Opaque Gateway service itself cannot access the data it is protecting.
9 |
10 | Today's LLM application architectures often yield constructed prompts that may include retrieved context, conversation memory, and/or a user query, all of which may contain sensitive information. Opaque Gateway enables applications to protect this sensitive information by sanitizing prompts before they're sent to a language model. Opaque Gateway can then "de-sanitize" the model's response, ensuring that the application receives the same response it would have received had the prompt not been sanitized. You can think of Opaque Gateway as a privacy layer that wraps a language model, transparently sanitizing and de-sanitizing prompts and responses.
11 |
12 |
13 |
14 | * :material-rocket-launch: **Get Started**
15 |
16 | ---
17 |
18 | New to Opaque Gateway? Quickly get started here.
19 |
20 | [Learn more :octicons-arrow-right-24:](getting_started/quickstart.md){: .right}
21 |
22 | * :material-tools: **Technical Overview**
23 |
24 | ---
25 |
26 | Gain a better understanding of how Opaque Gateway protects sensitive data without seeing it.
27 |
28 | [Learn more :octicons-arrow-right-24:](getting_started/overview.md){: .right}
29 |
30 | * :material-code-tags: **API Reference**
31 |
32 | ---
33 |
34 | See the API reference for the Opaque Gateway Python library.
35 |
36 | [Learn more :octicons-arrow-right-24:](reference/library_api.md){: .right}
37 |
38 |