├── backend
├── app
│ ├── __init__.py
│ ├── logging.conf
│ ├── main.py
│ ├── schemas.py
│ └── endpoints.py
├── tests
│ ├── __init__.py
│ ├── data
│ │ ├── foo.bar
│ │ ├── test.txt
│ │ ├── test.pdf
│ │ └── cli
│ │ │ ├── expected
│ │ │ ├── nested
│ │ │ │ └── test.txt
│ │ │ └── test.html
│ │ │ ├── input
│ │ │ ├── nested
│ │ │ │ └── test.txt
│ │ │ └── test.html
│ │ │ ├── recognizer_config.json
│ │ │ └── anonymizer_config.json
│ ├── cli
│ │ ├── __init__.py
│ │ └── test_cli.py
│ ├── endpoints
│ │ ├── __init__.py
│ │ ├── test_tags.py
│ │ ├── test_supported_recognizers.py
│ │ ├── test_score.py
│ │ ├── test_anonymize_file.py
│ │ ├── test_find_piis.py
│ │ └── test_anonymize.py
│ └── conftest.py
├── pyproject.toml
├── .flake8
├── requirements-dev.txt
├── .dockerignore
├── cli
│ ├── recognizer_config.json
│ ├── anonymizer_config.json
│ └── redact.py
├── requirements.txt
├── README.md
└── Dockerfile
├── frontend
├── .dockerignore
├── src
│ ├── components
│ │ ├── scores
│ │ │ ├── ScoresDialog.sass
│ │ │ ├── ScoresDialogBody.test.jsx
│ │ │ ├── ScoresTable.test.jsx
│ │ │ ├── ScoresDialog.test.jsx
│ │ │ ├── ScoresDialog.jsx
│ │ │ ├── ScoresDialogBody.jsx
│ │ │ └── ScoresTable.jsx
│ │ ├── HelpDialog.sass
│ │ ├── ActiveRecognizerSettings.sass
│ │ ├── preview
│ │ │ ├── PdfPreview.sass
│ │ │ ├── TextPreview.sass
│ │ │ ├── PreviewControl.sass
│ │ │ ├── TextPreview.test.jsx
│ │ │ ├── TextPreview.jsx
│ │ │ ├── PreviewControl.test.jsx
│ │ │ ├── PdfPreview.jsx
│ │ │ └── PreviewControl.jsx
│ │ ├── annotation
│ │ │ ├── annotator
│ │ │ │ ├── Span.js
│ │ │ │ ├── Mark.jsx
│ │ │ │ ├── utils.js
│ │ │ │ └── TokenAnnotator.jsx
│ │ │ ├── AnnotationControl.sass
│ │ │ ├── Dropzone.test.jsx
│ │ │ ├── Dropzone.sass
│ │ │ ├── AnnotationControl.test.jsx
│ │ │ ├── AnnotationForm.test.jsx
│ │ │ ├── Dropzone.jsx
│ │ │ ├── AnnotationControl.jsx
│ │ │ ├── AnnotationForm.sass
│ │ │ └── AnnotationForm.jsx
│ │ ├── App.sass
│ │ ├── NavBar.sass
│ │ ├── AboutDialog.sass
│ │ ├── Disclaimer.jsx
│ │ ├── Main.sass
│ │ ├── anonymizationConfig
│ │ │ ├── RandomizedResponseMechanism.jsx
│ │ │ ├── AnonymizationConfigMenu.test.jsx
│ │ │ ├── Item.jsx
│ │ │ ├── AnonymizationConfigMenu.sass
│ │ │ ├── GeneralizationMechanism.jsx
│ │ │ ├── TagMechanismConfig.jsx
│ │ │ ├── DefaultMechanismConfig.jsx
│ │ │ ├── LaplaceNoiseMechanism.jsx
│ │ │ ├── anonymizationConfig.jsx
│ │ │ ├── PseudonymizationMechanism.jsx
│ │ │ ├── randomizedResponseCountryDemoValues.js
│ │ │ ├── SuppressionMechanism.jsx
│ │ │ └── AnonymizationConfigMenu.jsx
│ │ ├── LocalizationWrapper.jsx
│ │ ├── LocalizationWrapper.test.jsx
│ │ ├── MainMenu.test.jsx
│ │ ├── App.test.jsx
│ │ ├── MainMenu.sass
│ │ ├── Help.jsx
│ │ ├── About.jsx
│ │ ├── NavBar.test.jsx
│ │ ├── NavBar.jsx
│ │ ├── HelpDialog.jsx
│ │ ├── ErrorBoundary.jsx
│ │ ├── Settings.jsx
│ │ ├── AboutDialog.jsx
│ │ ├── ActiveRecognizerSettings.jsx
│ │ ├── SettingsDialog.jsx
│ │ ├── MainMenu.jsx
│ │ ├── App.jsx
│ │ ├── Main.test.jsx
│ │ └── Main.jsx
│ ├── js
│ │ ├── toaster.js
│ │ ├── polyglotContext.js
│ │ ├── anonymization.js
│ │ ├── annotation.js
│ │ ├── constants.js
│ │ ├── token.js
│ │ ├── useLocalStorage.js
│ │ ├── useCompile.js
│ │ └── useAnonymization.js
│ ├── setupTests.js
│ ├── index.jsx
│ ├── index.css
│ ├── translations
│ │ ├── utils.js
│ │ ├── en.js
│ │ └── de.js
│ └── api
│ │ └── routes.js
├── public
│ ├── robots.txt
│ ├── favicon.ico
│ └── index.html
├── Dockerfile.dev
├── Dockerfile
├── .gitignore
├── nginx.conf
├── package.json
└── README.md
├── docs
├── logo.png
├── scores.png
├── approach.png
├── annotation.png
├── end-to-end.gif
└── anonymization.gif
├── .gitignore
├── docker-compose.yml
├── .run
├── Dev Servers.run.xml
├── Lint, format and test project.run.xml
├── Frontend Dev Server.run.xml
├── Frontend Tests.run.xml
├── Backend Tests.run.xml
├── Pre-commit Hooks.run.xml
└── Backend Dev Server.run.xml
├── docker-compose.dev.yml
├── .github
└── workflows
│ ├── pre_commit.yml
│ ├── test_frontend.yml
│ └── test_backend.yml
├── .pre-commit-config.yaml
├── LICENSE
└── README.md
/backend/app/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backend/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backend/tests/data/foo.bar:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backend/tests/cli/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/backend/tests/data/test.txt:
--------------------------------------------------------------------------------
1 | Deutschland.
--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/backend/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 127
3 |
--------------------------------------------------------------------------------
/backend/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, W503
3 | max-line-length = 127
4 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresDialog.sass:
--------------------------------------------------------------------------------
1 | .dialog
2 | width: auto
3 |
--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/logo.png
--------------------------------------------------------------------------------
/frontend/src/components/HelpDialog.sass:
--------------------------------------------------------------------------------
1 | .more-vertical-space
2 | padding-top: 20px
3 |
--------------------------------------------------------------------------------
/docs/scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/scores.png
--------------------------------------------------------------------------------
/docs/approach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/approach.png
--------------------------------------------------------------------------------
/docs/annotation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/annotation.png
--------------------------------------------------------------------------------
/docs/end-to-end.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/end-to-end.gif
--------------------------------------------------------------------------------
/docs/anonymization.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/docs/anonymization.gif
--------------------------------------------------------------------------------
/frontend/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 |
--------------------------------------------------------------------------------
/backend/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pre-commit==2.16.0
2 | black==19.10b0
3 | pytest==5.4.1
4 | pytest-cov==2.8.1
5 |
--------------------------------------------------------------------------------
/backend/tests/data/test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/backend/tests/data/test.pdf
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openredact/openredact-app/HEAD/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/backend/tests/data/cli/expected/nested/test.txt:
--------------------------------------------------------------------------------
1 | Meine E-Mail Adresse lautet Email 1.
2 | Sie ist in XXXXXXXXXXX registriert.
3 |
--------------------------------------------------------------------------------
/frontend/src/components/ActiveRecognizerSettings.sass:
--------------------------------------------------------------------------------
1 | .recognizer-list
2 | margin-right: 20px
3 | list-style-type: none
4 |
--------------------------------------------------------------------------------
/backend/tests/data/cli/input/nested/test.txt:
--------------------------------------------------------------------------------
1 | Meine E-Mail Adresse lautet test@mail.de.
2 | Sie ist in Deutschland registriert.
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # PyCharm
2 | .idea/
3 |
4 | # Mac
5 | .DS_Store
6 |
7 | # Virtualenv
8 | venv/
9 |
10 | # Tests
11 | __pycache__
12 |
--------------------------------------------------------------------------------
/backend/.dockerignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .pytest_cache
3 |
4 | # Mac
5 | .DS_Store
6 |
7 | # Virtualenv
8 | venv/
9 |
10 | # Tests
11 | __pycache__
12 |
--------------------------------------------------------------------------------
/backend/cli/recognizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "language": "de",
3 | "recognizer_paths": [],
4 | "use_statistical_ner": true,
5 | "load_integrated_recognizers": true
6 | }
7 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/PdfPreview.sass:
--------------------------------------------------------------------------------
1 | .react-pdf__Page__canvas
2 | margin: 0 auto
3 |
4 | .pdf-outline
5 | border-radius: 0
6 | margin: 10px
7 | padding: 0
8 |
--------------------------------------------------------------------------------
/backend/tests/data/cli/recognizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "language": "de",
3 | "recognizer_paths": [],
4 | "use_statistical_ner": false,
5 | "load_integrated_recognizers": true
6 | }
7 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_tags.py:
--------------------------------------------------------------------------------
1 | def test_get_tags(client):
2 | response = client.get("/api/tags")
3 | assert response.status_code == 200
4 | assert "PER" in response.json()
5 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/annotator/Span.js:
--------------------------------------------------------------------------------
1 | class Span {
2 | constructor(start, end) {
3 | this.start = start;
4 | this.end = end;
5 | }
6 | }
7 |
8 | export default Span;
9 |
--------------------------------------------------------------------------------
/frontend/src/components/App.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | $config-width: 16vw
4 |
5 | .grid-container
6 | display: grid
7 | grid-template-columns: 0.16fr 0.84fr
8 |
--------------------------------------------------------------------------------
/frontend/src/js/toaster.js:
--------------------------------------------------------------------------------
1 | import { Position, Toaster } from "@blueprintjs/core";
2 |
3 | const AppToaster = Toaster.create({
4 | position: Position.TOP_RIGHT,
5 | });
6 |
7 | export default AppToaster;
8 |
--------------------------------------------------------------------------------
/backend/cli/anonymizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "defaultMechanism": {
3 | "mechanism": "suppression",
4 | "config": {
5 | "suppressionChar": "X"
6 | }
7 | },
8 | "mechanismsByTag": {}
9 | }
10 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/TextPreview.sass:
--------------------------------------------------------------------------------
1 | .preview-text
2 | cursor: default
3 | padding: 15px
4 | font-family: "Courier New", sans-serif
5 |
6 | .document-outline
7 | border-radius: 0
8 | margin: 10px
9 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_supported_recognizers.py:
--------------------------------------------------------------------------------
1 | def test_get_supported_recognizers(client):
2 | response = client.get("/api/recognizers")
3 | assert response.status_code == 200
4 | assert "email_recognizer" in response.json()
5 |
--------------------------------------------------------------------------------
/frontend/src/js/polyglotContext.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 |
3 | const PolyglotContext = React.createContext((key, options = {}) =>
4 | options === {} ? key : key + JSON.stringify(options)
5 | );
6 |
7 | export default PolyglotContext;
8 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationControl.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 | @import "../Main"
3 |
4 | .annotation-card
5 | margin: $border-width 0.5*$border-width $border-width $border-width
6 | position: relative
7 | box-shadow: none
8 |
--------------------------------------------------------------------------------
/frontend/Dockerfile.dev:
--------------------------------------------------------------------------------
1 | # build and run the frontend
2 | FROM node:10
3 |
4 | WORKDIR /app
5 |
6 | COPY package*.json /app/
7 |
8 | RUN npm install
9 |
10 | ENV REACT_APP_API_BASE_URL=http://127.0.0.1:8000/api
11 |
12 | COPY public /app/public
13 | COPY src /app/src
14 |
--------------------------------------------------------------------------------
/frontend/src/components/NavBar.sass:
--------------------------------------------------------------------------------
1 | .logo
2 | width: 40px
3 | height: 40px
4 | margin-right: 10px
5 |
6 | .heading
7 | font-size: 20px
8 |
9 | .branding
10 | a
11 | color: inherit
12 | text-decoration: inherit
13 |
14 | &:hover
15 | color: inherit
16 |
--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.65.2
2 | uvicorn==0.11.7
3 | python-multipart==0.0.5
4 | click==7.1.1
5 | git+https://github.com/openredact/expose-text.git#egg=expose-text
6 | git+https://github.com/openredact/nerwhal.git#egg=nerwhal
7 | git+https://github.com/openredact/anonymizer.git#egg=or-anonymizer
8 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_score.py:
--------------------------------------------------------------------------------
1 | def test_score_computation(client):
2 | response = client.post(
3 | "/api/score", json={"computedAnnotations": [], "goldAnnotations": [{"start": 0, "end": 1, "tag": "PER"}]}
4 | )
5 | assert response.status_code == 200
6 | assert response.json()["total"]["f1"] == 0.0
7 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.3"
2 | services:
3 | backend:
4 | build:
5 | context: ./backend
6 | image: openredact/backend:${TAG:-latest}
7 | expose:
8 | - 8000
9 | frontend:
10 | build: ./frontend
11 | image: openredact/frontend:${TAG:-latest}
12 | ports:
13 | - "80:80"
14 |
--------------------------------------------------------------------------------
/frontend/src/js/anonymization.js:
--------------------------------------------------------------------------------
1 | class Anonymization {
2 | constructor({ start, end, startChar, endChar, text }) {
3 | this.start = start;
4 | this.end = end;
5 | this.startChar = startChar;
6 | this.endChar = endChar;
7 | this.text = text;
8 | }
9 | }
10 |
11 | export default Anonymization;
12 |
--------------------------------------------------------------------------------
/frontend/src/js/annotation.js:
--------------------------------------------------------------------------------
1 | import { v4 as uuidv4 } from "uuid";
2 |
3 | class Annotation {
4 | constructor(start, end, tag, text) {
5 | this.start = start;
6 | this.end = end;
7 | this.text = text;
8 | this.tag = tag;
9 | this.id = uuidv4();
10 | }
11 | }
12 |
13 | export default Annotation;
14 |
--------------------------------------------------------------------------------
/frontend/src/js/constants.js:
--------------------------------------------------------------------------------
1 | const constants = {};
2 |
3 | constants.title = "OpenRedact";
4 | constants.tooltipHoverOpenDelay = 500;
5 | constants.maxTagColors = 12;
6 | constants.compileTimeout = 300;
7 |
8 | // Set fixed value or 0 for auto width
9 | constants.previewPdfWidth = 0;
10 |
11 | export default constants;
12 |
--------------------------------------------------------------------------------
/backend/tests/data/cli/expected/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Test
6 |
7 |
8 |
9 | Meine E-Mail Adresse lautet Email 1.
10 | Sie ist in XXXXXXXXXXX registriert.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/backend/tests/data/cli/input/test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Test
6 |
7 |
8 |
9 | Meine E-Mail Adresse lautet test@mail.de.
10 | Sie ist in Deutschland registriert.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/frontend/src/js/token.js:
--------------------------------------------------------------------------------
1 | class Token {
2 | constructor(startChar, endChar, text, hasWhitespace, linebreakCount) {
3 | this.startChar = startChar;
4 | this.endChar = endChar;
5 | this.text = text;
6 | this.hasWhitespace = hasWhitespace;
7 | this.linebreakCount = linebreakCount;
8 | }
9 | }
10 |
11 | export default Token;
12 |
--------------------------------------------------------------------------------
/frontend/src/setupTests.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable import/no-extraneous-dependencies */
2 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
3 | // allows you to do things like:
4 | // expect(element).toHaveTextContent(/react/i)
5 | // learn more: https://github.com/testing-library/jest-dom
6 | import "@testing-library/jest-dom/extend-expect";
7 |
--------------------------------------------------------------------------------
/backend/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | from fastapi.testclient import TestClient
5 |
6 | from app.main import app
7 |
8 |
9 | @pytest.fixture(scope="session")
10 | def client():
11 | return TestClient(app)
12 |
13 |
14 | @pytest.fixture
15 | def test_data():
16 | return Path(__file__).parent / "data"
17 |
--------------------------------------------------------------------------------
/.run/Dev Servers.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/frontend/src/components/AboutDialog.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | .logo-background
4 | background: $dark-gray3
5 |
6 | .logo
7 | width: 40px
8 | height: 40px
9 | display: block
10 | margin: 20px auto
11 |
12 | .copyright
13 | margin-top: 40px
14 | margin-bottom: 0
15 |
16 | .more-vertical-space
17 | padding-top: 20px
18 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/Dropzone.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import Dropzone from "./Dropzone";
4 |
5 | it("renders", () => {
6 | const { getByText } = render( {}} />);
7 | const dropzone = getByText(/\.txt/i);
8 | expect(dropzone).toBeInTheDocument();
9 | });
10 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/Dropzone.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | .dropzone
4 | display: flex
5 | flex-direction: column
6 | align-items: center
7 | padding: 20px
8 | border-width: 2px
9 | border-radius: 2px
10 | border-color: $gray3
11 | border-style: dashed
12 | background-color: $white
13 | color: $pt-text-color-muted
14 | outline: none
15 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/PreviewControl.sass:
--------------------------------------------------------------------------------
1 | @import "../Main"
2 |
3 | .preview-card
4 | margin: $border-width $border-width $border-width 0.5*$border-width
5 | position: relative
6 | box-shadow: none
7 | padding: 0
8 |
9 | .cancel-warning
10 | top: 5px
11 | right: 5px
12 | position: absolute
13 | cursor: pointer
14 |
15 | .preview-html
16 | & > div
17 | margin: 0 auto
18 |
--------------------------------------------------------------------------------
/backend/tests/data/cli/anonymizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "defaultMechanism": {
3 | "mechanism": "suppression",
4 | "config": {
5 | "suppressionChar": "X"
6 | }
7 | },
8 | "mechanismsByTag": {
9 | "EMAIL": {
10 | "mechanism": "pseudonymization",
11 | "config": {
12 | "format_string": "Email {}",
13 | "stateful": true
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
1 | # build and compile the frontend
2 | FROM node:10 as build-stage
3 |
4 | WORKDIR /app
5 |
6 | COPY package*.json /app/
7 |
8 | RUN npm install
9 |
10 | COPY ./ /app/
11 |
12 | ENV REACT_APP_API_BASE_URL=/api
13 |
14 | RUN npm run build
15 |
16 | # serve built frontend using nginx
17 | FROM nginx:1.15
18 |
19 | COPY --from=build-stage /app/build/ /assets
20 | COPY nginx.conf /etc/nginx/nginx.conf
--------------------------------------------------------------------------------
/frontend/src/components/preview/TextPreview.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import TextPreview from "./TextPreview";
4 |
5 | it("renders preview text", () => {
6 | const text = "This is a sample text.";
7 | const { getByText } = render();
8 | const element = getByText(text);
9 | expect(element).toBeInTheDocument();
10 | });
11 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresDialogBody.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import ScoresDialogBody from "./ScoresDialogBody";
4 |
5 | it("renders", () => {
6 | const { getByText } = render(
7 |
8 | );
9 | const text = getByText(/annotation\.scores/);
10 | expect(text).toBeInTheDocument();
11 | });
12 |
--------------------------------------------------------------------------------
/.run/Lint, format and test project.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/frontend/src/index.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import ReactDOM from "react-dom";
3 | import { FocusStyleManager } from "@blueprintjs/core";
4 | import "./index.css";
5 | import LocalizationWrapper from "./components/LocalizationWrapper";
6 |
7 | FocusStyleManager.onlyShowFocusOnTabs();
8 |
9 | ReactDOM.render(
10 |
11 |
12 | ,
13 | document.getElementById("root")
14 | );
15 |
--------------------------------------------------------------------------------
/frontend/src/components/Disclaimer.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import { Callout } from "@blueprintjs/core";
3 | import PolyglotContext from "../js/polyglotContext";
4 |
5 | const Disclaimer = () => {
6 | const t = useContext(PolyglotContext);
7 |
8 | return (
9 |
10 | {t("disclaimer.text")}
11 |
12 | );
13 | };
14 |
15 | export default Disclaimer;
16 |
--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # production
12 | /build
13 |
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 |
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 |
25 | public/pdf.worker.min.js
26 |
--------------------------------------------------------------------------------
/frontend/src/components/Main.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | $border-width: 5px
4 | $main-menu-height: 40px
5 | $main-view-height: calc(100vh - #{$pt-navbar-height} - #{$main-menu-height})
6 |
7 | .main
8 | display: grid
9 | grid: $main-menu-height $main-view-height / 100%
10 |
11 | .main-view
12 | display: grid
13 | grid: auto / 50% 50%
14 | overflow-y: scroll
15 | position: relative
16 | background-color: $light-gray3
17 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationControl.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import AnnotationControl from "./AnnotationControl";
4 |
5 | it("renders", () => {
6 | render(
7 | {}}
11 | onFileDrop={() => {}}
12 | isLoading={false}
13 | tags={[]}
14 | />
15 | );
16 | });
17 |
--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
1 | @import "~normalize.css";
2 | @import "~@blueprintjs/core/lib/css/blueprint.css";
3 | @import "~@blueprintjs/icons/lib/css/blueprint-icons.css";
4 |
5 | body {
6 | margin: 0;
7 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen",
8 | "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue",
9 | sans-serif;
10 | -webkit-font-smoothing: antialiased;
11 | -moz-osx-font-smoothing: grayscale;
12 | }
13 |
--------------------------------------------------------------------------------
/.run/Frontend Dev Server.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.run/Frontend Tests.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docker-compose.dev.yml:
--------------------------------------------------------------------------------
1 | version: "3.3"
2 | services:
3 | backend:
4 | build:
5 | context: ./backend
6 | image: openredact/backend:${TAG:-latest}
7 | ports:
8 | - "8000:8000"
9 | volumes:
10 | - "./backend:/app"
11 | frontend-dev:
12 | build:
13 | context: ./frontend
14 | dockerfile: Dockerfile.dev
15 | image: openredact/frontend-dev:${TAG:-latest}
16 | command: npm run start
17 | ports:
18 | - "80:80"
19 | volumes:
20 | - "./frontend:/app"
21 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/RandomizedResponseMechanism.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PolyglotContext from "../../js/polyglotContext";
3 |
4 | const RandomizedResponseMechanism = () => {
5 | const t = useContext(PolyglotContext);
6 |
7 | // For now, this is only a demo for countries
8 |
9 | return (
10 |
11 | {t("anonymization.randomized_response.no_config")}
12 |
13 | );
14 | };
15 |
16 | export default RandomizedResponseMechanism;
17 |
--------------------------------------------------------------------------------
/frontend/src/components/LocalizationWrapper.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PolyglotContext from "../js/polyglotContext";
3 | import App from "./App";
4 | import { polyglot, updateLocale } from "../translations/utils";
5 |
6 | const t = (key, options) => polyglot.t(key, options);
7 |
8 | const LocalizationWrapper = () => {
9 | updateLocale(polyglot);
10 |
11 | return (
12 |
13 |
14 |
15 | );
16 | };
17 |
18 | export default LocalizationWrapper;
19 |
--------------------------------------------------------------------------------
/frontend/src/components/LocalizationWrapper.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import LocalizationWrapper from "./LocalizationWrapper";
4 |
5 | it("shows translations in the correct language", () => {
6 | // mock language
7 | Object.defineProperty(navigator, "language", {
8 | get() {
9 | return "de";
10 | },
11 | });
12 |
13 | const { getByTitle } = render();
14 | const help = getByTitle(/Hilfe/);
15 | expect(help).toBeInTheDocument();
16 | });
17 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresTable.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import ScoresTable from "./ScoresTable";
4 |
5 | it("renders", () => {
6 | const { getByText } = render(
7 |
8 | );
9 | const tag = getByText(/total/i);
10 | expect(tag).toBeInTheDocument();
11 | const metric = getByText(/f2/i);
12 | expect(metric).toBeInTheDocument();
13 | const value = getByText("1.00");
14 | expect(value).toBeInTheDocument();
15 | });
16 |
--------------------------------------------------------------------------------
/.github/workflows/pre_commit.yml:
--------------------------------------------------------------------------------
1 | name: Black & Flake8
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: [master]
7 |
8 | jobs:
9 | pre-commit:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | - uses: actions/setup-python@v2
14 | - name: Install Node.js 12.x
15 | uses: actions/setup-node@v1
16 | with:
17 | node-version: 12.x
18 | - name: Install dependencies
19 | run: npm install
20 | working-directory: frontend
21 | - uses: pre-commit/action@v2.0.0
22 |
--------------------------------------------------------------------------------
/frontend/src/js/useLocalStorage.js:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 |
3 | function useLocalStorage(key, initialValue) {
4 | const [storedValue, setStoredValue] = useState(() => {
5 | const item = window.localStorage.getItem(key);
6 | if (item == null) return initialValue;
7 |
8 | return JSON.parse(item);
9 | });
10 |
11 | const setValue = (value) => {
12 | setStoredValue(value);
13 | window.localStorage.setItem(key, JSON.stringify(value));
14 | };
15 |
16 | return [storedValue, setValue];
17 | }
18 |
19 | export default useLocalStorage;
20 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 21.12b0
4 | hooks:
5 | - id: black
6 | - repo: https://gitlab.com/pycqa/flake8
7 | rev: 3.7.9
8 | hooks:
9 | - id: flake8
10 | - repo: https://github.com/pre-commit/mirrors-prettier
11 | rev: v2.5.1
12 | hooks:
13 | - id: prettier
14 | - repo: local
15 | hooks:
16 | - id: eslint
17 | name: eslint
18 | entry: bash -c 'cd frontend && npm run lint -s'
19 | language: system
20 | pass_filenames: false
21 | types: [file]
22 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/AnonymizationConfigMenu.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import AnonymizationConfigMenu from "./AnonymizationConfigMenu";
4 |
5 | it("renders", () => {
6 | render(
7 | {}}
17 | />
18 | );
19 | });
20 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresDialog.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import ScoresDialog from "./ScoresDialog";
4 |
5 | it("can open dialog", () => {
6 | const { getByRole, getByLabelText } = render(
7 | {}}
11 | showDialog
12 | />
13 | );
14 | const button = getByRole("button");
15 | button.click();
16 | const closeButton = getByLabelText(/close/i);
17 | expect(closeButton).toBeInTheDocument();
18 | });
19 |
--------------------------------------------------------------------------------
/frontend/src/components/MainMenu.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import MainMenu from "./MainMenu";
4 |
5 | it("renders button", () => {
6 | const { getByRole } = render(
7 | {}}
9 | onNewDocument={() => {}}
10 | showDownloadButton={false}
11 | showCompileButton={false}
12 | onCompile={() => {}}
13 | isCompiling={false}
14 | onShowScores={() => {}}
15 | />
16 | );
17 | const button = getByRole("button");
18 | expect(button).toBeInTheDocument();
19 | });
20 |
--------------------------------------------------------------------------------
/frontend/src/components/App.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import { fetchRecognizers, fetchTags } from "../api/routes";
4 | import App from "./App";
5 |
6 | jest.mock("../api/routes");
7 | fetchTags.mockResolvedValue({ data: ["PER"] });
8 | fetchRecognizers.mockResolvedValue({ data: ["DummyRecognizer"] });
9 |
10 | afterEach(() => {
11 | jest.clearAllMocks();
12 | });
13 |
14 | it("renders", () => {
15 | render();
16 | expect(fetchTags).toHaveBeenCalledTimes(1);
17 | expect(fetchRecognizers).toHaveBeenCalledTimes(1);
18 | });
19 |
--------------------------------------------------------------------------------
/frontend/src/components/MainMenu.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 | @import "./Main"
3 | @import "./App"
4 |
5 | .main-menu
6 | background-color: $gray5
7 | display: grid
8 | grid: auto / 50% 50%
9 |
10 | .annotation-menu, .preview-menu
11 | display: flex
12 | justify-content: space-between
13 |
14 | .new-document-button
15 | margin: $border-width
16 |
17 | .scores-button
18 | margin: $border-width 0.5*$border-width $border-width
19 |
20 | .download-button
21 | margin: $border-width
22 |
23 | .compile-button
24 | margin: $border-width $border-width $border-width 0.5*$border-width
25 |
26 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/TextPreview.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { Card, EditableText, Elevation } from "@blueprintjs/core";
3 | import PropTypes from "prop-types";
4 | import "./TextPreview.sass";
5 |
6 | const TextPreview = ({ text }) => {
7 | return (
8 |
9 |
16 |
17 | );
18 | };
19 |
20 | TextPreview.propTypes = {
21 | text: PropTypes.string.isRequired,
22 | };
23 |
24 | export default TextPreview;
25 |
--------------------------------------------------------------------------------
/frontend/src/components/Help.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import { Button } from "@blueprintjs/core";
3 | import PolyglotContext from "../js/polyglotContext";
4 | import HelpDialog from "./HelpDialog";
5 |
6 | const Help = () => {
7 | const t = useContext(PolyglotContext);
8 |
9 | const [showHelp, setShowHelp] = useState(false);
10 |
11 | return (
12 |
13 |
21 | );
22 | };
23 |
24 | export default Help;
25 |
--------------------------------------------------------------------------------
/frontend/src/components/About.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import { Button } from "@blueprintjs/core";
3 | import PolyglotContext from "../js/polyglotContext";
4 | import AboutDialog from "./AboutDialog";
5 |
6 | const About = () => {
7 | const t = useContext(PolyglotContext);
8 |
9 | const [showAbout, setShowAbout] = useState(false);
10 |
11 | return (
12 |
13 |
21 | );
22 | };
23 |
24 | export default About;
25 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_anonymize_file.py:
--------------------------------------------------------------------------------
1 | def test_anonymization(client, test_data):
2 | test_file_path = test_data / "test.txt"
3 | response = client.post(
4 | "/api/anonymize-file",
5 | files={"file": open(test_file_path, "rb")},
6 | data={"anonymizations": '[{"startChar":0,"endChar":12,"text":"XXX"}]'},
7 | )
8 | assert response.status_code == 200
9 | assert response.content == b"XXX"
10 |
11 |
12 | def test_without_anonymization(client, test_data):
13 | test_file_path = test_data / "test.txt"
14 | response = client.post("/api/anonymize-file", files={"file": open(test_file_path, "rb")}, data={"anonymizations": "[]"})
15 | assert response.status_code == 200
16 | assert response.content == b"Deutschland."
17 |
--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
11 |
12 |
13 |
14 |
15 | OpenRedact
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/backend/tests/cli/test_cli.py:
--------------------------------------------------------------------------------
1 | import filecmp
2 |
3 | from click.testing import CliRunner
4 |
5 | from cli.redact import redact
6 |
7 |
8 | def test_redact_cli(tmp_path):
9 | runner = CliRunner()
10 | result = runner.invoke(
11 | redact,
12 | [
13 | "--input_dir",
14 | "tests/data/cli/input",
15 | "--output_dir",
16 | tmp_path,
17 | "--anonymizer_config",
18 | "tests/data/cli/anonymizer_config.json",
19 | "--recognizer_config",
20 | "tests/data/cli/recognizer_config.json",
21 | ],
22 | )
23 | assert result.exit_code == 0
24 | assert filecmp.cmp("tests/data/cli/expected/test.html", tmp_path / "test.html", shallow=False)
25 | assert filecmp.cmp("tests/data/cli/expected/nested/test.txt", tmp_path / "nested/test.txt", shallow=False)
26 |
--------------------------------------------------------------------------------
/frontend/src/components/NavBar.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import NavBar from "./NavBar";
4 | import Settings from "./Settings";
5 | import About from "./About";
6 | import Help from "./Help";
7 |
8 | it("renders buttons", () => {
9 | const { getByTitle } = render(
10 | {}}
14 | availableRecognizers={[]}
15 | activatedRecognizers={[]}
16 | />
17 | }
18 | about={}
19 | help={}
20 | />
21 | );
22 | const settings = getByTitle(/settings/i);
23 | expect(settings).toBeInTheDocument();
24 | const about = getByTitle(/about/i);
25 | expect(about).toBeInTheDocument();
26 | const help = getByTitle(/help/i);
27 | expect(help).toBeInTheDocument();
28 | });
29 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/Item.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { H6 } from "@blueprintjs/core";
4 | import TagMechanismConfig from "./TagMechanismConfig";
5 | import PolyglotContext from "../../js/polyglotContext";
6 |
7 | const Item = ({ mechanism, updateMechanism, tag }) => {
8 | const t = useContext(PolyglotContext);
9 |
10 | return (
11 |
12 |
{t(`tags.${tag.toLowerCase()}`)}
13 |
18 |
19 | );
20 | };
21 |
22 | Item.propTypes = {
23 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
24 | updateMechanism: PropTypes.func.isRequired,
25 | tag: PropTypes.string.isRequired,
26 | };
27 |
28 | export default Item;
29 |
--------------------------------------------------------------------------------
/frontend/src/translations/utils.js:
--------------------------------------------------------------------------------
1 | import Polyglot from "node-polyglot";
2 | import en from "./en";
3 | import de from "./de";
4 |
5 | const getLocale = () => navigator.language.slice(0, 2);
6 |
7 | const getPhrases = (locale) => {
8 | let phrases;
9 |
10 | switch (locale) {
11 | case "de":
12 | phrases = de;
13 | break;
14 | default:
15 | phrases = en;
16 | }
17 | return phrases;
18 | };
19 |
20 | const createDefaultPolyglot = () => {
21 | const locale = getLocale();
22 | const phrases = getPhrases(locale);
23 | return new Polyglot({ phrases, locale });
24 | };
25 |
26 | const updateLocale = (myPolyglot, newLocale = "") => {
27 | const locale = newLocale !== "" ? newLocale : getLocale();
28 | const phrases = getPhrases(locale);
29 | myPolyglot.extend(phrases);
30 | myPolyglot.locale(locale);
31 | };
32 |
33 | const polyglot = createDefaultPolyglot();
34 |
35 | export { polyglot, updateLocale };
36 |
--------------------------------------------------------------------------------
/backend/app/logging.conf:
--------------------------------------------------------------------------------
1 | [loggers]
2 | keys=root,app
3 |
4 | [handlers]
5 | keys=consoleHandler,detailedConsoleHandler
6 |
7 | [formatters]
8 | keys=normalFormatter,detailedFormatter
9 |
10 | [logger_root]
11 | level=DEBUG
12 | handlers=consoleHandler
13 |
14 | [logger_app]
15 | level=DEBUG
16 | handlers=detailedConsoleHandler
17 | qualname=app
18 | propagate=0
19 |
20 | [handler_consoleHandler]
21 | class=StreamHandler
22 | level=DEBUG
23 | formatter=normalFormatter
24 | args=(sys.stdout,)
25 |
26 | [handler_detailedConsoleHandler]
27 | class=StreamHandler
28 | level=DEBUG
29 | formatter=detailedFormatter
30 | args=(sys.stdout,)
31 |
32 | [formatter_normalFormatter]
33 | format=%(asctime)s loglevel=%(levelname)-6s logger=%(name)s %(funcName)s() L%(lineno)-4d %(message)s
34 |
35 | [formatter_detailedFormatter]
36 | format=%(asctime)s loglevel=%(levelname)-6s logger=%(name)s %(funcName)s() L%(lineno)-4d %(message)s call_trace=%(pathname)s L%(lineno)-4d
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/AnonymizationConfigMenu.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | .config-menu
4 | background: $gray4
5 | border-radius: 0
6 | overflow-y: auto
7 | height: calc(100vh - #{$pt-navbar-height})
8 | box-shadow: none
9 |
10 | ul
11 | padding-left: 0
12 | list-style-type: none
13 |
14 | select
15 | margin-bottom: 10px
16 |
17 | .vertical-space
18 | padding-top: 20px
19 |
20 | .show-advanced-button
21 | display: table
22 | margin: 0 auto
23 | text-align: center
24 |
25 | .more-top-padding
26 | padding-top: 10px
27 |
28 | .advanced-mechanism-options
29 | padding-left: 20px
30 | padding-bottom: 10px
31 |
32 | .mechanism-options
33 | padding-left: 0
34 |
35 | .mechanism-options
36 | padding-left: 20px
37 |
38 |
39 | #default-mechanism-info
40 | color: $gray1
41 | padding-left: 5px
42 | padding-bottom: 5px
43 |
44 | #default-mechanism-info-tooltip-content
45 | width: 350px
46 |
--------------------------------------------------------------------------------
/.run/Backend Tests.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/annotator/Mark.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 |
4 | const Mark = ({ start, end, content, tag, color, onClick }) => {
5 | return (
6 | onClick({ start, end })}
12 | onKeyPress={() => onClick({ start, end })}
13 | tabIndex={start}
14 | >
15 | {content}
16 | {tag && (
17 |
18 | {tag}
19 |
20 | )}
21 |
22 | );
23 | };
24 |
25 | Mark.defaultProps = {
26 | color: "#84d2ff",
27 | };
28 |
29 | Mark.propTypes = {
30 | start: PropTypes.string.isRequired,
31 | end: PropTypes.string.isRequired,
32 | content: PropTypes.string.isRequired,
33 | tag: PropTypes.string.isRequired,
34 | color: PropTypes.string,
35 | onClick: PropTypes.func.isRequired,
36 | };
37 |
38 | export default Mark;
39 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/PreviewControl.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import PreviewControl from "./PreviewControl";
4 | import Token from "../../js/token";
5 | import Anonymization from "../../js/anonymization";
6 |
7 | it("renders text preview", () => {
8 | const tokens = [
9 | new Token(0, 2, "My", true),
10 | new Token(3, 7, "name", true),
11 | new Token(8, 10, "is", true),
12 | new Token(11, 15, "Khan", false),
13 | new Token(15, 16, ".", false),
14 | ];
15 | const anonymizations = [
16 | new Anonymization({
17 | start: 3,
18 | end: 4,
19 | startChar: 11,
20 | endChar: 15,
21 | text: "XXXX",
22 | }),
23 | ];
24 | const anonymizedText = "My name is XXXX.";
25 | const { getByText } = render(
26 | false}
29 | paragraphs={[{ tokens }]}
30 | />
31 | );
32 | const element = getByText(anonymizedText);
33 | expect(element).toBeInTheDocument();
34 | });
35 |
--------------------------------------------------------------------------------
/backend/README.md:
--------------------------------------------------------------------------------
1 | # OpenRedact Backend
2 |
3 | ## Usage
4 |
5 | Before you begin, make sure you are in the backend directory.
6 |
7 | ### Install the backend dependencies
8 |
9 | Install the dependencies for production using:
10 |
11 | ```
12 | pip install -r requirements.txt
13 | ```
14 |
15 | ### Running the server
16 |
17 | ```
18 | uvicorn app.main:app --reload
19 | ```
20 |
21 | ## Development
22 |
23 | ### Install dev requirements
24 |
25 | ```
26 | pip install -r requirements-dev.txt
27 | ```
28 |
29 | To install our own dependencies in editable more (aka setuptools develop mode) use the `-e` flag.
30 | This adopts any changes in the local checked out projects immediately.
31 |
32 | ```
33 | pip install -e path/to/nerwhal
34 | pip install -e path/to/expose-text
35 | pip install -e path/to/anonymizer
36 | ```
37 |
38 | ### PyCharm Setup
39 |
40 | To have imports resolve correctly mark the backend folder as a Sources Root.
41 |
42 | ### Testing
43 |
44 | The tests can be executed with
45 |
46 | ```
47 | pytest --cov-report term --cov=app
48 | ```
49 |
50 | or by simply running `pytest`.
51 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Jonas Langhabel
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationForm.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { render } from "@testing-library/react";
3 | import AnnotationForm from "./AnnotationForm";
4 |
5 | it("shows a token", () => {
6 | const { getByText } = render(
7 | {}}
24 | tags={[]}
25 | />
26 | );
27 | const token = getByText("MyToken");
28 | expect(token).toBeInTheDocument();
29 | });
30 |
31 | it("shows a tag", () => {
32 | const { getByText } = render(
33 | {}}
37 | tags={["MyTag"]}
38 | />
39 | );
40 | const tag = getByText("MyTag");
41 | expect(tag).toBeInTheDocument();
42 | });
43 |
--------------------------------------------------------------------------------
/frontend/src/components/NavBar.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import PropTypes from "prop-types";
3 | import "./NavBar.sass";
4 | import {
5 | NavbarGroup,
6 | Alignment,
7 | NavbarHeading,
8 | Classes,
9 | } from "@blueprintjs/core";
10 | import { ReactComponent as LogoSvg } from "../logo.svg";
11 |
12 | const NavBar = ({ settings, about, help }) => {
13 | return (
14 |
33 | );
34 | };
35 |
36 | NavBar.propTypes = {
37 | settings: PropTypes.element.isRequired,
38 | about: PropTypes.element.isRequired,
39 | help: PropTypes.element.isRequired,
40 | };
41 |
42 | export default NavBar;
43 |
--------------------------------------------------------------------------------
/backend/app/main.py:
--------------------------------------------------------------------------------
1 | import uvicorn
2 | import logging
3 | import os
4 | from fastapi import FastAPI
5 | from fastapi.middleware.cors import CORSMiddleware
6 |
7 | from app.endpoints import router
8 |
9 | # setup loggers
10 | logging.config.fileConfig(
11 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "logging.conf"), disable_existing_loggers=False
12 | )
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | app = FastAPI(title="OpenRedact API", description="Anonymize German documents using automatic PII detection.", version="0.1.0")
17 |
18 |
19 | origins = [
20 | # for dev server
21 | "http://localhost:3000",
22 | "http://127.0.0.1:3000",
23 | "http://localhost:5000",
24 | "http://127.0.0.1:5000",
25 | # for docker-compose
26 | "http://localhost",
27 | "http://127.0.0.1",
28 | "http://localhost:80",
29 | "http://127.0.0.1:80",
30 | ]
31 |
32 | app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
33 |
34 |
35 | app.include_router(router, prefix="/api")
36 |
37 | if __name__ == "__main__":
38 | uvicorn.run("app.main:app", host="127.0.0.1", port=8000, reload=True)
39 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresDialog.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { Dialog } from "@blueprintjs/core";
4 | import "./ScoresDialog.sass";
5 | import PolyglotContext from "../../js/polyglotContext";
6 | import ScoresDialogBody from "./ScoresDialogBody";
7 |
8 | const ScoresDialog = ({
9 | showDialog,
10 | onClose,
11 | annotations,
12 | goldAnnotations,
13 | }) => {
14 | const t = useContext(PolyglotContext);
15 |
16 | return (
17 |
32 | );
33 | };
34 |
35 | ScoresDialog.propTypes = {
36 | showDialog: PropTypes.bool.isRequired,
37 | onClose: PropTypes.func.isRequired,
38 | annotations: PropTypes.arrayOf(PropTypes.object).isRequired,
39 | goldAnnotations: PropTypes.arrayOf(PropTypes.object).isRequired,
40 | };
41 |
42 | export default ScoresDialog;
43 |
--------------------------------------------------------------------------------
/frontend/src/components/HelpDialog.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { Classes, Dialog, OL } from "@blueprintjs/core";
4 | import PolyglotContext from "../js/polyglotContext";
5 | import Disclaimer from "./Disclaimer";
6 | import "./HelpDialog.sass";
7 |
8 | const HelpDialog = ({ showHelp, setShowHelp }) => {
9 | const t = useContext(PolyglotContext);
10 | return (
11 |
32 | );
33 | };
34 |
35 | HelpDialog.propTypes = {
36 | showHelp: PropTypes.bool.isRequired,
37 | setShowHelp: PropTypes.func.isRequired,
38 | };
39 |
40 | export default HelpDialog;
41 |
--------------------------------------------------------------------------------
/frontend/src/components/ErrorBoundary.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { NonIdealState } from "@blueprintjs/core";
3 | import { IconNames } from "@blueprintjs/icons";
4 | import PropTypes from "prop-types";
5 | import PolyglotContext from "../js/polyglotContext";
6 |
7 | class ErrorBoundary extends React.Component {
8 | constructor(props) {
9 | super(props);
10 | this.state = { hasError: false };
11 | }
12 |
13 | static getDerivedStateFromError() {
14 | // Update state so the next render will show the fallback UI.
15 | return { hasError: true };
16 | }
17 |
18 | render() {
19 | const { hasError } = this.state;
20 | const { children } = this.props;
21 | const t = this.context;
22 |
23 | if (hasError) {
24 | // You can render any custom fallback UI
25 | return (
26 |
31 | );
32 | }
33 |
34 | return children;
35 | }
36 | }
37 |
38 | ErrorBoundary.contextType = PolyglotContext;
39 |
40 | ErrorBoundary.propTypes = {
41 | children: PropTypes.objectOf(PropTypes.any).isRequired,
42 | };
43 |
44 | export default ErrorBoundary;
45 |
--------------------------------------------------------------------------------
/.run/Pre-commit Hooks.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.run/Backend Dev Server.run.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/frontend/src/components/Settings.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import PropTypes from "prop-types";
3 | import { Button } from "@blueprintjs/core";
4 | import SettingsDialog from "./SettingsDialog";
5 | import PolyglotContext from "../js/polyglotContext";
6 |
7 | const Settings = ({
8 | availableRecognizers,
9 | activatedRecognizers,
10 | setActivatedRecognizers,
11 | }) => {
12 | const t = useContext(PolyglotContext);
13 |
14 | const [showSettings, setShowSettings] = useState(false);
15 |
16 | return (
17 |
18 |
33 | );
34 | };
35 |
36 | Settings.propTypes = {
37 | availableRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
38 | activatedRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
39 | setActivatedRecognizers: PropTypes.func.isRequired,
40 | };
41 |
42 | export default Settings;
43 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/GeneralizationMechanism.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { InputGroup, FormGroup } from "@blueprintjs/core";
4 | import PolyglotContext from "../../js/polyglotContext";
5 |
6 | const GeneralizationMechanism = ({ mechanism, updateMechanism, tag }) => {
7 | const t = useContext(PolyglotContext);
8 |
9 | function onUpdateReplacement(value) {
10 | const mechanismClone = { ...mechanism };
11 | mechanismClone.config.replacement = value;
12 | updateMechanism(mechanismClone);
13 | }
14 |
15 | return (
16 |
17 |
21 | onUpdateReplacement(event.target.value)}
25 | fill
26 | />
27 |
28 |
29 | );
30 | };
31 |
32 | GeneralizationMechanism.propTypes = {
33 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
34 | updateMechanism: PropTypes.func.isRequired,
35 | tag: PropTypes.string.isRequired,
36 | };
37 |
38 | export default GeneralizationMechanism;
39 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/Dropzone.jsx:
--------------------------------------------------------------------------------
1 | /* eslint-disable react/jsx-props-no-spreading */
2 | import React, { useContext } from "react";
3 | import { useDropzone } from "react-dropzone";
4 | import { Button } from "@blueprintjs/core";
5 | import "./Dropzone.sass";
6 | import PropTypes from "prop-types";
7 | import PolyglotContext from "../../js/polyglotContext";
8 |
9 | const ACCEPTED_FORMATS = [".txt", ".pdf", ".html", ".docx"];
10 |
11 | const Dropzone = ({ onFileDrop }) => {
12 | const t = useContext(PolyglotContext);
13 |
14 | const { getRootProps, getInputProps, open } = useDropzone({
15 | accept: ACCEPTED_FORMATS.join(","),
16 | noClick: true,
17 | noKeyboard: true,
18 | multiple: false,
19 | onDropAccepted: onFileDrop,
20 | });
21 |
22 | function computeFormatString() {
23 | // prettier-ignore
24 | return `${ACCEPTED_FORMATS.slice(0, -1).join(", ")}, ${t("annotation.or")} ${ACCEPTED_FORMATS.slice(-1)}`;
25 | }
26 |
27 | return (
28 |
29 |
30 |
{t("annotation.drop", { formats: computeFormatString() })}
31 |
34 |
35 | );
36 | };
37 |
38 | Dropzone.propTypes = {
39 | onFileDrop: PropTypes.func.isRequired,
40 | };
41 |
42 | export default Dropzone;
43 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationControl.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import { Card, Elevation, Spinner } from "@blueprintjs/core";
3 | import "./AnnotationControl.sass";
4 | import PropTypes from "prop-types";
5 | import Dropzone from "./Dropzone";
6 | import AnnotationForm from "./AnnotationForm";
7 |
8 | const AnnotationControl = ({
9 | paragraphs,
10 | annotations,
11 | onAnnotationsChange,
12 | onFileDrop,
13 | isLoading,
14 | tags,
15 | }) => {
16 | return (
17 |
18 | {isLoading && }
19 | {(paragraphs.length === 0 ||
20 | (paragraphs.length > 0 && paragraphs[0].tokens.length === 0)) &&
21 | !isLoading && }
22 | {paragraphs.length > 0 &&
23 | paragraphs[0].tokens.length > 0 &&
24 | tags.length > 0 && (
25 |
31 | )}
32 |
33 | );
34 | };
35 |
36 | AnnotationControl.propTypes = {
37 | paragraphs: PropTypes.arrayOf(PropTypes.object).isRequired,
38 | annotations: PropTypes.arrayOf(PropTypes.array).isRequired,
39 | onAnnotationsChange: PropTypes.func.isRequired,
40 | onFileDrop: PropTypes.func.isRequired,
41 | isLoading: PropTypes.bool.isRequired,
42 | tags: PropTypes.arrayOf(PropTypes.string).isRequired,
43 | };
44 |
45 | export default AnnotationControl;
46 |
--------------------------------------------------------------------------------
/frontend/src/components/AboutDialog.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { Classes, Dialog } from "@blueprintjs/core";
4 | import PolyglotContext from "../js/polyglotContext";
5 | import { ReactComponent as LogoSvg } from "../logo.svg";
6 | import "./AboutDialog.sass";
7 | import Disclaimer from "./Disclaimer";
8 |
9 | const AboutDialog = ({ showAbout, setShowAbout }) => {
10 | const t = useContext(PolyglotContext);
11 |
12 | return (
13 |
39 | );
40 | };
41 |
42 | AboutDialog.propTypes = {
43 | showAbout: PropTypes.bool.isRequired,
44 | setShowAbout: PropTypes.func.isRequired,
45 | };
46 |
47 | export default AboutDialog;
48 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_find_piis.py:
--------------------------------------------------------------------------------
1 | def test_finding_piis(client, test_data):
2 | test_file_path = test_data / "test.txt"
3 | response = client.post(
4 | "/api/find-piis", files={"file": open(test_file_path, "rb")}, data={"recognizers": '["de_country_recognizer"]'}
5 | )
6 | assert response.status_code == 200
7 |
8 | piis = response.json()["piis"]
9 | assert len(piis) == 1
10 | assert all(
11 | item in piis[0].items()
12 | for item in {
13 | "startChar": 0,
14 | "endChar": 11,
15 | "tag": "COUNTRY",
16 | "text": "Deutschland",
17 | "startTok": 0,
18 | "endTok": 1,
19 | }.items()
20 | )
21 |
22 | tokens = response.json()["tokens"]
23 | assert len(tokens) == 2
24 | assert all(item in tokens[1].items() for item in {"text": ".", "hasWs": False, "startChar": 11, "endChar": 12}.items())
25 |
26 |
27 | def test_unsupported_format(client, test_data):
28 | test_file_path = test_data / "foo.bar"
29 | response = client.post(
30 | "/api/find-piis", files={"file": open(test_file_path, "rb")}, data={"recognizers": '["de_country_recognizer"]'}
31 | )
32 | assert response.status_code == 400
33 |
34 |
35 | def test_finding_piis_pdf(client, test_data):
36 |
37 | test_file_path = test_data / "test.pdf"
38 | response = client.post(
39 | "/api/find-piis", files={"file": open(test_file_path, "rb")}, data={"recognizers": '["de_country_recognizer"]'}
40 | )
41 | assert response.status_code == 200
42 | assert response.json()["format"] == "autopdf"
43 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresDialogBody.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useEffect, useState } from "react";
2 | import { Classes } from "@blueprintjs/core";
3 | import PropTypes from "prop-types";
4 | import ScoresTable from "./ScoresTable";
5 | import PolyglotContext from "../../js/polyglotContext";
6 | import { computeScores } from "../../api/routes";
7 | import AppToaster from "../../js/toaster";
8 |
9 | const ScoresDialogBody = ({ annotations, goldAnnotations }) => {
10 | const t = useContext(PolyglotContext);
11 |
12 | const [scores, setScores] = useState(null);
13 |
14 | useEffect(() => {
15 | if (annotations.length === 0 || goldAnnotations === null) return;
16 |
17 | computeScores({
18 | computedAnnotations: goldAnnotations,
19 | goldAnnotations: annotations,
20 | })
21 | .then((response) => setScores(response.data))
22 | .catch(() => {
23 | AppToaster.show({
24 | message: t("main.computing_scores_failed_toast"),
25 | intent: "danger",
26 | });
27 | });
28 | }, [t, annotations, goldAnnotations]);
29 |
30 | return (
31 |
32 |
33 | {t("annotation.scores_description")}
34 |
35 | {t("annotation.scores_note")}
36 |
37 | {scores !== null &&
}
38 |
39 | );
40 | };
41 |
42 | ScoresDialogBody.propTypes = {
43 | annotations: PropTypes.arrayOf(PropTypes.object).isRequired,
44 | goldAnnotations: PropTypes.arrayOf(PropTypes.object).isRequired,
45 | };
46 |
47 | export default ScoresDialogBody;
48 |
--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7
2 |
3 | WORKDIR /app
4 |
5 | # Install PDF depdencies (expose-text)
6 | RUN apt-get update
7 | RUN apt-get install -y cmake autoconf
8 |
9 | # wkhtmltopdf
10 | RUN wget --quiet https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.3/wkhtmltox-0.12.3_linux-generic-amd64.tar.xz && \
11 | tar vxf wkhtmltox-0.12.3_linux-generic-amd64.tar.xz && \
12 | cp wkhtmltox/bin/wk* /usr/local/bin/ && \
13 | rm -rf wkhtmltox
14 |
15 | # Uninstall old version (latest version is not available over apt)
16 | RUN apt-get purge -y poppler-utils
17 |
18 | # Install new poppler-utils manually
19 | RUN wget poppler.freedesktop.org/poppler-0.90.1.tar.xz
20 | RUN tar -xvf poppler-0.90.1.tar.xz
21 | RUN cd poppler-0.90.1 && mkdir build && cd build && cmake .. && make install && ldconfig
22 | RUN ln -s /usr/local/bin/pdftohtml /usr/bin/pdftohtml
23 | RUN pdftohtml -v
24 |
25 | # Disable pip cache
26 | ENV PIP_DISABLE_PIP_VERSION_CHECK=1
27 | ENV PIP_NO_CACHE_DIR=1
28 |
29 | # Dev dependencies (for testing)
30 | COPY requirements-dev.txt .
31 | RUN pip install --no-cache-dir -r requirements-dev.txt
32 |
33 | # Install packages
34 | COPY requirements.txt .
35 | RUN pip install --no-cache-dir -r requirements.txt
36 |
37 | # Install optional packages
38 | RUN pip install chardet
39 |
40 | # Environment
41 | ENV STANZA_TEST_HOME=/app/stanza_test
42 |
43 | # Models
44 | RUN python -m spacy download de
45 | RUN python -c "import stanza; stanza.download('de')"
46 |
47 | COPY ./ /app/
48 |
49 | RUN pip install gunicorn
50 |
51 |
52 | CMD ["gunicorn", "-b", "0.0.0.0:8000", "-t", "600", "-w", "2", "-k", "uvicorn.workers.UvicornWorker", "app.main:app"]
53 |
54 | EXPOSE 8000
55 |
--------------------------------------------------------------------------------
/frontend/nginx.conf:
--------------------------------------------------------------------------------
1 | worker_processes 1;
2 |
3 | events {
4 | worker_connections 1024;
5 | }
6 |
7 | http {
8 | include mime.types;
9 | index index.html;
10 |
11 | sendfile on;
12 | client_max_body_size 2000g;
13 | client_body_timeout 84600s;
14 | proxy_connect_timeout 84600s;
15 | proxy_send_timeout 84600s;
16 | proxy_read_timeout 84600s;
17 | send_timeout 84600s;
18 | proxy_buffering off;
19 | proxy_request_buffering off;
20 |
21 | upstream docker-backend {
22 | server backend:8000;
23 | }
24 |
25 | server {
26 | listen 80;
27 | ignore_invalid_headers off;
28 | add_header Referrer-Policy "same-origin";
29 | add_header X-Clacks-Overhead "GNU Terry Pratchett";
30 | add_header X-Content-Type-Options "nosniff";
31 | add_header X-Frame-Options "SAMEORIGIN";
32 | add_header X-XSS-Protection "1; mode=block";
33 | add_header Feature-Policy "accelerometer 'none'; camera 'none'; geolocation 'none'; gyroscope 'none'; magnetometer 'none'; microphone 'none'; payment 'none'; usb 'none'";
34 |
35 | location / {
36 | root /assets;
37 | try_files $uri $uri/ /index.html;
38 |
39 | gzip_static on;
40 | gzip_types text/plain text/xml text/css
41 | text/javascript application/x-javascript;
42 | }
43 |
44 | location /api {
45 | proxy_pass http://docker-backend;
46 | proxy_redirect off;
47 | proxy_set_header Host $http_host;
48 | proxy_set_header X-Real-IP $remote_addr;
49 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
50 | }
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/frontend/src/api/routes.js:
--------------------------------------------------------------------------------
1 | import axios from "axios";
2 | import queryString from "query-string";
3 | import AppToaster from "../js/toaster";
4 | import { polyglot } from "../translations/utils";
5 |
6 | const API = axios.create({
7 | baseURL: process.env.REACT_APP_API_BASE_URL || "http://127.0.0.1:8000/api",
8 | headers: {},
9 | paramsSerializer: queryString.stringify,
10 | });
11 |
12 | API.interceptors.response.use(
13 | (response) => response,
14 | (error) => {
15 | if (error.message === "Network Error") {
16 | AppToaster.show({
17 | message: polyglot.t("app.network_error_toast"),
18 | intent: "danger",
19 | icon: "warning-sign",
20 | });
21 | }
22 |
23 | throw error;
24 | }
25 | );
26 |
27 | function findPiis(formData) {
28 | return API.post("find-piis", formData, {
29 | headers: {
30 | "Content-Type": "multipart/form-data",
31 | },
32 | });
33 | }
34 |
35 | function computeScores(payload) {
36 | return API.post("score", payload);
37 | }
38 |
39 | function fetchTags() {
40 | return API.get("tags");
41 | }
42 |
43 | function compileFile(formData) {
44 | return API.post("anonymize-file?return_base64=1", formData, {
45 | headers: {
46 | "Content-Type": "multipart/form-data",
47 | },
48 | });
49 | }
50 |
51 | function anonymizeFile(formData) {
52 | return API.post("anonymize-file", formData, {
53 | headers: {
54 | "Content-Type": "multipart/form-data",
55 | },
56 | responseType: "blob",
57 | });
58 | }
59 |
60 | function anonymizePiis(payload) {
61 | return API.post("anonymize", payload);
62 | }
63 |
64 | function fetchRecognizers() {
65 | return API.get("recognizers");
66 | }
67 |
68 | export {
69 | findPiis,
70 | computeScores,
71 | fetchTags,
72 | anonymizeFile,
73 | anonymizePiis,
74 | compileFile,
75 | fetchRecognizers,
76 | };
77 |
--------------------------------------------------------------------------------
/frontend/src/components/ActiveRecognizerSettings.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { Switch, UL } from "@blueprintjs/core";
4 | import PolyglotContext from "../js/polyglotContext";
5 | import "./ActiveRecognizerSettings.sass";
6 |
7 | const ActiveRecognizerSettings = ({
8 | availableRecognizers,
9 | activatedRecognizers,
10 | setActivatedRecognizers,
11 | }) => {
12 | const t = useContext(PolyglotContext);
13 |
14 | if (availableRecognizers.length === 0 || activatedRecognizers === null)
15 | return null;
16 |
17 | function onSwitch(recognizer) {
18 | const activatedRecognizersClone = [...activatedRecognizers];
19 | if (!activatedRecognizers.includes(recognizer)) {
20 | activatedRecognizersClone.push(recognizer);
21 | } else {
22 | const index = activatedRecognizersClone.indexOf(recognizer);
23 | activatedRecognizersClone.splice(index, 1);
24 | }
25 | setActivatedRecognizers(activatedRecognizersClone);
26 | }
27 |
28 | const recognizerSwitches = availableRecognizers.map((recognizer) => (
29 |
30 | onSwitch(recognizer)}
34 | />
35 |
36 | ));
37 | return (
38 |
39 |
{t("settings.recognizers.description")}
40 |
41 |
42 | );
43 | };
44 |
45 | ActiveRecognizerSettings.propTypes = {
46 | availableRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
47 | activatedRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
48 | setActivatedRecognizers: PropTypes.func.isRequired,
49 | };
50 |
51 | export default ActiveRecognizerSettings;
52 |
--------------------------------------------------------------------------------
/backend/app/schemas.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 | from pydantic import BaseModel
3 |
4 |
5 | def to_camel_case(snake_case):
6 | pascal_case = snake_case.title().replace("_", "")
7 | return pascal_case[0].lower() + pascal_case[1:]
8 |
9 |
10 | class CamelBaseModel(BaseModel):
11 | # This base model automatically defines a camelCase public representation that is used by API clients.
12 | # Note: docstrings are automatically used as description for JSON schemas (and they are inherited)
13 |
14 | class Config:
15 | alias_generator = to_camel_case
16 | allow_population_by_field_name = True
17 |
18 |
19 | class Annotation(CamelBaseModel):
20 | start: int
21 | end: int
22 | tag: str
23 |
24 |
25 | class AnnotationsForEvaluation(CamelBaseModel):
26 | computed_annotations: List[Annotation]
27 | gold_annotations: List[Annotation]
28 |
29 |
30 | class Scores(CamelBaseModel):
31 | f1: float
32 | f2: float
33 | precision: float
34 | recall: float
35 | true_positives: float
36 | false_positives: float
37 | false_negatives: float
38 |
39 |
40 | class EvaluationResponse(CamelBaseModel):
41 | total: Scores
42 | tags: Dict[str, Scores]
43 |
44 |
45 | class Pii(CamelBaseModel):
46 | start_char: int
47 | end_char: int
48 | tag: str
49 | text: str
50 | score: float
51 | recognizer: str
52 | start_tok: int
53 | end_tok: int
54 |
55 |
56 | class Token(CamelBaseModel):
57 | text: str
58 | has_ws: bool
59 | br_count: int
60 | start_char: int
61 | end_char: int
62 |
63 |
64 | class FindPiisResponse(CamelBaseModel):
65 | piis: List[Pii]
66 | tokens: List[Token]
67 | format: str
68 |
69 |
70 | class AnonymizedPii(CamelBaseModel):
71 | text: str
72 | id: str
73 |
74 |
75 | class AnonymizedPiisResponse(CamelBaseModel):
76 | anonymized_piis: List[AnonymizedPii]
77 |
78 |
79 | class ErrorMessage(BaseModel):
80 | detail: str
81 |
--------------------------------------------------------------------------------
/frontend/src/js/useCompile.js:
--------------------------------------------------------------------------------
1 | import { useContext, useEffect, useState } from "react";
2 | import PolyglotContext from "./polyglotContext";
3 | import { compileFile } from "../api/routes";
4 | import AppToaster from "./toaster";
5 |
6 | function useCompile({
7 | anonymizations,
8 | fileFormData,
9 | compileTimer,
10 | compileDate,
11 | isCompilable,
12 | }) {
13 | const t = useContext(PolyglotContext);
14 |
15 | const [base64pdf, setBase64pdf] = useState(null);
16 | const [isCompiling, setIsCompiling] = useState(false);
17 |
18 | useEffect(() => {
19 | console.log("anonymizations changed...: ", JSON.stringify(anonymizations));
20 | if (isCompilable) {
21 | console.log("compiling... ");
22 |
23 | // unset myTimeout
24 | clearTimeout(compileTimer);
25 | setIsCompiling(true);
26 |
27 | if (fileFormData && fileFormData.current && anonymizations.length > 0) {
28 | const formData = fileFormData.current;
29 | formData.set("anonymizations", JSON.stringify(anonymizations));
30 | // formData.anonymizations = JSON.stringify(anonymizations);
31 |
32 | compileFile(formData)
33 | .then((response) => {
34 | setBase64pdf(response.data.base64);
35 | setIsCompiling(false);
36 | })
37 | .catch(() => {
38 | AppToaster.show({
39 | message: t("main.compile_anonymize_file_failed_toast"),
40 | intent: "danger",
41 | });
42 | setIsCompiling(false);
43 | });
44 | } else {
45 | console.log("Do not compile (no form data)");
46 | setIsCompiling(false);
47 | }
48 | } else {
49 | console.log("file type is not compilable");
50 | }
51 | }, [
52 | t,
53 | anonymizations,
54 | fileFormData,
55 | compileTimer,
56 | compileDate,
57 | isCompilable,
58 | ]);
59 |
60 | return { isCompiling, base64pdf };
61 | }
62 |
63 | export default useCompile;
64 |
--------------------------------------------------------------------------------
/frontend/src/components/SettingsDialog.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import PropTypes from "prop-types";
3 | import { Classes, Dialog, Tab, Tabs } from "@blueprintjs/core";
4 | import PolyglotContext from "../js/polyglotContext";
5 | import ActiveRecognizerSettings from "./ActiveRecognizerSettings";
6 |
7 | const SettingsDialog = ({
8 | availableRecognizers,
9 | showSettings,
10 | setShowSettings,
11 | activatedRecognizers,
12 | setActivatedRecognizers,
13 | }) => {
14 | const t = useContext(PolyglotContext);
15 |
16 | const [selectedTabId, setSelectedTabId] = useState("recognizers");
17 |
18 | return (
19 |
48 | );
49 | };
50 |
51 | SettingsDialog.propTypes = {
52 | availableRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
53 | showSettings: PropTypes.bool.isRequired,
54 | setShowSettings: PropTypes.func.isRequired,
55 | activatedRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
56 | setActivatedRecognizers: PropTypes.func.isRequired,
57 | };
58 |
59 | export default SettingsDialog;
60 |
--------------------------------------------------------------------------------
/frontend/src/components/scores/ScoresTable.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { HTMLTable } from "@blueprintjs/core";
4 | import PolyglotContext from "../../js/polyglotContext";
5 |
6 | const ScoresTable = ({ scores }) => {
7 | const t = useContext(PolyglotContext);
8 |
9 | const tableValues = new Map();
10 | const metrics = new Set();
11 | const tags = [];
12 |
13 | function extractTableValues(metricsObject, tag) {
14 | tags.push(tag);
15 | Object.entries(metricsObject).forEach((metricsEntry) => {
16 | const [metricName, metricValue] = metricsEntry;
17 | tableValues.set(
18 | `${tag}-${metricName}`,
19 | ["f1", "f2", "precision", "recall"].includes(metricName)
20 | ? metricValue.toFixed(2)
21 | : metricValue
22 | );
23 | metrics.add(metricName);
24 | });
25 | }
26 |
27 | extractTableValues(scores.total, "total");
28 |
29 | Object.entries(scores.tags).forEach((scoresEntry) => {
30 | const [tag, metricsObject] = scoresEntry;
31 | if (metricsObject === null) return;
32 | extractTableValues(metricsObject, tag);
33 | });
34 |
35 | const tableHeader = (
36 |
37 | {[ | ].concat(
38 | [...metrics].map((item) => (
39 | {t(`annotation.metric.${item}`)} |
40 | ))
41 | )}
42 |
43 | );
44 | const tableBody = tags.map((tag) => (
45 |
46 | | {tag.toUpperCase()} |
47 | {[...metrics].map((metricName) => (
48 |
49 | {tableValues.has(`${tag}-${metricName}`)
50 | ? tableValues.get(`${tag}-${metricName}`)
51 | : t("annotation.na")}
52 | |
53 | ))}
54 |
55 | ));
56 |
57 | return (
58 |
59 | {tableHeader}
60 | {tableBody}
61 |
62 | );
63 | };
64 |
65 | ScoresTable.propTypes = {
66 | scores: PropTypes.objectOf(PropTypes.any).isRequired,
67 | };
68 |
69 | export default ScoresTable;
70 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openredact-app",
3 | "version": "0.1.1",
4 | "private": true,
5 | "dependencies": {
6 | "@blueprintjs/core": "^3.30.1",
7 | "@blueprintjs/icons": "^3.20.0",
8 | "axios": "^0.24.0",
9 | "eslint-plugin-import": "^2.25.4",
10 | "eslint-plugin-react": "^7.20.3",
11 | "file-saver": "^2.0.2",
12 | "lodash.sortby": "^4.7.0",
13 | "node-polyglot": "^2.4.0",
14 | "prop-types": "^15.7.2",
15 | "query-string": "^6.12.1",
16 | "react": "^16.13.1",
17 | "react-dom": "^16.13.1",
18 | "react-dropzone": "^11.0.1",
19 | "react-hotkeys-hook": "^2.2.1",
20 | "react-pdf": "5.0.0-beta.4",
21 | "react-scripts": "3.4.1",
22 | "uuid": "^8.1.0"
23 | },
24 | "scripts": {
25 | "start": "./node_modules/.bin/react-scripts start",
26 | "build": "./node_modules/.bin/react-scripts build",
27 | "test": "./node_modules/.bin/react-scripts test",
28 | "eject": "./node_modules/.bin/react-scripts eject",
29 | "lint": "eslint --ext .jsx,.js src/",
30 | "format": "npx prettier . --write",
31 | "postinstall": "cp node_modules/pdfjs-dist/build/pdf.worker.min.js public/pdf.worker.min.js"
32 | },
33 | "eslintConfig": {
34 | "extends": [
35 | "airbnb",
36 | "plugin:jsx-a11y/recommended",
37 | "plugin:react-hooks/recommended",
38 | "plugin:@blueprintjs/recommended",
39 | "prettier",
40 | "prettier/react",
41 | "react-app"
42 | ],
43 | "rules": {
44 | "jsx-a11y/no-onchange": "off"
45 | }
46 | },
47 | "browserslist": [
48 | "last 1 Chrome version"
49 | ],
50 | "devDependencies": {
51 | "@blueprintjs/eslint-plugin": "^0.3.5",
52 | "@testing-library/jest-dom": "^4.2.4",
53 | "@testing-library/react": "^9.5.0",
54 | "@testing-library/user-event": "^7.2.1",
55 | "eslint-config-airbnb": "^18.1.0",
56 | "eslint-config-prettier": "^6.11.0",
57 | "eslint-plugin-jsx-a11y": "^6.2.3",
58 | "eslint-plugin-react-hooks": "^4.0.0",
59 | "node-sass": "^4.14.1",
60 | "prettier": "^2.0.5",
61 | "serve": "^11.3.2",
62 | "tslint": "^6.1.2",
63 | "typescript": "^3.8.3"
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/TagMechanismConfig.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { HTMLSelect } from "@blueprintjs/core";
4 | import PolyglotContext from "../../js/polyglotContext";
5 | import { getMechanismComponent } from "./anonymizationConfig";
6 |
7 | const TagMechanismConfig = ({ mechanism, updateMechanism, tag }) => {
8 | const t = useContext(PolyglotContext);
9 |
10 | const mechanismComponent = getMechanismComponent(
11 | mechanism,
12 | updateMechanism,
13 | tag
14 | );
15 |
16 | function onSelect(event) {
17 | updateMechanism({ mechanism: event.target.value });
18 | }
19 |
20 | return (
21 |
22 |
28 |
29 |
30 |
33 |
36 |
39 | {["DATE", "NUMBER", "MONEY", "PHONE"].includes(tag) && (
40 |
43 | )}
44 | {["COUNTRY"].includes(tag) && (
45 |
48 | )}
49 |
50 | {mechanismComponent}
51 |
52 | );
53 | };
54 |
55 | TagMechanismConfig.propTypes = {
56 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
57 | updateMechanism: PropTypes.func.isRequired,
58 | tag: PropTypes.string.isRequired,
59 | };
60 |
61 | export default TagMechanismConfig;
62 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/DefaultMechanismConfig.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import { Radio, RadioGroup, Tooltip, Position, H6 } from "@blueprintjs/core";
4 | import PolyglotContext from "../../js/polyglotContext";
5 | import constants from "../../js/constants";
6 | import { getMechanismComponent } from "./anonymizationConfig";
7 |
8 | const DefaultMechanismConfig = ({ mechanism, updateMechanism }) => {
9 | const t = useContext(PolyglotContext);
10 |
11 | const mechanismComponent = getMechanismComponent(
12 | mechanism,
13 | updateMechanism,
14 | "default"
15 | );
16 |
17 | function onSelect(event) {
18 | updateMechanism({ mechanism: event.target.value });
19 | }
20 |
21 | return (
22 |
23 |
24 |
25 |
30 | {t("anonymization.generalization.name")}
31 |
32 |
33 |
34 |
39 | {t("anonymization.pseudonymization.name")}
40 |
41 |
42 |
43 |
48 | {t("anonymization.suppression.name")}
49 |
50 |
51 |
52 |
{t("anonymization.options")}
53 | {mechanismComponent}
54 |
55 | );
56 | };
57 |
58 | DefaultMechanismConfig.propTypes = {
59 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
60 | updateMechanism: PropTypes.func.isRequired,
61 | };
62 |
63 | export default DefaultMechanismConfig;
64 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/PdfPreview.jsx:
--------------------------------------------------------------------------------
1 | import React, { useLayoutEffect, useState } from "react";
2 | import { Card, Spinner, Elevation } from "@blueprintjs/core";
3 | import PropTypes from "prop-types";
4 | import "./PdfPreview.sass";
5 | import { Document, Page, pdfjs } from "react-pdf";
6 | import constants from "../../js/constants";
7 |
8 | // Set PDF worker (see https://github.com/wojtekmaj/react-pdf#enable-pdfjs-worker)
9 | pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.js`;
10 |
11 | function useWindowWidth() {
12 | const [width, setWidth] = useState(0);
13 | useLayoutEffect(() => {
14 | function updateSize() {
15 | setWidth(window.innerWidth);
16 | }
17 | window.addEventListener("resize", updateSize);
18 | updateSize();
19 | return () => window.removeEventListener("resize", updateSize);
20 | }, []);
21 | return width;
22 | }
23 |
24 | const PdfPreview = ({ base64pdf }) => {
25 | const [numPages, setNumPages] = useState(0);
26 |
27 | const windowWidth = useWindowWidth();
28 |
29 | function onDocumentLoadSuccess(event) {
30 | console.log("Number of pages: ", event.numPages);
31 | setNumPages(event.numPages);
32 | }
33 |
34 | return (
35 | alert(error.message)}
44 | onSourceError={(error) => alert(error.message)}
45 | loading={}
46 | >
47 | {Array.from(new Array(numPages), (el, index) => (
48 |
53 | 0
57 | ? constants.previewPdfWidth
58 | : Math.floor(windowWidth * 0.4) - 50
59 | }
60 | />
61 |
62 | ))}
63 |
64 | );
65 | };
66 |
67 | PdfPreview.propTypes = {
68 | base64pdf: PropTypes.string.isRequired,
69 | };
70 |
71 | export default PdfPreview;
72 |
--------------------------------------------------------------------------------
/frontend/src/components/MainMenu.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext } from "react";
2 | import PropTypes from "prop-types";
3 | import "./MainMenu.sass";
4 | import { Button } from "@blueprintjs/core";
5 | import PolyglotContext from "../js/polyglotContext";
6 |
7 | const MainMenu = ({
8 | onNewDocument,
9 | showDownloadButton,
10 | onDownload,
11 | onShowScores,
12 | showCompileButton,
13 | isCompiling,
14 | onCompile,
15 | }) => {
16 | const t = useContext(PolyglotContext);
17 |
18 | return (
19 |
20 |
21 |
36 | {showDownloadButton && (
37 |
40 | )}
41 |
42 |
43 |
44 | {showCompileButton && (
45 |
54 | )}
55 |
56 | {showDownloadButton && (
57 |
65 | )}
66 |
67 |
68 | );
69 | };
70 |
71 | MainMenu.propTypes = {
72 | showDownloadButton: PropTypes.bool.isRequired,
73 | onNewDocument: PropTypes.func.isRequired,
74 | onDownload: PropTypes.func.isRequired,
75 | onShowScores: PropTypes.func.isRequired,
76 | showCompileButton: PropTypes.bool.isRequired,
77 | isCompiling: PropTypes.bool.isRequired,
78 | onCompile: PropTypes.func.isRequired,
79 | };
80 |
81 | export default MainMenu;
82 |
--------------------------------------------------------------------------------
/.github/workflows/test_frontend.yml:
--------------------------------------------------------------------------------
1 | name: Frontend Tests
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | test:
7 | runs-on: ubuntu-latest
8 | defaults:
9 | run:
10 | working-directory: frontend
11 | strategy:
12 | matrix:
13 | node-version: [12.x]
14 | steps:
15 | - uses: actions/checkout@v2
16 | - name: Use Node.js ${{ matrix.node-version }}
17 | uses: actions/setup-node@v1
18 | with:
19 | node-version: ${{ matrix.node-version }}
20 | - run: npm ci
21 | - run: npm run build --if-present
22 | - run: npm test
23 | docker-build:
24 | runs-on: ubuntu-latest
25 | needs:
26 | - test
27 | defaults:
28 | run:
29 | working-directory: frontend
30 | steps:
31 | - uses: actions/checkout@v1
32 | - name: Build docker image
33 | run: |
34 | docker build --cache-from openredact/frontend-dev:latest -t openredact/frontend-dev:${GITHUB_SHA} -f Dockerfile.dev .
35 | docker build --cache-from openredact/frontend:latest -t openredact/frontend:${GITHUB_SHA} .
36 | - name: Push docker image (hash)
37 | run: |
38 | docker login -u ${{ secrets.docker_user }} -p ${{ secrets.docker_password }}
39 | docker push openredact/frontend:${GITHUB_SHA}
40 | docker push openredact/frontend-dev:${GITHUB_SHA}
41 | docker-test-and-tag:
42 | runs-on: ubuntu-latest
43 | needs:
44 | - test
45 | - docker-build
46 | steps:
47 | - uses: actions/checkout@v1
48 | - name: Set up
49 | run: |
50 | sudo sysctl -w vm.max_map_count=262144
51 | docker --version
52 | docker-compose --version
53 | - name: Run tests in docker
54 | run: |
55 | TAG=${GITHUB_SHA} docker-compose pull frontend
56 | TAG=${GITHUB_SHA} docker-compose -f docker-compose.dev.yml pull frontend-dev
57 | docker run openredact/frontend-dev:${GITHUB_SHA} npm test -- --watchAll=false
58 | - name: Push docker images for tags (:latest)
59 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
60 | run: |
61 | export TAG=${GITHUB_REF/refs\/tags\//}
62 | docker login -u ${{ secrets.docker_user }} -p ${{ secrets.docker_password }}
63 | docker tag openredact/frontend-dev:${GITHUB_SHA} openredact/frontend-dev:latest
64 | docker tag openredact/frontend:${GITHUB_SHA} openredact/frontend:latest
65 | docker push openredact/frontend-dev:latest
66 | docker push openredact/frontend:latest
67 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/LaplaceNoiseMechanism.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import PropTypes from "prop-types";
3 | import {
4 | Checkbox,
5 | FormGroup,
6 | NumericInput,
7 | Position,
8 | Tooltip,
9 | } from "@blueprintjs/core";
10 | import PolyglotContext from "../../js/polyglotContext";
11 | import constants from "../../js/constants";
12 |
13 | const LaplaceNoiseMechanism = ({ mechanism, updateMechanism, tag }) => {
14 | const t = useContext(PolyglotContext);
15 |
16 | const [epsilonValid, setEpsilonValid] = useState(true);
17 |
18 | function validateEpsilon(value) {
19 | return value > 0;
20 | }
21 |
22 | function onUpdateEpsilon(valueAsNumber) {
23 | if (!validateEpsilon(valueAsNumber)) {
24 | setEpsilonValid(false);
25 | return;
26 | }
27 |
28 | setEpsilonValid(true);
29 | const mechanismClone = { ...mechanism };
30 | mechanismClone.config.epsilon = valueAsNumber;
31 | updateMechanism(mechanismClone);
32 | }
33 |
34 | function onUpdateStateful() {
35 | const mechanismClone = { ...mechanism };
36 | mechanismClone.config.stateful = !mechanism.config.stateful;
37 | updateMechanism(mechanismClone);
38 | }
39 |
40 | return (
41 |
42 |
47 |
52 |
62 |
72 |
73 |
74 |
75 | );
76 | };
77 |
78 | LaplaceNoiseMechanism.propTypes = {
79 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
80 | updateMechanism: PropTypes.func.isRequired,
81 | tag: PropTypes.string.isRequired,
82 | };
83 |
84 | export default LaplaceNoiseMechanism;
85 |
--------------------------------------------------------------------------------
/.github/workflows/test_backend.yml:
--------------------------------------------------------------------------------
1 | name: Backend Tests
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | test:
7 | runs-on: ubuntu-latest
8 | defaults:
9 | run:
10 | working-directory: backend
11 | strategy:
12 | matrix:
13 | python-version: [3.7]
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 | - name: Install OS dependecies
22 | run: |
23 | sudo apt-get install -y poppler-utils
24 | wget --quiet https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.3/wkhtmltox-0.12.3_linux-generic-amd64.tar.xz && \
25 | tar vxf wkhtmltox-0.12.3_linux-generic-amd64.tar.xz && \
26 | sudo cp wkhtmltox/bin/wk* /usr/local/bin/ && \
27 | rm -rf wkhtmltox
28 | - name: Install Python dependencies
29 | run: |
30 | python -m pip install --upgrade pip
31 | pip install -r requirements.txt
32 | - name: Test with pytest
33 | run: |
34 | pip install pytest
35 | pip install pytest-cov
36 | pytest --cov-report term --cov=app
37 | docker-build:
38 | runs-on: ubuntu-latest
39 | needs:
40 | - test
41 | defaults:
42 | run:
43 | working-directory: backend
44 | steps:
45 | - uses: actions/checkout@v1
46 | - name: Build docker image
47 | run: |
48 | docker build --cache-from openredact/backend:latest -t openredact/backend:${GITHUB_SHA} .
49 | - name: Push docker image (hash)
50 | run: |
51 | docker login -u ${{ secrets.docker_user }} -p ${{ secrets.docker_password }}
52 | docker push openredact/backend:${GITHUB_SHA}
53 | docker-test-and-tag:
54 | runs-on: ubuntu-latest
55 | needs:
56 | - test
57 | - docker-build
58 | steps:
59 | - uses: actions/checkout@v1
60 | - name: Set up
61 | run: |
62 | sudo sysctl -w vm.max_map_count=262144
63 | docker --version
64 | docker-compose --version
65 | - name: Docker pull
66 | run: |
67 | echo "${GITHUB_REF}"
68 | TAG=${GITHUB_SHA} docker-compose pull backend
69 | - name: Run tests in docker
70 | run: |
71 | TAG=${GITHUB_SHA} docker-compose run backend pytest --cov-report term --cov=app
72 | - name: Push docker images for tags (:latest)
73 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
74 | run: |
75 | export TAG=${GITHUB_REF/refs\/tags\//}
76 | docker login -u ${{ secrets.docker_user }} -p ${{ secrets.docker_password }}
77 | docker tag openredact/backend:${GITHUB_SHA} openredact/backend:latest
78 | docker push openredact/backend:latest
79 |
--------------------------------------------------------------------------------
/frontend/src/components/preview/PreviewControl.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import { Callout, Card, Elevation, Icon } from "@blueprintjs/core";
3 | import "./PreviewControl.sass";
4 | import PropTypes from "prop-types";
5 | import TextPreview from "./TextPreview";
6 | import PolyglotContext from "../../js/polyglotContext";
7 | import PdfPreview from "./PdfPreview";
8 |
9 | const PreviewControl = ({ paragraphs, anonymizations, base64pdf }) => {
10 | const t = useContext(PolyglotContext);
11 |
12 | const [showWarning, setShowWarning] = useState(true);
13 |
14 | function anonymize(myTokens, myAnonymizations) {
15 | let skipTokens = 0;
16 | let anonymizedText = null;
17 | const anonymizedTokens = myTokens.map((token, idx) => {
18 | if (skipTokens > 0) {
19 | skipTokens -= 1;
20 | if (skipTokens === 0) {
21 | return anonymizedText;
22 | }
23 | return "";
24 | }
25 |
26 | const anonymization = myAnonymizations.find((anon) => anon.start === idx);
27 |
28 | if (!anonymization) {
29 | return token.text;
30 | }
31 |
32 | const nrOfTokens = anonymization.end - anonymization.start;
33 | if (nrOfTokens > 1) {
34 | // return anonymized text instead of last original token, to get hasWhitespace of last token
35 | skipTokens = nrOfTokens - 1;
36 | anonymizedText = anonymization.text;
37 | return "";
38 | }
39 |
40 | return anonymization.text;
41 | });
42 |
43 | return anonymizedTokens.reduce(
44 | (acc, cur, idx) =>
45 | acc +
46 | cur +
47 | (cur !== "" && paragraphs[0].tokens[idx].hasWhitespace ? " " : ""),
48 | ""
49 | );
50 | }
51 |
52 | // TODO Use tokens of first paragraph only
53 | const text = anonymize(
54 | paragraphs.length > 0 ? paragraphs[0].tokens : [],
55 | anonymizations
56 | );
57 |
58 | return (
59 |
60 | {text !== "" && (
61 |
62 | {showWarning && (
63 |
64 | {t("preview.warning")}
65 | setShowWarning(false)}
70 | />
71 |
72 | )}
73 | {base64pdf ? (
74 |
75 | ) : (
76 |
77 | )}
78 |
79 | )}
80 |
81 | );
82 | };
83 |
84 | PreviewControl.defaultProps = {
85 | base64pdf: null,
86 | };
87 |
88 | PreviewControl.propTypes = {
89 | paragraphs: PropTypes.arrayOf(PropTypes.object).isRequired,
90 | anonymizations: PropTypes.arrayOf(PropTypes.object).isRequired,
91 | base64pdf: PropTypes.string,
92 | };
93 |
94 | export default PreviewControl;
95 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/annotator/utils.js:
--------------------------------------------------------------------------------
1 | import sortBy from "lodash.sortby";
2 |
3 | export const splitWithOffsets = (text, offsets) => {
4 | let lastEnd = 0;
5 | const splits = [];
6 |
7 | for (const offset of sortBy(offsets, (o) => o.start)) {
8 | const { start, end } = offset;
9 |
10 | if (lastEnd < start) {
11 | splits.push({
12 | start: lastEnd,
13 | end: start,
14 | text: text.slice(lastEnd, start),
15 | });
16 | }
17 | splits.push({
18 | ...offset,
19 | mark: true,
20 | text: text.slice(start, end),
21 | });
22 | lastEnd = end;
23 | }
24 | if (lastEnd < text.length) {
25 | splits.push({
26 | start: lastEnd,
27 | end: text.length,
28 | text: text.slice(lastEnd, text.length),
29 | });
30 | }
31 |
32 | return splits;
33 | };
34 |
35 | // offsets: {start: number; end: number}[]
36 | export const splitTokensWithOffsets = (tokens, offsets) => {
37 | let lastEnd = 0;
38 | const splits = [];
39 |
40 | for (const offset of sortBy(offsets, (o) => o.start)) {
41 | const { start, end } = offset;
42 | if (lastEnd < start) {
43 | for (let i = lastEnd; i < start; i += 1) {
44 | splits.push({
45 | i,
46 | text: tokens[i].text,
47 | hasWhitespace: tokens[i].hasWhitespace,
48 | linebreakCount: tokens[i].linebreakCount,
49 | });
50 | }
51 | }
52 | splits.push({
53 | ...offset,
54 | mark: true,
55 | text: tokens
56 | .slice(start, end)
57 | .map((t) => t.text)
58 | .join(" "),
59 | hasWhitespace: tokens[end - 1].hasWhitespace,
60 | linebreakCount: tokens[end - 1].linebreakCount,
61 | });
62 | lastEnd = end;
63 | }
64 |
65 | for (let i = lastEnd; i < tokens.length; i += 1) {
66 | splits.push({
67 | i,
68 | text: tokens[i].text,
69 | hasWhitespace: tokens[i].hasWhitespace,
70 | linebreakCount: tokens[i].linebreakCount,
71 | });
72 | }
73 |
74 | return splits;
75 | };
76 |
77 | // selection: Selection
78 | export const selectionIsEmpty = (selection) => {
79 | if (selection.anchorNode) {
80 | const position = selection.anchorNode.compareDocumentPosition(
81 | selection.focusNode
82 | );
83 |
84 | return position === 0 && selection.focusOffset === selection.anchorOffset;
85 | }
86 | return true;
87 | };
88 |
89 | // selection: Selection
90 | export const selectionIsBackwards = (selection) => {
91 | if (selectionIsEmpty(selection)) return false;
92 |
93 | const position = selection.anchorNode.compareDocumentPosition(
94 | selection.focusNode
95 | );
96 |
97 | let backward = false;
98 | if (
99 | (!position && selection.anchorOffset > selection.focusOffset) ||
100 | position === Node.DOCUMENT_POSITION_PRECEDING
101 | )
102 | backward = true;
103 |
104 | return backward;
105 | };
106 |
--------------------------------------------------------------------------------
/frontend/src/components/App.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useEffect, useState } from "react";
2 | import "./App.sass";
3 | import NavBar from "./NavBar";
4 | import AnonymizationConfigMenu from "./anonymizationConfig/AnonymizationConfigMenu";
5 | import Main from "./Main";
6 | import PolyglotContext from "../js/polyglotContext";
7 | import { fetchRecognizers, fetchTags } from "../api/routes";
8 | import AppToaster from "../js/toaster";
9 | import useLocalStorage from "../js/useLocalStorage";
10 | import ErrorBoundary from "./ErrorBoundary";
11 | import Settings from "./Settings";
12 | import About from "./About";
13 | import Help from "./Help";
14 |
15 | const App = () => {
16 | const t = useContext(PolyglotContext);
17 |
18 | const [tags, setTags] = useState([]);
19 | const [availableRecognizers, setAvailableRecognizers] = useState([]);
20 | const [activatedRecognizers, setActivatedRecognizers] = useLocalStorage(
21 | "activatedRecognizers",
22 | null
23 | );
24 | const [anonymizationConfig, setAnonymizationConfig] = useLocalStorage(
25 | "anonymizationConfig",
26 | {
27 | defaultMechanism: {
28 | mechanism: "suppression",
29 | config: { suppressionChar: "X" },
30 | },
31 | mechanismsByTag: {},
32 | }
33 | );
34 |
35 | useEffect(() => {
36 | fetchRecognizers()
37 | .then((response) => {
38 | setAvailableRecognizers(response.data);
39 | if (activatedRecognizers === null)
40 | setActivatedRecognizers(response.data);
41 | })
42 | .catch(() => {
43 | AppToaster.show({
44 | message: t("app.fetching_recognizers_failed_toast"),
45 | intent: "danger",
46 | });
47 | });
48 | }, []); // eslint-disable-line react-hooks/exhaustive-deps
49 |
50 | useEffect(() => {
51 | fetchTags()
52 | .then((response) => {
53 | setTags(response.data);
54 | })
55 | .catch(() => {
56 | AppToaster.show({
57 | message: t("annotation.fetching_tags_failed_toast"),
58 | intent: "danger",
59 | });
60 | });
61 | }, []); // eslint-disable-line react-hooks/exhaustive-deps
62 |
63 | return (
64 |
65 |
72 | }
73 | about={
}
74 | help={
}
75 | />
76 |
77 |
78 |
83 |
84 |
85 |
90 |
91 |
92 |
93 | );
94 | };
95 |
96 | export default App;
97 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/anonymizationConfig.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import GeneralizationMechanism from "./GeneralizationMechanism";
3 | import PseudonymizationMechanism from "./PseudonymizationMechanism";
4 | import SuppressionMechanism from "./SuppressionMechanism";
5 | import LaplaceNoiseMechanism from "./LaplaceNoiseMechanism";
6 | import RandomizedResponseMechanism from "./RandomizedResponseMechanism";
7 | import randomizedResponseCountryDemoValues from "./randomizedResponseCountryDemoValues";
8 |
9 | export function hasProperty(object, property) {
10 | return Object.prototype.hasOwnProperty.call(object, property);
11 | }
12 |
13 | export function getConfigHistoryForTag(configHistory, tag, mechanismName) {
14 | return configHistory[mechanismName][tag];
15 | }
16 |
17 | export function hasHistoryEntry(configHistory, tag, mechanismName) {
18 | return (
19 | getConfigHistoryForTag(configHistory, tag, mechanismName) !== undefined
20 | );
21 | }
22 |
23 | const defaultConfigs = {
24 | generalization: { replacement: "<>" },
25 | laplaceNoise: { epsilon: 0.1 },
26 | pseudonymization: { stateful: true, formatString: "{}", counter: 1 },
27 | randomizedResponse: {
28 | mode: "dp",
29 | epsilon: 0.1,
30 | values: randomizedResponseCountryDemoValues,
31 | defaultValue: "<>",
32 | },
33 | suppression: { suppressionChar: "X" },
34 | };
35 |
36 | const laplaceNoiseParameters = {
37 | DATE: { sensitivity: 1000, encoder: "datetime" },
38 | NUMBER: { sensitivity: 1, encoder: "delimitedNumber" },
39 | MONEY: { sensitivity: 1, encoder: "delimitedNumber" },
40 | PHONE: { sensitivity: 100000, encoder: "delimitedNumber" },
41 | };
42 |
43 | export function setFromHistoryOrDefault(configHistory, tag, mechanismName) {
44 | if (hasHistoryEntry(configHistory, tag, mechanismName)) {
45 | return { ...getConfigHistoryForTag(configHistory, tag, mechanismName) };
46 | }
47 |
48 | let config = { ...defaultConfigs[mechanismName] };
49 |
50 | // add tag-specific properties
51 | if (
52 | mechanismName === "laplaceNoise" &&
53 | Object.keys(laplaceNoiseParameters).includes(tag)
54 | )
55 | config = { ...config, ...laplaceNoiseParameters[tag] };
56 |
57 | return config;
58 | }
59 |
60 | export function getMechanismComponent(mechanism, updateMechanism, tag) {
61 | const props = { mechanism, updateMechanism, tag };
62 |
63 | let mechanismComponent;
64 | /* eslint-disable react/jsx-props-no-spreading */
65 | switch (mechanism.mechanism) {
66 | case "generalization":
67 | mechanismComponent = ;
68 | break;
69 | case "pseudonymization":
70 | mechanismComponent = ;
71 | break;
72 | case "suppression":
73 | mechanismComponent = ;
74 | break;
75 | case "laplaceNoise":
76 | mechanismComponent = ;
77 | break;
78 | case "randomizedResponse":
79 | mechanismComponent = ;
80 | break;
81 |
82 | default:
83 | mechanismComponent = null;
84 | }
85 | /* eslint-enable react/jsx-props-no-spreading */
86 |
87 | return mechanismComponent;
88 | }
89 |
--------------------------------------------------------------------------------
/frontend/src/components/Main.test.jsx:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import {
3 | createEvent,
4 | fireEvent,
5 | render,
6 | screen,
7 | waitForElement,
8 | } from "@testing-library/react";
9 | import { v4 as uuidv4 } from "uuid";
10 | import Main from "./Main";
11 | import { anonymizePiis, computeScores, findPiis } from "../api/routes";
12 |
13 | jest.mock("../api/routes");
14 | jest.mock("uuid");
15 |
16 | afterEach(() => {
17 | jest.clearAllMocks();
18 | });
19 |
20 | const anonymizationConfig = {
21 | defaultMechanism: { mechanism: "suppression" },
22 | mechanismsByTag: {},
23 | };
24 |
25 | it("shows an upload button", () => {
26 | const { getByRole } = render(
27 |
32 | );
33 | const help = getByRole("button", { name: /browse/i });
34 |
35 | expect(help).toBeInTheDocument();
36 | });
37 |
38 | it("loads and displays the file as well as preview of the anonymization", async () => {
39 | findPiis.mockResolvedValue({
40 | data: {
41 | format: "txt",
42 | piis: [
43 | {
44 | startChar: 8,
45 | endChar: 15,
46 | tag: "GPE",
47 | text: "Germany",
48 | score: 1.0,
49 | model: "mock",
50 | startTok: 2,
51 | endTok: 3,
52 | },
53 | ],
54 | tokens: [
55 | { startChar: 0, endChar: 4, text: "Made", hasWs: true, brCount: 0 },
56 | { startChar: 5, endChar: 7, text: "in", hasWs: true, brCount: 0 },
57 | {
58 | startChar: 8,
59 | endChar: 15,
60 | text: "Germany",
61 | hasWs: false,
62 | brCount: 0,
63 | },
64 | { startChar: 15, endChar: 16, text: ".", hasWs: false, brCount: 0 },
65 | ],
66 | },
67 | });
68 | anonymizePiis.mockResolvedValue({
69 | data: { anonymizedPiis: [{ text: "XXX", id: "1" }] },
70 | });
71 | computeScores.mockResolvedValue({ data: {} });
72 | uuidv4.mockReturnValue("1");
73 |
74 | const { getByText } = render(
75 |
80 | );
81 |
82 | // fire a drop event - this will be easier in @testing-library/jest-dom 5.x, see
83 | // https://testing-library.com/docs/dom-testing-library/api-events#fireeventeventname
84 | const dropZone = getByText(/\.txt/i);
85 | const fileDropEvent = createEvent.drop(dropZone);
86 | Object.defineProperty(fileDropEvent, "dataTransfer", {
87 | value: {
88 | files: [
89 | new File(["Made in Germany."], "example.txt", { type: "image/png" }),
90 | ],
91 | types: ["Files"],
92 | },
93 | });
94 | fireEvent(dropZone, fileDropEvent);
95 |
96 | await waitForElement(() => screen.getByRole("button", { name: /download/i }));
97 | expect(findPiis).toHaveBeenCalledTimes(1);
98 | expect(anonymizePiis).toHaveBeenCalledTimes(1);
99 | expect(uuidv4).toHaveBeenCalledTimes(1);
100 | expect(screen.getAllByText("Germany")).toHaveLength(1);
101 | expect(screen.getAllByText("Made in XXX.")).toHaveLength(1);
102 | });
103 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationForm.sass:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables"
2 |
3 | // Tag colors
4 | $tag-1-color: #FFADAD
5 | $tag-2-color: #FFD6A5
6 | $tag-3-color: #FDFFB6
7 | $tag-4-color: #CAFFBF
8 | $tag-5-color: #9BF6FF
9 | $tag-6-color: #A0C4FF
10 | $tag-7-color: #BDB2FF
11 | $tag-8-color: #FFC6FF
12 | $tag-9-color: #d9d9d9
13 | $tag-10-color: #e4b899
14 |
15 | $tag-selected-modifier: 0%
16 |
17 | .tag-colored
18 |
19 | &.tag-1
20 | background: $tag-1-color !important
21 | &.bp3-active,
22 | &.annotation-selected
23 | //background: lighten($tag-1-color, $tag-selected-modifier) !important
24 | background: $tag-1-color !important
25 |
26 | &.tag-2
27 | background: $tag-2-color !important
28 | &.bp3-active,
29 | &.annotation-selected
30 | background: lighten($tag-2-color, $tag-selected-modifier) !important
31 |
32 | &.tag-3
33 | background: $tag-3-color !important
34 | &.bp3-active,
35 | &.annotation-selected
36 | background: lighten($tag-3-color, $tag-selected-modifier) !important
37 |
38 | &.tag-4
39 | background: $tag-4-color !important
40 | &.bp3-active,
41 | &.annotation-selected
42 | background: lighten($tag-4-color, $tag-selected-modifier) !important
43 |
44 | &.tag-5
45 | background: $tag-5-color !important
46 | &.bp3-active,
47 | &.annotation-selected
48 | background: lighten($tag-5-color, $tag-selected-modifier) !important
49 |
50 | &.tag-6
51 | background: $tag-6-color !important
52 | &.bp3-active,
53 | &.annotation-selected
54 | background: lighten($tag-6-color, $tag-selected-modifier) !important
55 |
56 | &.tag-7
57 | background: $tag-7-color !important
58 | &.bp3-active,
59 | &.annotation-selected
60 | background: lighten($tag-7-color, $tag-selected-modifier) !important
61 |
62 | &.tag-8
63 | background: $tag-8-color !important
64 | &.bp3-active,
65 | &.annotation-selected
66 | background: lighten($tag-8-color, $tag-selected-modifier) !important
67 |
68 | &.tag-9
69 | background: $tag-9-color !important
70 | &.bp3-active,
71 | &.annotation-selected
72 | background: lighten($tag-9-color, $tag-selected-modifier) !important
73 |
74 | &.tag-10
75 | background: $tag-10-color !important
76 | &.bp3-active,
77 | &.annotation-selected
78 | background: lighten($tag-10-color, $tag-selected-modifier) !important
79 |
80 |
81 | .annotation-selected
82 | border: 2px dotted #000
83 | opacity: 1.0
84 |
85 | .annotation-mark
86 | cursor: pointer
87 | margin: 1px
88 | padding: 2.5px 5px
89 | font-weight: bold
90 | color: $black
91 | background: #ffe184
92 | opacity: 0.85
93 |
94 | .tag
95 | margin-left: 6px
96 | text-transform: uppercase
97 | vertical-align: middle
98 | font-size: 8px
99 | font-weight: bold
100 |
101 | .remove
102 | top: -8px
103 | left: -8px
104 | width: 16px
105 | height: 16px
106 | color: $white
107 | background: $dark-gray5
108 | opacity: 0
109 | position: absolute
110 | transition: opacity 0.1s ease
111 | border-radius: 50%
112 |
113 | &:hover
114 | background: #777
115 |
116 |
117 | &:hover
118 | .remove
119 | opacity: 100
120 |
121 | .annotation-header
122 | margin-bottom: 16px
123 |
124 | .label
125 | font-size: 11px
126 | text-transform: uppercase
127 | margin-right: 10px
128 |
129 | button
130 | > span > span
131 | margin-left: 8px
132 |
133 | .tag
134 | padding-right: 5px
135 | margin-right: 5px
136 | margin-bottom: 5px
137 | &.bp3-active
138 | box-shadow: inset 0 0 0 1px rgba(16, 22, 26, 0.2), inset 0 1px 2px rgba(16, 22, 26, 0.7)
139 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/PseudonymizationMechanism.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import PropTypes from "prop-types";
3 | import {
4 | Checkbox,
5 | FormGroup,
6 | InputGroup,
7 | NumericInput,
8 | } from "@blueprintjs/core";
9 | import PolyglotContext from "../../js/polyglotContext";
10 |
11 | const PseudonymizationMechanism = ({ mechanism, updateMechanism, tag }) => {
12 | const t = useContext(PolyglotContext);
13 |
14 | const [formatStringValid, setFormatStringValid] = useState(true);
15 | const [counterValid, setCounterValid] = useState(true);
16 |
17 | function validateFormatString(string) {
18 | const regex = RegExp("^[^{}]*{}[^{}]*$");
19 | return regex.test(string);
20 | }
21 |
22 | function onUpdateFormatString(value) {
23 | if (!validateFormatString(value)) {
24 | setFormatStringValid(false);
25 | } else {
26 | setFormatStringValid(true);
27 | }
28 |
29 | const mechanismClone = { ...mechanism };
30 | mechanismClone.config.formatString = value;
31 | updateMechanism(mechanismClone);
32 | }
33 |
34 | function validateCounterValue(counterValue) {
35 | return Number.isInteger(counterValue) && counterValue >= 1;
36 | }
37 |
38 | function onUpdateCounterValue(valueAsNumber) {
39 | if (!validateCounterValue(valueAsNumber)) {
40 | setCounterValid(false);
41 | return;
42 | }
43 |
44 | setCounterValid(true);
45 | const mechanismClone = { ...mechanism };
46 | mechanismClone.config.counter = valueAsNumber;
47 | updateMechanism(mechanismClone);
48 | }
49 |
50 | function onUpdateStateful() {
51 | const mechanismClone = { ...mechanism };
52 | mechanismClone.config.stateful = !mechanism.config.stateful;
53 | updateMechanism(mechanismClone);
54 | }
55 |
56 | return (
57 |
58 |
63 |
73 | onUpdateFormatString(event.target.value)}
77 | intent={formatStringValid ? "default" : "danger"}
78 | fill
79 | />
80 |
81 |
91 |
100 |
101 |
102 | );
103 | };
104 |
105 | PseudonymizationMechanism.propTypes = {
106 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
107 | updateMechanism: PropTypes.func.isRequired,
108 | tag: PropTypes.string.isRequired,
109 | };
110 |
111 | export default PseudonymizationMechanism;
112 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | # OpenRedact Frontend
2 |
3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
4 |
5 | ## Install dependencies
6 |
7 | Before anything else make sure to install the dependencies (including development dependencies).
8 |
9 | ```
10 | cd frontend
11 | npm install
12 | ```
13 |
14 | ## Run the frontend using Docker
15 |
16 | ```bash
17 | docker build -t openredact/frontend-dev -f Dockerfile.dev .
18 | docker run -p 80:80 openredact/frontend-dev
19 | ```
20 |
21 | ## Available Scripts
22 |
23 | In the project directory, you can run:
24 |
25 | ### `npm start`
26 |
27 | Runs the app in the development mode.
28 | Open [http://localhost:3000](http://localhost:3000) to view it in the browser.
29 |
30 | The page will reload if you make edits.
31 | You will also see any lint errors in the console.
32 |
33 | ### `npm test`
34 |
35 | Launches the test runner in the interactive watch mode.
36 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
37 |
38 | ### `npm run build`
39 |
40 | Builds the app for production to the `build` folder.
41 | It correctly bundles React in production mode and optimizes the build for the best performance.
42 |
43 | The build is minified and the filenames include the hashes.
44 | Your app is ready to be deployed!
45 |
46 | You can serve the build by running:
47 |
48 | ```
49 | ./node_modules/serve/bin/serve.js -s build
50 | ```
51 |
52 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
53 |
54 | ### `npm run lint`
55 |
56 | Lint code using eslint. Eslint is configured in `package.json`.
57 |
58 | ### `npm run format`
59 |
60 | Formats the code using prettier.
61 |
62 | ## Learn More
63 |
64 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
65 |
66 | To learn React, check out the [React documentation](https://reactjs.org/).
67 |
68 | ### Code Splitting
69 |
70 | This section has moved here: https://facebook.github.io/create-react-app/docs/code-splitting
71 |
72 | ### Analyzing the Bundle Size
73 |
74 | This section has moved here: https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size
75 |
76 | ### Making a Progressive Web App
77 |
78 | This section has moved here: https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app
79 |
80 | ### Advanced Configuration
81 |
82 | This section has moved here: https://facebook.github.io/create-react-app/docs/advanced-configuration
83 |
84 | ### Deployment
85 |
86 | This section has moved here: https://facebook.github.io/create-react-app/docs/deployment
87 |
88 | ### `npm run build` fails to minify
89 |
90 | This section has moved here: https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify
91 |
92 | ### `npm run eject`
93 |
94 | **Note: this is a one-way operation. Once you `eject`, you can’t go back!**
95 |
96 | If you aren’t satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
97 |
98 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you’re on your own.
99 |
100 | You don’t have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn’t feel obligated to use this feature. However we understand that this tool wouldn’t be useful if you couldn’t customize it when you are ready for it.
101 |
--------------------------------------------------------------------------------
/backend/cli/redact.py:
--------------------------------------------------------------------------------
1 | #!python
2 | import os
3 | import json
4 | from pathlib import Path
5 |
6 | import click
7 | import nerwhal
8 | from anonymizer import Anonymizer, AnonymizerConfig, Pii
9 | from click import UsageError, progressbar
10 | from expose_text import FileWrapper, UnsupportedFormat
11 |
12 |
13 | @click.command()
14 | @click.option("--input_dir", type=Path, help="Path to the directory that contains the files to redact.")
15 | @click.option("--output_dir", type=Path, help="Path to the directory that the redacted files will be stored in.")
16 | @click.option("--anonymizer_config", type=Path, default="anonymizer_config.json", help="Path to the anonymizer config.")
17 | @click.option("--recognizer_config", type=Path, default="recognizer_config.json", help="Path to the recognizer config.")
18 | def redact(input_dir, output_dir, anonymizer_config, recognizer_config):
19 | """Redact the documents in a directory.
20 |
21 | This script tries to redact all documents in the given directory and its subdirectories.
22 |
23 | Note: The redaction is done in an unsupervised manor. You have to ensure, that the chosen recognizers and
24 | configuration provide results of a sufficient quality on the given data. Do not use for anything critical."""
25 |
26 | if input_dir is None or output_dir is None:
27 | raise UsageError("Please provide an input_dir and output_dir.")
28 |
29 | input_dir = Path(input_dir)
30 | output_dir = Path(output_dir)
31 | anonymizer_config = Path(anonymizer_config)
32 | recognizer_config = Path(recognizer_config)
33 |
34 | with open(anonymizer_config, "r") as f:
35 | config = AnonymizerConfig(**json.load(f))
36 | anonymizer = Anonymizer(config)
37 |
38 | with open(recognizer_config, "r") as f:
39 | recognizer_config = nerwhal.Config(**json.load(f))
40 |
41 | click.echo(f'Start redacting files in "{input_dir}" ...')
42 |
43 | items_to_redact = []
44 | for root, dirs, files in os.walk(input_dir):
45 | for file in files:
46 | items_to_redact += [(root, file)]
47 |
48 | with progressbar(items_to_redact) as items:
49 | for root, file in items:
50 | relative_path = Path(os.path.relpath(root, start=input_dir)) / Path(file)
51 | in_path = input_dir / relative_path
52 |
53 | try:
54 | wrapper = FileWrapper(in_path)
55 | except UnsupportedFormat:
56 | click.echo(f"Warning: Unsupported format for file {relative_path}! This file was skipped!")
57 | continue
58 | except Exception:
59 | click.echo(f"Error while processing file {relative_path}! This file was skipped!", err=True)
60 | continue
61 |
62 | result = nerwhal.recognize(
63 | wrapper.text,
64 | config=recognizer_config,
65 | combination_strategy="smart-fusion",
66 | context_words=True,
67 | return_tokens=False,
68 | )
69 | id_to_piis = {str(idx): pii for idx, pii in enumerate(result["ents"])}
70 | piis_for_anonymizer = [Pii(tag=pii.tag, text=pii.text, id=idx) for idx, pii in id_to_piis.items()]
71 |
72 | anonymized_piis = [
73 | anonymized_pii for anonymized_pii in anonymizer.anonymize(piis_for_anonymizer) if anonymized_pii.modified
74 | ]
75 |
76 | for anonymized_pii in anonymized_piis:
77 | unanonymized_pii = id_to_piis[anonymized_pii.id]
78 | wrapper.add_alter(unanonymized_pii.start_char, unanonymized_pii.end_char, anonymized_pii.text)
79 | wrapper.apply_alters()
80 |
81 | out_path = output_dir / relative_path
82 | out_path.parent.mkdir(parents=True, exist_ok=True)
83 | wrapper.save(out_path)
84 |
85 | click.echo(f'The redacted files have been written to "{output_dir}".')
86 |
87 |
88 | if __name__ == "__main__":
89 | redact()
90 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/randomizedResponseCountryDemoValues.js:
--------------------------------------------------------------------------------
1 | export default [
2 | "Afghanistan",
3 | "Ägypten",
4 | "Albanien",
5 | "Algerien",
6 | "Andorra",
7 | "Angola",
8 | "Antigua und Barbuda",
9 | "Äquatorialguinea",
10 | "Argentinien",
11 | "Armenien",
12 | "Aserbaidschan",
13 | "Äthiopien",
14 | "Australien",
15 | "Bahamas",
16 | "Bahrain",
17 | "Bangladesch",
18 | "Barbados",
19 | "Belarus",
20 | "Belgien",
21 | "Belize",
22 | "Benin",
23 | "Bhutan",
24 | "Bolivien",
25 | "Bosnien und Herzegowina",
26 | "Botsuana",
27 | "Brasilien",
28 | "Brunei Darussalam",
29 | "Bulgarien",
30 | "Burkina Faso",
31 | "Burundi",
32 | "Cabo Verde",
33 | "Chile",
34 | "China",
35 | "Cookinseln",
36 | "Costa Rica",
37 | "Côte d'Ivoire",
38 | "Dänemark",
39 | "Deutschland",
40 | "Dominica",
41 | "Dominikanische Republik",
42 | "Dschibuti",
43 | "Ecuador",
44 | "El Salvador",
45 | "Eritrea",
46 | "Estland",
47 | "Eswatini",
48 | "Fidschi",
49 | "Finnland",
50 | "Frankreich",
51 | "Gabun",
52 | "Gambia",
53 | "Georgien",
54 | "Ghana",
55 | "Grenada",
56 | "Griechenland",
57 | "Guatemala",
58 | "Guinea",
59 | "Guinea-Bissau",
60 | "Guyana",
61 | "Haiti",
62 | "Heiliger Stuhl",
63 | "Honduras",
64 | "Indien",
65 | "Indonesien",
66 | "Irak",
67 | "Iran",
68 | "Irland",
69 | "Island",
70 | "Israel",
71 | "Italien",
72 | "Jamaika",
73 | "Japan",
74 | "Jemen",
75 | "Jordanien",
76 | "Kambodscha",
77 | "Kamerun",
78 | "Kanada",
79 | "Kasachstan",
80 | "Katar",
81 | "Kenia",
82 | "Kirgisistan",
83 | "Kiribati",
84 | "Kolumbien",
85 | "Komoren",
86 | "Kongo",
87 | "Korea",
88 | "Kosovo",
89 | "Kroatien",
90 | "Kuba",
91 | "Kuwait",
92 | "Laos",
93 | "Lesotho",
94 | "Lettland",
95 | "Libanon",
96 | "Liberia",
97 | "Libyen",
98 | "Liechtenstein",
99 | "Litauen",
100 | "Luxemburg",
101 | "Madagaskar",
102 | "Malawi",
103 | "Malaysia",
104 | "Malediven",
105 | "Mali",
106 | "Malta",
107 | "Marokko",
108 | "Marshallinseln",
109 | "Mauretanien",
110 | "Mauritius",
111 | "Mexiko",
112 | "Mikronesien",
113 | "Moldau",
114 | "Monaco",
115 | "Mongolei",
116 | "Montenegro",
117 | "Mosambik",
118 | "Myanmar",
119 | "Namibia",
120 | "Nauru",
121 | "Nepal",
122 | "Neuseeland",
123 | "Nicaragua",
124 | "Niederlande",
125 | "Niger",
126 | "Nigeria",
127 | "Niue",
128 | "Nordmazedonien",
129 | "Norwegen",
130 | "Oman",
131 | "Österreich",
132 | "Pakistan",
133 | "Palau",
134 | "Panama",
135 | "Papua-Neuguinea",
136 | "Paraguay",
137 | "Peru",
138 | "Philippinen",
139 | "Polen",
140 | "Portugal",
141 | "Ruanda",
142 | "Rumänien",
143 | "Russische Föderation",
144 | "Salomonen",
145 | "Sambia",
146 | "Samoa",
147 | "San Marino",
148 | "São Tomé und Príncipe",
149 | "Saudi-Arabien",
150 | "Schweden",
151 | "Schweiz",
152 | "Senegal",
153 | "Serbien",
154 | "Seychellen",
155 | "Sierra Leone",
156 | "Simbabwe",
157 | "Singapur",
158 | "Slowakei",
159 | "Slowenien",
160 | "Somalia",
161 | "Spanien",
162 | "Sri Lanka",
163 | "St. Kitts und Nevis",
164 | "St. Lucia",
165 | "St. Vincent und die Grenadinen",
166 | "Südafrika",
167 | "Sudan",
168 | "Südsudan",
169 | "Suriname",
170 | "Syrien",
171 | "Tadschikistan",
172 | "Tansania",
173 | "Thailand",
174 | "Timor-Leste",
175 | "Togo",
176 | "Tonga",
177 | "Trinidad und Tobago",
178 | "Tschad",
179 | "Tschechien",
180 | "Tunesien",
181 | "Türkei",
182 | "Turkmenistan",
183 | "Tuvalu",
184 | "Uganda",
185 | "Ukraine",
186 | "Ungarn",
187 | "Uruguay",
188 | "Usbekistan",
189 | "Vanuatu",
190 | "Vatikanstadt",
191 | "Venezuela",
192 | "Vereinigte Arabische Emirate",
193 | "Vereinigte Staaten",
194 | "Vereinigtes Königreich",
195 | "Vietnam",
196 | "Weißrussland",
197 | "Zentralafrikanische Republik",
198 | "Zypern",
199 | ];
200 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/SuppressionMechanism.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import PropTypes from "prop-types";
3 | import {
4 | InputGroup,
5 | FormGroup,
6 | NumericInput,
7 | Tooltip,
8 | Button,
9 | Position,
10 | } from "@blueprintjs/core";
11 | import PolyglotContext from "../../js/polyglotContext";
12 | import constants from "../../js/constants";
13 |
14 | const SuppressionMechanism = ({ mechanism, updateMechanism, tag }) => {
15 | const t = useContext(PolyglotContext);
16 |
17 | const [customLengthValid, setCustomLengthValid] = useState(true);
18 |
19 | function onUpdateSuppressionChar(value) {
20 | const suppressionChar = value.slice(-1);
21 | const mechanismClone = { ...mechanism };
22 | mechanismClone.config.suppressionChar = suppressionChar;
23 | updateMechanism(mechanismClone);
24 | }
25 |
26 | function validateCustomLength(customLength) {
27 | return (
28 | customLength === undefined ||
29 | (customLength !== "" &&
30 | Number.isInteger(customLength) &&
31 | customLength >= 1)
32 | );
33 | }
34 |
35 | function onUpdateCustomLength(valueAsNumber, valueAsString) {
36 | if (valueAsString === "" || Number.isNaN(valueAsNumber)) {
37 | if (valueAsString === "") setCustomLengthValid(true);
38 | const mechanismClone = { ...mechanism };
39 | delete mechanismClone.config.customLength;
40 | updateMechanism(mechanismClone);
41 | return;
42 | }
43 |
44 | if (!validateCustomLength(valueAsNumber)) {
45 | setCustomLengthValid(false);
46 | return;
47 | }
48 |
49 | setCustomLengthValid(true);
50 | const mechanismClone = { ...mechanism };
51 | mechanismClone.config.customLength = valueAsNumber;
52 | updateMechanism(mechanismClone);
53 | }
54 |
55 | function handleRedactClick() {
56 | const mechanismClone = { ...mechanism };
57 | mechanismClone.config.suppressionChar = "▬";
58 | updateMechanism(mechanismClone);
59 | }
60 |
61 | const redactButton = (
62 |
67 |
68 |
69 | );
70 |
71 | return (
72 |
73 |
77 | onUpdateSuppressionChar(event.target.value)}
81 | fill
82 | rightElement={redactButton}
83 | />
84 |
85 |
95 |
109 |
110 |
111 | );
112 | };
113 |
114 | SuppressionMechanism.propTypes = {
115 | mechanism: PropTypes.objectOf(PropTypes.any).isRequired,
116 | updateMechanism: PropTypes.func.isRequired,
117 | tag: PropTypes.string.isRequired,
118 | };
119 |
120 | export default SuppressionMechanism;
121 |
--------------------------------------------------------------------------------
/frontend/src/js/useAnonymization.js:
--------------------------------------------------------------------------------
1 | import { useCallback, useContext, useEffect, useState } from "react";
2 | import { anonymizePiis } from "../api/routes";
3 | import Anonymization from "./anonymization";
4 | import AppToaster from "./toaster";
5 | import PolyglotContext from "./polyglotContext";
6 |
7 | function computePositionsMap(annotations, paragraphs) {
8 | // Iterate over all paragraphs
9 | return Object.keys(paragraphs).map(function (p, paragraphIndex) {
10 | return new Map(
11 | // Generate position map for each paragraph + annotations
12 | annotations[paragraphIndex].map((annotation) => {
13 | return [
14 | annotation.id,
15 | {
16 | start: annotation.start,
17 | end: annotation.end,
18 | startChar:
19 | paragraphs[paragraphIndex].tokens[annotation.start].startChar,
20 | endChar:
21 | paragraphs[paragraphIndex].tokens[annotation.end - 1].endChar,
22 | },
23 | ];
24 | })
25 | );
26 | });
27 | }
28 |
29 | function computeSpecialTags(anonymizationConfig) {
30 | const tagsToNotAnonymize = [];
31 | const tagsAnonymizedWithDefault = [];
32 |
33 | Object.entries(anonymizationConfig.mechanismsByTag).forEach((item) => {
34 | const tag = item[0];
35 | const mechanism = item[1];
36 | if (mechanism.mechanism === "none") {
37 | tagsToNotAnonymize.push(tag);
38 | }
39 | if (mechanism.mechanism === "useDefault") {
40 | tagsAnonymizedWithDefault.push(tag);
41 | }
42 | });
43 |
44 | return [tagsToNotAnonymize, tagsAnonymizedWithDefault];
45 | }
46 |
47 | function useAnonymization({ paragraphs, annotations, anonymizationConfig }) {
48 | const t = useContext(PolyglotContext);
49 |
50 | const [anonymizations, setAnonymizations] = useState([]);
51 |
52 | const computeSpecialTagsCallback = useCallback(computeSpecialTags, [
53 | anonymizationConfig,
54 | ]);
55 | const computePositionsMapCallback = useCallback(computePositionsMap, [
56 | annotations,
57 | paragraphs,
58 | ]);
59 |
60 | useEffect(() => {
61 | if (annotations.length === 0 || annotations[0].length === 0) {
62 | setAnonymizations([]);
63 | return;
64 | }
65 |
66 | const paragraphAnnotations = annotations[0];
67 |
68 | const sortedAnnotations = paragraphAnnotations.sort(
69 | (a, b) => a.start - b.start
70 | );
71 |
72 | const piis = sortedAnnotations.map((annotation) => {
73 | return { tag: annotation.tag, text: annotation.text, id: annotation.id };
74 | });
75 |
76 | const [tagsToNotAnonymize, tagsAnonymizedWithDefault] =
77 | computeSpecialTagsCallback(anonymizationConfig);
78 |
79 | const configForRequest = JSON.parse(JSON.stringify(anonymizationConfig)); // deep clone
80 | tagsToNotAnonymize.forEach(
81 | (tag) => delete configForRequest.mechanismsByTag[tag]
82 | );
83 | tagsAnonymizedWithDefault.forEach(
84 | (tag) => delete configForRequest.mechanismsByTag[tag]
85 | );
86 |
87 | const piisToAnonymize = piis.filter(
88 | (pii) => !tagsToNotAnonymize.includes(pii.tag)
89 | );
90 |
91 | const positionsMap = computePositionsMapCallback(annotations, paragraphs);
92 | //
93 | anonymizePiis({
94 | piis: piisToAnonymize,
95 | config: configForRequest,
96 | })
97 | .then((response) => {
98 | const { anonymizedPiis } = response.data;
99 |
100 | const newAnonymizations = anonymizedPiis.map((anonymizedPii) => {
101 | return new Anonymization({
102 | // TODO paragraph support
103 | ...positionsMap[0].get(anonymizedPii.id),
104 | text: anonymizedPii.text,
105 | });
106 | });
107 |
108 | setAnonymizations(newAnonymizations);
109 | })
110 | .catch(() => {
111 | AppToaster.show({
112 | message: t("main.anonymizing_piis_failed_toast"),
113 | intent: "danger",
114 | });
115 | });
116 | }, [
117 | t,
118 | paragraphs,
119 | annotations,
120 | anonymizationConfig,
121 | computePositionsMapCallback,
122 | computeSpecialTagsCallback,
123 | ]);
124 |
125 | return anonymizations;
126 | }
127 |
128 | export default useAnonymization;
129 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/annotator/TokenAnnotator.jsx:
--------------------------------------------------------------------------------
1 | /**
2 | * Based on Martin Camacho's react-text-annotate
3 | * https://github.com/mcamac/react-text-annotate
4 | */
5 | import React from "react";
6 |
7 | import PropTypes from "prop-types";
8 |
9 | import {
10 | selectionIsEmpty,
11 | selectionIsBackwards,
12 | splitTokensWithOffsets,
13 | } from "./utils";
14 |
15 | const WrapperToken = ({ text, index, hasWhitespace, linebreakCount }) => {
16 | // return {props.content}
17 | return (
18 |
19 | {text}
20 | {hasWhitespace ? " " : ""}
21 | ").join(" "),
24 | }}
25 | />
26 |
27 | );
28 | };
29 |
30 | WrapperToken.defaultProps = {
31 | hasWhitespace: true,
32 | linebreakCount: 0,
33 | };
34 |
35 | WrapperToken.propTypes = {
36 | text: PropTypes.string.isRequired,
37 | index: PropTypes.number.isRequired,
38 | hasWhitespace: PropTypes.bool,
39 | linebreakCount: PropTypes.number,
40 | };
41 |
42 | // const TokenAnnotator = (props: TokenAnnotatorProps) => {
43 | const TokenAnnotator = ({ renderMark, getSpan, onChange, tokens, value }) => {
44 | // const renderMark = props.renderMark || ((props) => );
45 |
46 | // const getSpan = (span) => {
47 | // if (props.getSpan) return props.getSpan(span);
48 | //
49 | // return { start: span.start, end: span.end };
50 | // };
51 |
52 | const handleMouseUp = () => {
53 | if (!onChange) return;
54 |
55 | const selection = window.getSelection();
56 |
57 | if (selectionIsEmpty(selection)) return;
58 |
59 | if (
60 | !selection.anchorNode.parentElement.hasAttribute("data-i") ||
61 | !selection.focusNode.parentElement.hasAttribute("data-i")
62 | ) {
63 | window.getSelection().empty();
64 | return;
65 | }
66 |
67 | let start = parseInt(
68 | selection.anchorNode.parentElement.getAttribute("data-i"),
69 | 10
70 | );
71 | let end = parseInt(
72 | selection.focusNode.parentElement.getAttribute("data-i"),
73 | 10
74 | );
75 |
76 | if (selectionIsBackwards(selection)) {
77 | [start, end] = [end, start];
78 | }
79 |
80 | end += 1;
81 |
82 | onChange([
83 | ...value,
84 | getSpan({ start, end, tokens: tokens.slice(start, end) }),
85 | ]);
86 | window.getSelection().empty();
87 | };
88 |
89 | const handleSplitClick = ({ start, end }) => {
90 | // Find and remove the matching split.
91 | const splitIndex = value.findIndex(
92 | (s) => s.start === start && s.end === end
93 | );
94 |
95 | if (splitIndex >= 0) {
96 | onChange([...value.slice(0, splitIndex), ...value.slice(splitIndex + 1)]);
97 | }
98 | };
99 |
100 | // const { tokens, value, onChange, getSpan: _, ...divProps } = props;
101 |
102 | const splits = splitTokensWithOffsets(tokens, value);
103 |
104 | return (
105 |
106 | {splits.map((split, idx) => {
107 | const { mark, start, end, i, text, hasWhitespace, linebreakCount } =
108 | split;
109 | return mark ? (
110 | renderMark({
111 | key: `${start}-${end}`,
112 | ...split,
113 | onClick: handleSplitClick,
114 | })
115 | ) : (
116 |
123 | );
124 | // return split.mark ? (
125 | // renderMark({
126 | // key: `${split.start}-${split.end}`,
127 | // ...split,
128 | // onClick: handleSplitClick,
129 | // })
130 | // ) : (
131 | //
132 | // )
133 | })}
134 |
135 | );
136 | };
137 |
138 | TokenAnnotator.propTypes = {
139 | tokens: PropTypes.arrayOf(PropTypes.object).isRequired,
140 | value: PropTypes.arrayOf(PropTypes.object).isRequired,
141 | onChange: PropTypes.func.isRequired,
142 | getSpan: PropTypes.func.isRequired,
143 | renderMark: PropTypes.func.isRequired,
144 | };
145 |
146 | export default TokenAnnotator;
147 |
--------------------------------------------------------------------------------
/backend/tests/endpoints/test_anonymize.py:
--------------------------------------------------------------------------------
1 | def test_empty(client):
2 | response = client.post("/api/anonymize", json={"piis": [], "config": {"mechanismsByTag": {}}})
3 | assert response.status_code == 200
4 | assert response.json()["anonymizedPiis"] == []
5 |
6 |
7 | def test_anonymized_by_default(client):
8 | response = client.post(
9 | "/api/anonymize",
10 | json={
11 | "piis": [{"tag": "PER", "text": "Smith", "id": "1"}],
12 | "config": {"defaultMechanism": {"mechanism": "suppression", "config": {}}, "mechanismsByTag": {}},
13 | },
14 | )
15 | assert response.status_code == 200
16 | assert response.json()["anonymizedPiis"] == [{"text": "XXXXX", "id": "1"}]
17 |
18 |
19 | def test_anonymized_by_tag_mechanism(client):
20 | response = client.post(
21 | "/api/anonymize",
22 | json={
23 | "piis": [{"tag": "PER", "text": "Smith", "id": "1"}],
24 | "config": {
25 | "defaultMechanism": {"mechanism": "suppression", "config": {}},
26 | "mechanismsByTag": {"PER": {"mechanism": "generalization", "config": {"replacement": "person"}}},
27 | },
28 | },
29 | )
30 | assert response.status_code == 200
31 | assert response.json()["anonymizedPiis"] == [{"text": "person", "id": "1"}]
32 |
33 |
34 | def test_unconfigured_tag(client):
35 | response = client.post(
36 | "/api/anonymize", json={"piis": [{"tag": "PER", "text": "Smith", "id": "1"}], "config": {"mechanismsByTag": {}}}
37 | )
38 | assert response.status_code == 400
39 |
40 |
41 | def test_laplace_noise_date(client):
42 | response = client.post(
43 | "/api/anonymize",
44 | json={
45 | "piis": [{"tag": "DATE", "text": "24.12.2020", "id": "1"}],
46 | "config": {
47 | "defaultMechanism": {"mechanism": "suppression", "config": {}},
48 | "mechanismsByTag": {
49 | "DATE": {
50 | "mechanism": "laplaceNoise",
51 | "config": {"epsilon": 0.00001, "sensitivity": 10000, "encoder": "datetime"},
52 | }
53 | },
54 | },
55 | },
56 | )
57 | assert response.status_code == 200
58 | print("Anonymized date:", response.json()["anonymizedPiis"][0]["text"])
59 |
60 |
61 | def test_laplace_noise_number(client):
62 | response = client.post(
63 | "/api/anonymize",
64 | json={
65 | "piis": [{"tag": "NUMBER", "text": "7,2 kg", "id": "1"}],
66 | "config": {
67 | "defaultMechanism": {"mechanism": "suppression", "config": {}},
68 | "mechanismsByTag": {
69 | "NUMBER": {
70 | "mechanism": "laplaceNoise",
71 | "config": {"epsilon": 0.1, "sensitivity": 1, "encoder": "delimitedNumber"},
72 | }
73 | },
74 | },
75 | },
76 | )
77 | assert response.status_code == 200
78 | print("Anonymized date:", response.json()["anonymizedPiis"][0]["text"])
79 |
80 |
81 | def test_laplace_noise_money(client):
82 | response = client.post(
83 | "/api/anonymize",
84 | json={
85 | "piis": [{"tag": "MONEY", "text": "7,20 €", "id": "1"}],
86 | "config": {
87 | "defaultMechanism": {"mechanism": "suppression", "config": {}},
88 | "mechanismsByTag": {
89 | "MONEY": {
90 | "mechanism": "laplaceNoise",
91 | "config": {"epsilon": 0.1, "sensitivity": 1, "encoder": "delimitedNumber"},
92 | }
93 | },
94 | },
95 | },
96 | )
97 | assert response.status_code == 200
98 | print("Anonymized date:", response.json()["anonymizedPiis"][0]["text"])
99 |
100 |
101 | def test_laplace_noise_phone(client):
102 | response = client.post(
103 | "/api/anonymize",
104 | json={
105 | "piis": [{"tag": "PHONE", "text": "+49 1234 123 456", "id": "1"}],
106 | "config": {
107 | "defaultMechanism": {"mechanism": "suppression", "config": {}},
108 | "mechanismsByTag": {
109 | "PHONE": {
110 | "mechanism": "laplaceNoise",
111 | "config": {"epsilon": 0.1, "sensitivity": 100000, "encoder": "delimitedNumber"},
112 | }
113 | },
114 | },
115 | },
116 | )
117 | assert response.status_code == 200
118 | print("Anonymized date:", response.json()["anonymizedPiis"][0]["text"])
119 |
--------------------------------------------------------------------------------
/frontend/src/components/anonymizationConfig/AnonymizationConfigMenu.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useEffect, useState } from "react";
2 | import {
3 | Button,
4 | Card,
5 | Collapse,
6 | H3,
7 | H6,
8 | Icon,
9 | Tooltip,
10 | UL,
11 | } from "@blueprintjs/core";
12 | import { Position } from "@blueprintjs/core/lib/cjs/common/position";
13 | import "./AnonymizationConfigMenu.sass";
14 | import PropTypes from "prop-types";
15 | import Item from "./Item";
16 | import useLocalStorage from "../../js/useLocalStorage";
17 | import PolyglotContext from "../../js/polyglotContext";
18 | import { hasProperty, setFromHistoryOrDefault } from "./anonymizationConfig";
19 | import DefaultMechanismConfig from "./DefaultMechanismConfig";
20 |
21 | const AnonymizationConfigMenu = ({ tags, config, setConfig }) => {
22 | const t = useContext(PolyglotContext);
23 |
24 | const [configHistory, setConfigHistory] = useLocalStorage(
25 | "anonymizationConfigHistory",
26 | {
27 | suppression: {},
28 | generalization: {},
29 | pseudonymization: {},
30 | laplaceNoise: {},
31 | randomizedResponse: {},
32 | }
33 | );
34 | const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
35 |
36 | useEffect(() => {
37 | // initialize mechanism configs
38 | const configClone = { ...config };
39 | let changed = false;
40 |
41 | tags.forEach((tag) => {
42 | if (!hasProperty(config.mechanismsByTag, tag)) {
43 | configClone.mechanismsByTag[tag] = {
44 | mechanism: "useDefault",
45 | config: {},
46 | };
47 | changed = true;
48 | }
49 | });
50 |
51 | if (changed) setConfig(configClone);
52 | }, [tags, config, setConfig]);
53 |
54 | function updateConfigHistory(mechanism, tag) {
55 | const historyClone = { ...configHistory };
56 | historyClone[mechanism.mechanism][tag] = mechanism.config;
57 | setConfigHistory(historyClone);
58 | }
59 |
60 | function updateConfig(mechanism, tag = "default") {
61 | const mechanismName = mechanism.mechanism;
62 | let mechanismConfig = mechanism.config;
63 | if (
64 | mechanismName !== "none" &&
65 | mechanismName !== "useDefault" &&
66 | (mechanismConfig === undefined ||
67 | Object.keys(mechanismConfig).length === 0)
68 | ) {
69 | mechanismConfig = setFromHistoryOrDefault(
70 | configHistory,
71 | tag,
72 | mechanismName
73 | );
74 | }
75 |
76 | const myMechanism = { mechanism: mechanismName, config: mechanismConfig };
77 | if (tag === "default") {
78 | setConfig({ ...config, defaultMechanism: myMechanism });
79 | } else {
80 | const configClone = { ...config };
81 | configClone.mechanismsByTag[tag] = myMechanism;
82 | setConfig(configClone);
83 | }
84 |
85 | if (mechanismConfig && Object.keys(mechanismConfig).length > 0)
86 | updateConfigHistory(myMechanism, tag);
87 | }
88 |
89 | const listItems = Object.entries(config.mechanismsByTag)
90 | .sort()
91 | .map(([tag, mechanism]) => {
92 | return (
93 |
94 | - updateConfig(myMechanism, tag)}
98 | />
99 |
100 | );
101 | });
102 |
103 | return (
104 |
105 | {t("anonymization.anonymization")}
106 |
119 |
123 |
124 |
132 |
133 |
134 |
135 |
136 |
137 | );
138 | };
139 |
140 | AnonymizationConfigMenu.propTypes = {
141 | tags: PropTypes.arrayOf(PropTypes.string).isRequired,
142 | config: PropTypes.objectOf(PropTypes.any).isRequired,
143 | setConfig: PropTypes.func.isRequired,
144 | };
145 |
146 | export default AnonymizationConfigMenu;
147 |
--------------------------------------------------------------------------------
/frontend/src/translations/en.js:
--------------------------------------------------------------------------------
1 | export default {
2 | about: {
3 | and: "and",
4 | description1: " is one of the projects supported by the ",
5 | description2: ".",
6 | },
7 | annotation: {
8 | tagsLabel: "Available tags:",
9 | browse: "Browse your computer",
10 | drop: "Drop a %{formats} document here.",
11 | fetching_tags_failed_toast:
12 | "Loading the categories for manual annotation failed.",
13 | metric: {
14 | f1: "F1-Score",
15 | f2: "F2-Score",
16 | falseNegatives: "False Negatives",
17 | falsePositives: "False Positives",
18 | precision: "Precision",
19 | recall: "Recall",
20 | truePositives: "True Positives",
21 | },
22 | na: "n/a",
23 | or: "or",
24 | scores: "Scores",
25 | scores_description:
26 | "The following metrics evaluate the automatic detection of personal data on the basis of your manual corrections.",
27 | scores_note:
28 | "Note that the metrics can only be as accurate as your corrections.",
29 | },
30 | anonymization: {
31 | anonymization: "Anonymization",
32 | advanced_settings: "Mechanism per category",
33 | do_not_anonymize: "Do not anonymize",
34 | default: "Default Mechanism",
35 | default_tooltip:
36 | "Choose the default mechanism to use for anonymizing personal data. You can hover over each mechanism to get more information.",
37 | use_default: "Use default",
38 | generalization: {
39 | name: "Generalization",
40 | replacement: "Replace with",
41 | tooltip: "Replace with a more general phrase",
42 | },
43 | laplace_noise: {
44 | epsilon: "Epsilon",
45 | epsilon_hint: "Epsilon has to be larger than 0",
46 | epsilon_tooltip:
47 | "Epsilon controls the level of noise added. A smaller epsilon increases privacy.",
48 | name: "Laplace Noise",
49 | },
50 | options: "Options",
51 | pseudonymization: {
52 | name: "Pseudonymization",
53 | format_string: "Replace with",
54 | format_string_hint: "use {} once as placeholder",
55 | counter_value: "Initial value",
56 | counter_value_hint: "enter a number larger than 0",
57 | tooltip: "Replace with a pseudonym",
58 | },
59 | randomized_response: {
60 | name: "Randomized Response",
61 | no_config: "This mechanism is configured to use differential privacy",
62 | },
63 | stateful: "Stateful",
64 | suppression: {
65 | as_original: "as original",
66 | name: "Suppression",
67 | custom_length: "Length",
68 | custom_length_hint: "leave empty or enter a number larger than 0",
69 | redact: "Redact",
70 | suppression_char: "Replace with",
71 | tooltip: "Replace each character with a different one",
72 | },
73 | },
74 | app: {
75 | fetching_recognizers_failed_toast:
76 | "Loading the list of available recognizers failed.",
77 | network_error_toast: "The server is not available.",
78 | rendering_error: "Unexpected Error",
79 | rendering_error_action: "Please try reloading the page.",
80 | },
81 | disclaimer: {
82 | text: "Do not use this software to anonymize documents. Do not further use the documents downloaded from this software.",
83 | title: "This is a prototype",
84 | },
85 | help: {
86 | intro: "The following steps briefly outline the anonymization process:",
87 | item1: "Upload the document that you want to anonymize.",
88 | item2:
89 | "Extend and correct the automatically detected personal data in the left document view.",
90 | item3: "Configure the anonymization according to your needs.",
91 | item4:
92 | "Preview the document on the right document view and download your anonymized document.",
93 | tip: "Tip: Many elements will show you further information if you hover the mouse pointer over them.",
94 | },
95 | main: {
96 | anonymize_file_failed_toast: "Creating the anonymized file failed.",
97 | compile_anonymize_file_failed_toast:
98 | "Compiling the anonymized file failed.",
99 | anonymizing_piis_failed_toast:
100 | "Anonymizing the personal data failed. Please check the annotations and anonymization config.",
101 | computing_scores_failed_toast:
102 | "Computing statistical measures for the automatic identification of personal data failed.",
103 | download: "Download",
104 | find_piis_failed_toast: "Processing of the document failed.",
105 | new_document: "New Document",
106 | new_document_confirm:
107 | "Create new document? All changes on the current document will be lost.",
108 | },
109 | nav: {
110 | about: "About OpenRedact",
111 | help: "Help",
112 | settings: "Settings",
113 | },
114 | preview: {
115 | warning:
116 | "This is just a preview. The actual output and formatting may look different.",
117 | },
118 | settings: {
119 | recognizers: {
120 | description: "Enable recognizers to search personal data for a category.",
121 | names: {
122 | number_recognizer: "Recognize numbers",
123 | de_country_recognizer: "Recognize country names",
124 | de_date_recognizer: "Recognize dates",
125 | email_recognizer: "Recognize e-mail addresses",
126 | phone_number_recognizer: "Recognize phone numbers",
127 | money_recognizer: "Recognize amounts of money",
128 | statistical_recognizer:
129 | "Use statistical models to recognize persons, locations, organizations and miscellaneous entities",
130 | },
131 | title: "Categories",
132 | },
133 | },
134 | tags: {
135 | number: "Numbers",
136 | country: "Countries",
137 | date: "Dates",
138 | email: "E-mail addresses",
139 | loc: "Locations",
140 | money: "Amounts of money",
141 | misc: "Miscellaneous",
142 | org: "Organizations",
143 | per: "Persons",
144 | phone: "Phone",
145 | },
146 | };
147 |
--------------------------------------------------------------------------------
/frontend/src/translations/de.js:
--------------------------------------------------------------------------------
1 | export default {
2 | about: {
3 | and: "und",
4 | description1: " ist eins der vom ",
5 | description2: " geförderten Projekte.",
6 | },
7 | annotation: {
8 | tagsLabel: "Verfügbare Kategorien:",
9 | browse: "Computer durchsuchen",
10 | drop: "Ziehe eine %{formats} Datei hierhin.",
11 | fetching_tags_failed_toast:
12 | "Das Laden der Klassennamen für die manuelle Annotation ist fehlgeschlagen.",
13 | metric: {
14 | f1: "F1-Maß",
15 | f2: "F2-Maß",
16 | falseNegatives: "Falsch Negativ",
17 | falsePositives: "Falsch Positiv",
18 | precision: "Genauigkeit",
19 | recall: "Trefferquote",
20 | truePositives: "Richtig Positiv",
21 | },
22 | na: "n.a.",
23 | or: "oder",
24 | scores: "Metriken",
25 | scores_description:
26 | "Die folgenden Metriken bewerten die automatische Erkennung von personenbezogenen Daten im Vergleich zu Ihren manuellen Anpassungen.",
27 | scores_note:
28 | "Beachte, dass die Aussagekraft der Metriken von deinen Anpassungen abhängt.",
29 | },
30 | anonymization: {
31 | anonymization: "Anonymisierung",
32 | advanced_settings: "Mechanismus pro Kategorie",
33 | do_not_anonymize: "Nicht anonymisieren",
34 | default: "Default Mechanismus",
35 | default_tooltip:
36 | "Wähle den Default Mechanismus zur Anonymisierung der personenbezogenen Daten aus. Gehe mit dem Mauszeiger über die Mechanismen um mehr Informationen zu erhalten.",
37 | use_default: "Default benutzen",
38 | generalization: {
39 | name: "Generalisierung",
40 | replacement: "Ersetzen mit",
41 | tooltip: "Ersetze mit einem allgemeineren Ausdruck",
42 | },
43 | laplace_noise: {
44 | epsilon: "Epsilon",
45 | epsilon_hint: "Epsilon muss größer als 0 sein",
46 | epsilon_tooltip:
47 | "Das Epsilon kontrolliert wie viel der Wert verrauscht wird. Ein kleineres Epsilon erhöht die Privatsphäre.",
48 | name: "Laplace Verrauschung",
49 | },
50 | options: "Optionen",
51 | pseudonymization: {
52 | name: "Pseudonymisierung",
53 | format_string: "Ersetzen mit",
54 | format_string_hint: "benutze {} einmal als Platzhalter",
55 | counter_value: "Initialer Wert",
56 | counter_value_hint: "gib eine Zahl größer 0 sein",
57 | tooltip: "Ersetze mit einem Pseudonym",
58 | },
59 | randomized_response: {
60 | name: "Zufällige Antwort",
61 | no_config:
62 | "Dieser Mechanismus ist für differentielle Privatsphäre vorkonfiguriert",
63 | },
64 | stateful: "Zustandsbehaftet",
65 | suppression: {
66 | as_original: "wie im Original",
67 | name: "Schwärzen",
68 | custom_length: "Länge",
69 | redact: "Schwärzen",
70 | suppression_char: "Ersetzen mit",
71 | custom_length_hint:
72 | "lasse das Feld leer oder gib eine Zahl größer 0 sein",
73 | tooltip: "Ersetze jedes Zeichen mit einem anderen",
74 | },
75 | },
76 | app: {
77 | fetching_recognizers_failed_toast:
78 | "Das Laden der verfügbaren Erkennungsmechanismen ist fehlgeschlagen.",
79 | network_error_toast: "Der Server ist nicht erreichbar.",
80 | rendering_error: "Unerwarteter Fehler",
81 | rendering_error_action: "Bitte versuche die Seite erneut zu laden.",
82 | },
83 | disclaimer: {
84 | text: "Bitte nutze diese Software nicht zur Anonymisierung von Dokumenten. Bitte benutze aus dieser Software heruntergeladene Dokumente nicht weiter.",
85 | title: "Dies ist ein Prototyp",
86 | },
87 | help: {
88 | intro:
89 | "Die folgenden Schritte skizzieren kurz den Anonymisierungs-Prozess:",
90 | item1: "Lade das Dokument hoch, dass Du anonymisieren willst.",
91 | item2:
92 | "Erweitere und korrigiere was automatisch als personenbezogene Daten erkannt wurde in der linken Dokumentenansicht.",
93 | item3: "Konfiguriere die Anonymisierung nach deinen Anforderungen.",
94 | item4:
95 | "Überprüfe die Vorschau der Anonymisierung in der rechten Dokumentenansicht und lade dein anonymisiertes Dokument herunter.",
96 | tip: "Tipp: Viele Elemente zeigen einen Hilfetext an, wenn Du mit dem Mauszeiger über sie gehst.",
97 | },
98 | main: {
99 | anonymize_file_failed_toast:
100 | "Das Erstellen der anonymisierten Datei ist fehlgeschlagen.",
101 | compile_anonymize_file_failed_toast:
102 | "Das Erstellen der anonymisierten Datei ist fehlgeschlagen.",
103 | anonymizing_piis_failed_toast:
104 | "Das Anonymisieren der personenbezogenen Daten ist fehlgeschlagen. Bitte überprüfe die Annotationen und Anonymisierungs Einstellungen.",
105 | computing_scores_failed_toast:
106 | "Die Berechnung statistischer Maße für die automatische Identifizierung persönlicher Daten ist fehlgeschlagen.",
107 | download: "Herunterladen",
108 | find_piis_failed_toast: "Die Verarbeitung der Datei ist fehlgeschlagen.",
109 | new_document: "Neues Dokument",
110 | new_document_confirm:
111 | "Neues Dokument erstellen? Alle Änderungen auf dem aktuellen Dokument gehen verloren",
112 | },
113 | nav: {
114 | about: "Über OpenRedact",
115 | help: "Hilfe",
116 | settings: "Einstellungen",
117 | },
118 | preview: {
119 | warning:
120 | "Dies ist nur eine Vorschau. Der tatsächliche Inhalt und die Formatierung können abweichen.",
121 | },
122 | settings: {
123 | recognizers: {
124 | description:
125 | "Aktiviere Erkenner um nach personenbezogene Daten für eine Kategorie zu suchen.",
126 | names: {
127 | number_recognizer: "Suche nach Zahlen",
128 | de_country_recognizer: "Suche nach Namen von Ländern",
129 | de_date_recognizer: "Suche nach Datumsangaben",
130 | email_recognizer: "Suche nach E-Mail Adressen",
131 | phone_number_recognizer: "Suche nach Telefonnummern",
132 | money_recognizer: "Suche nach Geldbeträgen",
133 | statistical_recognizer:
134 | "Nutze statistische Modelle um nach Personen, Orten, Organisationen, und anderen Entitäten zu suchen",
135 | },
136 | title: "Kategorien",
137 | },
138 | },
139 | tags: {
140 | number: "Zahlen",
141 | country: "Länder",
142 | date: "Datumsangaben",
143 | email: "E-mail Adressen",
144 | loc: "Orte",
145 | money: "Geldbeträge",
146 | misc: "Andere",
147 | org: "Organisationen",
148 | per: "Personen",
149 | phone: "Telefonnummern",
150 | },
151 | };
152 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # OpenRedact
4 |
5 | **Semi-automatic data anonymization for German documents.**
6 |
7 | ---
8 |
9 |
10 |
11 | [](http://opensource.org/licenses/MIT)
12 | [](https://github.com/ambv/black)
13 | [](https://github.com/prettier/prettier)
14 | 
15 | 
16 | 
17 |
18 | _**:warning: Disclaimer :warning::**_ This is a prototype. Do not use for anything critical.
19 |
20 | _**:warning: Note :warning::**_ This tool focuses on the text content. Metadata will not be anonymized.
21 |
22 | ## Description
23 |
24 | This repository is the home to the OpenRedact app, a webapp for semi-automatic anonymization of German language documents.
25 | [OpenRedact](https://openredact.org) is a [Prototype Fund](https://prototypefund.de) project, funded by the [Federal Ministry of Education and Research](https://www.bmbf.de).
26 | A detailed description of the project and prototype can be seen [here](https://openredact.org/prototypefund).
27 |
28 |
29 |
30 | ## CLI
31 |
32 | You can use the CLI script `backend/cli/redact.py` to anonymize a directory of documents in an unsupervised manner.
33 |
34 | ```shell script
35 | ./redact.py --input_dir "path/to/documents/" --output_dir "out/directory/"
36 | ```
37 |
38 | Call `./redact.py --help` for usage instructions and important notes.
39 |
40 | ## Webapp
41 |
42 | ### OpenRedact works with document file formats
43 |
44 | This screencast walks you through the anonymization of a document, from upload to download of the anonymized file.
45 |
46 | 
47 |
48 | ### OpenRedact supports different anonymization methods
49 |
50 | This screencast demonstrates the different anonymization methods that OpenRedact supports.
51 | The modifications on the left are immediately previewed on the right.
52 |
53 | 
54 |
55 | ### OpenRedact comes with an annotation tool
56 |
57 | The automatically detected and proposed personal data can be corrected and extended by the user using our annotation tool.
58 |
59 |
60 |
61 | ### OpenRedact tells you how good its automatic personal data detection is
62 |
63 | Based on the manual corrections and extensions, we can assess the mechanism for automatic detection of personal data.
64 |
65 |
66 |
67 | ## Deployment
68 |
69 | The app is best deployed using Docker.
70 |
71 | ### Run the full stack using Docker-Compose
72 |
73 | We have pre-built Docker images available at https://hub.docker.com/u/openredact.
74 |
75 | Pull and start the containers by running:
76 |
77 | ```bash
78 | # Clone the repo
79 | git clone https://github.com/openredact/openredact-app.git
80 | cd openredact-app
81 |
82 | # Pull images & start containers
83 | docker-compose pull
84 | docker-compose up
85 | ```
86 |
87 | This will host the backend at port 8000 (and http://localhost/api) and the frontend at port 80.
88 | Once started, you can access the webapp at http://localhost/.
89 |
90 | ### Run the frontend using Docker
91 |
92 | ```bash
93 | cd frontend
94 | docker build -t openredact/frontend .
95 | docker run -p 80:80 openredact/frontend
96 | ```
97 |
98 | This will build the frontend inside a node Docker container and deploy the result in an nginx container.
99 | For more details about this procedure see [React in Docker with Nginx, built with multi-stage Docker builds, including testing](https://medium.com/@tiangolo/react-in-docker-with-nginx-built-with-multi-stage-docker-builds-including-testing-8cc49d6ec305).
100 |
101 | ### Run the backend using Docker
102 |
103 | ```bash
104 | cd backend
105 | docker build -t openredact/backend .
106 | docker run -p 8000:8000 openredact/backend
107 | ```
108 |
109 | ## API Documentation
110 |
111 | Documentation of the API is available at the endpoints `/docs` ([Swagger UI](https://swagger.io/tools/swagger-ui/))
112 | and `/redocs` ([ReDoc](https://redocly.github.io/redoc/)), e.g. http://127.0.0.1:8000/redoc.
113 | The OpenAPI specification can be found [here](http://127.0.0.1:8000/openapi.json).
114 |
115 | ## Development
116 |
117 | First, follow the instructions in the [backend](backend/README.md) or [frontend](frontend/README.md) readme.
118 | Then, continue with the instructions below.
119 |
120 | ### Developing using Docker
121 |
122 | If you want to use our Docker setup for development, run:
123 |
124 | ```bash
125 | docker-compose -f docker-compose.dev.yml up
126 | ```
127 |
128 | Don't forget to add the project's directory to the list of allowed file sharing resources in the Docker Desktop preferences.
129 |
130 | ### Install the pre-commit hooks
131 |
132 | `pre-commit` is a Python tool to manage git pre-commit hooks.
133 | Running the following code requires the backend dev requirements to be set up as explained [here](backend/README.md).
134 | We have pre-commit hooks for formatting and linting Python and JavaScript code (black, flake8, prettier and eslint).
135 | Note that the tests, being slower than formatters and linters, are run by CI.
136 | So don't forget to run them manually before committing.
137 |
138 | ```bash
139 | pre-commit install
140 | git config --bool flake8.strict true # Makes the commit fail if flake8 reports an error
141 | ```
142 |
143 | To run the hooks:
144 |
145 | ```bash
146 | pre-commit run --all-files
147 | ```
148 |
149 | ## How to contact us
150 |
151 | For usage questions, bugs, or suggestions please file a Github issue.
152 | If you would like to contribute or have other questions please email hello@openredact.org.
153 |
154 | ## License
155 |
156 | [MIT License](https://github.com/openredact/openredact-app/blob/master/LICENSE)
157 |
--------------------------------------------------------------------------------
/backend/app/endpoints.py:
--------------------------------------------------------------------------------
1 | from dataclasses import asdict
2 | from typing import List
3 | import base64
4 | from pathlib import Path
5 |
6 | from fastapi import APIRouter, File, UploadFile, Form, HTTPException
7 | from starlette.responses import StreamingResponse, JSONResponse
8 | import io
9 | import json
10 | import os
11 | import logging
12 | from expose_text import BinaryWrapper, UnsupportedFormat
13 | import nerwhal
14 | from anonymizer import AnonymizerConfig, Anonymizer, Pii, ParserError
15 |
16 | from app.schemas import (
17 | Annotation,
18 | AnnotationsForEvaluation,
19 | EvaluationResponse,
20 | FindPiisResponse,
21 | ErrorMessage,
22 | AnonymizedPiisResponse,
23 | AnonymizedPii,
24 | )
25 |
26 | logger = logging.getLogger(__name__)
27 |
28 | router = APIRouter()
29 |
30 | recognizer_name_to_path_lookup = {Path(path).stem: path for path in nerwhal.list_integrated_recognizers()}
31 |
32 |
33 | @router.post(
34 | "/anonymize",
35 | summary="Anonymize PIIs",
36 | description="Anonymize the given PIIs by replacing their text content according to the provided config.",
37 | response_model=AnonymizedPiisResponse,
38 | responses={400: {"model": ErrorMessage}},
39 | )
40 | async def anonymize(piis: List[Pii], config: AnonymizerConfig):
41 | anonymizer = Anonymizer(config)
42 | try:
43 | anonymized_piis = [AnonymizedPii(text=pii.text, id=pii.id) for pii in anonymizer.anonymize(piis) if pii.modified]
44 | except ParserError:
45 | raise HTTPException(status_code=400, detail="Error parsing a pii")
46 |
47 | if len(anonymized_piis) != len(piis):
48 | # one or more piis were not flagged as `modified`
49 | logger.error(f"Invalid config (anonymized_piis={anonymized_piis}; piis={piis}")
50 | raise HTTPException(status_code=400, detail="Invalid Config")
51 |
52 | return AnonymizedPiisResponse(anonymized_piis=anonymized_piis)
53 |
54 |
55 | @router.post(
56 | "/anonymize-file",
57 | summary="Anonymize file",
58 | description="Anonymize the given file by replacing the text passages specified in anonymizations. The character indices "
59 | "in anonymizations refer to the file's plain text representation.",
60 | responses={200: {"content": {"application/octet-stream": {}}}, 400: {"model": ErrorMessage}},
61 | )
62 | async def anonymize_file(
63 | file: UploadFile = File(...),
64 | anonymizations: str = Form(
65 | ...,
66 | description="A json array of objects with fields startChar, endChar and text. E.g. "
67 | '[{"startChar":0,"endChar":10,"text":"XXX"}].',
68 | ),
69 | return_base64: bool = False,
70 | ):
71 | _, extension = os.path.splitext(file.filename)
72 | content = await file.read()
73 | await file.close()
74 |
75 | try:
76 | wrapper = BinaryWrapper(content, extension)
77 | except UnsupportedFormat as e:
78 | logger.error(f"Unsupported File Format: {e}")
79 | raise HTTPException(status_code=400, detail="Unsupported File Format")
80 |
81 | for alteration in json.loads(anonymizations):
82 | wrapper.add_alter(alteration["startChar"], alteration["endChar"], alteration["text"])
83 | wrapper.apply_alters()
84 |
85 | if return_base64:
86 | try:
87 | # Send anonymized file as base64 encoding
88 | base64_bytes = base64.b64encode(wrapper.bytes)
89 |
90 | return JSONResponse({"base64": base64_bytes.decode()})
91 | except Exception as e:
92 | logger.error(f"base64 encoding failed: {e}")
93 | raise HTTPException(status_code=400, detail="File Handling Error (base64 encoding failed)")
94 |
95 | else:
96 | # Regular download
97 | return StreamingResponse(
98 | io.BytesIO(wrapper.bytes),
99 | media_type="application/octet-stream",
100 | headers={"Content-Disposition": f"attachment;{file.filename}"},
101 | )
102 |
103 |
104 | @router.post(
105 | "/find-piis",
106 | summary="Find PIIs",
107 | description="Find personally identifiable information in the given file. The character and token indices refer to the "
108 | "file's plain text representation.",
109 | response_model=FindPiisResponse,
110 | responses={400: {"model": ErrorMessage}},
111 | )
112 | async def find_piis(recognizers: str = Form(...), file: UploadFile = File(...)):
113 | _, extension = os.path.splitext(file.filename)
114 | content = await file.read()
115 | await file.close()
116 |
117 | try:
118 | wrapper = BinaryWrapper(content, extension)
119 | except UnsupportedFormat as e:
120 | logger.error(f"Unsupported File Format: {e}")
121 | raise HTTPException(status_code=400, detail="Unsupported File Format")
122 | except Exception as e:
123 | logger.error(f"File Handling Error: {e}")
124 | raise HTTPException(status_code=400, detail="File Handling Error")
125 |
126 | recognizers = json.loads(recognizers)
127 | use_statistical_ner = False
128 | if "statistical_recognizer" in recognizers:
129 | use_statistical_ner = True
130 | recognizers.remove("statistical_recognizer")
131 | recognizer_paths = [recognizer_name_to_path_lookup[name] for name in recognizers]
132 |
133 | nerwhal_config = nerwhal.Config(language="de", recognizer_paths=recognizer_paths, use_statistical_ner=use_statistical_ner)
134 | res = nerwhal.recognize(wrapper.text, config=nerwhal_config, combination_strategy="smart-fusion")
135 |
136 | return FindPiisResponse(
137 | piis=[asdict(pii) for pii in res["ents"]],
138 | tokens=[asdict(token) for token in res["tokens"]],
139 | format=str(wrapper.file.__class__.__name__).lower().replace("format", ""),
140 | )
141 |
142 |
143 | @router.post(
144 | "/score",
145 | summary="Compute scores",
146 | description="Compute common scoring metrics for the provided annotations data.",
147 | response_model=EvaluationResponse,
148 | )
149 | async def score(data: AnnotationsForEvaluation):
150 | def _create_entity(annot: Annotation):
151 | # annotation start and end are token based indices; in the context of scoring the actual value is not
152 | # important though, so we can pretend they are character based
153 | return nerwhal.NamedEntity(start_char=annot.start, end_char=annot.end, tag=annot.tag)
154 |
155 | gold = [_create_entity(annot) for annot in data.gold_annotations]
156 | piis = [_create_entity(annot) for annot in data.computed_annotations]
157 | return nerwhal.evaluate(piis, gold)
158 |
159 |
160 | @router.get(
161 | "/tags",
162 | summary="PII Tags",
163 | description="Fetch the types of personally identifiable information that the backend is looking for. The result is a "
164 | "string of tags, e.g. PER or LOC.",
165 | response_model=List[str],
166 | )
167 | async def tags():
168 | return sorted(["PER", "LOC", "ORG", "MISC", "MONEY", "EMAIL", "PHONE", "NUMBER", "COUNTRY", "DATE"])
169 |
170 |
171 | @router.get(
172 | "/recognizers",
173 | summary="PII Recognizers",
174 | description="Fetch the list of recognizers that are supported by the backend.",
175 | response_model=List[str],
176 | )
177 | async def supported_recognizers():
178 | return list(recognizer_name_to_path_lookup.keys()) + ["statistical_recognizer"]
179 |
--------------------------------------------------------------------------------
/frontend/src/components/Main.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useRef, useState } from "react";
2 | import { saveAs } from "file-saver";
3 | import PropTypes from "prop-types";
4 | import AnnotationControl from "./annotation/AnnotationControl";
5 | import PreviewControl from "./preview/PreviewControl";
6 | import "./Main.sass";
7 | import { anonymizeFile, findPiis } from "../api/routes";
8 | import Token from "../js/token";
9 | import Annotation from "../js/annotation";
10 | import AppToaster from "../js/toaster";
11 | import PolyglotContext from "../js/polyglotContext";
12 | import MainMenu from "./MainMenu";
13 | import ScoresDialog from "./scores/ScoresDialog";
14 | import useAnonymization from "../js/useAnonymization";
15 | import useCompile from "../js/useCompile";
16 | import constants from "../js/constants";
17 |
18 | const Main = ({ tags, anonymizationConfig, activatedRecognizers }) => {
19 | const t = useContext(PolyglotContext);
20 |
21 | const [paragraphs, setParagraphs] = useState([]);
22 | const [annotations, setAnnotations] = useState([]);
23 | const [computedAnnotations, setComputedAnnotations] = useState([]);
24 | const [isLoading, setIsLoading] = useState(false);
25 | const [showScoresDialog, setShowScoresDialog] = useState(false);
26 | const fileFormData = useRef({});
27 | const [isCompilable, setIsCompilable] = useState(false);
28 | const [compileDate, setCompileDate] = useState(null);
29 | const compileTimer = null;
30 |
31 | const anonymizations = useAnonymization({
32 | paragraphs,
33 | annotations,
34 | anonymizationConfig,
35 | });
36 |
37 | function onNewDocument() {
38 | setParagraphs([]);
39 | setAnnotations([]);
40 | setIsCompilable(false);
41 |
42 | clearTimeout(compileTimer);
43 |
44 | document.title = constants.title;
45 | }
46 |
47 | function onFileDrop(files) {
48 | setIsLoading(true);
49 |
50 | const formData = new FormData();
51 | formData.append("file", files[0]);
52 | formData.append("recognizers", JSON.stringify(activatedRecognizers));
53 | fileFormData.current = formData;
54 |
55 | findPiis(formData)
56 | .then((response) => {
57 | // Backend does not support paragraphs yet.
58 | const tokens = response.data.tokens.map(
59 | (token) =>
60 | new Token(
61 | token.startChar,
62 | token.endChar,
63 | token.text,
64 | token.hasWs,
65 | token.brCount
66 | )
67 | );
68 | // Use a single paragraph instead.
69 | setParagraphs([{ htmlProps: {}, tokens }]);
70 |
71 | // Annotations are as well not on paragraph-level
72 | const myAnnotations = response.data.piis.map((pii) => {
73 | return new Annotation(pii.startTok, pii.endTok, pii.tag, pii.text);
74 | });
75 | //
76 | setAnnotations([myAnnotations]);
77 | setComputedAnnotations([myAnnotations]);
78 |
79 | // Is this a compilable file, e.g., PDF?
80 | if (response.data.format.indexOf("pdf") > -1) {
81 | // TODO find-piis should return a compiled version as well (current not efficient)
82 | setIsCompilable(true);
83 |
84 | // if (compileTimer == null) {
85 | // compileTimer = setTimeout(() => {
86 | // onCompile();
87 | // }, constants.compileTimeout);
88 | // }
89 | }
90 |
91 | setIsLoading(false);
92 | document.title = `OpenRedact - ${
93 | fileFormData.current.get("file").name
94 | }`;
95 | })
96 | .catch(() => {
97 | AppToaster.show({
98 | message: t("main.find_piis_failed_toast"),
99 | intent: "danger",
100 | });
101 | setIsLoading(false);
102 | });
103 | }
104 |
105 | function onDownload() {
106 | const formData = fileFormData.current;
107 | formData.set("anonymizations", JSON.stringify(anonymizations));
108 | anonymizeFile(formData)
109 | .then((response) => {
110 | const blob = new Blob([response.data]);
111 | saveAs(blob, formData.get("file").name);
112 | })
113 | .catch(() => {
114 | AppToaster.show({
115 | message: t("main.anonymize_file_failed_toast"),
116 | intent: "danger",
117 | });
118 | });
119 | }
120 |
121 | function onAnnotationsChange(paragraphIndex, changedParagraphAnnotations) {
122 | // Convert to Annotation instances
123 | const paragraphAnnotations = changedParagraphAnnotations.map((item) => {
124 | if (item instanceof Annotation) {
125 | return item;
126 | }
127 |
128 | // TODO paragraph support
129 | const text = paragraphs[0].tokens
130 | .slice(item.start, item.end)
131 | .reduce(
132 | (acc, cur, idx) =>
133 | acc +
134 | cur.text +
135 | (item.start + idx + 1 < item.end && cur.hasWhitespace ? " " : ""),
136 | ""
137 | );
138 | return new Annotation(item.start, item.end, item.tag, text);
139 | });
140 |
141 | // Update annotations for current paragraph
142 | const newAnnotations = [...annotations];
143 | newAnnotations[paragraphIndex] = paragraphAnnotations;
144 | setAnnotations(newAnnotations);
145 |
146 | // Set compile timer
147 | // if (isCompilable) {
148 | // if (compileTimer == null) {
149 | // compileTimer = setTimeout(() => {
150 | // onCompile();
151 | // }, constants.compileTimeout);
152 | // }
153 | // }
154 | }
155 |
156 | const { isCompiling, base64pdf } = useCompile({
157 | anonymizations,
158 | fileFormData,
159 | compileTimer,
160 | compileDate,
161 | isCompilable,
162 | });
163 |
164 | return (
165 |
166 |
0}
170 | onShowScores={() => setShowScoresDialog(true)}
171 | showCompileButton={isCompilable}
172 | isCompiling={isCompiling}
173 | onCompile={() => {
174 | setCompileDate(new Date());
175 | }}
176 | />
177 |
178 |
187 |
192 |
setShowScoresDialog(false)}
195 | annotations={annotations.length > 0 ? annotations[0] : []}
196 | goldAnnotations={
197 | computedAnnotations.length > 0 ? computedAnnotations[0] : []
198 | }
199 | />
200 |
201 |
202 | );
203 | };
204 |
205 | Main.propTypes = {
206 | tags: PropTypes.arrayOf(PropTypes.string).isRequired,
207 | anonymizationConfig: PropTypes.objectOf(PropTypes.any).isRequired,
208 | activatedRecognizers: PropTypes.arrayOf(PropTypes.string).isRequired,
209 | };
210 |
211 | export default Main;
212 |
--------------------------------------------------------------------------------
/frontend/src/components/annotation/AnnotationForm.jsx:
--------------------------------------------------------------------------------
1 | import React, { useContext, useState } from "react";
2 | import { Button, Divider, Icon, Tag, Tooltip } from "@blueprintjs/core";
3 | import { IconNames } from "@blueprintjs/icons";
4 | import PropTypes from "prop-types";
5 | import { useHotkeys } from "react-hotkeys-hook";
6 |
7 | import "./AnnotationForm.sass";
8 | import PolyglotContext from "../../js/polyglotContext";
9 | import constants from "../../js/constants";
10 | import TokenAnnotator from "./annotator/TokenAnnotator";
11 |
12 | const AnnotationForm = ({
13 | paragraphs,
14 | annotations,
15 | onAnnotationsChange,
16 | tags,
17 | }) => {
18 | const [activeTag, setActiveTag] = useState(tags[0]);
19 | const t = useContext(PolyglotContext);
20 | const navButtons = false;
21 | const tagsLabel = false;
22 | const [selectedParagraph, setSelectedParagraph] = useState(-1);
23 | const [selectedStart, setSelectedStart] = useState(-1);
24 | const [selectedEnd, setSelectedEnd] = useState(-1);
25 |
26 | function onAnnotationRemove(paragraphIndex, mark) {
27 | // Remove items in annotations of current paragraph
28 | const paragraphAnnotations = annotations[paragraphIndex].filter(
29 | (item) => item.start !== mark.start || item.end !== mark.end
30 | );
31 |
32 | // Is removed annotation the selected one?
33 | if (
34 | selectedParagraph === paragraphIndex &&
35 | selectedStart === mark.start &&
36 | selectedEnd === mark.end
37 | ) {
38 | selectPreviousAnnotation(
39 | annotations,
40 | selectedParagraph,
41 | selectedStart,
42 | selectedEnd
43 | );
44 | }
45 |
46 | onAnnotationsChange(paragraphIndex, paragraphAnnotations);
47 | }
48 |
49 | function onAnnotationClick(paragraphIndex, mark) {
50 | // Select clicked annotation
51 | if (
52 | paragraphIndex === selectedParagraph &&
53 | selectedStart === mark.start &&
54 | selectedEnd === mark.end
55 | ) {
56 | // Clicked annotation is already selected => deselect
57 | setSelectedParagraph(-1);
58 | setSelectedStart(-1);
59 | setSelectedEnd(-1);
60 | } else {
61 | setSelectedParagraph(paragraphIndex);
62 | setSelectedStart(mark.start);
63 | setSelectedEnd(mark.end);
64 | }
65 | }
66 |
67 | const selectPreviousAnnotation = (
68 | inputAnnotations,
69 | paragraph,
70 | start,
71 | end
72 | ) => {
73 | console.log(
74 | "selectPreviousAnnotation ",
75 | inputAnnotations,
76 | paragraph,
77 | start,
78 | end
79 | );
80 |
81 | // Paragraph must be set
82 | if (paragraph >= 0) {
83 | if (inputAnnotations[paragraph].length < 1) {
84 | // Unset
85 | setSelectedParagraph(-1);
86 | setSelectedStart(-1);
87 | setSelectedEnd(-1);
88 | }
89 |
90 | // sort annotations in current paragraph descending
91 | inputAnnotations[paragraph].sort((a, b) => b.start - a.start);
92 |
93 | // select the annotation with start < selectedStart
94 | let newSelection;
95 |
96 | for (let i = 0; i < inputAnnotations[paragraph].length; i += 1) {
97 | if (inputAnnotations[paragraph][i].start < start) {
98 | newSelection = inputAnnotations[paragraph][i];
99 | break;
100 | }
101 | }
102 |
103 | if (newSelection) {
104 | // previous annotation found in current paragraph
105 | setSelectedStart(newSelection.start);
106 | setSelectedEnd(newSelection.end);
107 | } else if (paragraph > 0) {
108 | // Last annotations from previous paragraph
109 | let newParagraph = paragraph - 1;
110 |
111 | while (newParagraph >= 0) {
112 | if (inputAnnotations[newParagraph].length > 0) {
113 | inputAnnotations[newParagraph].sort((a, b) => b.start - a.start); // sort descending
114 |
115 | setSelectedParagraph(newParagraph);
116 | setSelectedStart(inputAnnotations[newParagraph][0].start);
117 | setSelectedEnd(inputAnnotations[newParagraph][0].end);
118 | break;
119 | }
120 | newParagraph -= 1;
121 | }
122 | }
123 | }
124 |
125 | return false;
126 | };
127 |
128 | const selectNextAnnotation = (inputAnnotations, paragraph, start, end) => {
129 | console.log(
130 | "selectNextAnnotation ",
131 | inputAnnotations,
132 | paragraph,
133 | start,
134 | end
135 | );
136 |
137 | if (paragraph >= 0) {
138 | // sort annotations of current paragraph by "start"
139 | inputAnnotations[paragraph].sort((a, b) => a.start - b.start);
140 |
141 | // select the annotation with start > selectedEnd
142 | let newSelection;
143 |
144 | for (let i = 0; i < inputAnnotations[paragraph].length; i += 1) {
145 | if (inputAnnotations[paragraph][i].start > end) {
146 | newSelection = inputAnnotations[paragraph][i];
147 | break;
148 | }
149 | }
150 |
151 | if (newSelection) {
152 | // new selection already found
153 | setSelectedStart(newSelection.start);
154 | setSelectedEnd(newSelection.end);
155 | } else {
156 | // find in next paragraph
157 | let newParagraph = paragraph + 1;
158 |
159 | while (newParagraph < paragraphs.length) {
160 | // Skip empty paragraphs
161 | if (inputAnnotations[newParagraph].length > 0) {
162 | inputAnnotations[newParagraph].sort((a, b) => a.start - b.start);
163 |
164 | // Take first annotation of next paragraph
165 | setSelectedParagraph(newParagraph);
166 | setSelectedStart(inputAnnotations[newParagraph][0].start);
167 | setSelectedEnd(inputAnnotations[newParagraph][0].end);
168 |
169 | break;
170 | }
171 | newParagraph += 1;
172 | }
173 | }
174 | }
175 | // TODO jump to beginning?
176 |
177 | return false;
178 | };
179 |
180 | const selectTag = (
181 | tagIndex,
182 | tag,
183 | inputAnnotations,
184 | paragraph,
185 | start,
186 | end
187 | ) => {
188 | console.log(
189 | "selectTag (paragraph,start,end)",
190 | JSON.stringify([paragraph, start, end])
191 | );
192 |
193 | // Is the selected tag valid?
194 | if (!isNaN(tagIndex) && tagIndex > 0 && tagIndex <= tags.length) {
195 | const newActiveTag = tags[tagIndex - 1];
196 |
197 | // If old tag is equal to new tag, no need to change anything
198 | if (tag === newActiveTag) return;
199 |
200 | // Update active tag
201 | setActiveTag(newActiveTag);
202 |
203 | // Is any annotation selected that we need to change?
204 | if (paragraph >= 0 && start >= 0 && end >= 0) {
205 | const changedAnnotations = [...inputAnnotations];
206 |
207 | for (let i = 0; i < changedAnnotations[paragraph].length; i += 1) {
208 | if (
209 | start === changedAnnotations[paragraph][i].start &&
210 | end === changedAnnotations[paragraph][i].end
211 | ) {
212 | // Change tag
213 | changedAnnotations[paragraph][i].tag = newActiveTag;
214 | break;
215 | }
216 | }
217 | // Send annotation change
218 | console.log("Send annotation change ");
219 | onAnnotationsChange(paragraph, changedAnnotations[paragraph]);
220 | }
221 | }
222 | };
223 |
224 | // Hot keys
225 | useHotkeys(
226 | "left",
227 | () =>
228 | selectPreviousAnnotation(
229 | annotations,
230 | selectedParagraph,
231 | selectedStart,
232 | selectedEnd
233 | ),
234 | { keydown: false, keyup: true },
235 | [annotations, selectedParagraph, selectedStart, selectedEnd]
236 | );
237 | useHotkeys(
238 | "right",
239 | () =>
240 | selectNextAnnotation(
241 | annotations,
242 | selectedParagraph,
243 | selectedStart,
244 | selectedEnd
245 | ),
246 | { keydown: false, keyup: true },
247 | [annotations, selectedParagraph, selectedStart, selectedEnd]
248 | );
249 | useHotkeys(
250 | "0,1,2,3,4,5,6,7,8,9",
251 | (event) =>
252 | selectTag(
253 | parseInt(event.key, 10),
254 | activeTag,
255 | annotations,
256 | selectedParagraph,
257 | selectedStart,
258 | selectedEnd
259 | ),
260 | { keydown: false, keyup: true },
261 | [activeTag, annotations, selectedParagraph, selectedStart, selectedEnd]
262 | );
263 |
264 | console.log("Selected: ", [selectedParagraph, selectedStart, selectedEnd]);
265 |
266 | return (
267 |
268 |
269 | {tagsLabel && (
270 |
{t("annotation.tagsLabel")}
271 | )}
272 | {tags.map((tag, index) => (
273 |
278 |
297 |
298 | ))}
299 | {navButtons && (
300 |
301 |
305 |
318 |
319 |
323 |
336 |
337 |
338 | )}
339 |
340 |
341 |
342 |
343 | {paragraphs.map((paragraph, paragraphIndex) => (
344 |
345 |
{
351 | onAnnotationsChange(
352 | paragraphIndex,
353 | changedParagraphAnnotations
354 | );
355 | }}
356 | getSpan={(span) => ({
357 | ...span,
358 | tag: activeTag,
359 | })}
360 | renderMark={(mark) => (
361 | = constants.maxTagColors ? "" : "tag-colored"
372 | }`}
373 | key={mark.key}
374 | >
375 | onAnnotationClick(paragraphIndex, mark)}
380 | onKeyPress={() => onAnnotationClick(paragraphIndex, mark)}
381 | >
382 | {mark.text}
383 | {mark.tag}
384 |
385 | onAnnotationRemove(paragraphIndex, mark)}
390 | onKeyPress={() => onAnnotationClick(paragraphIndex, mark)}
391 | >
392 |
393 |
394 |
395 | )}
396 | />
397 |
398 | ))}
399 |
400 |
401 | );
402 | };
403 |
404 | AnnotationForm.propTypes = {
405 | paragraphs: PropTypes.arrayOf(PropTypes.object).isRequired,
406 | annotations: PropTypes.arrayOf(PropTypes.array).isRequired,
407 | onAnnotationsChange: PropTypes.func.isRequired,
408 | tags: PropTypes.arrayOf(PropTypes.string).isRequired,
409 | };
410 |
411 | export default AnnotationForm;
412 |
--------------------------------------------------------------------------------