├── .dockerignore
├── .editorconfig
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── docker.yml
├── .gitignore
├── DESIGN.md
├── Dockerfile
├── LICENSE
├── Makefile
├── QUERIES.md
├── README.md
├── RESEARCH.md
├── contrib
    ├── link_classification_experiments.ipynb
    ├── link_classification_experiments_2.ipynb
    ├── occrp-experiment
    │   ├── DH_2022_NLP_Workshop.ipynb
    │   └── crawl.py
    └── tagged_sentences_20230203.csv
├── docker-compose.yml
├── docs
    ├── CNAME
    ├── index.html
    └── scribble.png
├── frontend
    ├── .gitignore
    ├── package-lock.json
    ├── package.json
    ├── public
    │   ├── favicon.ico
    │   └── index.html
    ├── src
    │   ├── App.tsx
    │   ├── components
    │   │   ├── ArticleClusters.tsx
    │   │   ├── ArticleCorefList.tsx
    │   │   ├── ArticleDrawer.tsx
    │   │   ├── ArticlePreview.tsx
    │   │   ├── ArticleStoryEditor.tsx
    │   │   ├── ArticleText.tsx
    │   │   ├── ClusterArticles.tsx
    │   │   ├── ClusterButtonGroup.tsx
    │   │   ├── ClusterDrawer.tsx
    │   │   ├── Footer.tsx
    │   │   ├── Navbar.tsx
    │   │   ├── Pagination.tsx
    │   │   ├── PairLink.tsx
    │   │   ├── RelatedListing.tsx
    │   │   ├── ScreenContent.tsx
    │   │   ├── ScreenHeading.tsx
    │   │   ├── SettingsDialog.tsx
    │   │   ├── SimilarListing.tsx
    │   │   ├── StoryArticleImportDialog.tsx
    │   │   ├── StoryArticles.tsx
    │   │   ├── StoryCreateDialog.tsx
    │   │   ├── StoryDeleteDialog.tsx
    │   │   ├── StoryGraph.tsx
    │   │   ├── StoryLinkerBanner.tsx
    │   │   ├── StoryNomNom.tsx
    │   │   ├── StoryPairs.tsx
    │   │   ├── StoryUpdateDialog.tsx
    │   │   └── util.tsx
    │   ├── constants.ts
    │   ├── hooks.ts
    │   ├── index.tsx
    │   ├── logic.ts
    │   ├── react-app-env.d.ts
    │   ├── router.tsx
    │   ├── screens
    │   │   ├── ArticleIndex.tsx
    │   │   ├── ClusterIndex.tsx
    │   │   ├── ClusterView.tsx
    │   │   ├── Home.tsx
    │   │   ├── Layout.tsx
    │   │   ├── Linker.tsx
    │   │   ├── LinkerRelated.tsx
    │   │   ├── StoryIndex.tsx
    │   │   ├── StoryLinker.tsx
    │   │   └── StoryView.tsx
    │   ├── selectors.ts
    │   ├── services
    │   │   ├── articles.ts
    │   │   ├── clusters.ts
    │   │   ├── config.ts
    │   │   ├── links.ts
    │   │   ├── ontology.ts
    │   │   ├── sites.ts
    │   │   └── stories.ts
    │   ├── store.ts
    │   ├── styles
    │   │   ├── App.scss
    │   │   ├── Article.module.scss
    │   │   ├── Cluster.module.scss
    │   │   ├── Footer.module.scss
    │   │   ├── Layout.module.scss
    │   │   ├── Linker.module.scss
    │   │   ├── Navbar.module.scss
    │   │   ├── Story.module.scss
    │   │   ├── index.scss
    │   │   ├── util.module.scss
    │   │   └── variables.scss
    │   ├── types.ts
    │   └── util.ts
    └── tsconfig.json
├── setup.py
├── sources.json
├── stories.md
└── storyweb
    ├── __init__.py
    ├── app.py
    ├── clean.py
    ├── cli.py
    ├── db.py
    ├── logic
        ├── __init__.py
        ├── articles.py
        ├── clusters.py
        ├── graph.py
        ├── links.py
        ├── predict.py
        ├── stories.py
        └── util.py
    ├── models.py
    ├── ontology.py
    ├── ontology.yml
    ├── parse
        ├── __init__.py
        ├── extract.py
        ├── language.py
        ├── lid.176.ftz
        └── pipeline.py
    ├── routes
        ├── __init__.py
        ├── articles.py
        ├── clusters.py
        ├── links.py
        ├── stories.py
        ├── system.py
        └── util.py
    ├── server.py
    └── settings.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | frontend/node_modules
2 | .mypy_cache
3 | __pycache__
4 | storyweb.egg-info
5 | .envrc


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | 
2 | [*.{ts,tsx,js,jsx}]
3 | indent_style = space
4 | indent_size = 2
5 | charset = utf-8


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates: []
 3 | #   - package-ecosystem: pip
 4 | #     open-pull-requests-limit: 99
 5 | #     directory: "/"
 6 | #     schedule:
 7 | #       interval: weekly
 8 | #   - package-ecosystem: npm
 9 | #     open-pull-requests-limit: 99
10 | #     directory: "/frontend"
11 | #     schedule:
12 | #       interval: weekly
13 | #   - package-ecosystem: docker
14 | #     open-pull-requests-limit: 99
15 | #     directory: "/"
16 | #     schedule:
17 | #       interval: weekly
18 | #   - package-ecosystem: "github-actions"
19 | #     open-pull-requests-limit: 99
20 | #     directory: "/"
21 | #     schedule:
22 | #       interval: weekly
23 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: docker
 2 | 
 3 | on: [push]
 4 | 
 5 | permissions:
 6 |   packages: write
 7 | 
 8 | jobs:
 9 |   docker-build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       # - name: Set up QEMU
14 |       #   uses: docker/setup-qemu-action@v2
15 |       - name: Docker meta
16 |         id: meta
17 |         uses: docker/metadata-action@v4
18 |         with:
19 |           images: ghcr.io/opensanctions/storyweb
20 |           tags: |
21 |             type=ref,event=branch
22 |             type=semver,pattern={{version}}
23 |             type=sha
24 |       - name: Set up Docker Buildx
25 |         uses: docker/setup-buildx-action@v2
26 |         with:
27 |           install: true
28 |       - name: Debug information
29 |         run: |
30 |           docker --version
31 |           docker-compose --version
32 |           echo "${GITHUB_REF}"
33 |       - name: Login to GitHub Container Registry
34 |         uses: docker/login-action@v2
35 |         with:
36 |           registry: ghcr.io
37 |           username: ${{ github.actor }}
38 |           password: ${{ secrets.GITHUB_TOKEN }}
39 |       - name: Build and push release
40 |         uses: docker/build-push-action@v4
41 |         with:
42 |           context: .
43 |           # platforms: linux/amd64,linux/arm64
44 |           push: true
45 |           tags: ${{ steps.meta.outputs.tags }}
46 |           labels: ${{ steps.meta.outputs.labels }}
47 |           cache-from: type=gha
48 |           cache-to: type=gha,mode=max
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.code-workspace
  6 | *.gexf
  7 | .DS_Store
  8 | .envrc
  9 | data/
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Workflow ideas for StoryWeb
  3 | 
  4 | We want to go from a corpus of media reports to a knowledge graph for a specific set of journalistic stories (called it a `Scandal` for now, I guess `StoryLine` also works).
  5 | 
  6 | 
  7 | ## Step 1: Crawling the reporting
  8 | 
  9 | * Contact a bunch of GIJN member orgs to see if I may. Maybe offer a formalised quid pro quo deal ("I can parse your stories, you get story graph data")
 10 | * Build a news crawler in async Python, store everything to a SQL database that allows for incremental crawls.
 11 | * Output articles with metadata (https://schema.org/Article) as a JSONL file.
 12 |     * Requires identifying which pages contain articles
 13 |     * Requires extracting article metadata and body (e.g. via `newspaper`, `trafilatura`)
 14 | 
 15 | 
 16 | ## Step 2: Extract named entities
 17 | 
 18 | * Run a competition between spaCy and SparkNLP, decide if we always want to run both of if it's a per-language decision.
 19 | * Find a disk format for annotated articles, probably going to need:
 20 |     * Every extracted entity and their tag type, span
 21 |     * Every sentence and their spans
 22 | 
 23 | 
 24 | ## Step 3: Build a co-occurrence matrix 
 25 | 
 26 | Get everything into a massive SQL table a la:
 27 | 
 28 | * `article_url`, `sentence_no`, `tag_type`, `tag_label`, `tag_normalised`
 29 | 
 30 | e.g.:
 31 | 
 32 | * `https://rise.md/...`,`6`,`PER`,`Vladimir Plahotniuc`,`vladimir-plahotniuc`
 33 | * `https://rise.md/...`,`16`,`PER`,`Vlad Plahotniuc`,`vlad-plahotniuc`
 34 | * `https://rise.md/...`,`4`,`LOC`,`Moldova`,`md`
 35 | * `https://rise.md/...`,`4`,`ORG`,`Democratic Party`,`democratic-party`
 36 | * `https://istories.ru/...`,`1`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk`
 37 | * `https://istories.ru/...`,`5`,`PER`,`Плахотнюк`,`plahotnuk`
 38 | * `https://istories.ru/...`,`17`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk`
 39 | 
 40 | 
 41 | ## Step 4: Build an entity loom
 42 | 
 43 | The core of an interactive graph entity identity building tool could be an interactive loop like this: 
 44 | 
 45 | * Pick a particularly namey-looking tag that occurs a lot.
 46 | * Show it to a user and prompt them to decide:
 47 |     1. This is a new entity's name, make a new ID (shortuuid)
 48 |     2. This is another surface form of an existing entity, show top 5 search results (ask if it's a strong or weak alias)
 49 | * Focus the user process on the (new) entity
 50 |     * Show co-occurring other tags, including place and date tags
 51 |     * Maybe: show any sentence in which both the tag and an alias of the entity occur
 52 |     * For each tag, prompt the user to say if it's a strong/weak alias, context or related entity or unrelated tag
 53 |     * Allow the user to finish working on this entity and start with a new one
 54 | * Start over.
 55 | 
 56 | Resulting table:
 57 | 
 58 | `entity_id`, `tag_type`, `tag_label`, `tag_normalised`, `role`
 59 | 
 60 | where `role` is one of:
 61 | 
 62 | * `alias` (e.g. `Vladimir Plahotniuc`)
 63 | * `weak_alias` (e.g. `Plahotniuc's`, `the Oligarch`)
 64 | * `context` (e.g. `Moldova` for Plahotniuc)
 65 | * `related` (e.g. `Democratic Party` for Plahotniuc)
 66 | * `unrelated` (e.g. `European Union` for Plahotniuc)
 67 | 
 68 | This process can probably later be partially automated, eg. if one of the related labels already is part of an existing entity, or by doing string similarity on the aliases.
 69 | 
 70 | ### How to disambiguate?
 71 | 
 72 | This doesn't yet allow us to say that there are two separate `Markus Braun` - one maybe an actor mentioned in a gossip piece, the other the CEO of Wirecard. We basically need a way to fork an entity and say: this alias, in this article - make it part of another entity! 
 73 | 
 74 | ### Clustering
 75 | 
 76 | What I'm describing here is really a clustering process. Need to do some research on what scikit-learn-level machine learning for clustering looks like and how well it might apply.
 77 | 
 78 | ## Step 5: Build a relationship loom
 79 | 
 80 | Similar process as above: take two entities from Step 4 that co-occur in multiple articles, show the user any sentences that mention both and then propose to them to classify their relationship (or do it based on a keyword list, and merely double-check directionality). 
 81 | 
 82 | Categories (tbd):
 83 | 
 84 | * Family
 85 | * Personal associate
 86 | * Business associate
 87 | * Nemesis, opponent, adversary, antagonist (word?)
 88 | * Owner
 89 | * In control of (Director, etc.)
 90 | * Participant
 91 | * Member/Employee
 92 | * Payment, debt, business relationship
 93 | 
 94 | 
 95 | ### Can we model events?
 96 | 
 97 | Media reporting is all about events, do we want to reify them? How can we label events, maybe by deriving key words from the headline?
 98 | 
 99 | 
100 | ## Step 6: Reconcile entities
101 | 
102 | This can maybe already happen in `nomenklatura`:
103 | 
104 | * https://github.com/opensanctions/nomenklatura/blob/master/README.md
105 | 
106 | 
107 | ## Step 7: Visualise, profit! 
108 | 
109 | * What can we compute on the output using NetworkX?
110 | * https://sayari-analytics.github.io/trellis/
111 | 
112 | 
113 | ## Credits
114 | 
115 | * Thanks to [Heleen](https://twitter.com/heleenemanuel) and [Johan](https://johanschuijt.nl/) :) 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:19 AS frontend
 2 | 
 3 | RUN mkdir -p /fe
 4 | WORKDIR /fe
 5 | COPY frontend /fe
 6 | RUN npm install
 7 | RUN npm run build
 8 | 
 9 | FROM ubuntu:23.04
10 | ENV DEBIAN_FRONTEND noninteractive
11 | 
12 | LABEL org.opencontainers.image.title "StoryWeb"
13 | LABEL org.opencontainers.image.licenses MIT
14 | LABEL org.opencontainers.image.source https://github.com/opensanctions/storyweb
15 | 
16 | RUN apt-get -qq -y update \
17 |     && apt-get -qq -y upgrade \
18 |     && apt-get -qq -y install locales ca-certificates tzdata curl python3-pip \
19 |     python3-icu python3-cryptography libicu-dev pkg-config postgresql-client-common \
20 |     postgresql-client libpq-dev \
21 |     && apt-get -qq -y autoremove \
22 |     && apt-get clean \
23 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
24 | 
25 | RUN localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \
26 |     && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
27 |     && dpkg-reconfigure -f noninteractive tzdata
28 | 
29 | ENV LANG='en_US.UTF-8' \
30 |     TZ="UTC" \
31 |     API_URL="/api/1"
32 | 
33 | RUN pip install -U pip setuptools wheel
34 | RUN pip install spacy
35 | RUN python3 -m spacy download en_core_web_sm
36 | RUN python3 -m spacy download de_core_news_sm
37 | RUN python3 -m spacy download xx_ent_wiki_sm
38 | RUN python3 -m spacy download ru_core_news_sm
39 | 
40 | RUN mkdir -p /storyweb
41 | WORKDIR /storyweb
42 | COPY . /storyweb
43 | RUN pip install --no-cache-dir -e /storyweb
44 | COPY --from=frontend /fe/build /storyweb/frontend/build
45 | 
46 | CMD ["uvicorn", "--host", "0.0.0.0", "storyweb.server:app"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022-2023, Friedrich Lindenberg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | data: data/articles.ijson
 3 | 
 4 | clean:
 5 | 	rm -rf data/articles
 6 | 
 7 | data/articles:
 8 | 	mkdir -p data/articles
 9 | 
10 | data/articles/%.ijson: data/articles
11 | 	curl -o data/articles/$*.ijson -s https://data.opensanctions.org/contrib/mediacrawl/$*.ijson
12 | 
13 | fetch: data/articles/occrp.ijson \
14 | 	data/articles/icij.ijson \
15 | 	data/articles/dossier_at.ijson \
16 | 	data/articles/daphne_foundation.ijson \
17 | 	data/articles/istories_media.ijson \
18 | 	data/articles/amabhungane.ijson
19 | 
20 | data/articles.ijson: fetch
21 | 	cat data/articles/* >data/articles.ijson
22 | 
23 | load: data/articles.ijson
24 | 	storyweb import data/articles.ijson
25 | 
26 | serve:
27 | 	uvicorn --reload storyweb.server:app
28 | 
29 | reset:
30 | 	dropdb storyweb
31 | 	createdb -E utf-8 storyweb
32 | 	storyweb init


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img width="460" height="300" src="https://storyweb.opensanctions.org/scribble.png">
  3 | </p>
  4 | 
  5 | # StoryWeb
  6 | 
  7 | StoryWeb is a project aimed to extract networks of entities from journalistic reporting. The idea is to reverse engineer stories into structured graphs of the persons and companies involved, and to capture the relationships between them.
  8 | 
  9 | https://storyweb.opensanctions.org
 10 | 
 11 | StoryWeb consumes news articles as input data. Individual articles can be imported via the web interface, but there's also a possibility for bulk import using the [`articledata`](https://github.com/pudo/articledata) micro-format. One producer of `articledata` files is [`mediacrawl`](https://github.com/opensanctions/mediacrawl), which can be used to crawl news websites and harvest all of their articles.
 12 | 
 13 | <p align="center">
 14 |   <img src="https://assets.opensanctions.org/images/articles/storyweb/story-graph.png">
 15 | </p>
 16 | 
 17 | ## Installation
 18 | 
 19 | Storyweb can be run as a Python web application from a developer's machine, or via a docker container. We recommend using docker for any production deployment and as a quick means to get the application running if you don't intend to change its code.
 20 | 
 21 | ### Running in Docker mode
 22 | 
 23 | You can start up the a docker instance by running the following commands in an empty directory:
 24 | 
 25 | ```bash
 26 | wget https://raw.githubusercontent.com/opensanctions/storyweb/main/docker-compose.yml
 27 | docker-compose up
 28 | ```
 29 | 
 30 | This will make the storyweb user interface available on port 8000 of the host machine.
 31 | 
 32 | ### Running in development mode
 33 | 
 34 | Before installing storyweb on the host machine, we recommend setting up a Python virtual environment of some form (venv, virtualenv, etc.). 
 35 | 
 36 | As a first step, let's install the `spaCy` models that are used to extract person and company names from the given articles: 
 37 | 
 38 | ```bash
 39 | pip install spacy
 40 | python3 -m spacy download en_core_web_sm
 41 | python3 -m spacy download de_core_news_sm
 42 | python3 -m spacy download xx_ent_wiki_sm
 43 | python3 -m spacy download ru_core_news_sm
 44 | ```
 45 | 
 46 | Next, we'll install the application itself, and its dependencies. Run the following command inside of a git checkout of the storyweb repository:
 47 | 
 48 | ```bash
 49 | pip install -e ".[dev]"
 50 | ```
 51 | 
 52 | You also need to have a PostgreSQL server running somewhere (e.g. on the same machine, perhaps installed via homebrew or apt). Create a fresh database on that server and point storyweb to it like this: 
 53 | 
 54 | ```bash
 55 | export STORYWEB_DB_URL=postgresql://storyweb:storyweb@db/storyweb
 56 | # Create the database tables:
 57 | storyweb init
 58 | ```
 59 | 
 60 | You now have the application configured and you can explore the commands exposed by the `storyweb` command-line tool:
 61 | 
 62 | ```
 63 | Usage: storyweb [OPTIONS] COMMAND [ARGS]...
 64 | 
 65 |   Storyweb CLI
 66 | 
 67 | Options:
 68 |   --help  Show this message and exit.
 69 | 
 70 | Commands:
 71 |   auto-merge  Automatically merge on fingerprints
 72 |   compute     Run backend computations
 73 |   graph       Export an entity graph
 74 |   import      Import articles into the DB
 75 |   import-url  Load a single news story by URL
 76 |   init        Initialize the database
 77 | ```
 78 | 
 79 | The `import` command listed here will accept any data file in the `articledata` format, which is emitted by the `mediacrawl` tool.
 80 | 
 81 | #### Running the backend API
 82 | 
 83 | Finally, you can run the backend API using `uvicorn`:
 84 | 
 85 | ```bash
 86 | uvicorn --reload --host 0.0.0.0 storyweb.server:app
 87 | ```
 88 | 
 89 | This will boot up the API server of port 8000 of the local host and enable hot reloads whenever the code changes during development. 
 90 | 
 91 | #### Installing and running the frontend
 92 | 
 93 | Once you have the API running, you can install and run the development server for the frontend. Storyweb uses React and ReduxToolkit internally and will use a Webpack dev server to dynamically re-build the frontend during development.
 94 | 
 95 | ```bash
 96 | cd frontend/
 97 | npm install 
 98 | npm run dev
 99 | ```
100 | 
101 | Remember that you need to run `npm run dev` whenever you do frontend development.
102 | 
103 | ## License and credits
104 | 
105 | Thanks to [Heleen Emanuel](https://twitter.com/heleenemanuel) and [Tobias Sterbak](https://tobiassterbak.com/) for their advice on the design and implementation of StoryWeb. 
106 | 
107 | This project receives financial support from the German Federal Ministry for Education and Research (Bundesministerium für Bildung und Forschung, BMBF) under the grant identifier `01IS22S42`. The full responsibility for the content of this publication remains with its authors.
108 | 
109 | The software is licensed under the MIT license, see `LICENSE` in this repository.


--------------------------------------------------------------------------------
/RESEARCH.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ### Bad Will Hunting
 4 | 
 5 | * https://blogs.lse.ac.uk/polis/2022/09/15/bad-will-hunting-the-story-so-far/ 
 6 | 
 7 | * SparkNLP: https://nlp.johnsnowlabs.com/
 8 | * Rosette Text Analytics: https://www.rosette.com/capability/entity-extractor/ 
 9 | 
10 | # Selecting articles from a corpus
11 | 
12 | * https://github.com/asreview/asreview 
13 | 
14 | Tips from Tobias:
15 | 
16 | * https://maartengr.github.io/BERTopic/index.html#quick-start 
17 | * Dimensionality reduction: https://umap-learn.readthedocs.io/en/latest/basic_usage.html
18 | 
19 | * One-class classification: https://en.wikipedia.org/wiki/One-class_classification 
20 |     * Get confidence scores for classification?
21 | * Or: fuzzying out names for other names and see if it can disambiguate
22 | 
23 | 
24 | 
25 | * https://github.com/microsoft/spacy-ann-linker


--------------------------------------------------------------------------------
/contrib/occrp-experiment/crawl.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import asyncio
  3 | import trafilatura
  4 | from datetime import datetime
  5 | from typing import Optional
  6 | import aiohttp
  7 | from lxml import html
  8 | from urllib.parse import urlparse, urljoin
  9 | from sqlmodel import Field, Session, SQLModel, create_engine, select
 10 | 
 11 | 
 12 | queue = asyncio.Queue()
 13 | seen = set()
 14 | engine = create_engine("sqlite:///crawl.sqlite3")
 15 | 
 16 | 
 17 | class Page(SQLModel, table=True):
 18 |     url: Optional[str] = Field(primary_key=True)
 19 |     text: Optional[str]
 20 |     is_article: bool
 21 |     crawled_at: datetime
 22 | 
 23 | 
 24 | class Article(SQLModel, table=True):
 25 |     url: Optional[str] = Field(primary_key=True)
 26 |     title: Optional[str]
 27 |     author: Optional[str]
 28 |     date: Optional[str]
 29 |     text: Optional[str]
 30 | 
 31 | 
 32 | async def clean_url(url):
 33 |     parsed = urlparse(url)
 34 |     if parsed.scheme != "https":
 35 |         return
 36 |     if parsed.hostname not in ["occrp.org", "www.occrp.org"]:
 37 |         return None
 38 |     if parsed.path.startswith("/ru/"):
 39 |         return None
 40 |     parsed = parsed._replace(query=None)
 41 |     parsed = parsed._replace(fragment=None)
 42 |     url = parsed.geturl()
 43 |     return url
 44 | 
 45 | 
 46 | async def crawl_url(url):
 47 |     url = await clean_url(url)
 48 |     if url is None:
 49 |         return
 50 |     if url in seen:
 51 |         return
 52 |     seen.add(url)
 53 |     await queue.put(url)
 54 | 
 55 | 
 56 | async def get_page(db: Session, session: aiohttp.ClientSession, url: str):
 57 |     statement = select(Page).where(Page.url == url)
 58 |     page = db.exec(statement).first()
 59 |     if page is not None:
 60 |         return page
 61 | 
 62 |     async with session.get(url) as response:
 63 |         content_type = response.headers.get("Content-Type")
 64 |         text = None
 65 |         if response.status == 200:
 66 |             if content_type is None or "html" in content_type.lower():
 67 |                 # print("CONTENT_TYPE", content_type)
 68 |                 data = await response.read()
 69 |                 # print("FETCHED", url, response.headers.get("Content-Type"))
 70 |                 try:
 71 |                     text = data.decode("utf-8")
 72 |                 except UnicodeDecodeError as exc:
 73 |                     # text = None
 74 |                     pass
 75 |         page = Page(
 76 |             url=url,
 77 |             text=text,
 78 |             is_article=False,
 79 |             crawled_at=datetime.utcnow(),
 80 |         )
 81 |         db.add(page)
 82 |         db.commit()
 83 |         return page
 84 | 
 85 | 
 86 | def is_article(doc):
 87 |     if doc.find('.//article//li[@class="authors"]') is not None:
 88 |         return True
 89 |     if doc.find('.//aside[@class="byline"]') is not None:
 90 |         return True
 91 |     if doc.find('.//section[@class="blog"]') is not None:
 92 |         return True
 93 |     if doc.find('.//div[@class="occrp-story"]') is not None:
 94 |         return True
 95 |     return False
 96 | 
 97 | 
 98 | async def extract_article(db: Session, page: Page, doc):
 99 |     extract = trafilatura.bare_extraction(doc)
100 |     statement = select(Article).where(Article.url == page.url)
101 |     article = db.exec(statement).first()
102 |     if article is None:
103 |         article = Article(url=page.url)
104 |     title = extract.get("title")
105 |     if title is not None:
106 |         title = title.replace(" - OCCRP", "")
107 |         article.title = title.strip()
108 |     article.date = extract.get("date")
109 |     article.text = extract.get("text")
110 |     article.author = extract.get("author")
111 |     # print(list(extract.keys()))
112 |     print("ARTICLE", page.url, extract.get("title"))
113 |     db.add(article)
114 |     db.commit()
115 | 
116 | 
117 | async def worker(session: aiohttp.ClientSession):
118 |     while True:
119 |         with Session(engine) as db:
120 |             url = await queue.get()
121 |             try:
122 |                 page = await get_page(db, session, url)
123 |                 if page is not None and page.text is not None:
124 |                     doc = html.fromstring(page.text)
125 |                     # article = trafilatura.bare_extraction(doc)
126 |                     # print(article)
127 |                     for link in doc.findall(".//a"):
128 |                         next_url = link.get("href")
129 |                         if next_url is None:
130 |                             continue
131 |                         next_url = urljoin(url, next_url)
132 |                         await crawl_url(next_url)
133 |                         # print(link)
134 |                     if is_article(doc):
135 |                         await extract_article(db, page, doc)
136 |                     # print("NO ARTICLE", url)
137 |                     # print(url, doc, queue.qsize())
138 | 
139 |             except Exception as exc:
140 |                 print("EXCEPTION", exc)
141 |             queue.task_done()
142 | 
143 | 
144 | async def crawl():
145 |     SQLModel.metadata.create_all(engine)
146 |     headers = {"User-Agent": "pudo from the hood"}
147 |     async with aiohttp.ClientSession(headers=headers) as session:
148 |         await crawl_url("https://occrp.org")
149 |         tasks = []
150 |         for _ in range(10):
151 |             task = asyncio.create_task(worker(session))
152 |             tasks.append(task)
153 | 
154 |         await queue.join()
155 |         for task in tasks:
156 |             task.cancel()
157 |         await asyncio.gather(*tasks, return_exceptions=True)
158 | 
159 | 
160 | async def export():
161 |     with open("articles.json", "w") as fh:
162 |         with Session(engine) as db:
163 |             statement = select(Article)
164 |             articles = db.exec(statement).all()
165 |             data = [a.dict() for a in articles]
166 |             json.dump(data, fh)
167 | 
168 | 
169 | def main():
170 |     asyncio.run(export())
171 | 
172 | 
173 | if __name__ == "__main__":
174 |     main()
175 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | 
 3 | services:
 4 |   db:
 5 |     image: postgres:15
 6 |     expose:
 7 |       - "5432"
 8 |     container_name: db
 9 |     environment:
10 |       POSTGRES_USER: storyweb
11 |       POSTGRES_PASSWORD: storyweb
12 |       POSTGRES_DB: storyweb
13 |     ulimits:
14 |       memlock:
15 |         soft: -1
16 |         hard: -1
17 |     volumes:
18 |       - db-data:/var/lib/postgresql/data
19 |       # - "./schema.sql:/docker-entrypoint-initdb.d/storyweb-schema.sql"
20 |     deploy:
21 |       restart_policy:
22 |         condition: on-failure
23 | 
24 |   app:
25 |     build: .
26 |     image: ghcr.io/opensanctions/storyweb:main
27 |     command: bash -c 'while !</dev/tcp/db/5432; do sleep 2; done; storyweb init; uvicorn --host 0.0.0.0 --workers 3 storyweb.server:app'
28 |     ports:
29 |       - "0.0.0.0:8000:8000"
30 |     hostname: app
31 |     environment:
32 |       STORYWEB_DB_URL: postgresql://storyweb:storyweb@db/storyweb
33 | 
34 | volumes:
35 |   db-data:
36 | 


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | storyweb.opensanctions.org


--------------------------------------------------------------------------------
/docs/scribble.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/docs/scribble.png


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # production
12 | /build
13 | 
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 | 
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "dependencies": {
 6 |     "@blueprintjs/core": "^4.15.1",
 7 |     "@blueprintjs/popover2": "^1.13.3",
 8 |     "@blueprintjs/select": "^4.9.3",
 9 |     "@react-sigma/core": "^3.2.0",
10 |     "@react-sigma/layout-circular": "^3.2.0",
11 |     "@react-sigma/layout-core": "^3.1.0",
12 |     "@react-sigma/layout-force": "^3.2.0",
13 |     "@react-sigma/layout-forceatlas2": "^3.2.0",
14 |     "@reduxjs/toolkit": "^1.9.3",
15 |     "@testing-library/react": "^14.0.0",
16 |     "@testing-library/user-event": "^14.4.3",
17 |     "@types/node": "^18.14.2",
18 |     "@types/react": "^18.0.28",
19 |     "@types/react-dom": "^18.0.11",
20 |     "@types/react-redux": "^7.1.25",
21 |     "@types/react-router-dom": "^5.3.3",
22 |     "graphology": "^0.25.1",
23 |     "graphology-gexf": "^0.10.3",
24 |     "graphology-types": "^0.24.7",
25 |     "lodash": "^4.17.21",
26 |     "query-string": "^8.1.0",
27 |     "react": "^18.2.0",
28 |     "react-dom": "^18.2.0",
29 |     "react-helmet": "^6.1.0",
30 |     "react-redux": "^8.0.5",
31 |     "react-router-dom": "^6.8.1",
32 |     "react-scripts": "^5.0.1",
33 |     "redux-thunk": "^2.4.2",
34 |     "sass": "^1.58.3",
35 |     "sigma": "^2.4.0",
36 |     "typescript": "^4.9.5"
37 |   },
38 |   "scripts": {
39 |     "start": "react-scripts start",
40 |     "build": "react-scripts build",
41 |     "eject": "react-scripts eject"
42 |   },
43 |   "proxy": "http://127.0.0.1:8000",
44 |   "eslintConfig": {
45 |     "extends": [
46 |       "react-app"
47 |     ]
48 |   },
49 |   "browserslist": {
50 |     "production": [
51 |       ">0.2%",
52 |       "not dead",
53 |       "not op_mini all"
54 |     ],
55 |     "development": [
56 |       "last 1 chrome version",
57 |       "last 1 firefox version",
58 |       "last 1 safari version"
59 |     ]
60 |   },
61 |   "devDependencies": {
62 |     "@types/react-helmet": "^6.1.6"
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/frontend/public/favicon.ico


--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <meta name="theme-color" content="#000000" />
 8 |     <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
 9 |     <title>StoryWeb</title>
10 |   </head>
11 |   <body>
12 |     <noscript>You need to enable JavaScript to run this app.</noscript>
13 |     <div id="root"></div>
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { RouterProvider } from 'react-router-dom';
 3 | import { Provider } from 'react-redux'
 4 | import { FocusStyleManager } from "@blueprintjs/core"
 5 | 
 6 | import { store } from './store';
 7 | import { router } from './router';
 8 | 
 9 | import './styles/App.scss';
10 | 
11 | FocusStyleManager.onlyShowFocusOnTabs();
12 | 
13 | function App() {
14 |   return (
15 |     <Provider store={store}>
16 |       <RouterProvider router={router} />
17 |     </Provider>
18 |   );
19 | }
20 | 
21 | export default App;
22 | 


--------------------------------------------------------------------------------
/frontend/src/components/ArticleClusters.tsx:
--------------------------------------------------------------------------------
 1 | import { HTMLTable } from "@blueprintjs/core";
 2 | import { Link } from "react-router-dom";
 3 | import { useFetchClusterListingQuery } from "../services/clusters";
 4 | import { IArticle } from "../types";
 5 | import { getClusterLink } from "../util";
 6 | import { ClusterLabel, ClusterTypeIcon, ErrorSection, SectionLoading } from "./util";
 7 | 
 8 | type ArticleClustersProps = {
 9 |   article: IArticle
10 | }
11 | 
12 | export default function ArticleClusters({ article }: ArticleClustersProps) {
13 |   const query = { article: article.id, limit: 100 };
14 |   const { data: clusters, isLoading, error: clustersError } = useFetchClusterListingQuery(query);
15 |   if (clustersError) {
16 |     return <ErrorSection title="Could not load the article" />
17 |   }
18 |   if (isLoading || clusters === undefined) {
19 |     return <SectionLoading />;
20 |   }
21 | 
22 |   return (
23 |     <HTMLTable condensed bordered className="wide">
24 |       <thead>
25 |         <tr>
26 |           <th>
27 |             Name
28 |           </th>
29 |         </tr>
30 |       </thead>
31 |       <tbody>
32 |         {clusters.results.map((cluster) =>
33 |           <tr key={cluster.id}>
34 |             <td>
35 |               <ClusterTypeIcon type={cluster.type} size={14} />
36 |               <Link to={getClusterLink(cluster)}>
37 |                 <ClusterLabel label={cluster.label} />
38 |               </Link>
39 |             </td>
40 | 
41 |           </tr>
42 |         )}
43 |       </tbody>
44 |     </HTMLTable>
45 |   )
46 | };


--------------------------------------------------------------------------------
/frontend/src/components/ArticleCorefList.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from "react";
 2 | import { Menu, MenuItem } from '@blueprintjs/core';
 3 | 
 4 | import { useFetchArticleListingQuery } from "../services/articles"
 5 | import ArticleDrawer from "./ArticleDrawer";
 6 | import { IArticle } from "../types";
 7 | import { ARTICLE_ICON } from "../constants";
 8 | 
 9 | type ArticleCorefListProps = {
10 |   clusters: string[]
11 |   tags: string[][]
12 | }
13 | 
14 | export default function ArticleCorefList({ clusters, tags }: ArticleCorefListProps) {
15 |   const [articleId, setArticleId] = useState<string | undefined>()
16 |   const articleParams = { cluster: clusters };
17 |   const { data, isLoading } = useFetchArticleListingQuery(articleParams);
18 |   if (isLoading || data === undefined) {
19 |     return null;
20 |   }
21 | 
22 |   const onClick = (e: React.MouseEvent<HTMLElement>, article: IArticle) => {
23 |     e.preventDefault();
24 |     setArticleId(article.id);
25 |   }
26 | 
27 |   return (
28 |     <>
29 |       <Menu>
30 |         {data.results.map((a) => (
31 |           <MenuItem
32 |             icon={ARTICLE_ICON}
33 |             key={a.id}
34 |             onClick={(e) => onClick(e, a)}
35 |             text={a.title} active={a.id === articleId} />
36 |         ))
37 |         }
38 |       </Menu>
39 |       <ArticleDrawer
40 |         onClose={(e) => setArticleId(undefined)}
41 |         articleId={articleId}
42 |         tags={tags}
43 |       />
44 |     </>
45 |   )
46 | };


--------------------------------------------------------------------------------
/frontend/src/components/ArticleDrawer.tsx:
--------------------------------------------------------------------------------
  1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core"
  2 | import { SyntheticEvent, useEffect, useState } from "react"
  3 | import { ARTICLE_ICON } from "../constants"
  4 | import { useFetchArticleQuery } from "../services/articles"
  5 | import { useFetchClusterListingQuery } from "../services/clusters"
  6 | import ArticleText from "./ArticleText"
  7 | import { ErrorSection, NumericTag, SectionLoading } from "./util"
  8 | 
  9 | import styles from '../styles/Article.module.scss'
 10 | import ArticleClusters from "./ArticleClusters"
 11 | 
 12 | type ArticleDrawerInnerProps = {
 13 |   articleId: string,
 14 |   tags?: string[][]
 15 |   isOpen: boolean,
 16 |   onClose: (event: SyntheticEvent<HTMLElement>) => void
 17 |   onClosed: (node: HTMLElement) => void
 18 | }
 19 | 
 20 | function ArticleDrawerInner({ articleId, tags, isOpen, onClose, onClosed }: ArticleDrawerInnerProps) {
 21 |   const { data: article, error: articleError } = useFetchArticleQuery(articleId);
 22 |   const clustersQuery = { article: articleId, limit: 0 };
 23 |   const { data: clusters } = useFetchClusterListingQuery(clustersQuery);
 24 |   const realTags = tags ? tags : []
 25 |   const realIsOpen = isOpen && articleId.trim().length > 1;
 26 | 
 27 |   return (
 28 |     <Drawer
 29 |       size={"40%"}
 30 |       isOpen={realIsOpen}
 31 |       onClose={onClose}
 32 |       onClosed={onClosed}
 33 |       icon={ARTICLE_ICON}
 34 |       hasBackdrop={false}
 35 |       autoFocus
 36 |       enforceFocus
 37 |       title={article ? article.title : 'No article'}
 38 |     >
 39 |       <div className={styles.articleDrawer}>
 40 |         {(article === undefined) && (
 41 |           <SectionLoading />
 42 |         )}
 43 |         {(articleError !== undefined) && (
 44 |           <ErrorSection title="Could not load the article" />
 45 |         )}
 46 |         {article && (
 47 |           <Tabs id="articleView" defaultSelectedTabId="text">
 48 |             <Tab
 49 |               id="text"
 50 |               key="text"
 51 |               title="Text"
 52 |               panel={
 53 |                 <ArticleText text={article.text} tags={realTags} />
 54 |               }
 55 |             />
 56 |             <Tab
 57 |               id="entities"
 58 |               key="entities"
 59 |               title={
 60 |                 <>
 61 |                   Extracted entities
 62 |                   <NumericTag value={clusters?.total} className="tab-tag" />
 63 |                 </>
 64 |               }
 65 |               panel={
 66 |                 <ArticleClusters article={article} />
 67 |               }
 68 |             />
 69 |           </Tabs>
 70 |         )}
 71 | 
 72 |       </div>
 73 |     </Drawer>
 74 |   )
 75 | }
 76 | 
 77 | type ArticleDrawerProps = {
 78 |   articleId?: string,
 79 |   tags?: string[][]
 80 |   onClose: (event: SyntheticEvent<HTMLElement>) => void
 81 | }
 82 | 
 83 | export default function ArticleDrawer({ articleId, tags, onClose }: ArticleDrawerProps) {
 84 |   const isOpen = !!articleId;
 85 |   const [activeArticleId, setActiveArticleId] = useState<string | undefined>(articleId);
 86 | 
 87 |   useEffect(() => {
 88 |     if (!!articleId && articleId !== activeArticleId) {
 89 |       setActiveArticleId(articleId);
 90 |     }
 91 |   }, [articleId, activeArticleId])
 92 | 
 93 |   const onClosed = () => {
 94 |     setActiveArticleId(undefined);
 95 |   }
 96 | 
 97 |   if (activeArticleId === undefined) {
 98 |     return null;
 99 |   }
100 | 
101 |   return (
102 |     <ArticleDrawerInner
103 |       articleId={activeArticleId}
104 |       onClose={onClose}
105 |       onClosed={onClosed}
106 |       isOpen={isOpen}
107 |       tags={tags}
108 |     />
109 |   );
110 | }


--------------------------------------------------------------------------------
/frontend/src/components/ArticlePreview.tsx:
--------------------------------------------------------------------------------
 1 | import { AnchorButton } from "@blueprintjs/core";
 2 | import { useFetchArticleQuery } from "../services/articles"
 3 | import ArticleText from "./ArticleText";
 4 | 
 5 | type ArticlePreviewProps = {
 6 |   articleId: string,
 7 |   tags: string[][]
 8 | }
 9 | 
10 | export default function ArticlePreview({ articleId, tags }: ArticlePreviewProps) {
11 |   const { data, isLoading } = useFetchArticleQuery(articleId);
12 |   if (isLoading || data === undefined) {
13 |     return null;
14 |   }
15 | 
16 |   return (
17 |     <div>
18 |       <h3>
19 |         <AnchorButton icon="document" minimal href={`/articles/${articleId}`} />
20 |         {data.title}
21 |       </h3>
22 |       <ArticleText text={data.text} tags={tags} />
23 |     </div>
24 |   )
25 | };


--------------------------------------------------------------------------------
/frontend/src/components/ArticleStoryEditor.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Intent, Menu, MenuItem } from "@blueprintjs/core";
 2 | import { Popover2, Classes as Popover2Classes } from "@blueprintjs/popover2";
 3 | import { MouseEvent } from "react";
 4 | import { useFetchStoryListingQuery, useToggleStoryArticleMutation } from "../services/stories";
 5 | import { IArticle, IStory } from "../types";
 6 | 
 7 | type ArticleStoryEditorContentProps = {
 8 |   article: IArticle
 9 | }
10 | 
11 | function ArticleStoryEditorContent({ article }: ArticleStoryEditorContentProps) {
12 |   const { data: allListing } = useFetchStoryListingQuery({ limit: 100 });
13 |   const { data: linkedListing } = useFetchStoryListingQuery({ limit: 100, article: article.id });
14 |   const [toggleStoryArticle] = useToggleStoryArticleMutation();
15 | 
16 | 
17 |   const linkedIds = linkedListing?.results.map((s) => s.id) || [];
18 |   const onToggleAssign = async (e: MouseEvent, story: IStory) => {
19 |     await toggleStoryArticle({ story: story.id, article: article.id }).unwrap();
20 |   }
21 |   if (allListing === undefined) {
22 |     return null;
23 |   }
24 | 
25 |   return (
26 |     <Menu>
27 |       {allListing.results.map((story) =>
28 |         <MenuItem
29 |           key={story.id}
30 |           text={story.title}
31 |           onClick={(e) => onToggleAssign(e, story)}
32 |           intent={linkedIds.indexOf(story.id) === -1 ? Intent.NONE : Intent.SUCCESS}
33 |           icon={linkedIds.indexOf(story.id) === -1 ? "small-minus" : "small-tick"}
34 |         />
35 |       )}
36 |     </Menu>
37 |   );
38 | }
39 | 
40 | 
41 | type ArticleStoryManagerProps = {
42 |   article: IArticle
43 |   inList: boolean
44 | }
45 | 
46 | export default function ArticleStoryEditor({ article, inList }: ArticleStoryManagerProps) {
47 |   return (
48 |     <Popover2
49 |       content={<ArticleStoryEditorContent article={article} />}
50 |       interactionKind="click"
51 |       popoverClassName={Popover2Classes.POPOVER2_CONTENT_SIZING}
52 |       placement="auto"
53 |     >
54 |       <Button
55 |         icon="add-to-artifact"
56 |         minimal={inList}
57 |         small={inList}
58 |       />
59 |     </Popover2 >
60 |   )
61 | }


--------------------------------------------------------------------------------
/frontend/src/components/ArticleText.tsx:
--------------------------------------------------------------------------------
 1 | import classnames from 'classnames';
 2 | 
 3 | import styles from '../styles/Article.module.scss'
 4 | 
 5 | const CLASSES = ['markup1', 'markup2', 'markup3', 'markup4', 'markup5']
 6 | 
 7 | type ArticleTextProps = {
 8 |   text: string
 9 |   tags: string[][]
10 | }
11 | 
12 | function cleanName(text: string): string {
13 |   return text.trim().replace(' ', '\\s')
14 |   // https://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex:
15 |   // function escapeRegExp(string) {
16 |   //   return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
17 |   // }
18 | }
19 | 
20 | function checkName(text: string): boolean {
21 |   if (text === undefined || text === null || text.length === 0) {
22 |     return false
23 |   }
24 |   try {
25 |     new RegExp(text, 'muig')
26 |     return true;
27 |   } catch {
28 |     return false
29 |   }
30 | }
31 | 
32 | export default function ArticleText({ text, tags }: ArticleTextProps) {
33 |   let html = text;
34 |   tags.forEach((forms, index) => {
35 |     const alternatives = forms.map(cleanName).filter(checkName).join('|');
36 |     const altRx = new RegExp(`(${alternatives})`, 'muig')
37 |     const clazz = CLASSES[index % CLASSES.length];
38 |     html = html.replaceAll(altRx, (m) => `<span class="markup ${clazz}">${m}</span>`);
39 |   })
40 |   // html = html.replaceAll(/\n/g, '<br />\n');
41 |   const paragraphs = html.split('\n').filter((p) => p.trim().length > 1);
42 |   const paraHtml = paragraphs.join('</p><p>')
43 | 
44 |   return (
45 |     <div className={classnames('bp4-running-text', styles.articleText)} dangerouslySetInnerHTML={{ __html: `<p>${paraHtml}</p>` }} />
46 |   )
47 | }


--------------------------------------------------------------------------------
/frontend/src/components/ClusterArticles.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, HTMLTable } from "@blueprintjs/core";
 2 | import { MouseEvent } from "react";
 3 | import { Link, useSearchParams } from "react-router-dom";
 4 | import { useFetchArticleListingQuery } from "../services/articles";
 5 | import { useUntagArticleMutation } from "../services/links";
 6 | import { IArticle, IClusterDetails } from "../types";
 7 | import { useListingPagination } from "../util";
 8 | import ArticleDrawer from "./ArticleDrawer";
 9 | import Pagination from "./Pagination";
10 | import { SectionLoading } from "./util";
11 | 
12 | 
13 | type ClusterArticlesProps = {
14 |   cluster: IClusterDetails,
15 | }
16 | 
17 | export default function ClusterArticles({ cluster }: ClusterArticlesProps) {
18 |   const page = useListingPagination('articles');
19 |   const query = { ...page, cluster: cluster.id };
20 |   const { data: listing, isLoading } = useFetchArticleListingQuery(query);
21 |   const [params, setParams] = useSearchParams();
22 |   const [untagArticleMutation, { isLoading: isUntagging }] = useUntagArticleMutation();
23 |   const articleId = params.get('article') || undefined;
24 | 
25 |   if (listing === undefined || isLoading || isUntagging) {
26 |     return <SectionLoading />
27 |   }
28 | 
29 |   const showArticle = (e: MouseEvent<HTMLAnchorElement>, articleId: string) => {
30 |     e.preventDefault();
31 |     setArticle(articleId);
32 |   }
33 | 
34 |   const setArticle = (articleId?: string) => {
35 |     const paramsObj = Object.fromEntries(params.entries());
36 |     setParams({ ...paramsObj, article: articleId || '' });
37 |   }
38 | 
39 |   const untagArticle = async (article: IArticle) => {
40 |     await untagArticleMutation({ cluster: cluster.id, article: article.id }).unwrap()
41 |   }
42 | 
43 |   return (
44 |     <>
45 |       <HTMLTable condensed bordered className="wide">
46 |         <thead>
47 |           <tr>
48 |             <th>Title</th>
49 |             <th>Site</th>
50 |             <th>Split</th>
51 |           </tr>
52 |         </thead>
53 |         <tbody>
54 |           {listing.results.map((article) => (
55 |             <tr key={article.id}>
56 |               <td>
57 |                 <Link
58 |                   onClick={(e) => showArticle(e, article.id)}
59 |                   to={`/articles?article=${article.id}`}
60 |                 >
61 |                   {article.title}
62 |                 </Link>
63 |               </td>
64 |               <td>{article.site}</td>
65 |               <td>
66 |                 <Button
67 |                   onClick={() => untagArticle(article)}
68 |                   icon="unresolve"
69 |                   minimal
70 |                   small
71 |                 />
72 |               </td>
73 |             </tr>
74 |           ))}
75 |         </tbody>
76 |       </HTMLTable>
77 |       <Pagination prefix='articles' response={listing} />
78 |       <ArticleDrawer
79 |         onClose={(e) => setArticle(undefined)}
80 |         articleId={articleId}
81 |         tags={[[cluster.label, ...cluster.labels]]}
82 |       />
83 |     </>
84 |   )
85 | }


--------------------------------------------------------------------------------
/frontend/src/components/ClusterButtonGroup.tsx:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { AnchorButton, Button, ButtonGroup, Intent } from "@blueprintjs/core";
 3 | import { useExplodeClusterMutation } from "../services/links";
 4 | import { ICluster } from "../types";
 5 | import { getLinkLoomLink } from "../util";
 6 | import { LINKER_ICON } from '../constants';
 7 | 
 8 | type ClusterButtonGroupProps = {
 9 |   cluster: ICluster,
10 | }
11 | 
12 | export default function ClusterButtonGroup({ cluster }: ClusterButtonGroupProps) {
13 |   const [explodeCluster, { isLoading: isExploding }] = useExplodeClusterMutation();
14 | 
15 |   const onExplode = async () => {
16 |     await explodeCluster(cluster.id).unwrap();
17 |   };
18 | 
19 |   const disabled = isExploding;
20 |   const googleUrl = queryString.stringifyUrl({ url: 'https://www.google.com/search', query: { q: cluster.label } });
21 |   const sanctionsUrl = queryString.stringifyUrl({ url: 'https://www.opensanctions.org/search', query: { q: cluster.label } });
22 |   return (
23 |     <ButtonGroup>
24 |       <AnchorButton icon={LINKER_ICON} intent={Intent.PRIMARY} href={getLinkLoomLink(cluster)} disabled={disabled}>
25 |         Build web
26 |       </AnchorButton>
27 |       <AnchorButton href={googleUrl} disabled={disabled} icon="search-text">Google</AnchorButton>
28 |       <AnchorButton href={sanctionsUrl} disabled={disabled} icon="search-text">OpenSanctions</AnchorButton>
29 |       <Button icon="graph-remove" intent={Intent.DANGER} onClick={onExplode} disabled={disabled}>Explode</Button>
30 |     </ButtonGroup>
31 |   )
32 | }


--------------------------------------------------------------------------------
/frontend/src/components/ClusterDrawer.tsx:
--------------------------------------------------------------------------------
  1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core"
  2 | import { SyntheticEvent, useEffect, useState } from "react"
  3 | import { useFetchArticleListingQuery } from "../services/articles"
  4 | import { useFetchClusterQuery, useFetchRelatedClusterListingQuery, useFetchSimilarClusterListingQuery } from "../services/clusters"
  5 | import { ErrorSection, NumericTag, SectionLoading } from "./util"
  6 | 
  7 | import styles from '../styles/Cluster.module.scss'
  8 | import { useFetchOntologyQuery } from "../services/ontology"
  9 | import { useNodeTypes } from "../selectors"
 10 | import RelatedListing from "./RelatedListing"
 11 | import SimilarListing from "./SimilarListing"
 12 | import ClusterArticles from "./ClusterArticles"
 13 | 
 14 | type ClusterDrawerInnerProps = {
 15 |   clusterId: string,
 16 |   isOpen: boolean,
 17 |   onClose: (event: SyntheticEvent<HTMLElement>) => void
 18 |   onClosed: (node: HTMLElement) => void
 19 | }
 20 | 
 21 | function ClusterDrawerInner({ clusterId, isOpen, onClose }: ClusterDrawerInnerProps) {
 22 |   const nodeTypes = useNodeTypes();
 23 |   const { data: cluster, error: clusterError } = useFetchClusterQuery(clusterId);
 24 |   const relatedQuery = { clusterId: clusterId || '', params: { types: nodeTypes } };
 25 |   const { data: related } = useFetchRelatedClusterListingQuery(relatedQuery)
 26 |   const similarQuery = { clusterId: clusterId || '', params: {} };
 27 |   const { data: similar } = useFetchSimilarClusterListingQuery(similarQuery);
 28 |   const articleQuery = { cluster: clusterId };
 29 |   const { data: articles } = useFetchArticleListingQuery(articleQuery);
 30 |   const { data: ontology } = useFetchOntologyQuery();
 31 |   const meta = ontology?.cluster_types.find((t) => t.name === cluster?.type);
 32 |   const icon = meta?.icon || 'hat';
 33 |   const realIsOpen = isOpen && clusterId.trim().length > 1;
 34 | 
 35 |   return (
 36 |     <Drawer
 37 |       size={"40%"}
 38 |       isOpen={realIsOpen}
 39 |       onClose={onClose}
 40 |       icon={icon as any}
 41 |       hasBackdrop={false}
 42 |       autoFocus
 43 |       enforceFocus
 44 |       portalClassName="portal-z-top"
 45 |       title={cluster ? cluster.label : 'No entity'}
 46 |     >
 47 |       <div className={styles.clusterDrawer}>
 48 |         {(cluster === undefined) && (
 49 |           <SectionLoading />
 50 |         )}
 51 |         {(clusterError !== undefined) && (
 52 |           <ErrorSection title="Could not load the entity" />
 53 |         )}
 54 |         {cluster && (
 55 |           <Tabs id="clusterView">
 56 |             <Tab id="related"
 57 |               title={
 58 |                 <>
 59 |                   Co-occurring
 60 |                   <NumericTag value={related?.total} className="tab-tag" />
 61 |                 </>
 62 |               }
 63 |               panel={
 64 |                 <RelatedListing cluster={cluster} />
 65 |               }
 66 |             />
 67 |             <Tab id="similar"
 68 |               title={
 69 |                 <>
 70 |                   Similar
 71 |                   <NumericTag value={similar?.total} className="tab-tag" />
 72 |                 </>
 73 |               }
 74 |               disabled={similar?.total === 0}
 75 |               panel={
 76 |                 <SimilarListing cluster={cluster} />
 77 |               }
 78 |             />
 79 |             <Tab id="articles"
 80 |               title={
 81 |                 <>
 82 |                   Articles
 83 |                   <NumericTag value={articles?.total} className="tab-tag" />
 84 |                 </>
 85 |               }
 86 |               panel={
 87 |                 <ClusterArticles cluster={cluster} />
 88 |               }
 89 |             />
 90 |           </Tabs>
 91 |         )}
 92 |       </div>
 93 |     </Drawer >
 94 |   )
 95 | }
 96 | 
 97 | 
 98 | 
 99 | type ClusterDrawerProps = {
100 |   clusterId?: string,
101 |   onClose: (event: SyntheticEvent<HTMLElement>) => void
102 | }
103 | 
104 | export default function ClusterDrawer({ clusterId, onClose }: ClusterDrawerProps) {
105 |   const isOpen = !!clusterId;
106 |   const [activeClusterId, setActiveClusterId] = useState<string | undefined>(clusterId);
107 | 
108 |   useEffect(() => {
109 |     if (!!clusterId && clusterId !== activeClusterId) {
110 |       setActiveClusterId(clusterId);
111 |     }
112 |   }, [clusterId, activeClusterId])
113 | 
114 |   const onClosed = () => {
115 |     setActiveClusterId(undefined);
116 |   }
117 | 
118 |   if (activeClusterId === undefined) {
119 |     return null;
120 |   }
121 | 
122 |   return (
123 |     <ClusterDrawerInner
124 |       clusterId={activeClusterId}
125 |       onClose={onClose}
126 |       onClosed={onClosed}
127 |       isOpen={isOpen}
128 |     />
129 |   );
130 | }


--------------------------------------------------------------------------------
/frontend/src/components/Footer.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | import styles from '../styles/Footer.module.scss';
 4 | 
 5 | export default class Footer extends React.Component {
 6 |   render() {
 7 |     return (
 8 |       <div className={styles.footer}>
 9 |         <div className='page-container'>
10 |           StoryWeb prototype
11 |         </div>
12 |       </div>
13 |     )
14 |   }
15 | }


--------------------------------------------------------------------------------
/frontend/src/components/Navbar.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useState } from 'react';
 2 | import classnames from 'classnames';
 3 | import { Link } from 'react-router-dom';
 4 | import { Navbar, Alignment, Icon, Button } from '@blueprintjs/core';
 5 | import SettingsDialog from './SettingsDialog';
 6 | import { ARTICLE_ICON, CLUSTER_ICON, SITE, STORY_ICON } from '../constants';
 7 | 
 8 | import styles from "../styles/Navbar.module.scss";
 9 | 
10 | export default function NavbarSection() {
11 |   const [showSettings, setShowSettings] = useState(false);
12 |   return (
13 |     // 'bp4-dark', 
14 |     <Navbar className={classnames(styles.navContainered)}>
15 |       <div className='page-container'>
16 |         <Navbar.Group align={Alignment.LEFT}>
17 |           <Navbar.Heading>
18 |             <Link to="/">{SITE}</Link>
19 |           </Navbar.Heading>
20 |           <Navbar.Divider />
21 |           <Link to="/" role="button" className="bp4-minimal bp4-button">
22 |             <Icon icon={STORY_ICON} />
23 |             <span className="bp4-button-text">Stories</span>
24 |           </Link>
25 |           <Link to="/clusters" role="button" className="bp4-minimal bp4-button">
26 |             <Icon icon={CLUSTER_ICON} />
27 |             <span className="bp4-button-text">Entities</span>
28 |           </Link>
29 |           <Link to="/articles" role="button" className="bp4-minimal bp4-button">
30 |             <Icon icon={ARTICLE_ICON} />
31 |             <span className="bp4-button-text">Articles</span>
32 |           </Link>
33 |         </Navbar.Group>
34 |         <Navbar.Group align={Alignment.RIGHT}>
35 |           <Button minimal icon="cog" onClick={() => setShowSettings(true)}>Settings</Button>
36 |           <SettingsDialog isOpen={showSettings} onClose={() => setShowSettings(false)} />
37 |         </Navbar.Group>
38 |       </div>
39 |     </Navbar>
40 |   )
41 | }


--------------------------------------------------------------------------------
/frontend/src/components/Pagination.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, ButtonGroup } from "@blueprintjs/core";
 2 | import { useSearchParams } from "react-router-dom";
 3 | 
 4 | import { IListingResponse } from "../types";
 5 | import styles from '../styles/util.module.scss';
 6 | import { Numeric } from "./util";
 7 | 
 8 | 
 9 | type PaginationProps<T> = {
10 |   prefix: string
11 |   response: IListingResponse<T>
12 | }
13 | 
14 | export default function Pagination<T>({ prefix, response }: PaginationProps<T>) {
15 |   const [params, setParams] = useSearchParams();
16 | 
17 |   const setOffset = (e: React.MouseEvent<HTMLElement>, newOffset: number) => {
18 |     e.preventDefault();
19 |     const oldParams = Object.fromEntries(params.entries());
20 |     const key = `${prefix}.offset`;
21 |     setParams({ ...oldParams, [key]: newOffset + '' });
22 |   }
23 | 
24 |   const nextOffset = response.offset + response.limit;
25 |   const upper = Math.min(response.total, nextOffset);
26 |   const hasPrev = response.offset > 0;
27 |   const hasNext = response.total > nextOffset;
28 |   const prevOffset = Math.max(0, response.offset - response.limit)
29 |   return (
30 |     <ButtonGroup fill className={styles.pagination}>
31 |       <Button icon="caret-left" disabled={!hasPrev} onClick={(e) => setOffset(e, prevOffset)} />
32 |       <Button fill disabled minimal>
33 |         {response.offset + 1} - {upper} of <Numeric value={response.total} />
34 |       </Button>
35 |       <Button icon="caret-right" disabled={!hasNext} onClick={(e) => setOffset(e, nextOffset)} />
36 |     </ButtonGroup>
37 |   )
38 | }


--------------------------------------------------------------------------------
/frontend/src/components/PairLink.tsx:
--------------------------------------------------------------------------------
 1 | import { Icon } from "@blueprintjs/core";
 2 | import { Link } from "react-router-dom";
 3 | import { IClusterBase } from "../types";
 4 | import { getLinkLoomLink } from "../util";
 5 | import { LinkType, SpacedList } from "./util";
 6 | 
 7 | type PairLinkProps = {
 8 |   left: IClusterBase,
 9 |   right: IClusterBase,
10 |   link_types: string[]
11 |   story?: number
12 | }
13 | 
14 | export default function PairLink({ left, right, link_types, story }: PairLinkProps) {
15 |   return (
16 |     <>
17 |       <Link to={getLinkLoomLink(left, right, story)}>
18 |         <SpacedList values={link_types.map(t => <LinkType type={t} />)} />
19 |         {link_types.length === 0 && (
20 |           <Icon icon="new-link" />
21 |         )}
22 |       </Link>
23 |     </>
24 |   )
25 | }


--------------------------------------------------------------------------------
/frontend/src/components/RelatedListing.tsx:
--------------------------------------------------------------------------------
 1 | import { HTMLTable } from "@blueprintjs/core";
 2 | import { Link } from "react-router-dom";
 3 | import { useNodeTypes } from "../selectors";
 4 | import { useFetchRelatedClusterListingQuery } from "../services/clusters";
 5 | import { ICluster } from "../types";
 6 | import { getClusterLink, useListingPagination } from "../util";
 7 | import Pagination from "./Pagination";
 8 | import PairLink from "./PairLink";
 9 | import { SectionLoading, ClusterTypeIcon } from "./util";
10 | 
11 | type RelatedListingProps = {
12 |   cluster: ICluster,
13 | }
14 | 
15 | export default function RelatedListing({ cluster }: RelatedListingProps) {
16 |   const nodeTypes = useNodeTypes();
17 |   const page = useListingPagination('related');
18 |   const relatedParams = { clusterId: cluster.id, params: { ...page, types: nodeTypes } };
19 |   const { data: listing, isLoading } = useFetchRelatedClusterListingQuery(relatedParams)
20 |   if (listing === undefined || isLoading) {
21 |     return <SectionLoading />
22 |   }
23 | 
24 |   return (
25 |     <>
26 |       <HTMLTable condensed bordered className="wide">
27 |         <thead>
28 |           <tr>
29 |             <th>Name</th>
30 |             <th>Link</th>
31 |             <th style={{ width: '1%' }} className="numeric">Articles</th>
32 |           </tr>
33 |         </thead>
34 |         <tbody>
35 |           {listing.results.map((related) => (
36 |             <tr key={related.id}>
37 |               <td>
38 |                 <ClusterTypeIcon type={related.type} size={14} />
39 |                 <Link to={getClusterLink(related)}>{related.label}</Link>
40 |               </td>
41 |               <td>
42 |                 <PairLink left={cluster} right={related} link_types={related.link_types} />
43 |               </td>
44 |               <td className="numeric">{related.articles}</td>
45 |             </tr>
46 |           ))}
47 |         </tbody>
48 |       </HTMLTable>
49 |       <Pagination prefix='related' response={listing} />
50 |     </>
51 |   )
52 | }


--------------------------------------------------------------------------------
/frontend/src/components/ScreenContent.tsx:
--------------------------------------------------------------------------------
 1 | import { Card, Elevation } from '@blueprintjs/core';
 2 | 
 3 | import styles from '../styles/util.module.scss';
 4 | 
 5 | type ScreenContentProps = {
 6 |   children?: React.ReactNode
 7 | }
 8 | 
 9 | export default function ScreenContent({ children }: ScreenContentProps) {
10 |   return (
11 |     <Card elevation={Elevation.ONE} className={styles.screenContent}>
12 |       {children}
13 |     </Card>
14 |   )
15 | }


--------------------------------------------------------------------------------
/frontend/src/components/ScreenHeading.tsx:
--------------------------------------------------------------------------------
 1 | import styles from '../styles/util.module.scss';
 2 | 
 3 | type ScreenHeadingProps = {
 4 |   title: React.ReactNode
 5 |   children?: React.ReactNode
 6 | }
 7 | 
 8 | export default function ScreenHeading({ title, children }: ScreenHeadingProps) {
 9 | 
10 |   return (
11 |     <>
12 |       {!!children && (
13 |         <div className={styles.headingActions}>
14 |           {children}
15 |         </div>
16 |       )}
17 |       <h1>{title}</h1>
18 |     </>
19 |   )
20 | }


--------------------------------------------------------------------------------
/frontend/src/components/SettingsDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Checkbox, Classes, Dialog, Label } from "@blueprintjs/core";
 2 | import { FormEvent } from "react";
 3 | import { useDispatch } from "react-redux";
 4 | import { useSelector } from "react-redux";
 5 | import { setHiddenNodeTypes } from "../services/config";
 6 | import { useFetchOntologyQuery } from "../services/ontology";
 7 | import { RootState } from "../store";
 8 | import { IClusterType } from "../types";
 9 | import { listToggle } from "../util";
10 | 
11 | type SettingsDialogProps = {
12 |   isOpen: boolean
13 |   onClose: () => void
14 | }
15 | 
16 | export default function SettingsDialog({ isOpen, onClose }: SettingsDialogProps) {
17 |   const { data: ontology } = useFetchOntologyQuery();
18 |   const hiddenNodeTypes = useSelector((state: RootState) => state.config.hiddenNodeTypes);
19 |   const dispatch = useDispatch();
20 | 
21 |   const onChangeNodeType = (e: FormEvent<HTMLInputElement>, type: IClusterType) => {
22 |     const updated = listToggle(hiddenNodeTypes, type.name);
23 |     dispatch(setHiddenNodeTypes(updated));
24 |   }
25 | 
26 |   if (ontology === undefined) {
27 |     return null;
28 |   }
29 | 
30 |   return (
31 |     <Dialog isOpen={isOpen} onClose={onClose} title="Settings">
32 |       <div className={Classes.DIALOG_BODY}>
33 |         <div>
34 |           <Label>Include the following node types in listings:</Label>
35 |           {ontology.cluster_types.map((ct) =>
36 |             <Checkbox
37 |               key={ct.name}
38 |               label={ct.label}
39 |               checked={hiddenNodeTypes.indexOf(ct.name) === -1}
40 |               onChange={(e) => onChangeNodeType(e, ct)}
41 |             />
42 |           )}
43 |         </div>
44 |       </div>
45 |     </Dialog>
46 |   )
47 | }


--------------------------------------------------------------------------------
/frontend/src/components/SimilarListing.tsx:
--------------------------------------------------------------------------------
  1 | import { Button, ButtonGroup, Checkbox, HTMLTable, Intent, NonIdealState } from "@blueprintjs/core"
  2 | import { useState } from "react"
  3 | import { Link, useNavigate } from "react-router-dom"
  4 | import { useFetchSimilarClusterListingQuery, useMergeClustersMutation } from "../services/clusters"
  5 | import { ICluster } from "../types"
  6 | import { getClusterLink, listToggle, useListingPagination } from "../util"
  7 | import Pagination from "./Pagination"
  8 | import { SectionLoading, SpacedList, ClusterLabel, ClusterTypeIcon } from "./util"
  9 | 
 10 | type SimilarListingProps = {
 11 |   cluster: ICluster,
 12 | }
 13 | 
 14 | export default function SimilarListing({ cluster }: SimilarListingProps) {
 15 |   const page = useListingPagination('similar');
 16 |   const similarQuery = { clusterId: cluster.id, params: { ...page } };
 17 |   const { data: listing, isLoading } = useFetchSimilarClusterListingQuery(similarQuery);
 18 |   const navigate = useNavigate();
 19 |   const [postMerge, { isLoading: isUpdating }] = useMergeClustersMutation();
 20 |   const [merges, setMerges] = useState([] as string[]);
 21 | 
 22 |   if (listing === undefined || isLoading) {
 23 |     return <SectionLoading />
 24 |   }
 25 |   const allSelected = merges.length === listing.results.length;
 26 | 
 27 |   const onMerge = async () => {
 28 |     const response = await postMerge({ anchor: cluster.id, other: merges }).unwrap()
 29 |     setMerges([]);
 30 |     if (response.id !== cluster.id) {
 31 |       navigate(`/clusters/${response.id}`);
 32 |     }
 33 |   }
 34 | 
 35 |   const toggleAll = async () => {
 36 |     if (allSelected) {
 37 |       setMerges([]);
 38 |     } else {
 39 |       setMerges(listing.results.map(r => r.id));
 40 |     }
 41 |   }
 42 | 
 43 |   const toggleOne = async (id: string) => {
 44 |     setMerges(listToggle(merges, id));
 45 |   }
 46 | 
 47 |   return (
 48 |     <>
 49 |       <ButtonGroup>
 50 |         <Button
 51 |           disabled={merges.length === 0 || isUpdating}
 52 |           onClick={() => onMerge()}
 53 |           intent={Intent.PRIMARY}
 54 |         >
 55 |           Merge ({merges.length})
 56 |         </Button>
 57 |         <Button
 58 |           onClick={() => toggleAll()}
 59 |           disabled={isUpdating}
 60 |         >
 61 |           {allSelected && <>Select none</>}
 62 |           {!allSelected && <>Select all</>}
 63 |         </Button>
 64 |       </ButtonGroup>
 65 |       {listing.total < 1 && (
 66 |         <NonIdealState
 67 |           icon="heart-broken"
 68 |           title="No similar entities"
 69 |           description="There are no un-linked entities in other articles with the same name."
 70 |         />
 71 |       )}
 72 |       {listing.total > 0 && (
 73 |         <>
 74 |           <HTMLTable condensed bordered className="wide">
 75 |             <thead>
 76 |               <tr>
 77 |                 <th>Name</th>
 78 |                 <th>Common tags</th>
 79 |                 <th>Count</th>
 80 |                 <th>Same</th>
 81 |               </tr>
 82 |             </thead>
 83 |             <tbody>
 84 |               {listing.results.map((similar) => (
 85 |                 <tr key={similar.id}>
 86 |                   <td>
 87 |                     <ClusterTypeIcon type={similar.type} size={14} />
 88 |                     <Link to={getClusterLink(similar)}>{similar.label}</Link>
 89 |                   </td>
 90 |                   <td>
 91 |                     <SpacedList values={similar.common.map((l) => <ClusterLabel key={l} label={l} />)} />
 92 |                   </td>
 93 |                   <td>
 94 |                     {similar.common_count}
 95 |                   </td>
 96 |                   <td>
 97 |                     <Checkbox
 98 |                       checked={merges.indexOf(similar.id) !== -1}
 99 |                       onClick={() => toggleOne(similar.id)}
100 |                       disabled={isUpdating}
101 |                     />
102 |                   </td>
103 |                 </tr>
104 |               ))}
105 |             </tbody>
106 |           </HTMLTable>
107 |         </>
108 |       )}
109 |       <Pagination prefix='similar' response={listing} />
110 |     </>
111 |   )
112 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryArticleImportDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Classes, Dialog, FormGroup, InputGroup } from "@blueprintjs/core";
 2 | import { FormEvent, MouseEvent, useState } from "react";
 3 | import { ARTICLE_ICON } from "../constants";
 4 | import { useImportStoryArticleMutation } from "../services/stories";
 5 | import { SectionLoading } from "./util";
 6 | 
 7 | type StoryArticleImportProps = {
 8 |   storyId: number
 9 |   isOpen: boolean
10 |   onClose: () => void
11 | }
12 | 
13 | export default function StoryArticleImportDialog({ storyId, isOpen, onClose }: StoryArticleImportProps) {
14 |   const [url, setUrl] = useState('');
15 |   const [importArticle, { isLoading: isCreating }] = useImportStoryArticleMutation();
16 |   const hasUrl = url.trim().length > 10;
17 | 
18 |   const onImport = async (e: MouseEvent | FormEvent) => {
19 |     e.preventDefault();
20 |     if (hasUrl && !isCreating) {
21 |       await importArticle({ story: storyId, url }).unwrap();
22 |       setUrl('')
23 |       onClose()
24 |     }
25 |   }
26 | 
27 |   return (
28 |     <Dialog isOpen={isOpen} onClose={onClose} title="Import an article" icon={ARTICLE_ICON}>
29 |       {isCreating && (
30 |         <SectionLoading />
31 |       )}
32 |       {!isCreating && (
33 |         <form onSubmit={onImport}>
34 |           <div className={Classes.DIALOG_BODY}>
35 |             <FormGroup
36 |               helperText="Give the link to the article, without paywall restrictions."
37 |               label="URL"
38 |               labelFor="text-input"
39 |             >
40 |               <InputGroup id="text-input" placeholder="https://www.news..." value={url} onChange={(e) => setUrl(e.target.value)} />
41 |             </FormGroup>
42 |           </div>
43 |           <div className={Classes.DIALOG_FOOTER}>
44 |             <div className={Classes.DIALOG_FOOTER_ACTIONS}>
45 |               <Button onClick={onImport} disabled={!isCreating && !hasUrl}>Import</Button>
46 |             </div>
47 |           </div>
48 |         </form>
49 |       )}
50 |     </Dialog>
51 |   )
52 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryArticles.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, HTMLTable } from "@blueprintjs/core";
 2 | import { MouseEvent } from "react";
 3 | import { Link, useSearchParams } from "react-router-dom";
 4 | import { ARTICLE_THRESHOLD } from "../constants";
 5 | import { useFetchArticleListingQuery } from "../services/articles";
 6 | import { useToggleStoryArticleMutation } from "../services/stories";
 7 | import { IArticle, IStory } from "../types";
 8 | import { useListingPagination } from "../util";
 9 | import ArticleDrawer from "./ArticleDrawer";
10 | import Pagination from "./Pagination";
11 | import StoryNomNom from "./StoryNomNom";
12 | import { ErrorSection, SectionLoading } from "./util";
13 | 
14 | type StoryArticlesProps = {
15 |   story: IStory,
16 | }
17 | 
18 | export default function StoryArticles({ story }: StoryArticlesProps) {
19 |   const [params, setParams] = useSearchParams();
20 |   const articleId = params.get('article') || undefined;
21 |   const page = useListingPagination('pairs');
22 |   const { data: articles, error, isLoading } = useFetchArticleListingQuery({ ...page, story: story.id });
23 |   const [toggleStoryArticle] = useToggleStoryArticleMutation();
24 | 
25 |   if (error !== undefined) {
26 |     return <ErrorSection title="Could not load story-related articles" />
27 |   }
28 |   if (articles === undefined || isLoading) {
29 |     return <SectionLoading />
30 |   }
31 | 
32 |   const onRemoveArticle = async (article: IArticle) => {
33 |     if (story !== undefined) {
34 |       await toggleStoryArticle({ story: story.id, article: article.id }).unwrap()
35 |     }
36 |   }
37 | 
38 |   const setPreviewArticle = (articleId?: string) => {
39 |     const paramsObj = Object.fromEntries(params.entries());
40 |     setParams({ ...paramsObj, article: articleId || '' });
41 |   }
42 | 
43 |   const onPreviewArticle = (event: MouseEvent<HTMLAnchorElement>, article: IArticle) => {
44 |     event.preventDefault();
45 |     setPreviewArticle(article.id)
46 |   }
47 | 
48 |   return (
49 |     <>
50 |       {(articles.total < ARTICLE_THRESHOLD) && (
51 |         <StoryNomNom story={story} />
52 |       )}
53 |       {articles.results.length > 0 && (
54 |         <>
55 |           <HTMLTable condensed bordered className="wide">
56 |             <thead>
57 |               <tr>
58 |                 <th>Title</th>
59 |                 <th>Site</th>
60 |                 <th style={{ width: '1%' }} className="numeric">Remove</th>
61 |               </tr>
62 |             </thead>
63 |             <tbody>
64 |               {articles.results.map((article) => (
65 |                 <tr key={article.id}>
66 |                   <td>
67 |                     <Link
68 |                       to={`/articles?article=${article.id}`}
69 |                       onClick={(e) => onPreviewArticle(e, article)}
70 |                     >
71 |                       {article.title}
72 |                     </Link>
73 |                   </td>
74 |                   <td>{article.site}</td>
75 |                   <td className="numeric">
76 |                     <Button
77 |                       onClick={() => onRemoveArticle(article)}
78 |                       icon="trash"
79 |                       minimal
80 |                       small
81 |                     />
82 |                   </td>
83 |                 </tr>
84 |               ))}
85 |             </tbody>
86 |           </HTMLTable>
87 |           <Pagination prefix='articles' response={articles} />
88 |         </>
89 |       )}
90 |       <ArticleDrawer
91 |         onClose={(e) => setPreviewArticle(undefined)}
92 |         articleId={articleId}
93 |         tags={[]}
94 |       />
95 |     </>
96 |   )
97 | };


--------------------------------------------------------------------------------
/frontend/src/components/StoryCreateDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Classes, Dialog, FormGroup, InputGroup, TextArea } from "@blueprintjs/core";
 2 | import { FormEvent, MouseEvent, useState } from "react";
 3 | import { useNavigate } from "react-router-dom";
 4 | import { STORY_ICON } from "../constants";
 5 | import { useCreateStoryMutation } from "../services/stories";
 6 | 
 7 | type StoryCreateDialogProps = {
 8 |   isOpen: boolean
 9 |   onClose: () => void
10 | }
11 | 
12 | export default function StoryCreateDialog({ isOpen, onClose }: StoryCreateDialogProps) {
13 |   const [title, setTitle] = useState('');
14 |   const [summary, setSummary] = useState('');
15 |   const navigate = useNavigate();
16 |   const [createStory, { isLoading: isCreating }] = useCreateStoryMutation();
17 | 
18 |   const hasTitle = title.trim().length > 3;
19 | 
20 |   const onCreate = async (e: MouseEvent | FormEvent) => {
21 |     e.preventDefault();
22 |     if (hasTitle && !isCreating) {
23 |       const story = await createStory({ title: title, summary: summary }).unwrap();
24 |       navigate(`/stories/${story.id}`);
25 |     }
26 |   }
27 | 
28 |   return (
29 |     <Dialog isOpen={isOpen} onClose={onClose} icon={STORY_ICON} title="Create a new story">
30 |       <form onSubmit={onCreate}>
31 |         <div className={Classes.DIALOG_BODY}>
32 |           <FormGroup
33 |             helperText="Describe your story with a simple sentence."
34 |             label="Title"
35 |             labelFor="text-input"
36 |           >
37 |             <InputGroup id="text-input" large placeholder="Story title" value={title} onChange={(e) => setTitle(e.target.value)} />
38 |           </FormGroup>
39 |           <FormGroup
40 |             label="Summary"
41 |             labelFor="text-input"
42 |           >
43 |             <TextArea id="text-input" fill large
44 |               rows={5}
45 |               placeholder="Short description"
46 |               value={summary}
47 |               onChange={(e) => setSummary(e.target.value)}
48 |             />
49 |           </FormGroup>
50 |         </div>
51 |         <div className={Classes.DIALOG_FOOTER}>
52 |           <div className={Classes.DIALOG_FOOTER_ACTIONS}>
53 |             <Button onClick={onCreate} disabled={!isCreating && !hasTitle}>Create</Button>
54 |           </div>
55 |         </div>
56 |       </form>
57 |     </Dialog>
58 |   )
59 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryDeleteDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Callout, Classes, Dialog, Intent } from "@blueprintjs/core";
 2 | import { FormEvent, MouseEvent } from "react";
 3 | import { useNavigate } from "react-router-dom";
 4 | import { STORY_ICON } from "../constants";
 5 | import { useDeleteStoryMutation } from "../services/stories";
 6 | import { IStory } from "../types";
 7 | 
 8 | type StoryCreateDialogProps = {
 9 |   story: IStory
10 |   isOpen: boolean
11 |   onClose: () => void
12 | }
13 | 
14 | export default function StoryDeleteDialog({ isOpen, onClose, story }: StoryCreateDialogProps) {
15 |   const navigate = useNavigate();
16 |   const [deleteStory, { isLoading: isDeleting }] = useDeleteStoryMutation();
17 | 
18 |   const onDelete = async (e: MouseEvent | FormEvent) => {
19 |     e.preventDefault();
20 |     await deleteStory(story.id).unwrap();
21 |     navigate('/');
22 |   }
23 | 
24 |   return (
25 |     <Dialog isOpen={isOpen} onClose={onClose} icon={STORY_ICON} title={`Delete story: ${story.title}`}>
26 |       <form onSubmit={onDelete}>
27 |         <div className={Classes.DIALOG_BODY}>
28 |           <Callout intent={Intent.DANGER}>
29 |             Are you sure you want to delete this story?
30 |           </Callout>
31 |         </div>
32 |         <div className={Classes.DIALOG_FOOTER}>
33 |           <div className={Classes.DIALOG_FOOTER_ACTIONS}>
34 |             <Button intent={Intent.DANGER} onClick={onDelete} disabled={isDeleting}>Delete</Button>
35 |           </div>
36 |         </div>
37 |       </form>
38 |     </Dialog>
39 |   )
40 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryGraph.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useState } from "react";
 2 | import Graph from "graphology";
 3 | import { parse } from "graphology-gexf/browser";
 4 | import { ControlsContainer, SigmaContainer, useLoadGraph, useRegisterEvents, ZoomControl } from "@react-sigma/core";
 5 | import "@react-sigma/core/lib/react-sigma.min.css";
 6 | import { useFetchStoryGraphQuery } from "../services/stories";
 7 | import { useLayoutForceAtlas2 } from "@react-sigma/layout-forceatlas2";
 8 | import { useFetchOntologyQuery } from "../services/ontology";
 9 | import { IStory } from "../types";
10 | import ClusterDrawer from "./ClusterDrawer";
11 | 
12 | export type StoryGraphProps = {
13 |   story: IStory
14 | }
15 | 
16 | export const LoadGraph = ({ story }: StoryGraphProps) => {
17 |   const loadGraph = useLoadGraph();
18 |   const { data: ontology } = useFetchOntologyQuery();
19 |   const { assign } = useLayoutForceAtlas2();
20 | 
21 |   const { data: graphData } = useFetchStoryGraphQuery({ storyId: story.id });
22 | 
23 |   useEffect(() => {
24 |     if (graphData !== undefined && ontology !== undefined) {
25 |       const graph = parse(Graph, graphData)
26 |       graph.forEachNode((node, attributes) => {
27 |         const type = ontology.cluster_types.find((tp) => tp.name === attributes.node_type);
28 |         attributes.x = Math.random() * 20;
29 |         attributes.y = Math.random() * 20;
30 |         attributes.size = 5 + (1.5 * graph.degree(node));
31 |         attributes.color = type?.color || '#dddddd';
32 |       });
33 |       graph.forEachEdge((edge, attributes) => {
34 |         const type = ontology.link_types.find((t) => t.name === attributes.edge_type);
35 |         attributes.size = 2;
36 |         attributes.label = type?.label;
37 |       });
38 |       loadGraph(graph);
39 |       assign();
40 |     }
41 |     // console.log(positions());
42 |   }, [loadGraph, assign, ontology, graphData]);
43 | 
44 |   return null;
45 | };
46 | 
47 | type GraphEventsProps = {
48 |   showCluster: (id: string) => void
49 | }
50 | 
51 | function GraphEvents({ showCluster }: GraphEventsProps) {
52 |   const registerEvents = useRegisterEvents();
53 | 
54 |   useEffect(() => {
55 |     registerEvents({
56 |       clickNode: (event) => showCluster(event.node),
57 |       doubleClickNode: (event) => showCluster(event.node),
58 |       // clickEdge: (event) => console.log("clickEdge", event.event, event.edge, event.preventSigmaDefault),
59 |       // doubleClickEdge: (event) => console.log("doubleClickEdge", event.event, event.edge, event.preventSigmaDefault),
60 |       // wheel: (event) => event.preventSigmaDefault(),
61 |     });
62 |   }, [registerEvents, showCluster]);
63 | 
64 |   return null;
65 | }
66 | 
67 | export default function StoryGraph({ story }: StoryGraphProps) {
68 |   const [showCluster, setShowCluster] = useState<string | undefined>();
69 | 
70 |   return (
71 |     <>
72 |       <ClusterDrawer
73 |         clusterId={showCluster}
74 |         onClose={(e) => setShowCluster(undefined)}
75 |       />
76 |       <SigmaContainer style={{ height: "500px", width: "100%" }} settings={{
77 |         zIndex: true,
78 |         renderEdgeLabels: true
79 |       }}>
80 |         <LoadGraph story={story} />
81 |         <GraphEvents showCluster={setShowCluster} />
82 |         <ControlsContainer position={"bottom-right"}>
83 |           <ZoomControl />
84 |         </ControlsContainer>
85 |       </SigmaContainer>
86 |     </>
87 |   );
88 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryLinkerBanner.tsx:
--------------------------------------------------------------------------------
 1 | import { Breadcrumbs2 } from "@blueprintjs/popover2"
 2 | import { useFetchStoryQuery } from "../services/stories"
 3 | 
 4 | import styles from '../styles/Linker.module.scss';
 5 | 
 6 | type StoryLinkerBannerProps = {
 7 |   storyId: string,
 8 | }
 9 | 
10 | export default function StoryLinkerBanner({ storyId }: StoryLinkerBannerProps) {
11 |   const { data: story, isLoading } = useFetchStoryQuery(storyId)
12 | 
13 |   if (story === undefined || isLoading) {
14 |     return null
15 |   }
16 |   return (
17 |     <Breadcrumbs2 className={styles.banner} items={[{ href: `/stories/${storyId}`, icon: "projects", text: story.title }]} />
18 |   )
19 | }


--------------------------------------------------------------------------------
/frontend/src/components/StoryNomNom.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, ButtonGroup, Intent, NonIdealState } from "@blueprintjs/core";
 2 | import { useState } from "react";
 3 | import { useNavigate } from "react-router-dom";
 4 | import { ARTICLE_ICON, ARTICLE_THRESHOLD } from "../constants";
 5 | import { IStory } from "../types";
 6 | import StoryArticleImportDialog from "./StoryArticleImportDialog";
 7 | 
 8 | import styles from '../styles/Story.module.scss';
 9 | 
10 | type StoryNomNomProps = {
11 |   story: IStory,
12 | }
13 | 
14 | export default function StoryNomNom({ story }: StoryNomNomProps) {
15 |   const [showImport, setShowImport] = useState(false);
16 |   const navigate = useNavigate();
17 | 
18 |   const onImportClose = () => {
19 |     setShowImport(false);
20 |     navigate(0);
21 |   }
22 | 
23 |   return (
24 |     <>
25 |       <NonIdealState
26 |         icon={ARTICLE_ICON}
27 |         className={styles.nomNom}
28 |         title={`Add ${ARTICLE_THRESHOLD} articles to your story`}
29 |         description="In order to build a story web, we need some reporting. Please add several pieces of news reporting to this story to collect entity information."
30 |         action={
31 |           <ButtonGroup>
32 |             <Button intent={Intent.PRIMARY} icon="add" onClick={() => setShowImport(true)}>Add by URL</Button>
33 |             <Button icon="box" onClick={() => navigate("/articles")}>Select from archive...</Button>
34 |           </ButtonGroup>
35 |         }
36 |       />
37 |       <StoryArticleImportDialog storyId={story.id} isOpen={showImport} onClose={onImportClose} />
38 |     </>
39 |   )
40 | };


--------------------------------------------------------------------------------
/frontend/src/components/StoryPairs.tsx:
--------------------------------------------------------------------------------
 1 | import { HTMLTable } from "@blueprintjs/core";
 2 | import { MouseEvent, useState } from "react";
 3 | import { Link } from "react-router-dom";
 4 | import { useNodeTypes } from "../selectors";
 5 | import { useFetchStoryPairsQuery } from "../services/stories";
 6 | import { IClusterBase, IStory } from "../types";
 7 | import { getClusterLink, useListingPagination } from "../util";
 8 | import ClusterDrawer from "./ClusterDrawer";
 9 | import Pagination from "./Pagination";
10 | import PairLink from "./PairLink";
11 | import { ErrorSection, Numeric, SectionLoading, ClusterTypeIcon } from "./util";
12 | 
13 | type StoryPairsProps = {
14 |   story: IStory,
15 | }
16 | 
17 | export default function StoryPairs({ story }: StoryPairsProps) {
18 |   const nodeTypes = useNodeTypes();
19 |   const [showCluster, setShowCluster] = useState<string | undefined>();
20 |   const page = useListingPagination('pairs');
21 |   const { data: clusters, error: clustersError } = useFetchStoryPairsQuery({
22 |     storyId: story.id,
23 |     params: { ...page, types: nodeTypes }
24 |   });
25 | 
26 |   if (clustersError !== undefined) {
27 |     return <ErrorSection title="Could not load story-related entity pairs" />
28 |   }
29 |   if (clusters === undefined) {
30 |     return <SectionLoading />
31 |   }
32 | 
33 |   const onPreview = (e: MouseEvent<HTMLAnchorElement>, cluster: IClusterBase) => {
34 |     setShowCluster(cluster.id);
35 |     e.preventDefault();
36 |   }
37 | 
38 |   return (
39 |     <>
40 |       <HTMLTable condensed bordered className="wide">
41 |         <thead>
42 |           <tr>
43 |             <th>From</th>
44 |             <th>To</th>
45 |             <th>Links</th>
46 |             <th className="numeric">Articles</th>
47 |           </tr>
48 |         </thead>
49 |         <tbody>
50 |           {clusters.results.map((pair) => (
51 |             <tr key={pair.left.id + pair.right.id}>
52 |               <td>
53 |                 <ClusterTypeIcon type={pair.left.type} size={14} />
54 |                 <Link to={getClusterLink(pair.left)} onClick={(e) => onPreview(e, pair.left)}>{pair.left.label}</Link>
55 |               </td>
56 |               <td>
57 |                 <ClusterTypeIcon type={pair.right.type} size={14} />
58 |                 <Link to={getClusterLink(pair.right)} onClick={(e) => onPreview(e, pair.right)}>{pair.right.label}</Link>
59 |               </td>
60 |               <td>
61 |                 <PairLink left={pair.left} right={pair.right} link_types={pair.link_types} story={story.id} />
62 |               </td>
63 |               <td className="numeric">
64 |                 <Numeric value={pair.articles} />
65 |               </td>
66 |             </tr>
67 |           ))}
68 |         </tbody>
69 |       </HTMLTable>
70 |       <Pagination prefix='pairs' response={clusters} />
71 |       <ClusterDrawer
72 |         clusterId={showCluster}
73 |         onClose={(e) => setShowCluster(undefined)}
74 |       />
75 |     </>
76 |   )
77 | };


--------------------------------------------------------------------------------
/frontend/src/components/StoryUpdateDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Button, Classes, Dialog, FormGroup, InputGroup, TextArea } from "@blueprintjs/core";
 2 | import { FormEvent, MouseEvent, useState } from "react";
 3 | import { STORY_ICON } from "../constants";
 4 | import { useUpdateStoryMutation } from "../services/stories";
 5 | import { IStory } from "../types";
 6 | 
 7 | type StoryUpdateDialogProps = {
 8 |   story: IStory
 9 |   isOpen: boolean
10 |   onClose: () => void
11 | }
12 | 
13 | export default function StoryUpdateDialog({ story, isOpen, onClose }: StoryUpdateDialogProps) {
14 |   const [title, setTitle] = useState(story.title);
15 |   const [summary, setSummary] = useState(story.summary);
16 |   const [updateStory, { isLoading: isCreating }] = useUpdateStoryMutation();
17 | 
18 |   const hasTitle = title.trim().length > 3;
19 | 
20 |   const onSave = async (e: MouseEvent | FormEvent) => {
21 |     e.preventDefault();
22 |     if (hasTitle && !isCreating) {
23 |       await updateStory({ id: story.id, title: title, summary: summary }).unwrap();
24 |       onClose()
25 |     }
26 |   }
27 | 
28 |   return (
29 |     <Dialog isOpen={isOpen} onClose={onClose} icon={STORY_ICON} title="Edit story">
30 |       <form onSubmit={onSave}>
31 |         <div className={Classes.DIALOG_BODY}>
32 |           <FormGroup
33 |             helperText="Describe your story with a simple sentence."
34 |             label="Title"
35 |             labelFor="text-input"
36 |           >
37 |             <InputGroup id="text-input" large placeholder="Story title" value={title} onChange={(e) => setTitle(e.target.value)} />
38 |           </FormGroup>
39 |           <FormGroup
40 |             label="Summary"
41 |             labelFor="text-input"
42 |           >
43 |             <TextArea id="text-input" fill large
44 |               rows={5}
45 |               placeholder="Short description"
46 |               value={summary}
47 |               onChange={(e) => setSummary(e.target.value)}
48 |             />
49 |           </FormGroup>
50 |         </div>
51 |         <div className={Classes.DIALOG_FOOTER}>
52 |           <div className={Classes.DIALOG_FOOTER_ACTIONS}>
53 |             <Button onClick={onSave} disabled={!isCreating && !hasTitle}>Save</Button>
54 |           </div>
55 |         </div>
56 |       </form>
57 |     </Dialog>
58 |   )
59 | }


--------------------------------------------------------------------------------
/frontend/src/components/util.tsx:
--------------------------------------------------------------------------------
  1 | import { Icon, NonIdealState, NonIdealStateIconSize, Spinner, SpinnerSize, Tag } from '@blueprintjs/core';
  2 | import { useEffect } from "react";
  3 | import { useLocation } from "react-router-dom";
  4 | import { ReactNode } from 'react';
  5 | import classnames from 'classnames';
  6 | 
  7 | import { SPACER } from '..//constants';
  8 | import { useFetchOntologyQuery } from '../services/ontology';
  9 | 
 10 | import styles from '../styles/util.module.scss';
 11 | 
 12 | 
 13 | type ClusterLabelProps = {
 14 |   label: string
 15 | }
 16 | 
 17 | export function ClusterLabel({ label }: ClusterLabelProps) {
 18 |   return <span className={styles.ClusterLabel}>{label}</span>;
 19 | }
 20 | 
 21 | type ClusterTypeProps = {
 22 |   type: string
 23 | }
 24 | 
 25 | export function ClusterType({ type }: ClusterTypeProps) {
 26 |   const { data: ontology } = useFetchOntologyQuery();
 27 |   const meta = ontology?.cluster_types.find((t) => t.name === type)
 28 |   return (
 29 |     <span className={styles.clusterType} style={{ 'backgroundColor': meta?.color }}>
 30 |       {meta?.label || type}
 31 |     </span>
 32 |   );
 33 | }
 34 | 
 35 | 
 36 | type ClusterTypeIconProps = {
 37 |   type: string
 38 |   size?: number
 39 |   className?: string
 40 | }
 41 | 
 42 | export function ClusterTypeIcon({ type, size, className }: ClusterTypeIconProps) {
 43 |   const { data: ontology } = useFetchOntologyQuery();
 44 |   const allClassName = classnames('spaced-icon', className)
 45 |   const meta = ontology?.cluster_types.find((t) => t.name === type);
 46 |   const icon = meta?.icon || 'hat';
 47 |   return (
 48 |     <Icon
 49 |       icon={icon as any}
 50 |       size={size}
 51 |       color={meta?.color}
 52 |       className={allClassName}
 53 |       title={meta?.label || type}
 54 |     />
 55 |   );
 56 | }
 57 | 
 58 | 
 59 | type LinkTypeProps = {
 60 |   type: string
 61 | }
 62 | 
 63 | export function LinkType({ type }: LinkTypeProps) {
 64 |   const { data: ontology } = useFetchOntologyQuery();
 65 |   const meta = ontology?.link_types.find((t) => t.name === type)
 66 |   return <span className={classnames(styles[type], styles.linkType)}>{meta?.label || type}</span>;
 67 | }
 68 | 
 69 | type NumericProps = {
 70 |   value?: number | null
 71 | }
 72 | 
 73 | export function Numeric({ value }: NumericProps) {
 74 |   // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat
 75 |   if (value === undefined || value === null) {
 76 |     return null;
 77 |   }
 78 |   const fmt = new Intl.NumberFormat('en-US');
 79 |   return <>{fmt.format(value)}</>;
 80 | }
 81 | 
 82 | type NumericTagProps = {
 83 |   value?: number | null
 84 |   className?: string
 85 | }
 86 | 
 87 | export function NumericTag({ value, className }: NumericTagProps) {
 88 |   // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat
 89 |   const isLoading = value === undefined || value === null;
 90 |   const fmt = new Intl.NumberFormat('en-US');
 91 |   const displayValue = isLoading ? "…" : fmt.format(value);
 92 |   return <Tag round className={className}>{displayValue}</Tag>;
 93 | }
 94 | 
 95 | 
 96 | type SpacedListProps = {
 97 |   values: Array<ReactNode>
 98 | }
 99 | 
100 | export function SpacedList({ values }: SpacedListProps) {
101 |   if (values.length === 0) {
102 |     return null;
103 |   }
104 |   return (
105 |     <>
106 |       {values
107 |         .map<React.ReactNode>((t, idx) => <span key={idx}>{t}</span>)
108 |         .reduce((prev, curr, idx) => [prev, <Spacer key={`spacer-${idx}`} />, curr])}
109 |     </>
110 |   )
111 | }
112 | 
113 | export function Spacer() {
114 |   return (
115 |     <span className={styles.spacer}>{SPACER}</span>
116 |   )
117 | }
118 | 
119 | 
120 | export function SectionLoading() {
121 |   return (
122 |     <NonIdealState
123 |       icon={<Spinner size={SpinnerSize.STANDARD} />}
124 |       iconSize={NonIdealStateIconSize.STANDARD}
125 |     />
126 |   )
127 | }
128 | 
129 | type ErrorSectionProps = {
130 |   title: string
131 |   description?: string
132 | }
133 | 
134 | export function ErrorSection({ title, description }: ErrorSectionProps) {
135 |   return (
136 |     <NonIdealState
137 |       title={title}
138 |       description={description}
139 |       icon="warning-sign"
140 |       iconSize={NonIdealStateIconSize.STANDARD}
141 |     />
142 |   )
143 | }
144 | 
145 | export function ScreenLoading() {
146 |   return (
147 |     <NonIdealState
148 |       title="Loading..."
149 |       icon={<Spinner size={SpinnerSize.LARGE} />}
150 |       iconSize={NonIdealStateIconSize.STANDARD}
151 |     />
152 |   )
153 | }
154 | 
155 | 
156 | export function ScrollToTop() {
157 |   const { pathname } = useLocation();
158 | 
159 |   useEffect(() => {
160 |     window.scrollTo(0, 0);
161 |   }, [pathname]);
162 | 
163 |   return null;
164 | }
165 | 


--------------------------------------------------------------------------------
/frontend/src/constants.ts:
--------------------------------------------------------------------------------
 1 | 
 2 | export const SITE = 'StoryWeb';
 3 | export const API_URL = process.env.API_URL || 'http://localhost:8000/api/1';
 4 | export const SPACER = " · ";
 5 | export const STORY_ICON = "projects";
 6 | export const ARTICLE_ICON = "document";
 7 | export const CLUSTER_ICON = "people";
 8 | export const LINKER_ICON = "send-to-graph";
 9 | export const ARTICLE_THRESHOLD = 5;
10 | export const LINKS_THRESHOLD = 3;


--------------------------------------------------------------------------------
/frontend/src/hooks.ts:
--------------------------------------------------------------------------------
1 | import { TypedUseSelectorHook, useDispatch, useSelector } from 'react-redux'
2 | import type { RootState, AppDispatch } from './store'
3 | 
4 | export const useAppDispatch: () => AppDispatch = useDispatch
5 | export const useAppSelector: TypedUseSelectorHook<RootState> = useSelector


--------------------------------------------------------------------------------
/frontend/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | 
 4 | import './styles/index.scss';
 5 | import App from './App';
 6 | 
 7 | const root = ReactDOM.createRoot(
 8 |   document.getElementById('root') as HTMLElement
 9 | );
10 | root.render(
11 |   <React.StrictMode>
12 |     <App />
13 |   </React.StrictMode>
14 | );
15 | 


--------------------------------------------------------------------------------
/frontend/src/logic.ts:
--------------------------------------------------------------------------------
 1 | import { IClusterBase, IOntology } from "./types";
 2 | 
 3 | 
 4 | export function isA(ontology: IOntology, type_: string, required: string): boolean {
 5 |   if (type_ === required) {
 6 |     return true
 7 |   }
 8 |   const clusterType = ontology.cluster_types.find((ct) => ct.name === type_);
 9 |   if (clusterType === undefined || clusterType.parent === undefined) {
10 |     return false;
11 |   }
12 |   return isA(ontology, clusterType.parent, required);
13 | }
14 | 
15 | export function canHaveLink(ontology: IOntology, source: IClusterBase, target: IClusterBase, linkType: string): boolean {
16 |   const linkTypeEnt = ontology.link_types.find((lt) => lt.name === linkType);
17 |   if (linkTypeEnt === undefined) {
18 |     return false;
19 |   }
20 |   if (!isA(ontology, source.type, linkTypeEnt.source_type)) {
21 |     return false;
22 |   }
23 |   if (!isA(ontology, target.type, linkTypeEnt.target_type)) {
24 |     return false;
25 |   }
26 |   return true;
27 | }
28 | 
29 | export function canHaveBidi(ontology: IOntology, source: IClusterBase, target: IClusterBase, linkType: string): boolean {
30 |   return canHaveLink(ontology, source, target, linkType) || canHaveLink(ontology, target, source, linkType);
31 | }


--------------------------------------------------------------------------------
/frontend/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="react-scripts" />
2 | 


--------------------------------------------------------------------------------
/frontend/src/router.tsx:
--------------------------------------------------------------------------------
 1 | import { createBrowserRouter, useRouteError } from "react-router-dom";
 2 | import ArticleIndex from "./screens/ArticleIndex";
 3 | import ClusterIndex from "./screens/ClusterIndex";
 4 | import ClusterView from "./screens/ClusterView";
 5 | // import HomePage from "./screens/Home";
 6 | import Layout from "./screens/Layout";
 7 | import Linker from "./screens/Linker";
 8 | import LinkerRelated from "./screens/LinkerRelated";
 9 | import StoryIndex from "./screens/StoryIndex";
10 | import StoryLinker from "./screens/StoryLinker";
11 | import StoryView from "./screens/StoryView";
12 | 
13 | interface IRouteError {
14 |   statusText?: string
15 |   message: string
16 | }
17 | 
18 | function ErrorPage() {
19 |   const error = useRouteError() as IRouteError;
20 | 
21 |   return (
22 |     <div id="error-page">
23 |       <h1>Oops!</h1>
24 |       <p>Sorry, an unexpected error has occurred.</p>
25 |       <p>
26 |         <i>{error.statusText || error.message}</i>
27 |       </p>
28 |     </div>
29 |   );
30 | }
31 | 
32 | export const router = createBrowserRouter([
33 |   {
34 |     path: "/",
35 |     element: <Layout />,
36 |     errorElement: <ErrorPage />,
37 |     children: [
38 |       {
39 |         path: "",
40 |         element: <StoryIndex />
41 |       },
42 |       {
43 |         path: "stories/:storyId",
44 |         element: <StoryView />
45 |       },
46 |       {
47 |         path: "stories/:storyId/linker",
48 |         element: <StoryLinker />
49 |       },
50 |       {
51 |         path: "articles",
52 |         element: <ArticleIndex />,
53 |       },
54 |       {
55 |         path: "clusters",
56 |         element: <ClusterIndex />,
57 |       },
58 |       {
59 |         path: "clusters/:clusterId",
60 |         element: <ClusterView />,
61 |       },
62 |       {
63 |         path: "linker",
64 |         element: <Linker />,
65 |       },
66 |       {
67 |         path: "linker/related",
68 |         element: <LinkerRelated />,
69 |       },
70 |     ],
71 |   },
72 | ]);
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/frontend/src/screens/ArticleIndex.tsx:
--------------------------------------------------------------------------------
  1 | import { ControlGroup, Classes, HTMLSelect, HTMLTable, Button, IconSize, Icon } from '@blueprintjs/core';
  2 | import classnames from "classnames";
  3 | import { FormEvent, MouseEvent, useState } from 'react';
  4 | import { Link } from 'react-router-dom';
  5 | import { useSearchParams } from "react-router-dom";
  6 | import ArticleDrawer from '../components/ArticleDrawer';
  7 | import ArticleStoryEditor from '../components/ArticleStoryEditor';
  8 | import Pagination from '../components/Pagination';
  9 | import { Numeric, SectionLoading } from '../components/util';
 10 | import { ARTICLE_ICON } from '../constants';
 11 | 
 12 | import { useFetchArticleListingQuery } from "../services/articles"
 13 | import { useFetchSitesQuery } from '../services/sites';
 14 | import { IArticle } from '../types';
 15 | import { asString, useListingPagination } from "../util";
 16 | 
 17 | export default function ArticleIndex() {
 18 |   const [params, setParams] = useSearchParams();
 19 |   const page = useListingPagination('articles');
 20 |   const { data: sitesResponse } = useFetchSitesQuery();
 21 |   const [query, setQuery] = useState(asString(params.get('q')) || '')
 22 |   const [site, setSite] = useState(asString(params.get('site')) || '')
 23 |   const { data: listing } = useFetchArticleListingQuery({
 24 |     ...page,
 25 |     q: params.get('q'),
 26 |     site: params.get('site'),
 27 |     sort: 'tags:desc'
 28 |   });
 29 |   const sites = sitesResponse === undefined ? [] : sitesResponse.results.map(s => s.site);
 30 |   const articleId = params.get('article') || undefined;
 31 | 
 32 |   const onSubmit = function (e: FormEvent<HTMLFormElement>) {
 33 |     e.preventDefault();
 34 |     setParams({ site: site, q: query });
 35 |   }
 36 | 
 37 |   const setArticle = (articleId: string | undefined) => {
 38 |     const paramsObj = Object.fromEntries(params.entries());
 39 |     setParams({ ...paramsObj, article: articleId || '' });
 40 |   }
 41 | 
 42 |   const onClickArticle = (event: MouseEvent<HTMLAnchorElement>, article: IArticle) => {
 43 |     event.preventDefault();
 44 |     setArticle(article.id)
 45 |   }
 46 | 
 47 |   return (
 48 |     <div>
 49 |       {(listing === undefined || sites === undefined) && (
 50 |         <h1>
 51 |           <Icon size={IconSize.LARGE} icon={ARTICLE_ICON} />{' '}
 52 |           Articles in the StoryWeb database
 53 |         </h1>
 54 |       )}
 55 |       {(listing !== undefined && sites !== undefined) && (
 56 |         <h1>
 57 |           <Icon size={IconSize.LARGE} icon={ARTICLE_ICON} />{' '}
 58 |           <Numeric value={listing.total} /> articles from <Numeric value={sites.length} /> sources in the StoryWeb database
 59 |         </h1>
 60 |       )}
 61 | 
 62 |       <section className="section">
 63 |         <form onSubmit={onSubmit}>
 64 |           <ControlGroup fill>
 65 |             <input
 66 |               className={classnames(Classes.INPUT, Classes.FILL)}
 67 |               value={query}
 68 |               onChange={(e) => setQuery(e.target.value)}
 69 |               placeholder="Search in articles..."
 70 |             />
 71 |             <HTMLSelect
 72 |               className={classnames(Classes.HTML_SELECT)}
 73 |               value={site}
 74 |               onChange={(e) => setSite(e.target.value)}
 75 |             >
 76 |               <option value="">(all sites)</option>
 77 |               {sites.map((s) =>
 78 |                 <option key={s} value={s}>{s}</option>
 79 |               )}
 80 |             </HTMLSelect>
 81 |             <Button icon="search" type="submit">Search</Button>
 82 |           </ControlGroup>
 83 |         </form>
 84 |       </section>
 85 |       {listing === undefined && (
 86 |         <SectionLoading />
 87 |       )}
 88 |       {listing !== undefined && (
 89 |         <>
 90 |           <HTMLTable condensed bordered className="wide">
 91 |             <thead>
 92 |               <tr>
 93 |                 <th>Title</th>
 94 |                 <th>Site</th>
 95 |                 <th className="numeric">Entities</th>
 96 |                 <th className="numeric">Stories</th>
 97 |               </tr>
 98 |             </thead>
 99 |             <tbody>
100 |               {listing.results.map((article) => (
101 |                 <tr key={article.id}>
102 |                   <td>
103 |                     <Link
104 |                       onClick={(e) => onClickArticle(e, article)}
105 |                       to={`/articles?article=${article.id}`}
106 |                     >
107 |                       {article.title}
108 |                     </Link>
109 |                   </td>
110 |                   <td>
111 |                     {article.site}
112 |                   </td>
113 |                   <td className="numeric">
114 |                     <Numeric value={article.tags} />
115 |                   </td>
116 |                   <td style={{ width: "1%" }} className="numeric">
117 |                     <ArticleStoryEditor article={article} inList />
118 |                   </td>
119 |                 </tr>
120 |               ))}
121 |             </tbody>
122 |           </HTMLTable>
123 |           <Pagination prefix='articles' response={listing} />
124 |           <ArticleDrawer
125 |             onClose={() => setArticle(undefined)}
126 |             articleId={articleId}
127 |             tags={[]}
128 |           />
129 |         </>
130 |       )}
131 |     </div >
132 |   )
133 | }
134 | 


--------------------------------------------------------------------------------
/frontend/src/screens/ClusterIndex.tsx:
--------------------------------------------------------------------------------
  1 | import { ControlGroup, Classes, HTMLTable, Button, Checkbox, IconSize, Icon } from '@blueprintjs/core';
  2 | import classnames from "classnames";
  3 | import { FormEvent, useState } from 'react';
  4 | import { Link } from 'react-router-dom';
  5 | import { useSearchParams } from "react-router-dom";
  6 | import Pagination from '../components/Pagination';
  7 | import { ErrorSection, Numeric, SectionLoading, ClusterTypeIcon } from '../components/util';
  8 | import { CLUSTER_ICON } from '../constants';
  9 | import { useNodeTypes } from '../selectors';
 10 | 
 11 | import { useFetchClusterListingQuery, useMergeClustersMutation } from '../services/clusters';
 12 | import { asString, getClusterLink, listToggle, useListingPagination } from "../util";
 13 | 
 14 | export default function ClusterIndex() {
 15 |   const [params, setParams] = useSearchParams();
 16 |   const page = useListingPagination('clusters');
 17 |   const [query, setQuery] = useState(asString(params.get('q')) || '');
 18 |   const [merges, setMerges] = useState([] as string[]);
 19 |   const [postMerge, { isLoading: isUpdating }] = useMergeClustersMutation();
 20 |   const { data: listing, error } = useFetchClusterListingQuery({
 21 |     ...page,
 22 |     q: params.get('q'),
 23 |     types: useNodeTypes(),
 24 |   });
 25 | 
 26 |   const onMerge = async () => {
 27 |     if (merges.length > 1) {
 28 |       const [anchor, ...other] = merges;
 29 |       await postMerge({ anchor: anchor, other: other }).unwrap()
 30 |       setMerges([]);
 31 |     }
 32 |   }
 33 | 
 34 |   const toggleMerge = async (id: string) => {
 35 |     setMerges(listToggle(merges, id));
 36 |   }
 37 | 
 38 |   const onSubmit = function (e: FormEvent<HTMLFormElement>) {
 39 |     e.preventDefault();
 40 |     setParams({ q: query });
 41 |   }
 42 | 
 43 |   if (error !== undefined) {
 44 |     return <ErrorSection title="Cannot get cluster listing" />
 45 |   }
 46 | 
 47 |   return (
 48 |     <div>
 49 |       {listing === undefined && (
 50 |         <h1>
 51 |           <Icon size={IconSize.LARGE} icon={CLUSTER_ICON} />{' '}
 52 |           Entities in the StoryWeb database
 53 |         </h1>
 54 |       )}
 55 |       {listing !== undefined && (
 56 |         <h1>
 57 |           <Icon size={IconSize.LARGE} icon={CLUSTER_ICON} />{' '}
 58 |           <Numeric value={listing.total} /> entities in the StoryWeb database
 59 |         </h1>
 60 |       )}
 61 | 
 62 |       <section className="section">
 63 |         <form onSubmit={onSubmit}>
 64 |           <ControlGroup fill>
 65 |             <input
 66 |               className={classnames(Classes.INPUT, Classes.FILL)}
 67 |               value={query}
 68 |               onChange={(e) => setQuery(e.target.value)}
 69 |               placeholder="Search entities..."
 70 |             />
 71 |             <Button icon="search" type="submit">Search</Button>
 72 |           </ControlGroup>
 73 |         </form>
 74 |       </section>
 75 |       {listing === undefined && (
 76 |         <SectionLoading />
 77 |       )}
 78 |       {listing !== undefined && (
 79 |         <>
 80 |           <HTMLTable condensed bordered className="wide">
 81 |             <thead>
 82 |               <tr>
 83 |                 <th>Label</th>
 84 |                 <th className="numeric">Articles</th>
 85 |                 <th style={{ width: "1%" }} className="numeric">
 86 |                   <Button small onClick={onMerge} disabled={merges.length < 2}>
 87 |                     Merge
 88 |                   </Button>
 89 |                 </th>
 90 |               </tr>
 91 |             </thead>
 92 |             <tbody>
 93 |               {listing.results.map((cluster) => (
 94 |                 <tr key={cluster.id}>
 95 |                   <td>
 96 |                     <ClusterTypeIcon type={cluster.type} size={14} />
 97 |                     <Link to={getClusterLink(cluster)}>{cluster.label}</Link>
 98 |                   </td>
 99 |                   <td className="numeric">
100 |                     <Numeric value={cluster.articles} />
101 |                   </td>
102 |                   <td style={{ width: "1%" }} className="numeric">
103 |                     <Checkbox
104 |                       checked={merges.indexOf(cluster.id) !== -1}
105 |                       onClick={() => toggleMerge(cluster.id)}
106 |                       disabled={isUpdating}
107 |                     />
108 |                   </td>
109 |                 </tr>
110 |               ))}
111 |             </tbody>
112 |           </HTMLTable>
113 |           <Pagination prefix='clusters' response={listing} />
114 |         </>
115 |       )}
116 |     </div>
117 |   )
118 | }
119 | 


--------------------------------------------------------------------------------
/frontend/src/screens/ClusterView.tsx:
--------------------------------------------------------------------------------
 1 | import { useParams, useSearchParams } from "react-router-dom";
 2 | import { Tabs, Tab, IconSize } from "@blueprintjs/core";
 3 | 
 4 | import RelatedListing from "../components/RelatedListing";
 5 | import SimilarListing from "../components/SimilarListing";
 6 | import { ErrorSection, SectionLoading, ClusterLabel, ClusterTypeIcon, NumericTag } from "../components/util";
 7 | import { useFetchClusterQuery, useFetchSimilarClusterListingQuery, useFetchRelatedClusterListingQuery } from "../services/clusters";
 8 | import ClusterArticles from "../components/ClusterArticles";
 9 | import ScreenHeading from "../components/ScreenHeading";
10 | import ClusterButtonGroup from "../components/ClusterButtonGroup";
11 | import { useFetchArticleListingQuery } from "../services/articles";
12 | import { useNodeTypes } from "../selectors";
13 | import ScreenContent from "../components/ScreenContent";
14 | 
15 | export default function ClusterView() {
16 |   const { clusterId } = useParams();
17 |   const [params, setParams] = useSearchParams();
18 |   const nodeTypes = useNodeTypes();
19 |   const { data: cluster, isLoading, error } = useFetchClusterQuery(clusterId as string);
20 |   const relatedQuery = { clusterId: clusterId || '', params: { types: nodeTypes } };
21 |   const { data: related } = useFetchRelatedClusterListingQuery(relatedQuery)
22 |   const similarQuery = { clusterId: clusterId || '', params: {} };
23 |   const { data: similar } = useFetchSimilarClusterListingQuery(similarQuery);
24 |   const articleQuery = { cluster: clusterId };
25 |   const { data: articles } = useFetchArticleListingQuery(articleQuery);
26 |   if (error !== undefined) {
27 |     return <ErrorSection title="Could not load the article" />
28 |   }
29 |   if (cluster === undefined || isLoading) {
30 |     return <SectionLoading />
31 |   }
32 | 
33 |   const activeTab = params.get('view') || 'related';
34 | 
35 |   const setView = (view: string) => {
36 |     const paramsObj = Object.fromEntries(params.entries());
37 |     setParams({ ...paramsObj, view });
38 |   }
39 | 
40 |   const title = <>
41 |     <ClusterTypeIcon type={cluster.type} size={IconSize.LARGE} />
42 |     <ClusterLabel label={cluster.label} />
43 |   </>;
44 |   return (
45 |     <div>
46 |       <ScreenHeading title={title}>
47 |         <ClusterButtonGroup cluster={cluster} />
48 |       </ScreenHeading>
49 |       <Tabs id="clusterView" selectedTabId={activeTab} onChange={(tab) => setView(tab.toString())}>
50 |         <Tab id="related"
51 |           title={
52 |             <>
53 |               Co-occurring
54 |               <NumericTag value={related?.total} className="tab-tag" />
55 |             </>
56 |           }
57 |           panel={
58 |             <ScreenContent>
59 |               <RelatedListing cluster={cluster} />
60 |             </ScreenContent>
61 |           }
62 |         />
63 |         <Tab id="similar"
64 |           title={
65 |             <>
66 |               Similar
67 |               <NumericTag value={similar?.total} className="tab-tag" />
68 |             </>
69 |           }
70 |           disabled={similar?.total === 0}
71 |           panel={
72 |             <ScreenContent>
73 |               <SimilarListing cluster={cluster} />
74 |             </ScreenContent>
75 |           }
76 |         />
77 |         <Tab id="articles"
78 |           title={
79 |             <>
80 |               Articles
81 |               <NumericTag value={articles?.total} className="tab-tag" />
82 |             </>
83 |           }
84 |           panel={
85 |             <ScreenContent>
86 |               <ClusterArticles cluster={cluster} />
87 |             </ScreenContent>
88 |           }
89 |         />
90 |       </Tabs>
91 |     </div>
92 |   )
93 | }
94 | 


--------------------------------------------------------------------------------
/frontend/src/screens/Home.tsx:
--------------------------------------------------------------------------------
 1 | import { Link } from "react-router-dom";
 2 | 
 3 | export default function HomePage() {
 4 |   return (
 5 |     <div>
 6 |       <h1>Welcome to storyweb</h1>
 7 |       <Link to="/articles">articles</Link>
 8 |     </div>
 9 |   )
10 | }
11 | 


--------------------------------------------------------------------------------
/frontend/src/screens/Layout.tsx:
--------------------------------------------------------------------------------
 1 | import { Outlet } from "react-router-dom";
 2 | import classnames from "classnames";
 3 | import { HotkeyConfig, HotkeysTarget2, HotkeysProvider } from '@blueprintjs/core';
 4 | import Helmet from "react-helmet";
 5 | 
 6 | import { ErrorSection, ScreenLoading, ScrollToTop } from "../components/util";
 7 | import { SITE } from "../constants";
 8 | import { useFetchOntologyQuery } from "../services/ontology";
 9 | import Footer from "../components/Footer";
10 | import NavbarSection from "../components/Navbar";
11 | 
12 | import styles from "../styles/Layout.module.scss";
13 | 
14 | export default function Layout() {
15 |   const { data: ontology, error: ontologyError } = useFetchOntologyQuery();
16 | 
17 |   if (ontologyError !== undefined) {
18 |     return <ErrorSection title="Could not load ontology metadata" />;
19 |   }
20 | 
21 |   if (ontology === undefined) {
22 |     return <ScreenLoading />;
23 |   }
24 | 
25 |   const appHotkeys: HotkeyConfig[] = [
26 |     {
27 |       combo: "/",
28 |       global: true,
29 |       label: "Search entity",
30 |       onKeyDown: () => alert('tbd :)'),
31 |     },
32 |   ];
33 | 
34 |   return (
35 |     <>
36 |       <Helmet>
37 |         {/* <link rel="apple-touch-icon" sizes="180x180" href="/static/apple-touch-icon.png" />
38 |         <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon-32x32.png" />
39 |         <link rel="icon" type="image/png" sizes="16x16" href="/static/favicon-16x16.png" /> */}
40 |         <meta name="viewport" content="width=device-width, initial-scale=1" />
41 |         <meta name="twitter:card" content="summary" />
42 |         <meta name="twitter:creator" content="@pudo" />
43 |         <meta name="og:site" content={SITE} />
44 |       </Helmet>
45 |       <HotkeysProvider>
46 |         <div className={styles.page}>
47 |           <NavbarSection />
48 |           <ScrollToTop />
49 |           <HotkeysTarget2 hotkeys={appHotkeys}>
50 |             <div className={classnames(styles.content, 'page-container')}>
51 |               <Outlet />
52 |             </div>
53 |           </HotkeysTarget2>
54 |         </div>
55 |       </HotkeysProvider>
56 |       <Footer />
57 |     </>
58 |   )
59 | }
60 | 


--------------------------------------------------------------------------------
/frontend/src/screens/LinkerRelated.tsx:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { useFetchRelatedClusterListingQuery } from '../services/clusters';
 3 | import { useNavigate, useSearchParams } from 'react-router-dom';
 4 | import { SectionLoading } from '../components/util';
 5 | import { useEffect } from 'react';
 6 | import { useNodeTypes } from '../selectors';
 7 | 
 8 | export default function LinkerRelated() {
 9 |   const navigate = useNavigate();
10 |   const [params] = useSearchParams();
11 |   const nodeTypes = useNodeTypes();
12 |   const anchorId = params.get('anchor');
13 |   if (anchorId === null) {
14 |     navigate('/clusters');
15 |   }
16 |   const relatedParams = { linked: false, limit: 1, _: params.get('previous'), types: nodeTypes };
17 |   const relatedQuery = { clusterId: anchorId + '', params: relatedParams };
18 |   const { data, isLoading } = useFetchRelatedClusterListingQuery(relatedQuery, { refetchOnMountOrArgChange: true })
19 |   useEffect(() => {
20 |     if (data !== undefined && !isLoading) {
21 |       if (data.results.length > 0) {
22 |         const otherId = data.results[0].id;
23 |         navigate(queryString.stringifyUrl({
24 |           'url': `/linker`,
25 |           'query': { anchor: anchorId, other: otherId, related: true }
26 |         }), { replace: true });
27 |       } else {
28 |         navigate(`/clusters/${anchorId}`, { replace: true });
29 |       }
30 |     }
31 |   });
32 |   return <SectionLoading />;
33 | }


--------------------------------------------------------------------------------
/frontend/src/screens/StoryIndex.tsx:
--------------------------------------------------------------------------------
  1 | import { ControlGroup, Classes, Intent, Button, NonIdealState, Card, Elevation, ButtonGroup, Menu, MenuItem } from '@blueprintjs/core';
  2 | import classnames from "classnames";
  3 | import { FormEvent, useState } from 'react';
  4 | import { useNavigate } from 'react-router-dom';
  5 | import { useSearchParams } from "react-router-dom";
  6 | import StoryCreateDialog from '../components/StoryCreateDialog';
  7 | import { SectionLoading } from '../components/util';
  8 | 
  9 | import { useFetchStoryListingQuery } from '../services/stories';
 10 | import ScreenHeading from '../components/ScreenHeading';
 11 | import { asString } from "../util";
 12 | 
 13 | import styles from '../styles/Story.module.scss';
 14 | import { API_URL } from '../constants';
 15 | import { Popover2, PopupKind } from '@blueprintjs/popover2';
 16 | 
 17 | export default function StoryIndex() {
 18 |   const [params, setParams] = useSearchParams();
 19 |   const navigate = useNavigate();
 20 |   const [query, setQuery] = useState(asString(params.get('q')) || '')
 21 |   const [showCreate, setShowCreate] = useState(false)
 22 |   const { data: listing } = useFetchStoryListingQuery({
 23 |     q: params.get('q'),
 24 |   });
 25 | 
 26 |   const onSubmit = function (e: FormEvent<HTMLFormElement>) {
 27 |     e.preventDefault();
 28 |     setParams({ q: query });
 29 |   }
 30 | 
 31 |   const onCreate = () => { setShowCreate(true) };
 32 |   const onCloseCreate = () => { setShowCreate(false) };
 33 | 
 34 |   return (
 35 |     <div>
 36 |       <ScreenHeading title={<>Your StoryWebs</>}>
 37 |         <ButtonGroup>
 38 |           <Button intent={Intent.PRIMARY} onClick={onCreate} icon="add">New story...</Button>
 39 |           <Popover2
 40 |             placement="bottom-start"
 41 |             popupKind={PopupKind.MENU}
 42 |             minimal
 43 |             content={
 44 |               <Menu>
 45 |                 <MenuItem icon="download" text="Aleph" label="(FollowTheMoney)" href={`${API_URL}/ftm`} target="_blank" download />
 46 |                 <MenuItem icon="download" text="Gephi" label="(GEXF)" href={`${API_URL}/gexf`} target="_blank" download />
 47 |               </Menu>
 48 |             }
 49 |           >
 50 |             <Button intent={Intent.NONE} icon="download">
 51 |               Export
 52 |             </Button>
 53 |           </Popover2>
 54 |         </ButtonGroup>
 55 |       </ScreenHeading>
 56 |       <StoryCreateDialog isOpen={showCreate} onClose={onCloseCreate} />
 57 |       <section className="section">
 58 |         <form onSubmit={onSubmit}>
 59 |           <ControlGroup fill>
 60 |             <input
 61 |               className={classnames(Classes.INPUT, Classes.FILL)}
 62 |               value={query}
 63 |               onChange={(e) => setQuery(e.target.value)}
 64 |               placeholder="Search in storywebs..."
 65 |             />
 66 |             <Button type="submit">Search</Button>
 67 |           </ControlGroup>
 68 |         </form>
 69 |       </section>
 70 |       {
 71 |         listing === undefined && (
 72 |           <SectionLoading />
 73 |         )
 74 |       }
 75 |       {
 76 |         (listing !== undefined && listing.results.length === 0) && (
 77 |           <NonIdealState
 78 |             icon="heart-broken"
 79 |             title="You're not telling a story yet."
 80 |             description="Group articles into stories to begin building story graphs."
 81 |           />
 82 |         )
 83 |       }
 84 |       {
 85 |         (listing !== undefined && listing.results.length > 0) && (
 86 |           <div className={styles.storyCardArea}>
 87 |             {listing.results.map((story) => (
 88 |               <Card
 89 |                 key={story.id}
 90 |                 interactive={true}
 91 |                 elevation={Elevation.TWO}
 92 |                 className={styles.storyCard}
 93 |                 onClick={() => navigate(`/stories/${story.id}`)}
 94 |               >
 95 |                 <h3>{story.title}</h3>
 96 |                 <p>{story.summary}</p>
 97 |               </Card>
 98 |             ))}
 99 |           </div>
100 |         )
101 |       }
102 |     </div >
103 |   )
104 | }
105 | 


--------------------------------------------------------------------------------
/frontend/src/screens/StoryLinker.tsx:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { useNavigate, useParams, useSearchParams } from 'react-router-dom';
 3 | import { SectionLoading } from '../components/util';
 4 | import { useEffect } from 'react';
 5 | import { useFetchStoryPairsQuery } from '../services/stories';
 6 | import { useNodeTypes } from '../selectors';
 7 | 
 8 | export default function StoryLinker() {
 9 |   const { storyId } = useParams();
10 |   const navigate = useNavigate();
11 |   const [params] = useSearchParams();
12 |   const nodeTypes = useNodeTypes();
13 |   const pairsParams = { linked: false, limit: 1, _: params.get('previous'), types: nodeTypes };
14 |   const pairsQuery = { storyId, params: pairsParams };
15 |   const { data, isLoading } = useFetchStoryPairsQuery(pairsQuery, { refetchOnMountOrArgChange: true });
16 |   useEffect(() => {
17 |     if (data !== undefined && !isLoading) {
18 |       if (data.results.length > 0) {
19 |         const pair = data.results[0];
20 |         navigate(queryString.stringifyUrl({
21 |           'url': `/linker`,
22 |           'query': { anchor: pair.left.id, other: pair.right.id, story: storyId }
23 |         }), { replace: true });
24 |       } else {
25 |         navigate(`/stories/${storyId}`, { replace: true });
26 |       }
27 |     }
28 |   });
29 |   return <SectionLoading />;
30 | }


--------------------------------------------------------------------------------
/frontend/src/screens/StoryView.tsx:
--------------------------------------------------------------------------------
  1 | import { AnchorButton, Button, ButtonGroup, Icon, IconSize, Intent, Menu, MenuItem, Tab, Tabs } from "@blueprintjs/core";
  2 | import { Popover2, PopupKind } from "@blueprintjs/popover2";
  3 | import { useState } from "react";
  4 | import { useParams, useSearchParams } from "react-router-dom";
  5 | import ScreenContent from "../components/ScreenContent";
  6 | import ScreenHeading from "../components/ScreenHeading";
  7 | import StoryArticleImportDialog from "../components/StoryArticleImportDialog";
  8 | import StoryArticles from "../components/StoryArticles";
  9 | import StoryDeleteDialog from "../components/StoryDeleteDialog";
 10 | import StoryGraph from "../components/StoryGraph";
 11 | import StoryPairs from "../components/StoryPairs";
 12 | import StoryUpdateDialog from "../components/StoryUpdateDialog";
 13 | import { ErrorSection, NumericTag, SectionLoading } from "../components/util";
 14 | import { API_URL, ARTICLE_ICON, ARTICLE_THRESHOLD, LINKER_ICON, LINKS_THRESHOLD, STORY_ICON } from "../constants";
 15 | import { useNodeTypes } from "../selectors";
 16 | import { useFetchArticleListingQuery } from "../services/articles";
 17 | import { useFetchStoryPairsQuery, useFetchStoryQuery } from "../services/stories";
 18 | 
 19 | 
 20 | export default function StoryView() {
 21 |   const { storyId } = useParams();
 22 |   const nodeTypes = useNodeTypes();
 23 |   const [showImport, setShowImport] = useState(false);
 24 |   const [showDelete, setShowDelete] = useState(false);
 25 |   const [showEdit, setShowEdit] = useState(false);
 26 |   const [params, setParams] = useSearchParams();
 27 |   const { data: story, isLoading, error } = useFetchStoryQuery(storyId as string);
 28 |   const { data: articles } = useFetchArticleListingQuery({ story: storyId, limit: 0 });
 29 |   const { data: links } = useFetchStoryPairsQuery({
 30 |     storyId: storyId || '',
 31 |     params: { types: nodeTypes, limit: 0, linked: true }
 32 |   });
 33 | 
 34 |   const hasArticles = (articles?.total || 0) >= ARTICLE_THRESHOLD;
 35 |   const hasLinks = (links?.total || 0) >= LINKS_THRESHOLD;
 36 | 
 37 |   const secondaryTab = hasLinks ? 'graph' : 'pairs';
 38 |   const defaultTab = hasArticles ? secondaryTab : 'articles';
 39 |   const activeTab = params.get('view') || defaultTab;
 40 | 
 41 |   if (error !== undefined) {
 42 |     return <ErrorSection title="Could not load the story." />
 43 |   }
 44 |   if (story === undefined || articles === undefined || links === undefined || isLoading) {
 45 |     return <SectionLoading />
 46 |   }
 47 | 
 48 |   const setView = (view: string) => {
 49 |     const paramsObj = Object.fromEntries(params.entries());
 50 |     setParams({ ...paramsObj, view });
 51 |   }
 52 | 
 53 |   return (
 54 |     <div>
 55 |       <ScreenHeading title={<><Icon icon={STORY_ICON} size={IconSize.LARGE} /> {story.title}</>}>
 56 |         <ButtonGroup>
 57 |           {(hasArticles && hasLinks) && (
 58 |             <AnchorButton intent={Intent.PRIMARY} icon={LINKER_ICON} href={`/stories/${story.id}/linker`}>
 59 |               Build web
 60 |             </AnchorButton>
 61 |           )}
 62 |           <Button icon={ARTICLE_ICON} intent={hasArticles ? Intent.NONE : Intent.PRIMARY} onClick={() => setShowImport(true)}>
 63 |             Add article
 64 |           </Button>
 65 |           <Button intent={Intent.NONE} icon="edit" onClick={() => setShowEdit(true)}>
 66 |             Edit
 67 |           </Button>
 68 |           <Popover2
 69 |             placement="bottom-start"
 70 |             popupKind={PopupKind.MENU}
 71 |             minimal
 72 |             content={
 73 |               <Menu>
 74 |                 <MenuItem icon="download" text="Aleph" label="(FollowTheMoney)" href={`${API_URL}/stories/${story.id}/ftm`} target="_blank" download />
 75 |                 <MenuItem icon="download" text="Gephi" label="(GEXF)" href={`${API_URL}/stories/${story.id}/gexf`} target="_blank" download />
 76 |               </Menu>
 77 |             }
 78 |           >
 79 |             <Button intent={Intent.NONE} icon="download">
 80 |               Export
 81 |             </Button>
 82 |           </Popover2>
 83 |           <Button intent={Intent.DANGER} icon="trash" onClick={() => setShowDelete(true)}>
 84 |             Delete
 85 |           </Button>
 86 |         </ButtonGroup>
 87 |         <StoryArticleImportDialog storyId={story.id} isOpen={showImport} onClose={() => setShowImport(false)} />
 88 |         <StoryUpdateDialog isOpen={showEdit} onClose={() => setShowEdit(false)} story={story} />
 89 |         <StoryDeleteDialog isOpen={showDelete} onClose={() => setShowDelete(false)} story={story} />
 90 |       </ScreenHeading>
 91 |       <Tabs id="storyView" renderActiveTabPanelOnly selectedTabId={activeTab} onChange={(e) => setView(e.toString())}>
 92 |         <Tab id="graph"
 93 |           title={
 94 |             <>
 95 |               Network graph
 96 |             </>
 97 |           }
 98 |           disabled={!hasLinks}
 99 |           panel={
100 |             <ScreenContent>
101 |               <StoryGraph story={story} />
102 |             </ScreenContent>
103 |           }
104 |         />
105 |         <Tab id="pairs"
106 |           title={
107 |             <>
108 |               Links
109 |               <NumericTag value={links?.total} className="tab-tag" />
110 |             </>
111 |           }
112 |           disabled={!hasArticles}
113 |           panel={
114 |             <ScreenContent>
115 |               <StoryPairs story={story} />
116 |             </ScreenContent>
117 |           }
118 |         />
119 |         <Tab id="articles"
120 |           title={
121 |             <>
122 |               Articles
123 |               <NumericTag value={articles?.total} className="tab-tag" />
124 |             </>
125 |           }
126 |           panel={
127 |             <ScreenContent>
128 |               <StoryArticles story={story} />
129 |             </ScreenContent>
130 |           }
131 |         />
132 |       </Tabs>
133 |     </div >
134 |   )
135 | }
136 | 


--------------------------------------------------------------------------------
/frontend/src/selectors.ts:
--------------------------------------------------------------------------------
 1 | import { useSelector } from "react-redux";
 2 | import { useFetchOntologyQuery } from "./services/ontology";
 3 | import { RootState } from "./store";
 4 | 
 5 | 
 6 | 
 7 | export function useNodeTypes(): string[] {
 8 |   const { data: ontology } = useFetchOntologyQuery();
 9 |   const hiddenNodeTypes = useSelector((state: RootState) => state.config.hiddenNodeTypes);
10 |   if (ontology === undefined) {
11 |     return []
12 |   }
13 |   return ontology.cluster_types
14 |     .map((t) => t.name)
15 |     .filter((t) => hiddenNodeTypes.indexOf(t) === -1);
16 | }


--------------------------------------------------------------------------------
/frontend/src/services/articles.ts:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react';
 3 | 
 4 | import { API_URL } from '../constants';
 5 | import type { IArticle, IArticleDetails, IListingResponse } from '../types';
 6 | 
 7 | export const articlesApi = createApi({
 8 |   reducerPath: 'articlesApi',
 9 |   tagTypes: ['Article'],
10 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
11 |   endpoints: (builder) => ({
12 |     fetchArticle: builder.query<IArticleDetails, string>({
13 |       query: (articleId) => `articles/${articleId}`,
14 |       providesTags: ["Article"]
15 |     }),
16 |     fetchArticleListing: builder.query<IListingResponse<IArticle>, any>({
17 |       query: (params) => queryString.stringifyUrl({
18 |         'url': `articles`,
19 |         'query': params
20 |       }),
21 |       providesTags: ["Article"],
22 |     }),
23 |   }),
24 | })
25 | 
26 | export const { useFetchArticleQuery, useFetchArticleListingQuery } = articlesApi


--------------------------------------------------------------------------------
/frontend/src/services/clusters.ts:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
 3 | 
 4 | import { API_URL } from '../constants'
 5 | import { ICluster, IClusterDetails, IClusterMerge, IListingResponse, IRelatedCluster, ISimilarCluster } from '../types'
 6 | 
 7 | type IClusterQueryParams = {
 8 |   clusterId: string,
 9 |   params?: any
10 | }
11 | 
12 | export const clustersApi = createApi({
13 |   reducerPath: 'clustersApi',
14 |   tagTypes: ['Cluster', 'Link'],
15 |   refetchOnMountOrArgChange: true,
16 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
17 |   endpoints: (builder) => ({
18 |     fetchCluster: builder.query<IClusterDetails, string>({
19 |       query: (clusterId) => `clusters/${clusterId}`,
20 |       providesTags: ["Cluster"]
21 |     }),
22 |     fetchClusterListing: builder.query<IListingResponse<ICluster>, any>({
23 |       query: (params) => queryString.stringifyUrl({
24 |         'url': `clusters`,
25 |         'query': params
26 |       }),
27 |       providesTags: ["Cluster"]
28 |     }),
29 |     fetchSimilarClusterListing: builder.query<IListingResponse<ISimilarCluster>, IClusterQueryParams>({
30 |       query: ({ clusterId, params }) => queryString.stringifyUrl({
31 |         'url': `clusters/${clusterId}/similar`,
32 |         'query': params
33 |       }),
34 |       providesTags: ["Cluster", "Link"]
35 |     }),
36 |     fetchRelatedClusterListing: builder.query<IListingResponse<IRelatedCluster>, IClusterQueryParams>({
37 |       query: ({ clusterId, params }) => queryString.stringifyUrl({
38 |         'url': `clusters/${clusterId}/related`,
39 |         'query': params
40 |       }),
41 |       providesTags: ["Cluster", "Link"]
42 |     }),
43 |     mergeClusters: builder.mutation<IClusterDetails, IClusterMerge>({
44 |       query(merge) {
45 |         return {
46 |           url: `links/_merge`,
47 |           method: 'POST',
48 |           body: merge,
49 |         }
50 |       },
51 |       invalidatesTags: ['Cluster', 'Link'],
52 |     })
53 |   }),
54 | })
55 | 
56 | export const {
57 |   useFetchClusterListingQuery,
58 |   useFetchClusterQuery,
59 |   useFetchSimilarClusterListingQuery,
60 |   useFetchRelatedClusterListingQuery,
61 |   useMergeClustersMutation
62 | } = clustersApi


--------------------------------------------------------------------------------
/frontend/src/services/config.ts:
--------------------------------------------------------------------------------
 1 | import { createSlice } from '@reduxjs/toolkit'
 2 | import type { PayloadAction } from '@reduxjs/toolkit'
 3 | 
 4 | export interface ConfigState {
 5 |   hiddenNodeTypes: string[]
 6 | }
 7 | 
 8 | const initialState: ConfigState = {
 9 |   // hiddenNodeTypes: ['LOC'],
10 |   hiddenNodeTypes: [],
11 | }
12 | 
13 | export const configSlice = createSlice({
14 |   name: 'config',
15 |   initialState,
16 |   reducers: {
17 |     setHiddenNodeTypes: (state, action: PayloadAction<string[]>) => {
18 |       state.hiddenNodeTypes = action.payload;
19 |     },
20 |     hydrate: (state, action: PayloadAction<ConfigState>) => {
21 |       return { ...initialState, ...action.payload };
22 |     },
23 |   },
24 | })
25 | 
26 | export const { setHiddenNodeTypes, hydrate } = configSlice.actions
27 | 
28 | export default configSlice.reducer


--------------------------------------------------------------------------------
/frontend/src/services/links.ts:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
 3 | 
 4 | import { API_URL } from '../constants'
 5 | import { IClusterDetails, ILink, ILinkPredict, ILinkPrediction, IListingResponse, IUntagArticle } from '../types'
 6 | 
 7 | 
 8 | export const linksApi = createApi({
 9 |   reducerPath: 'linksApi',
10 |   tagTypes: ['Cluster', 'Link'],
11 |   refetchOnMountOrArgChange: true,
12 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
13 |   endpoints: (builder) => ({
14 |     fetchLinks: builder.query<IListingResponse<ILink>, any>({
15 |       query: (params) => queryString.stringifyUrl({
16 |         'url': `links`,
17 |         'query': params
18 |       }),
19 |       providesTags: ['Link'],
20 |     }),
21 |     fetchPrediction: builder.query<ILinkPrediction, ILinkPredict>({
22 |       query: (params) => queryString.stringifyUrl({
23 |         'url': `links/_predict`,
24 |         'query': { ...params }
25 |       }),
26 |       providesTags: ['Cluster', 'Link'],
27 |     }),
28 |     saveLink: builder.mutation<ILink, Partial<ILink>>({
29 |       query(link) {
30 |         return {
31 |           url: `links`,
32 |           method: 'POST',
33 |           body: link,
34 |         }
35 |       },
36 |       invalidatesTags: ['Cluster', 'Link'],
37 |     }),
38 |     explodeCluster: builder.mutation<IClusterDetails, string>({
39 |       query(clusterId) {
40 |         return {
41 |           url: `links/_explode`,
42 |           method: 'POST',
43 |           body: { cluster: clusterId },
44 |         }
45 |       },
46 |       invalidatesTags: ['Cluster', 'Link'],
47 |     }),
48 |     untagArticle: builder.mutation<IClusterDetails, IUntagArticle>({
49 |       query(untag) {
50 |         return {
51 |           url: `links/_untag`,
52 |           method: 'POST',
53 |           body: untag,
54 |         }
55 |       },
56 |       invalidatesTags: ['Cluster', 'Link'],
57 |     }),
58 |   }),
59 | })
60 | 
61 | export const {
62 |   useSaveLinkMutation,
63 |   useFetchPredictionQuery,
64 |   useExplodeClusterMutation,
65 |   useUntagArticleMutation,
66 |   useFetchLinksQuery,
67 | } = linksApi


--------------------------------------------------------------------------------
/frontend/src/services/ontology.ts:
--------------------------------------------------------------------------------
 1 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
 2 | 
 3 | import { API_URL } from '../constants'
 4 | import type { IOntology } from '../types'
 5 | 
 6 | export const ontologyApi = createApi({
 7 |   reducerPath: 'ontologyApi',
 8 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
 9 |   endpoints: (builder) => ({
10 |     fetchOntology: builder.query<IOntology, void>({
11 |       query: () => `ontology`,
12 |     }),
13 |   }),
14 | })
15 | 
16 | export const { useFetchOntologyQuery } = ontologyApi


--------------------------------------------------------------------------------
/frontend/src/services/sites.ts:
--------------------------------------------------------------------------------
 1 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
 2 | 
 3 | import { API_URL } from '../constants'
 4 | import type { IListingResponse, ISite } from '../types'
 5 | 
 6 | export const sitesApi = createApi({
 7 |   reducerPath: 'sitesApi',
 8 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
 9 |   endpoints: (builder) => ({
10 |     fetchSites: builder.query<IListingResponse<ISite>, void>({
11 |       query: () => `sites`,
12 |     }),
13 |   }),
14 | })
15 | 
16 | export const { useFetchSitesQuery } = sitesApi


--------------------------------------------------------------------------------
/frontend/src/services/stories.ts:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
 3 | 
 4 | import { API_URL } from '../constants'
 5 | import type { IArticle, IClusterPair, IListingResponse, IStory, IStoryArticleImport, IStoryArticleToggle, IStoryMutation } from '../types'
 6 | 
 7 | export const storiesApi = createApi({
 8 |   reducerPath: 'storiesApi',
 9 |   tagTypes: ['Story', 'Article', "Cluster", "Link"],
10 |   refetchOnMountOrArgChange: true,
11 |   baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
12 |   endpoints: (builder) => ({
13 |     fetchStory: builder.query<IStory, string>({
14 |       query: (storyId) => `stories/${storyId}`,
15 |       providesTags: ["Story"]
16 |     }),
17 |     fetchStoryListing: builder.query<IListingResponse<IStory>, any>({
18 |       query: (params) => queryString.stringifyUrl({
19 |         'url': `stories`,
20 |         'query': params
21 |       }),
22 |       providesTags: ["Story"]
23 |     }),
24 |     fetchStoryPairs: builder.query<IListingResponse<IClusterPair>, any>({
25 |       query: ({ storyId, params }) => queryString.stringifyUrl({
26 |         'url': `stories/${storyId}/pairs`,
27 |         'query': params
28 |       }),
29 |       providesTags: ["Story", "Cluster", "Link"]
30 |     }),
31 |     fetchStoryGraph: builder.query<string, any>({
32 |       query: ({ storyId, params }) => ({
33 |         url: queryString.stringifyUrl({
34 |           'url': `stories/${storyId}/gexf`,
35 |           'query': params
36 |         }),
37 |         responseHandler: (response) => response.text(),
38 |       }),
39 |       providesTags: ["Story", "Cluster", "Link"],
40 | 
41 |     }),
42 |     createStory: builder.mutation<IStory, IStoryMutation>({
43 |       query(story) {
44 |         return {
45 |           url: `stories`,
46 |           method: 'POST',
47 |           body: story,
48 |         }
49 |       },
50 |       invalidatesTags: ['Story'],
51 |     }),
52 |     updateStory: builder.mutation<IStory, IStory>({
53 |       query(story) {
54 |         return {
55 |           url: `stories/${story.id}`,
56 |           method: 'POST',
57 |           body: story,
58 |         }
59 |       },
60 |       invalidatesTags: ['Story'],
61 |     }),
62 |     deleteStory: builder.mutation<void, number>({
63 |       query(storyId) {
64 |         return {
65 |           url: `stories/${storyId}`,
66 |           method: 'DELETE',
67 |         }
68 |       },
69 |       invalidatesTags: ['Story'],
70 |     }),
71 |     toggleStoryArticle: builder.mutation<IStory, IStoryArticleToggle>({
72 |       query(data) {
73 |         return {
74 |           url: `stories/${data.story}/articles`,
75 |           method: 'POST',
76 |           body: { article: data.article },
77 |         }
78 |       },
79 |       invalidatesTags: ['Story', 'Article'],
80 |     }),
81 |     importStoryArticle: builder.mutation<IArticle, IStoryArticleImport>({
82 |       query(data) {
83 |         return {
84 |           url: `stories/${data.story}/articles/import-url`,
85 |           method: 'POST',
86 |           body: { url: data.url },
87 |         }
88 |       },
89 |       invalidatesTags: ['Story', 'Article'],
90 |     }),
91 |   }),
92 | })
93 | 
94 | export const { useFetchStoryListingQuery, useFetchStoryQuery, useFetchStoryGraphQuery, useCreateStoryMutation, useUpdateStoryMutation, useDeleteStoryMutation, useToggleStoryArticleMutation, useImportStoryArticleMutation, useFetchStoryPairsQuery } = storiesApi


--------------------------------------------------------------------------------
/frontend/src/store.ts:
--------------------------------------------------------------------------------
 1 | import { configureStore } from '@reduxjs/toolkit'
 2 | import { setupListeners } from '@reduxjs/toolkit/query'
 3 | import { articlesApi } from './services/articles'
 4 | import { clustersApi } from './services/clusters'
 5 | import { linksApi } from './services/links'
 6 | import { ontologyApi } from './services/ontology'
 7 | import { sitesApi } from './services/sites'
 8 | import { storiesApi } from './services/stories'
 9 | import { configSlice, hydrate } from './services/config'
10 | 
11 | 
12 | export const store = configureStore({
13 |   reducer: {
14 |     [ontologyApi.reducerPath]: ontologyApi.reducer,
15 |     [articlesApi.reducerPath]: articlesApi.reducer,
16 |     [storiesApi.reducerPath]: storiesApi.reducer,
17 |     [clustersApi.reducerPath]: clustersApi.reducer,
18 |     [linksApi.reducerPath]: linksApi.reducer,
19 |     [sitesApi.reducerPath]: sitesApi.reducer,
20 |     config: configSlice.reducer,
21 |   },
22 |   middleware: (getDefaultMiddleware) =>
23 |     getDefaultMiddleware()
24 |       .concat(ontologyApi.middleware)
25 |       .concat(articlesApi.middleware)
26 |       .concat(storiesApi.middleware)
27 |       .concat(clustersApi.middleware)
28 |       .concat(linksApi.middleware)
29 |       .concat(sitesApi.middleware),
30 | })
31 | 
32 | store.subscribe(() => {
33 |   localStorage.setItem('config', JSON.stringify(store.getState().config))
34 | })
35 | 
36 | setupListeners(store.dispatch)
37 | 
38 | const getConfig = () => {
39 |   try {
40 |     const persistedState = localStorage.getItem('config')
41 |     if (persistedState) {
42 |       return JSON.parse(persistedState)
43 |     }
44 |   }
45 |   catch (e) {
46 |     console.log(e)
47 |   }
48 | }
49 | 
50 | const storedConfig = getConfig()
51 | if (storedConfig) {
52 |   store.dispatch(hydrate(storedConfig))
53 | }
54 | 
55 | export type RootState = ReturnType<typeof store.getState>
56 | export type AppDispatch = typeof store.dispatch
57 | 
58 | 


--------------------------------------------------------------------------------
/frontend/src/styles/App.scss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/frontend/src/styles/App.scss


--------------------------------------------------------------------------------
/frontend/src/styles/Article.module.scss:
--------------------------------------------------------------------------------
 1 | @import "./variables.scss";
 2 | 
 3 | .articleDrawer {
 4 |     margin: $spacer;
 5 |     overflow-y: auto;
 6 | }
 7 | 
 8 | .articleText {
 9 |     // font-size: 1.3em;
10 |     // font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
11 |     // line-height: 1.4;
12 | 
13 |     :global {
14 |         .markup {
15 |             margin-top: -2px;
16 |             margin-bottom: -2px;
17 |             padding-top: 2px;
18 |             padding-bottom: 2px;
19 |             display: inline-block;
20 |         }
21 | 
22 |         .markup1 {
23 |             background-color: $blue5;
24 |         }
25 | 
26 |         .markup2 {
27 |             background-color: $green5;
28 |         }
29 | 
30 |         .markup3 {
31 |             background-color: $orange5;
32 |         }
33 | 
34 |         .markup4 {
35 |             background-color: $indigo5;
36 |         }
37 | 
38 |         .markup5 {
39 |             background-color: $lime5;
40 |         }
41 |     }
42 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Cluster.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 | 
3 | .clusterDrawer {
4 |     margin: $spacer;
5 |     overflow-y: auto;
6 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Footer.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 | 
3 | .footer {
4 |     margin-top: $spacer;
5 |     padding-top: $spacer * 2;
6 |     padding-bottom: $spacer * 3;
7 |     // background-color: $dark-gray3;
8 |     color: $gray1;
9 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Layout.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 | 
3 | .page {
4 |     min-height: 90vh;
5 | }
6 | 
7 | .content {
8 |     margin-top: $spacer * 2;
9 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Linker.module.scss:
--------------------------------------------------------------------------------
 1 | @import "./variables.scss";
 2 | 
 3 | // .articlePreview {
 4 | //     max-height: 70vh;
 5 | //     overflow-y: scroll;
 6 | // }
 7 | 
 8 | .banner {
 9 |     margin-bottom: $spacer;
10 | }
11 | 
12 | .phrase {
13 |     text-align: center;
14 |     font-size: 1.4em;
15 |     // background-color: $light-gray2;
16 |     // background-color: white;
17 |     // padding-top: $spacer;
18 |     margin-bottom: $spacer * 2;
19 |     // border-radius: $spacer;
20 | 
21 |     :global {
22 |         .bp4-icon {
23 |             vertical-align: middle;
24 |         }
25 |     }
26 | }
27 | 
28 | .phraseSpan {
29 |     // text-decoration: underline;
30 |     // font-weight: bold;
31 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Navbar.module.scss:
--------------------------------------------------------------------------------
1 | .navContainered {
2 |     padding-left: 0 !important;
3 |     padding-right: 0 !important;
4 | }


--------------------------------------------------------------------------------
/frontend/src/styles/Story.module.scss:
--------------------------------------------------------------------------------
 1 | @import "./variables.scss";
 2 | 
 3 | .storyCardArea {
 4 |     display: flex;
 5 |     // flex-direction: column;
 6 |     flex-wrap: wrap;
 7 |     justify-content: space-between;
 8 | }
 9 | 
10 | .storyCard {
11 |     flex: 0 1 30%;
12 |     // margin-left: $spacer;
13 |     margin-bottom: $spacer * 2;
14 | }
15 | 
16 | .nomNom {
17 |     margin-top: $spacer * 4;
18 |     margin-bottom: $spacer * 6;
19 | }


--------------------------------------------------------------------------------
/frontend/src/styles/index.scss:
--------------------------------------------------------------------------------
 1 | @import "./variables.scss";
 2 | 
 3 | @import "~normalize.css";
 4 | @import "~@blueprintjs/core/lib/css/blueprint.css";
 5 | @import "~@blueprintjs/icons/lib/css/blueprint-icons.css";
 6 | @import "~@blueprintjs/select/lib/css/blueprint-select.css";
 7 | @import "~@blueprintjs/popover2/lib/css/blueprint-popover2.css";
 8 | @import "@react-sigma/core/lib/react-sigma.min.css";
 9 | 
10 | .page-container {
11 |     margin: 0 auto;
12 |     max-width: $max-container-width;
13 |     // margin-left: $spacer * 2;
14 |     // margin-right: $spacer * 2;
15 |     line-height: 1.4;
16 | }
17 | 
18 | .section {
19 |     // padding-left: $spacer;
20 |     // padding-right: $spacer;
21 |     padding-bottom: $spacer * 1;
22 | }
23 | 
24 | html {
25 |     scroll-behavior: smooth;
26 |     font-size: $default-font-size;
27 |     color: $default-text-color;
28 |     background-color: $light-gray4;
29 | }
30 | 
31 | table.wide {
32 |     width: 100%;
33 | }
34 | 
35 | td .bp4-control,
36 | th .bp4-control {
37 |     margin-bottom: 0 !important;
38 |     // padding: 0 !important;
39 | }
40 | 
41 | td.numeric,
42 | th.numeric {
43 |     text-align: right !important;
44 | 
45 |     // button.bp4-button {
46 |     //     float: right;
47 |     // }
48 | }
49 | 
50 | .page-column-area {
51 |     display: flex;
52 |     flex-direction: row;
53 |     width: 100%;
54 | }
55 | 
56 | .page-column {
57 |     min-width: 30%;
58 |     max-width: 50%;
59 |     padding-right: 2 * $spacer
60 | }
61 | 
62 | .page-column-wide {
63 |     width: 100%;
64 |     padding-right: 2 * $spacer
65 | }
66 | 
67 | .bp4-navbar-heading a,
68 | .bp4-navbar-heading a:hover {
69 |     color: inherit;
70 |     text-decoration: none;
71 | }
72 | 
73 | .spaced-icon {
74 |     margin-right: 0.3em;
75 | }
76 | 
77 | .tab-tag {
78 |     margin-left: $spacer * 0.3;
79 |     // background-color: $gray3;
80 |     background-color: $blue2;
81 | }
82 | 
83 | [aria-disabled=true] .tab-tag {
84 |     background-color: $gray3;
85 | }
86 | 
87 | .portal-z-top {
88 |     z-index: 9999;
89 | }
90 | 
91 | h1 {
92 |     clear: both;
93 |     padding-top: $spacer;
94 | 
95 |     .bp4-icon {
96 |         vertical-align: middle;
97 |     }
98 | }


--------------------------------------------------------------------------------
/frontend/src/styles/util.module.scss:
--------------------------------------------------------------------------------
 1 | @import "./variables.scss";
 2 | 
 3 | .headingActions {
 4 |     display: block;
 5 |     float: right;
 6 | }
 7 | 
 8 | .screenContent {
 9 |     margin-top: -1 * $spacer;
10 | }
11 | 
12 | .pagination {
13 |     margin-top: $spacer;
14 | }
15 | 
16 | .clusterType {
17 |     font-size: 0.8em;
18 |     font-weight: bold;
19 |     display: inline-block;
20 |     padding: $spacer * 0.2;
21 |     padding-left: $spacer * 0.5;
22 |     padding-right: $spacer * 0.5;
23 |     border-radius: $spacer * 0.5;
24 |     color: $light-gray4;
25 | 
26 | }
27 | 
28 | .linkType {
29 |     font-size: 0.8em;
30 |     font-weight: bold;
31 |     display: inline-block;
32 |     padding: $spacer * 0.2;
33 |     padding-left: $spacer * 0.5;
34 |     padding-right: $spacer * 0.5;
35 |     border-radius: $spacer * 0.5;
36 |     color: $light-gray4;
37 |     background-color: $dark-gray3;
38 | }
39 | 
40 | .UNRELATED {
41 |     background-color: $gray3;
42 | }
43 | 
44 | .LOCATED,
45 | .WITHIN {
46 |     background-color: $forest3;
47 | }
48 | 
49 | .ANTAGONIST {
50 |     background-color: $vermilion3;
51 | }


--------------------------------------------------------------------------------
/frontend/src/styles/variables.scss:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables";
2 | 
3 | // $max-text-width: 60em;
4 | $max-container-width: 950px;
5 | $spacer: $pt-grid-size;
6 | $default-font-size: 15px;
7 | $default-text-color: $gray1;


--------------------------------------------------------------------------------
/frontend/src/types.ts:
--------------------------------------------------------------------------------
  1 | 
  2 | export interface IResponse {
  3 |   status: string
  4 |   debug_msg?: string
  5 | }
  6 | 
  7 | export interface IListingResponse<T> extends IResponse {
  8 |   limit: number
  9 |   offset: number
 10 |   total: number
 11 |   results: T[]
 12 | }
 13 | 
 14 | export interface IArticle {
 15 |   id: string
 16 |   title: string
 17 |   site: string
 18 |   url: string
 19 |   language: string
 20 |   tags: number
 21 |   mentions: number
 22 | }
 23 | 
 24 | export interface IArticleDetails extends IArticle {
 25 |   text: string
 26 | }
 27 | 
 28 | export interface IStoryMutation {
 29 |   title: string
 30 |   summary: string
 31 | }
 32 | 
 33 | export interface IStory extends IStoryMutation {
 34 |   id: number
 35 | }
 36 | 
 37 | export interface IStoryArticleToggle {
 38 |   story: number
 39 |   article: string
 40 | }
 41 | 
 42 | export interface IStoryArticleImport {
 43 |   story: number
 44 |   url: string
 45 | }
 46 | 
 47 | export interface IClusterBase {
 48 |   id: string
 49 |   label: string
 50 |   type: string
 51 | }
 52 | 
 53 | export interface ICluster extends IClusterBase {
 54 |   articles: number
 55 | }
 56 | 
 57 | export interface IClusterDetails extends ICluster {
 58 |   labels: string[]
 59 | }
 60 | 
 61 | export interface IClusterPair {
 62 |   left: IClusterBase,
 63 |   right: IClusterBase,
 64 |   link_types: string[]
 65 |   articles: number
 66 | }
 67 | 
 68 | export interface IRelatedCluster extends IClusterBase {
 69 |   articles: number
 70 |   link_types: string[]
 71 | }
 72 | 
 73 | export interface ISimilarCluster extends IClusterBase {
 74 |   common: string[]
 75 |   common_count: number
 76 | }
 77 | 
 78 | export interface IClusterMerge {
 79 |   anchor: string
 80 |   other: string[]
 81 | }
 82 | 
 83 | export interface IUntagArticle {
 84 |   cluster: string
 85 |   article: string
 86 | }
 87 | 
 88 | export interface ILinkPredict {
 89 |   anchor: string
 90 |   other: string
 91 | }
 92 | 
 93 | export interface ISite {
 94 |   site: string
 95 |   articles: number
 96 | }
 97 | 
 98 | export interface ILink {
 99 |   source: string
100 |   source_cluster: string
101 |   target: string
102 |   target_cluster: string
103 |   type: string
104 |   user?: string
105 |   timestamp?: string
106 | }
107 | 
108 | 
109 | export interface ILinkPrediction {
110 |   source: IClusterDetails
111 |   target: IClusterDetails
112 |   type: string
113 | }
114 | 
115 | 
116 | export interface ILinkType {
117 |   name: string
118 |   directed: boolean
119 |   label: string
120 |   phrase: string
121 |   source_type: string
122 |   target_type: string
123 | }
124 | 
125 | export interface IClusterType {
126 |   name: string
127 |   label: string
128 |   plural: string
129 |   parent?: string
130 |   color: string
131 |   icon: string
132 | }
133 | 
134 | export interface IOntology {
135 |   link_types: ILinkType[]
136 |   cluster_types: IClusterType[]
137 | }
138 | 


--------------------------------------------------------------------------------
/frontend/src/util.ts:
--------------------------------------------------------------------------------
 1 | import queryString from 'query-string';
 2 | import { useSearchParams } from 'react-router-dom';
 3 | // import { useLocation } from 'react-router-dom';
 4 | 
 5 | import { IClusterBase } from "./types";
 6 | 
 7 | 
 8 | export function asString(value: any): string | undefined {
 9 |   if (!Array.isArray(value)) {
10 |     value = [value];
11 |   }
12 |   for (let item of value) {
13 |     if (item === null || item === undefined) {
14 |       return undefined
15 |     }
16 |     item = item + ''
17 |     item = item.trim()
18 |     if (item.length > 0) {
19 |       return item;
20 |     }
21 |   }
22 |   return undefined;
23 | }
24 | 
25 | export function listToggle<T>(items: T[], value: T): T[] {
26 |   const updated = [...items];
27 |   const index = items.indexOf(value);
28 |   if (index === -1) {
29 |     updated.push(value);
30 |   } else {
31 |     updated.splice(index, 1);
32 |   }
33 |   return updated;
34 | }
35 | 
36 | export function getClusterLink(cluster: IClusterBase): string {
37 |   return `/clusters/${cluster.id}`
38 | }
39 | 
40 | export function getLinkLoomLink(anchor: IClusterBase, other?: IClusterBase, story?: number): string {
41 |   if (other === undefined) {
42 |     return queryString.stringifyUrl({
43 |       'url': `/linker/related`,
44 |       'query': { anchor: anchor.id, story: story }
45 |     })
46 |   }
47 |   return queryString.stringifyUrl({
48 |     'url': `/linker`,
49 |     'query': { anchor: anchor.id, other: other.id, story: story }
50 |   })
51 | }
52 | 
53 | export function useListingPagination(prefix: string, limit: number = 15) {
54 |   const [params] = useSearchParams();
55 |   return {
56 |     limit: parseInt(params.get(`${prefix}.limit`) || `${limit}`, 10),
57 |     offset: parseInt(params.get(`${prefix}.offset`) || '0', 10)
58 |   }
59 | }


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es5",
 4 |     "lib": [
 5 |       "dom",
 6 |       "dom.iterable",
 7 |       "esnext"
 8 |     ],
 9 |     "allowJs": true,
10 |     "skipLibCheck": true,
11 |     "esModuleInterop": true,
12 |     "allowSyntheticDefaultImports": true,
13 |     "strict": true,
14 |     "forceConsistentCasingInFileNames": true,
15 |     "noFallthroughCasesInSwitch": true,
16 |     "module": "esnext",
17 |     "moduleResolution": "node",
18 |     "resolveJsonModule": true,
19 |     "isolatedModules": true,
20 |     "noEmit": true,
21 |     "jsx": "preserve"
22 |   },
23 |   "include": [
24 |     "src"
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md") as f:
 4 |     long_description = f.read()
 5 | 
 6 | 
 7 | setup(
 8 |     name="storyweb",
 9 |     version="0.0.1",
10 |     description="Extract actor networks from journalistic reporting.",
11 |     long_description=long_description,
12 |     long_description_content_type="text/markdown",
13 |     keywords="ner spacy journalism text nlp graph entities",
14 |     author="Friedrich Lindenberg",
15 |     author_email="friedrich@pudo.org",
16 |     url="https://github.com/opensanctions/storyweb",
17 |     license="MIT",
18 |     packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
19 |     namespace_packages=[],
20 |     include_package_data=True,
21 |     package_data={"": ["storyweb/py.typed", "storyweb/data/*"]},
22 |     zip_safe=False,
23 |     install_requires=[
24 |         "sqlalchemy",
25 |         "psycopg2",
26 |         "pydantic",
27 |         "pydantic_yaml",
28 |         "articledata",
29 |         "pantomime",
30 |         "orjson",
31 |         "fastapi",
32 |         "networkx",
33 |         "spacy",
34 |         "python-levenshtein",
35 |         "followthemoney",
36 |         "languagecodes",
37 |         "fasttext",
38 |         "uvicorn",
39 |         "trafilatura",
40 |         "charset-normalizer",
41 |         "click >= 8.0.0, < 8.2.0",
42 |     ],
43 |     tests_require=[],
44 |     entry_points={
45 |         "console_scripts": [
46 |             "storyweb = storyweb.cli:cli",
47 |         ],
48 |     },
49 |     extras_require={
50 |         "dev": [
51 |             "wheel>=0.29.0",
52 |             "twine",
53 |             "mypy",
54 |             "flake8>=2.6.0",
55 |             "pytest",
56 |             "pytest-cov",
57 |             "coverage>=4.1",
58 |             "types-setuptools",
59 |             "types-requests",
60 |         ],
61 |     },
62 | )
63 | 


--------------------------------------------------------------------------------
/stories.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Wirecard
 3 | 
 4 | * https://www.ft.com/content/284fb1ad-ddc0-45df-a075-0709b36868db
 5 | * https://www.reuters.com/article/us-germany-wirecard-inquiry-timeline-idUSKBN2B811J
 6 | * https://www.accountancycareers.co.uk/2020/06/the-wirecard-scandal-explained/
 7 | * https://www.fxcm.com/markets/insights/the-wirecard-scandal/
 8 | * https://en.wikipedia.org/wiki/Wirecard_scandal
 9 | * https://web.archive.org/web/20200630075122/https://www.ft.com/content/284fb1ad-ddc0-45df-a075-0709b36868db
10 | * https://www.dw.com/en/fincen-turkey-aktif-bank-wirecard/a-54991398
11 | * https://www.straitstimes.com/business/banking/wirecards-creditors-set-for-battle-over-missing-billions
12 | * https://www.straitstimes.com/business/police-raid-wirecard-entities-in-singapore-after-reports-of-fraud-allegations-at-payments
13 | * https://web.archive.org/web/20210426080227/https://www.reuters.com/article/uk-wirecard-accounts-idUKKBN2424I3
14 | * https://web.archive.org/web/20200703095157/https://www.washingtonpost.com/business/how-german-fintech-darling-wirecard-fell-from-grace/2020/06/23/6278e336-b564-11ea-9a1d-d3db1cbe07ce_story.html
15 | * https://web.archive.org/web/20200630052127/https://ffj-online.org/2018/01/23/wirecard-ag-the-great-indian-shareholder-robbery/
16 | * https://www.bbc.com/news/world-europe-63893933
17 | * https://www.bbc.com/news/business-53176003
18 | * https://www.tagesschau.de/wirtschaft/unternehmen/wirecard-aussage-bellenhaus-101.html
19 | * https://www.tagesschau.de/wirtschaft/wirecard-gerichtsprozess-faq-101.html
20 | * https://www.tagesschau.de/wirtschaft/wirecard-prozess-verteidigung-101.html
21 | 
22 | 
23 | ### Russian Laundromat
24 | 
25 | * https://www.occrp.org/en/russianlaundromat/ 
26 | * https://www.occrp.org/en/laundromat/grand-theft-moldova/
27 | * https://www.occrp.org/en/laundromat/poor-town-controlled-billions/ 
28 | * https://www.occrp.org/en/laundromat/follow-the-money-follow-the-banker/
29 | * https://www.occrp.org/en/laundromat/russian-laundromat/ 
30 | * https://www.occrp.org/en/laundromat/the-russian-banks-and-putins-cousin/
31 | * https://www.occrp.org/en/laundromat/kempinski-negotiating-hotel-deal-with-alexander-grigoriev/
32 | * https://www.occrp.org/en/laundromat/the-laundry-cycle-from-start-to-finish/
33 | * https://www.occrp.org/en/laundromat/the-20-billion-bank-in-the-country-of-the-poor/
34 | * https://www.occrp.org/en/laundromat/latvian-bank-was-laundering-tool/
35 | * https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited
36 | * https://www.occrp.org/en/laundromat/the-russian-laundromat-exposed/
37 | * https://www.occrp.org/en/loosetobacco/china-tobacco-goes-global/romanian-prosecutors-probe-china-tobacco-for-millions-of-disappeared-cigarettes
38 | * https://www.occrp.org/en/investigations/bangladeshi-politician-close-to-prime-minister-hasina-secretly-owns-over-4-million-in-new-york-real-estate 
39 | * https://www.occrp.org/en/laundromat/profiles/frivent-GmbH
40 | * https://www.occrp.org/en/laundromat/profiles/handelsgericht-wien 
41 | * https://www.occrp.org/en/laundromat/profiles/rudolf-and-Anna 
42 | * https://www.occrp.org/en/laundromat/profiles/sergey-magin-and-a-japanese-electronics-manufacturer
43 | * https://www.occrp.org/en/laundromat/profiles/the-american-international-school-vienna
44 | * https://www.occrp.org/en/laundromat/profiles/va-intertrading
45 | * https://www.occrp.org/en/laundromat/profiles/dorville-ltd
46 | * https://www.occrp.org/en/laundromat/profiles/intradecom
47 | * https://www.occrp.org/en/laundromat/profiles/montinvest-AD
48 | * https://www.occrp.org/en/laundromat/profiles/pavel-flider-and-trident-international
49 | * https://www.occrp.org/en/laundromat/senior-moldovan-judges-daughter-lived-in-posh-london-flat
50 | * https://www.occrp.org/en/laundromat/regulator-says-latvia-finally-cleaning-up-bad-banks/
51 | * https://www.occrp.org/en/laundromat/the-banks/
52 | * https://www.occrp.org/en/laundromat/two-huge-scams-intersect-at-one-moldovan-businessman/
53 | * https://www.occrp.org/en/laundromat/how-veaceslav-platons-high-life-in-kyiv-came-crashing-down/
54 | * https://www.occrp.org/en/investigations/5617-platon-s-money
55 | * https://www.occrp.org/en/laundromat/the-russian-laundromat-superusers-revealed/
56 | * https://en.wikipedia.org/wiki/Russian_Laundromat
57 | * https://de.wikipedia.org/wiki/Russischer_Waschsalon


--------------------------------------------------------------------------------
/storyweb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/__init__.py


--------------------------------------------------------------------------------
/storyweb/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.middleware.cors import CORSMiddleware
 3 | from fastapi.staticfiles import StaticFiles
 4 | from starlette.exceptions import HTTPException
 5 | from starlette.responses import Response
 6 | from starlette.types import Scope
 7 | 
 8 | from storyweb.routes import links, stories, articles, clusters, system
 9 | 
10 | 
11 | app = FastAPI(
12 |     title="storyweb",
13 |     description="make networks from text",
14 |     redoc_url="/api/1/docs",
15 | )
16 | app.add_middleware(
17 |     CORSMiddleware,
18 |     allow_origins=["*"],
19 |     allow_credentials=False,
20 |     allow_methods=["*"],
21 |     allow_headers=["*"],
22 | )
23 | app.include_router(system.router, prefix="/api/1")
24 | app.include_router(links.router, prefix="/api/1")
25 | app.include_router(stories.router, prefix="/api/1")
26 | app.include_router(articles.router, prefix="/api/1")
27 | app.include_router(clusters.router, prefix="/api/1")
28 | 
29 | 
30 | class SPAStaticFiles(StaticFiles):
31 |     async def get_response(self, path: str, scope: Scope) -> Response:
32 |         try:
33 |             return await super().get_response(path, scope)
34 |         except HTTPException as http:
35 |             if http.status_code == 404 and not path.startswith("api"):
36 |                 return await super().get_response("index.html", scope)
37 |             else:
38 |                 raise
39 | 
40 | 
41 | app.mount("/", SPAStaticFiles(directory="frontend/build", html=True), name="frontend")
42 | 


--------------------------------------------------------------------------------
/storyweb/clean.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import Levenshtein
 3 | from typing import List, Optional, Union
 4 | from normality import collapse_spaces
 5 | 
 6 | PREFIXES_RAW_LIST = [
 7 |     "Mr",
 8 |     "Ms",
 9 |     "Mrs",
10 |     "Mister",
11 |     "Miss",
12 |     "Madam",
13 |     "Madame",
14 |     "Monsieur",
15 |     "Mme",
16 |     "Mmme",
17 |     "Herr",
18 |     "Hr",
19 |     "Frau",
20 |     "Fr",
21 |     "The",
22 |     "Fräulein",
23 |     "Senor",
24 |     "Senorita",
25 |     "Sr",
26 |     "Sir",
27 |     "Lady",
28 |     "The",
29 |     "de",
30 |     "of",
31 | ]
32 | PREFIXES_RAW = "|".join(PREFIXES_RAW_LIST)
33 | NAME_PATTERN_ = r"^\W*((%s)\.?\s+)*(?P<term>.*?)([\'’]s)?\W*$"
34 | NAME_PATTERN_ = NAME_PATTERN_ % PREFIXES_RAW
35 | PREFIXES = re.compile(NAME_PATTERN_, re.I | re.U)
36 | 
37 | 
38 | def clean_entity_name(name: str) -> Optional[str]:
39 |     match = PREFIXES.match(name)
40 |     if match is not None:
41 |         name = match.group("term")
42 |     return collapse_spaces(name)
43 | 
44 | 
45 | def most_common(texts: List[str]) -> str:
46 |     # https://stackoverflow.com/questions/1518522/find-the-most-common-element-in-a-list
47 |     return max(set(texts), key=texts.count)
48 | 
49 | 
50 | def pick_name(names: List[Union[str, bytes]]) -> str:
51 |     return Levenshtein.setmedian(names)
52 | 


--------------------------------------------------------------------------------
/storyweb/cli.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import logging
 3 | from pathlib import Path
 4 | from typing import Optional
 5 | from networkx.readwrite.gexf import write_gexf
 6 | 
 7 | from storyweb.db import create_db, engine
 8 | from storyweb.logic.links import auto_merge, story_merge
 9 | from storyweb.logic.stories import toggle_story_article
10 | from storyweb.logic.graph import generate_graph
11 | from storyweb.parse import import_article_by_url
12 | from storyweb.parse.pipeline import load_articles
13 | 
14 | 
15 | log = logging.getLogger(__name__)
16 | 
17 | InPath = click.Path(dir_okay=False, readable=True, path_type=Path)
18 | OutPath = click.Path(dir_okay=False, readable=True, path_type=Path)
19 | 
20 | 
21 | @click.group(help="Storyweb CLI")
22 | def cli() -> None:
23 |     logging.basicConfig(level=logging.INFO)
24 | 
25 | 
26 | @cli.command("import", help="Import articles into the DB")
27 | @click.argument("articles", type=InPath)
28 | def parse(articles: Path) -> None:
29 |     load_articles(articles)
30 | 
31 | 
32 | @cli.command("import-url", help="Load a single news story by URL")
33 | @click.argument("url", type=str)
34 | @click.option("-s", "--story", "story", help="Story ID", type=int)
35 | def parse(url: str, story: Optional[int] = None) -> None:
36 |     with engine.begin() as conn:
37 |         article_id = import_article_by_url(conn, url)
38 |         if story is not None:
39 |             story_merge(conn, story, article_id)
40 |             toggle_story_article(conn, story, article_id, delete_existing=False)
41 | 
42 | 
43 | @cli.command("graph", help="Export an entity graph")
44 | @click.argument("graph_path", type=OutPath)
45 | def export_graph(graph_path: Path) -> None:
46 |     with engine.begin() as conn:
47 |         graph = generate_graph(conn)
48 |         write_gexf(graph, graph_path)
49 | 
50 | 
51 | @cli.command("compute", help="Run backend computations")
52 | def compute() -> None:
53 |     from pprint import pprint
54 |     from storyweb.models import Listing
55 |     from storyweb.logic.clusters import list_story_pairs
56 | 
57 |     with engine.begin() as conn:
58 |         # print(compute_cluster(conn, "ffd364472a999c3d1001f5910398a53997ae0afe"))
59 |         listing = Listing(limit=5, offset=0, sort_direction="desc")
60 |         resp = list_story_pairs(conn, listing, 4)
61 |         pprint(resp.dict())
62 | 
63 | 
64 | @cli.command("auto-merge", help="Automatically merge on fingerprints")
65 | @click.option(
66 |     "-f",
67 |     "--force",
68 |     "force",
69 |     help="Do not check existing links",
70 |     default=False,
71 |     is_flag=True,
72 | )
73 | def auto_merge_(force: bool) -> None:
74 |     with engine.begin() as conn:
75 |         auto_merge(conn, check_links=force)
76 | 
77 | 
78 | @cli.command("init", help="Initialize the database")
79 | def init() -> None:
80 |     create_db()
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     cli()
85 | 


--------------------------------------------------------------------------------
/storyweb/db.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from sqlalchemy import MetaData, create_engine
 3 | from sqlalchemy import Table, Column, Integer, Unicode, DateTime, Float
 4 | from sqlalchemy.engine import Connection
 5 | from sqlalchemy.dialects.postgresql import insert as upsert
 6 | 
 7 | from storyweb import settings
 8 | 
 9 | Conn = Connection
10 | KEY_LEN = 40
11 | 
12 | log = logging.getLogger(__name__)
13 | engine = create_engine(settings.DB_URL)
14 | meta = MetaData(bind=engine)
15 | 
16 | __all__ = ["Conn", "upsert", "create_db"]
17 | 
18 | 
19 | def create_db() -> None:
20 |     meta.create_all(checkfirst=True)
21 | 
22 | 
23 | article_table = Table(
24 |     "article",
25 |     meta,
26 |     Column("id", Unicode(255), primary_key=True),
27 |     Column("site", Unicode(255), index=True, nullable=False),
28 |     Column("url", Unicode, nullable=True),
29 |     Column("title", Unicode, nullable=True),
30 |     Column("language", Unicode(10), nullable=True),
31 |     Column("text", Unicode, nullable=True),
32 |     Column("tags", Integer, default=0),
33 |     Column("mentions", Integer, default=0),
34 | )
35 | 
36 | story_table = Table(
37 |     "story",
38 |     meta,
39 |     Column("id", Integer, primary_key=True),
40 |     Column("title", Unicode, nullable=True),
41 |     Column("summary", Unicode, nullable=True),
42 | )
43 | 
44 | story_article_table = Table(
45 |     "story_article",
46 |     meta,
47 |     Column("article", Unicode(255), primary_key=True),
48 |     Column("story", Integer, primary_key=True),
49 | )
50 | 
51 | sentence_table = Table(
52 |     "sentence",
53 |     meta,
54 |     Column("article", Unicode(255), primary_key=True),
55 |     Column("sequence", Integer, primary_key=True),
56 |     Column("text", Unicode),
57 | )
58 | 
59 | tag_table = Table(
60 |     "tag",
61 |     meta,
62 |     Column("id", Unicode(KEY_LEN), primary_key=True),
63 |     Column("cluster", Unicode(KEY_LEN), index=True),
64 |     Column("article", Unicode(255), index=True),
65 |     Column("fingerprint", Unicode(1024), index=True),
66 |     Column("type", Unicode(10)),
67 |     Column("cluster_type", Unicode(10)),
68 |     Column("label", Unicode),
69 |     Column("cluster_label", Unicode),
70 |     Column("count", Integer),
71 |     Column("frequency", Float),
72 | )
73 | 
74 | tag_sentence_table = Table(
75 |     "tag_sentence",
76 |     meta,
77 |     Column("article", Unicode(255), primary_key=True),
78 |     Column("sentence", Integer, primary_key=True),
79 |     Column("tag", Unicode(KEY_LEN), primary_key=True),
80 | )
81 | 
82 | link_table = Table(
83 |     "link",
84 |     meta,
85 |     Column("source", Unicode(KEY_LEN), primary_key=True),
86 |     Column("source_cluster", Unicode(KEY_LEN)),
87 |     Column("target", Unicode(KEY_LEN), primary_key=True),
88 |     Column("target_cluster", Unicode(KEY_LEN)),
89 |     Column("type", Unicode(255)),
90 |     Column("user", Unicode(255), nullable=True),
91 |     Column("timestamp", DateTime),
92 | )
93 | 


--------------------------------------------------------------------------------
/storyweb/logic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/logic/__init__.py


--------------------------------------------------------------------------------
/storyweb/logic/articles.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Iterable, List, Optional
  3 | from sqlalchemy.sql import select, delete, insert, func
  4 | 
  5 | from storyweb.db import Conn, upsert
  6 | from storyweb.db import article_table, sentence_table
  7 | from storyweb.db import tag_table, tag_sentence_table
  8 | from storyweb.db import story_article_table
  9 | from storyweb.logic.util import count_stmt
 10 | from storyweb.models import (
 11 |     ArticleDetails,
 12 |     Link,
 13 |     Article,
 14 |     Listing,
 15 |     ListingResponse,
 16 |     Sentence,
 17 |     Site,
 18 |     Tag,
 19 |     TagSentence,
 20 | )
 21 | 
 22 | log = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | def list_sites(conn: Conn, listing: Listing) -> ListingResponse[Site]:
 26 |     stmt = select(
 27 |         article_table.c.site,
 28 |         func.count(article_table.c.id).label("articles"),
 29 |     )
 30 |     total = count_stmt(conn, stmt, func.distinct(article_table.c.site))
 31 |     stmt = stmt.group_by(article_table.c.site)
 32 |     stmt = stmt.order_by(article_table.c.site)
 33 |     stmt = stmt.limit(listing.limit).offset(listing.offset)
 34 |     cursor = conn.execute(stmt)
 35 |     results = [Site.parse_obj(r) for r in cursor.fetchall()]
 36 |     return ListingResponse[Site](
 37 |         total=total,
 38 |         debug_msg=str(stmt),
 39 |         limit=listing.limit,
 40 |         offset=listing.offset,
 41 |         results=results,
 42 |     )
 43 | 
 44 | 
 45 | def list_articles(
 46 |     conn: Conn,
 47 |     listing: Listing,
 48 |     site: Optional[str] = None,
 49 |     story: Optional[int] = None,
 50 |     query: Optional[str] = None,
 51 |     clusters: List[str] = [],
 52 | ) -> ListingResponse[Article]:
 53 |     stmt = select(
 54 |         article_table.c.id,
 55 |         article_table.c.title,
 56 |         article_table.c.url,
 57 |         article_table.c.language,
 58 |         article_table.c.site,
 59 |         article_table.c.tags,
 60 |         article_table.c.mentions,
 61 |     )
 62 |     stmt = stmt.select_from(article_table)
 63 |     if site is not None and len(site.strip()):
 64 |         stmt = stmt.where(article_table.c.site == site)
 65 |     if query is not None and len(query.strip()):
 66 |         stmt = stmt.where(article_table.c.title.ilike(f"%{query}%"))
 67 |     if story is not None:
 68 |         stmt = stmt.join(
 69 |             story_article_table,
 70 |             story_article_table.c.article == article_table.c.id,
 71 |         )
 72 |         stmt = stmt.where(story_article_table.c.story == story)
 73 |     for cluster in clusters:
 74 |         cluster_t = tag_table.alias()
 75 |         stmt = stmt.join(cluster_t, cluster_t.c.article == article_table.c.id)
 76 |         stmt = stmt.where(cluster_t.c.cluster == cluster)
 77 | 
 78 |     total = count_stmt(conn, stmt, func.distinct(article_table.c.id))
 79 |     if listing.sort_field is not None:
 80 |         column = article_table.c[listing.sort_field]
 81 |         if listing.sort_direction == "desc":
 82 |             stmt = stmt.order_by(column.desc())
 83 |         else:
 84 |             stmt = stmt.order_by(column.asc())
 85 |     stmt = stmt.group_by(
 86 |         article_table.c.id,
 87 |         article_table.c.title,
 88 |         article_table.c.url,
 89 |         article_table.c.language,
 90 |         article_table.c.site,
 91 |         article_table.c.tags,
 92 |         article_table.c.mentions,
 93 |     )
 94 |     stmt = stmt.limit(listing.limit).offset(listing.offset)
 95 |     cursor = conn.execute(stmt)
 96 |     results = [Article.parse_obj(r) for r in cursor.fetchall()]
 97 |     return ListingResponse[Article](
 98 |         total=total,
 99 |         debug_msg=str(stmt),
100 |         limit=listing.limit,
101 |         offset=listing.offset,
102 |         results=results,
103 |     )
104 | 
105 | 
106 | def fetch_article(conn: Conn, article_id: str) -> Optional[ArticleDetails]:
107 |     stmt = select(article_table)
108 |     stmt = stmt.where(article_table.c.id == article_id)
109 |     stmt = stmt.limit(1)
110 |     cursor = conn.execute(stmt)
111 |     obj = cursor.fetchone()
112 |     if obj is None:
113 |         return None
114 |     return ArticleDetails.parse_obj(obj)
115 | 
116 | 
117 | def save_article(conn: Conn, article: ArticleDetails) -> None:
118 |     istmt = upsert(article_table).values([article.dict()])
119 |     values = dict(
120 |         site=istmt.excluded.site,
121 |         url=istmt.excluded.url,
122 |         title=istmt.excluded.title,
123 |         language=istmt.excluded.language,
124 |         text=istmt.excluded.text,
125 |     )
126 |     stmt = istmt.on_conflict_do_update(index_elements=["id"], set_=values)
127 |     conn.execute(stmt)
128 | 
129 | 
130 | def save_extracted(
131 |     conn: Conn,
132 |     article: ArticleDetails,
133 |     sentences: Iterable[Sentence],
134 |     tag_sentences: Iterable[TagSentence],
135 |     tags: Iterable[Tag],
136 | ) -> None:
137 |     save_article(conn, article)
138 |     stmt = delete(sentence_table)
139 |     stmt = stmt.where(sentence_table.c.article == article.id)
140 |     conn.execute(stmt)
141 |     sentence_values = [s.dict() for s in sentences]
142 |     if len(sentence_values):
143 |         sstmt = insert(sentence_table).values(sentence_values)
144 |         conn.execute(sstmt)
145 | 
146 |     stmt = delete(tag_sentence_table)
147 |     stmt = stmt.where(tag_sentence_table.c.article == article.id)
148 |     conn.execute(stmt)
149 |     tag_sentence_values = [s.dict() for s in tag_sentences]
150 |     if len(tag_sentence_values):
151 |         sstmt = insert(tag_sentence_table).values(tag_sentence_values)
152 |         conn.execute(sstmt)
153 | 
154 |     tag_values = [t.dict() for t in tags]
155 |     if len(tag_values):
156 |         istmt = upsert(tag_table).values(tag_values)
157 |         updates = dict(
158 |             type=istmt.excluded.type,
159 |             label=istmt.excluded.label,
160 |             count=istmt.excluded.count,
161 |             frequency=istmt.excluded.frequency,
162 |         )
163 |         ustmt = istmt.on_conflict_do_update(index_elements=["id"], set_=updates)
164 |         conn.execute(ustmt)
165 | 
166 | 
167 | # def compute_idf(conn: Conn):
168 | #     cstmt = select(func.count(article_table.c.id))
169 | #     article_count = float(conn.execute(cstmt).scalar())
170 | #     print("Article count", article_count)
171 | 
172 | #     conn.execute(delete(fingerprint_idf_table))
173 | #     gstmt = select(
174 | #         tag_table.c.fingerprint,
175 | #         func.count(tag_table.c.article),
176 | #         func.log(article_count / func.count(tag_table.c.article)),
177 | #     )
178 | #     gstmt = gstmt.group_by(tag_table.c.fingerprint)
179 | #     stmt = fingerprint_idf_table.insert()
180 | #     stmt = stmt.from_select(["fingerprint", "count", "frequency"], gstmt)
181 | #     print("Update tf/idf", stmt)
182 | #     conn.execute(stmt)
183 | 


--------------------------------------------------------------------------------
/storyweb/logic/graph.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import countrynames
  3 | from typing import List, Optional, Generator, Dict
  4 | from networkx import DiGraph
  5 | from followthemoney import model
  6 | from followthemoney.proxy import EntityProxy
  7 | from sqlalchemy.future import select
  8 | from sqlalchemy.engine import Row
  9 | from networkx.readwrite.gexf import generate_gexf
 10 | 
 11 | from storyweb.db import Conn, link_table, tag_table, story_article_table
 12 | from storyweb.ontology import ontology, LinkType
 13 | 
 14 | 
 15 | def query_links(
 16 |     conn: Conn,
 17 |     story_id: Optional[int] = None,
 18 |     link_types: List[str] = list(ontology.link_types.keys()),
 19 | ) -> Generator[Row, None, None]:
 20 |     link_t = link_table.alias("l")
 21 |     source_t = tag_table.alias("s")
 22 |     target_t = tag_table.alias("t")
 23 | 
 24 |     lstmt = select(
 25 |         link_t.c.type.label("link_type"),
 26 |         source_t.c.cluster.label("source_id"),
 27 |         source_t.c.label.label("source_alias"),
 28 |         source_t.c.cluster_label.label("source_label"),
 29 |         source_t.c.cluster_type.label("source_type"),
 30 |         target_t.c.cluster.label("target_id"),
 31 |         target_t.c.label.label("target_alias"),
 32 |         target_t.c.cluster_label.label("target_label"),
 33 |         target_t.c.cluster_type.label("target_type"),
 34 |     )
 35 | 
 36 |     if story_id is not None:
 37 |         sa_source_t = story_article_table.alias("src_sa")
 38 |         sa_target_t = story_article_table.alias("tgt_sa")
 39 |         lstmt = lstmt.join(source_t, link_t.c.source_cluster == source_t.c.cluster)
 40 |         lstmt = lstmt.join(sa_source_t, sa_source_t.c.article == source_t.c.article)
 41 |         lstmt = lstmt.filter(sa_source_t.c.story == story_id)
 42 |         lstmt = lstmt.join(target_t, link_t.c.target_cluster == target_t.c.cluster)
 43 |         lstmt = lstmt.join(sa_target_t, sa_target_t.c.article == target_t.c.article)
 44 |         lstmt = lstmt.filter(sa_target_t.c.story == story_id)
 45 |         # lstmt = lstmt.filter(
 46 |         #     or_(
 47 |         #         sa_target_t.c.story == story,
 48 |         #         sa_source_t.c.story == story,
 49 |         #     )
 50 |         # )
 51 |     else:
 52 |         lstmt = lstmt.join(source_t, link_t.c.source_cluster == source_t.c.id)
 53 |         lstmt = lstmt.join(target_t, link_t.c.target_cluster == target_t.c.id)
 54 | 
 55 |     lstmt = lstmt.where(link_t.c.type.in_(link_types))
 56 |     lstmt = lstmt.distinct()
 57 |     for row in conn.execute(lstmt):
 58 |         yield row
 59 | 
 60 | 
 61 | def generate_graph(
 62 |     conn: Conn,
 63 |     story_id: Optional[int] = None,
 64 |     link_types: List[str] = list(ontology.link_types.keys()),
 65 | ) -> DiGraph:
 66 |     for skip in (LinkType.SAME, LinkType.UNRELATED):
 67 |         if skip in link_types:
 68 |             link_types.remove(skip)
 69 | 
 70 |     graph = DiGraph()
 71 |     for row in query_links(conn, story_id=story_id, link_types=link_types):
 72 |         source_id = row["source_id"]
 73 |         target_id = row["target_id"]
 74 |         if not graph.has_node(source_id):
 75 |             graph.add_node(
 76 |                 source_id,
 77 |                 label=row["source_label"],
 78 |                 node_type=row["source_type"],
 79 |             )
 80 |         if not graph.has_node(target_id):
 81 |             graph.add_node(
 82 |                 target_id,
 83 |                 label=row["target_label"],
 84 |                 node_type=row["target_type"],
 85 |             )
 86 |         graph.add_edge(
 87 |             source_id,
 88 |             target_id,
 89 |             edge_type=row["link_type"],
 90 |         )
 91 |     return graph
 92 | 
 93 | 
 94 | def generate_graph_gexf(
 95 |     conn: Conn,
 96 |     story_id: Optional[int] = None,
 97 |     link_types: List[str] = list(ontology.link_types.keys()),
 98 | ) -> str:
 99 |     graph = generate_graph(conn, story_id=story_id, link_types=link_types)
100 |     return "\n".join(generate_gexf(graph))
101 | 
102 | 
103 | def _make_ent(row: Row, prefix: str) -> EntityProxy:
104 |     type_ = row[f"{prefix}_type"]
105 |     schema = ontology.get_cluster_type(type_).ftm
106 |     ent = model.make_entity(schema)
107 |     ent_id = row[f"{prefix}_id"]
108 |     ent.id = f"sw-{ent_id}"
109 |     label = row[f"{prefix}_label"]
110 |     alias = row[f"{prefix}_alias"]
111 |     ent.add("name", label)
112 |     if alias != label:
113 |         ent.add("alias", alias)
114 |     return ent
115 | 
116 | 
117 | def generate_graph_ftm(conn: Conn, story_id: Optional[int] = None) -> str:
118 |     link_types = list(ontology.link_types.keys())
119 |     for skip in (LinkType.SAME, LinkType.UNRELATED):
120 |         if skip in link_types:
121 |             link_types.remove(skip)
122 | 
123 |     entities: Dict[str, EntityProxy] = {}
124 | 
125 |     def _merge(e: EntityProxy):
126 |         if e.id in entities:
127 |             entities[e.id].merge(e)
128 |         else:
129 |             entities[e.id] = e
130 | 
131 |     for row in query_links(conn, story_id=story_id, link_types=link_types):
132 |         if row["link_type"] == "LOCATED" and row["target_type"] == "LOC":
133 |             for label in (row["target_label"], row["target_alias"]):
134 |                 code = countrynames.to_code(label)
135 |                 if code is None:
136 |                     continue
137 |                 ent = _make_ent(row, "source")
138 |                 ent.add("country", code)
139 |                 _merge(ent)
140 |             continue
141 | 
142 |         if row["source_type"] == "LOC" or row["target_type"] == "LOC":
143 |             continue
144 | 
145 |         source = _make_ent(row, "source")
146 |         target = _make_ent(row, "target")
147 | 
148 |         link_type = ontology.get_link_type(row["link_type"])
149 |         if not link_type.ftm:
150 |             continue
151 | 
152 |         schema = model.get(link_type.ftm)
153 |         if schema is None or not schema.edge:
154 |             raise ValueError()
155 | 
156 |         link = model.make_entity(schema)
157 |         link.make_id(source.id, target.id, link_type.model.name)
158 |         link.add("summary", link_type.model.label)
159 |         link.add(schema.edge_source, source)
160 |         link.add(schema.edge_target, target)
161 | 
162 |         _merge(source)
163 |         _merge(target)
164 |         _merge(link)
165 | 
166 |     texts = []
167 |     for ent in entities.values():
168 |         # print(ent.to_dict())
169 |         texts.append(json.dumps(ent.to_dict()))
170 | 
171 |     return "\n".join(texts)
172 | 


--------------------------------------------------------------------------------
/storyweb/logic/predict.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from sqlalchemy.sql import select, delete, update, and_, or_, func, distinct
 3 | 
 4 | from storyweb.db import Conn, link_table, tag_table
 5 | from storyweb.logic.clusters import fetch_cluster
 6 | from storyweb.logic.links import get_links
 7 | from storyweb.ontology import ontology, LinkType
 8 | from storyweb.models import LinkPrediction, ClusterBase, Link
 9 | 
10 | 
11 | def is_observer(conn: Conn, id: str) -> bool:
12 |     stmt = select(
13 |         link_table.c.type.label("type"),
14 |         func.count(func.distinct(link_table.c.target_cluster)).label("targets"),
15 |     )
16 |     stmt = stmt.where(link_table.c.source_cluster == id)
17 |     stmt = stmt.where(~link_table.c.type.in_((LinkType.SAME, LinkType.UNRELATED)))
18 |     # stmt = stmt.filter(tag_table.c.cluster == id)
19 |     stmt = stmt.group_by(link_table.c.type)
20 |     observer = 0.0
21 |     total = 0.0
22 |     for row in conn.execute(stmt):
23 |         if row["type"] == LinkType.OBSERVER:
24 |             observer = row["targets"]
25 |         total += row["targets"]
26 |     if total == 0.0:
27 |         return False
28 |     return (observer / total) >= 0.5
29 | 
30 | 
31 | def pick_cluster(id: str, *clusters: ClusterBase) -> ClusterBase:
32 |     for cluster in clusters:
33 |         if id == cluster.id:
34 |             return cluster
35 |     raise ValueError("Cluster not found!")
36 | 
37 | 
38 | def can_have_link(source: ClusterBase, target: ClusterBase, link_type: str) -> bool:
39 |     obj = ontology.get_link_type(link_type)
40 |     src_type = ontology.get_cluster_type(source.type)
41 |     tgt_type = ontology.get_cluster_type(target.type)
42 |     if not src_type.is_a(obj.source_type.name):
43 |         return False
44 |     if not tgt_type.is_a(obj.target_type.name):
45 |         return False
46 |     return True
47 | 
48 | 
49 | # def can_have_bidi(source: ClusterBase, target: ClusterBase, link_type: str) -> bool:
50 | #     pass
51 | 
52 | 
53 | def link_predict(conn: Conn, anchor_id: str, other_id: str) -> LinkPrediction:
54 |     anchor = fetch_cluster(conn, anchor_id)
55 |     other = fetch_cluster(conn, other_id)
56 |     if anchor is None or other is None:
57 |         raise ValueError("Invalid clusters for link prediction!")
58 |     link_type = LinkType.UNRELATED
59 | 
60 |     # Check if there is a link already:
61 |     existing_links: List[Link] = []
62 |     for link in get_links(conn, anchor_id, other_id):
63 |         if link.type == LinkType.UNRELATED:
64 |             continue
65 |         link_source = pick_cluster(link.source_cluster, anchor, other)
66 |         link_target = pick_cluster(link.target_cluster, anchor, other)
67 |         if not can_have_link(link_source, link_target, link.type):
68 |             continue
69 |         existing_links.append(link)
70 |     if len(existing_links) > 0:
71 |         existing_links.sort(key=lambda l: ontology.get_link_type(l.type).weight)
72 |         link = existing_links[-1]
73 |         return LinkPrediction(
74 |             source=pick_cluster(link.source_cluster, anchor, other),
75 |             target=pick_cluster(link.target_cluster, anchor, other),
76 |             type=link.type,
77 |         )
78 | 
79 |     # Heuristic: if one of the two clusters is known to be an observer on most of
80 |     # their links, assume they are overall an observer (e.g. a media organisation,
81 |     # or a journalist).
82 |     anchor_observer = is_observer(conn, anchor.id)
83 |     other_observer = is_observer(conn, other.id)
84 |     if anchor_observer and not other_observer:
85 |         return LinkPrediction(source=anchor, target=other, type=LinkType.OBSERVER)
86 |     if other_observer and not anchor_observer:
87 |         return LinkPrediction(source=other, target=anchor, type=LinkType.OBSERVER)
88 | 
89 |     # Heuristic: locations have very limited connection types they can enter into.
90 |     if anchor.type == "LOC" and other.type == "LOC":
91 |         return LinkPrediction(source=anchor, target=other, type="WITHIN")
92 |     if anchor.type == "LOC":
93 |         return LinkPrediction(source=other, target=anchor, type="LOCATED")
94 |     if other.type == "LOC":
95 |         return LinkPrediction(source=anchor, target=other, type="LOCATED")
96 | 
97 |     return LinkPrediction(source=anchor, target=other, type=link_type)
98 | 


--------------------------------------------------------------------------------
/storyweb/logic/stories.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | from sqlalchemy.sql import select, delete, update, insert, func
 4 | 
 5 | from storyweb.db import Conn
 6 | from storyweb.db import story_table
 7 | from storyweb.db import story_article_table
 8 | from storyweb.logic.util import count_stmt
 9 | from storyweb.models import Story, StoryMutation, Listing, ListingResponse
10 | 
11 | log = logging.getLogger(__name__)
12 | 
13 | 
14 | def list_stories(
15 |     conn: Conn, listing: Listing, query: Optional[str], article: Optional[str]
16 | ) -> ListingResponse[Story]:
17 |     stmt = select(story_table)
18 |     if query is not None and len(query.strip()):
19 |         stmt = stmt.where(story_table.c.title.ilike(f"%{query}%"))
20 |     if article is not None and len(article.strip()):
21 |         stmt = stmt.join(
22 |             story_article_table,
23 |             story_article_table.c.story == story_table.c.id,
24 |         )
25 |         stmt = stmt.where(story_article_table.c.article == article)
26 |     total = count_stmt(conn, stmt, story_table.c.id)
27 |     stmt = stmt.limit(listing.limit).offset(listing.offset)
28 |     cursor = conn.execute(stmt)
29 |     results = [Story.parse_obj(r) for r in cursor.fetchall()]
30 |     return ListingResponse[Story](
31 |         total=total,
32 |         debug_msg=str(stmt),
33 |         limit=listing.limit,
34 |         offset=listing.offset,
35 |         results=results,
36 |     )
37 | 
38 | 
39 | def fetch_story(conn: Conn, story_id: int) -> Optional[Story]:
40 |     stmt = select(story_table)
41 |     stmt = stmt.where(story_table.c.id == story_id)
42 |     stmt = stmt.limit(1)
43 |     cursor = conn.execute(stmt)
44 |     obj = cursor.fetchone()
45 |     if obj is None:
46 |         return None
47 |     return Story.parse_obj(obj)
48 | 
49 | 
50 | def create_story(conn: Conn, data: StoryMutation) -> Story:
51 |     stmt = insert(story_table)
52 |     stmt = stmt.values(title=data.title, summary=data.summary)
53 |     cursor = conn.execute(stmt)
54 |     story = fetch_story(conn, cursor.inserted_primary_key[0])
55 |     if story is None:
56 |         raise Exception("Story was not saved.")
57 |     return story
58 | 
59 | 
60 | def update_story(conn: Conn, data: StoryMutation, story_id: int) -> Story:
61 |     stmt = update(story_table)
62 |     stmt = stmt.where(story_table.c.id == story_id)
63 |     stmt = stmt.values(title=data.title, summary=data.summary)
64 |     conn.execute(stmt)
65 |     story = fetch_story(conn, story_id)
66 |     if story is None:
67 |         raise Exception("Story was not saved.")
68 |     return story
69 | 
70 | 
71 | def delete_story(conn: Conn, story_id: int) -> None:
72 |     sa_stmt = delete(story_article_table)
73 |     sa_stmt = sa_stmt.where(story_article_table.c.story == story_id)
74 |     conn.execute(sa_stmt)
75 |     s_stmt = delete(story_table)
76 |     s_stmt = s_stmt.where(story_table.c.id == story_id)
77 |     conn.execute(s_stmt)
78 | 
79 | 
80 | def toggle_story_article(
81 |     conn: Conn, story: int, article: str, delete_existing: bool = True
82 | ) -> None:
83 |     t = story_article_table.alias("t")
84 |     sstmt = select(func.count(t.c.story))
85 |     sstmt = sstmt.filter(t.c.story == story, t.c.article == article)
86 |     scursor = conn.execute(sstmt)
87 |     if scursor.scalar_one() > 0:
88 |         if delete_existing:
89 |             dstmt = delete(t)
90 |             dstmt = dstmt.filter(t.c.story == story, t.c.article == article)
91 |             conn.execute(dstmt)
92 |     else:
93 |         istmt = insert(story_article_table)
94 |         istmt = istmt.values(story=story, article=article)
95 |         conn.execute(istmt)
96 | 


--------------------------------------------------------------------------------
/storyweb/logic/util.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from sqlalchemy.sql import Select, Selectable, ColumnElement
 3 | from sqlalchemy.sql import func
 4 | 
 5 | from storyweb.db import Conn
 6 | 
 7 | log = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def count_stmt(conn: Conn, stmt: Select, col: Selectable | ColumnElement) -> int:
11 |     count_stmt = stmt.with_only_columns(func.count(col))
12 |     cursor = conn.execute(count_stmt)
13 |     return cursor.scalar_one()
14 | 


--------------------------------------------------------------------------------
/storyweb/models.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from typing import Generic, List, Optional, TypeVar
  3 | from pydantic import BaseModel, Field
  4 | from pydantic.generics import GenericModel
  5 | 
  6 | R = TypeVar("R", bound=BaseModel)
  7 | 
  8 | 
  9 | class Response(GenericModel):
 10 |     status: str = Field("ok")
 11 |     debug_msg: Optional[str] = Field(None)
 12 | 
 13 | 
 14 | class ListingResponse(Response, Generic[R]):
 15 |     total: int = Field(0)
 16 |     limit: int = Field()
 17 |     offset: int = Field(0)
 18 |     results: List[R]
 19 | 
 20 | 
 21 | class Listing(BaseModel):
 22 |     limit: int
 23 |     offset: int
 24 |     sort_direction: str
 25 |     sort_field: Optional[str]
 26 | 
 27 | 
 28 | class Article(BaseModel):
 29 |     id: str
 30 |     site: str
 31 |     url: str
 32 |     title: Optional[str]
 33 |     language: Optional[str]
 34 |     tags: Optional[int]
 35 |     mentions: Optional[int]
 36 | 
 37 | 
 38 | class ArticleDetails(Article):
 39 |     text: str
 40 | 
 41 | 
 42 | class StoryMutation(BaseModel):
 43 |     title: str = Field(min_length=4)
 44 |     summary: Optional[str]
 45 | 
 46 | 
 47 | class StoryArticleToggle(BaseModel):
 48 |     article: str
 49 | 
 50 | 
 51 | class StoryArticleImportUrl(BaseModel):
 52 |     url: str
 53 | 
 54 | 
 55 | class Story(BaseModel):
 56 |     id: int
 57 |     title: str
 58 |     summary: Optional[str]
 59 | 
 60 | 
 61 | class Sentence(BaseModel):
 62 |     article: str
 63 |     sequence: int
 64 |     text: str
 65 | 
 66 | 
 67 | class ClusterBase(BaseModel):
 68 |     id: str
 69 |     type: str
 70 |     label: str
 71 | 
 72 | 
 73 | class Tag(ClusterBase):
 74 |     cluster: str
 75 |     article: str
 76 |     fingerprint: str
 77 |     count: int
 78 |     frequency: float
 79 |     cluster_type: Optional[str]
 80 |     cluster_label: Optional[str]
 81 | 
 82 | 
 83 | class TagSentence(BaseModel):
 84 |     tag: str
 85 |     article: str
 86 |     sentence: int
 87 | 
 88 | 
 89 | class Cluster(ClusterBase):
 90 |     articles: int
 91 | 
 92 | 
 93 | class ClusterDetails(Cluster):
 94 |     labels: List[str]
 95 | 
 96 | 
 97 | class ClusterPair(BaseModel):
 98 |     left: ClusterBase
 99 |     right: ClusterBase
100 |     articles: int
101 |     link_types: List[str] = []
102 | 
103 | 
104 | class RelatedCluster(ClusterBase):
105 |     articles: int
106 |     link_types: List[str] = []
107 | 
108 | 
109 | class SimilarCluster(ClusterBase):
110 |     common: List[str]
111 |     common_count: int
112 | 
113 | 
114 | class LinkBase(BaseModel):
115 |     source: str
116 |     target: str
117 |     type: str
118 | 
119 | 
120 | class Link(LinkBase):
121 |     source_cluster: str
122 |     target_cluster: str
123 |     user: Optional[str]
124 |     timestamp: datetime
125 | 
126 | 
127 | class LinkPrediction(BaseModel):
128 |     source: ClusterDetails
129 |     target: ClusterDetails
130 |     type: str
131 | 
132 | 
133 | class MergeRequest(BaseModel):
134 |     anchor: str
135 |     other: List[str]
136 | 
137 | 
138 | class ExplodeRequest(BaseModel):
139 |     cluster: str
140 | 
141 | 
142 | class UntagRequest(BaseModel):
143 |     cluster: str
144 |     article: str
145 | 
146 | 
147 | class Site(BaseModel):
148 |     site: str
149 |     articles: int = 0
150 | 


--------------------------------------------------------------------------------
/storyweb/ontology.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from pydantic import BaseModel
  3 | from pydantic_yaml import YamlModel
  4 | from typing import List, Optional
  5 | 
  6 | from storyweb.clean import most_common
  7 | 
  8 | 
  9 | class ClusterTypeModel(BaseModel):
 10 |     name: str
 11 |     label: str
 12 |     plural: str
 13 |     parent: Optional[str]
 14 |     color: str
 15 |     icon: str
 16 |     ftm: str
 17 | 
 18 | 
 19 | class LinkTypeModel(BaseModel):
 20 |     name: str
 21 |     directed: bool = False
 22 |     label: str
 23 |     phrase: str
 24 |     source_type: str
 25 |     target_type: str
 26 |     ftm: Optional[str]
 27 |     weight: int
 28 | 
 29 | 
 30 | class OntologyModel(YamlModel):
 31 |     cluster_types: List[ClusterTypeModel]
 32 |     link_types: List[LinkTypeModel]
 33 | 
 34 | 
 35 | class ClusterType(object):
 36 |     PERSON = "PER"
 37 |     ORGANIZATION = "ORG"
 38 |     LOCATION = "LOC"
 39 | 
 40 |     def __init__(self, ontology: "Ontology", model: ClusterTypeModel):
 41 |         self.ontology = ontology
 42 |         self.model = model
 43 |         self.name = model.name
 44 |         self.label = model.label
 45 |         self.plural = model.plural
 46 |         self.color = model.color
 47 |         self.icon = model.icon
 48 |         self.ftm = model.ftm
 49 | 
 50 |     @property
 51 |     def parent(self) -> Optional["ClusterType"]:
 52 |         if self.model.parent is None:
 53 |             return None
 54 |         return self.ontology.get_cluster_type(self.model.parent)
 55 | 
 56 |     def is_a(self, name: str) -> bool:
 57 |         if name == self.name:
 58 |             return True
 59 |         parent = self.parent
 60 |         if parent is None:
 61 |             return False
 62 |         return parent.is_a(name)
 63 | 
 64 |     def pick(self, names: List[str]) -> str:
 65 |         """Given a set of categories, pick the most descriptive one."""
 66 |         # TODO: does this want to be a proper class-based type system (ftm?) at
 67 |         # some point?
 68 |         # if not len(categories):
 69 |         #     raise TypeError("No categories for this entity!")
 70 |         # unique = set(categories)
 71 |         # if len(unique) == 1:
 72 |         #     return categories[0]
 73 |         # if LOCATION in unique:
 74 |         #     # works in practice, not in theory:
 75 |         #     return LOCATION
 76 |         # if PERSON in unique and ORGANIZATION in unique:
 77 |         #     return ENTITY
 78 |         # if PERSON in unique:
 79 |         #     return PERSON
 80 |         # return ORGANIZATION
 81 |         return most_common(names)
 82 | 
 83 | 
 84 | class LinkType(object):
 85 |     SAME = "SAME"
 86 |     UNRELATED = "UNRELATED"
 87 |     OBSERVER = "OBSERVER"
 88 | 
 89 |     def __init__(self, ontology: "Ontology", model: LinkTypeModel):
 90 |         self.ontology = ontology
 91 |         self.model = model
 92 |         self.ftm = model.ftm
 93 |         self.weight = model.weight
 94 | 
 95 |     @property
 96 |     def source_type(self) -> "ClusterType":
 97 |         return self.ontology.get_cluster_type(self.model.source_type)
 98 | 
 99 |     @property
100 |     def target_type(self) -> "ClusterType":
101 |         return self.ontology.get_cluster_type(self.model.target_type)
102 | 
103 | 
104 | class Ontology(object):
105 |     def __init__(self, model: OntologyModel):
106 |         self.model = model
107 |         self.node_types = {n.name: ClusterType(self, n) for n in model.cluster_types}
108 |         self.link_types = {l.name: LinkType(self, l) for l in model.link_types}
109 | 
110 |         assert LinkType.SAME in self.link_types, LinkType.SAME
111 |         assert ClusterType.LOCATION in self.node_types, ClusterType.LOCATION
112 |         assert ClusterType.PERSON in self.node_types, ClusterType.PERSON
113 |         assert ClusterType.ORGANIZATION in self.node_types, ClusterType.ORGANIZATION
114 | 
115 |     def get_cluster_type(self, name: str) -> ClusterType:
116 |         return self.node_types[name]
117 | 
118 |     def get_link_type(self, name: str) -> LinkType:
119 |         return self.link_types[name]
120 | 
121 |     @classmethod
122 |     def load(cls) -> "Ontology":
123 |         path = Path(__file__).parent / "ontology.yml"
124 |         model = OntologyModel.parse_file(path)
125 |         return Ontology(model)
126 | 
127 | 
128 | ontology = Ontology.load()
129 | 


--------------------------------------------------------------------------------
/storyweb/ontology.yml:
--------------------------------------------------------------------------------
  1 | cluster_types:
  2 |   - name: ANY
  3 |     label: "Thing"
  4 |     plural: "Things"
  5 |     color: "#9D3F9D"
  6 |     icon: "hat"
  7 |     ftm: Thing
  8 |   - name: ENT
  9 |     label: "Entity"
 10 |     plural: "Entities"
 11 |     parent: ANY
 12 |     color: "#9D3F9D"
 13 |     icon: "people"
 14 |     ftm: LegalEntity
 15 |   - name: LOC
 16 |     label: "Location"
 17 |     plural: "Locations"
 18 |     parent: ANY
 19 |     color: "#29a634"
 20 |     icon: "mountain"
 21 |     ftm: Address
 22 |   - name: PER
 23 |     label: "Person"
 24 |     plural: "People"
 25 |     parent: ENT
 26 |     color: "#9d3f9d"
 27 |     icon: "person"
 28 |     ftm: Person
 29 |   - name: ORG
 30 |     label: "Organization"
 31 |     plural: "Organizations"
 32 |     parent: ENT
 33 |     color: "#2965cc"
 34 |     icon: "office"
 35 |     ftm: Organization
 36 | link_types:
 37 |   - name: SAME
 38 |     label: "Same as"
 39 |     directed: false
 40 |     phrase: "is the same as"
 41 |     source_type: ANY
 42 |     target_type: ANY
 43 |     weight: 100
 44 |   - name: OBSERVER
 45 |     directed: true
 46 |     label: "Observer"
 47 |     phrase: "writes about"
 48 |     source_type: ENT
 49 |     target_type: ANY
 50 |     weight: 20
 51 |   - name: UNRELATED
 52 |     directed: false
 53 |     label: "Unrelated"
 54 |     phrase: "has nothing to do with"
 55 |     source_type: ANY
 56 |     target_type: ANY
 57 |     weight: 0
 58 |   - name: ASSOCIATE
 59 |     directed: false
 60 |     label: "Associate"
 61 |     phrase: "is an associate of"
 62 |     source_type: PER
 63 |     target_type: PER
 64 |     ftm: Associate
 65 |     weight: 30
 66 |   - name: FAMILY
 67 |     directed: false
 68 |     label: "Family"
 69 |     phrase: "is related to"
 70 |     source_type: PER
 71 |     target_type: PER
 72 |     ftm: Family
 73 |     weight: 40
 74 |   - name: ANTAGONIST
 75 |     directed: false
 76 |     label: "Antagonist"
 77 |     phrase: "is in conflict with"
 78 |     source_type: ENT
 79 |     target_type: ENT
 80 |     weight: 50
 81 |   - name: OWNER
 82 |     directed: true
 83 |     label: "Owner"
 84 |     phrase: "owns"
 85 |     source_type: ENT
 86 |     target_type: ORG
 87 |     ftm: Ownership
 88 |     weight: 40
 89 |   - name: MANAGER
 90 |     directed: true
 91 |     label: "Manager"
 92 |     phrase: "manages or directs"
 93 |     source_type: ENT
 94 |     target_type: ORG
 95 |     ftm: Directorship
 96 |     weight: 40
 97 |   - name: EMPLOYEE
 98 |     directed: true
 99 |     label: "Employer"
100 |     phrase: "works for"
101 |     source_type: PER
102 |     target_type: ORG
103 |     ftm: Employment
104 |     weight: 40
105 |   - name: MEMBER
106 |     directed: true
107 |     label: "Member"
108 |     phrase: "is part of"
109 |     source_type: PER
110 |     target_type: ORG
111 |     ftm: Membership
112 |     weight: 40
113 |   - name: BUSINESS
114 |     directed: false
115 |     label: "Business activity"
116 |     phrase: "does business with"
117 |     source_type: ENT
118 |     target_type: ENT
119 |     weight: 30
120 |   - name: LOCATED
121 |     directed: true
122 |     label: "Located"
123 |     phrase: "is located in"
124 |     source_type: ENT
125 |     target_type: LOC
126 |     weight: 20
127 |   - name: WITHIN
128 |     directed: true
129 |     label: "Within"
130 |     phrase: "is located in"
131 |     source_type: LOC
132 |     target_type: LOC
133 |     weight: 20
134 |   - name: INDIRECT
135 |     directed: false
136 |     label: "Indirect link"
137 |     phrase: "is indirectly linked to"
138 |     source_type: ANY
139 |     target_type: ANY
140 |     weight: 5
141 |   - name: OTHER
142 |     directed: false
143 |     label: "Other link"
144 |     phrase: "is linked to"
145 |     source_type: ANY
146 |     target_type: ANY
147 |     ftm: UnknownLink
148 |     weight: 7
149 | 


--------------------------------------------------------------------------------
/storyweb/parse/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import requests
 3 | from typing import Optional
 4 | from articledata import URL
 5 | 
 6 | from storyweb.db import Conn
 7 | from storyweb.parse.extract import extract
 8 | from storyweb.parse.pipeline import load_one_article
 9 | 
10 | log = logging.getLogger(__name__)
11 | 
12 | 
13 | def import_article_by_url(conn: Conn, url: str) -> Optional[str]:
14 |     try:
15 |         res = requests.get(url)
16 |         res.raise_for_status()
17 |     except Exception as exc:
18 |         log.exception("Cannot fetch article text: %r" % exc)
19 |         return None
20 | 
21 |     url_obj = URL(url)
22 |     article = extract(url_obj, res.content)
23 |     if article is None:
24 |         return None
25 | 
26 |     return load_one_article(conn, article)
27 | 


--------------------------------------------------------------------------------
/storyweb/parse/extract.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | from typing import Optional, Dict, Any
 4 | from articledata import URL, Article
 5 | from trafilatura import bare_extraction
 6 | 
 7 | from storyweb.parse.language import detect_language
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | def extract(url: URL, html: Any) -> Optional[Article]:
13 |     log.info("Parsing: %r", url)
14 |     article = Article(
15 |         id=url.id,
16 |         url=url.url,
17 |         title=url.url,
18 |         site=url.domain,
19 |         bylines=[],
20 |         language="xxx",
21 |         locale="xx",
22 |         text="",
23 |         extracted_at=datetime.utcnow().isoformat(),
24 |     )
25 | 
26 |     extract: Dict[str, str] = bare_extraction(html, url=url.url, include_comments=False)
27 |     if extract is not None:
28 |         article.title = extract.get("title", article.title)
29 |         article.date = extract.get("date")
30 |         article.text = extract.get("text", article.text)
31 |         author = extract.get("author")
32 |         if author is not None:
33 |             article.bylines.append(author)
34 | 
35 |     lang = detect_language(article.text)
36 |     if lang is not None:
37 |         article.language = lang
38 |     return article
39 | 


--------------------------------------------------------------------------------
/storyweb/parse/language.py:
--------------------------------------------------------------------------------
 1 | from functools import cache
 2 | from typing import Optional
 3 | import fasttext
 4 | import languagecodes
 5 | from normality import collapse_spaces
 6 | from pathlib import Path
 7 | 
 8 | model_path = Path(__file__).parent / "lid.176.ftz"
 9 | 
10 | 
11 | @cache
12 | def get_model():
13 |     try:
14 |         # see https://github.com/facebookresearch/fastText/issues/1056
15 |         fasttext.FastText.eprint = lambda *args, **kwargs: None
16 |     except:
17 |         pass
18 |     return fasttext.load_model(model_path.as_posix())
19 | 
20 | 
21 | def detect_language(text: Optional[str]) -> Optional[str]:
22 |     model = get_model()
23 |     text = collapse_spaces(text)
24 |     if text is None:
25 |         return text
26 |     out = model.predict(text[:10000])
27 |     if not len(out):
28 |         return None
29 |     ((lang,), _) = out
30 |     lang = lang.replace("__label__", "")
31 |     lang_long = languagecodes.iso_639_alpha3(lang)
32 |     if lang_long is not None:
33 |         return lang_long
34 |     return None
35 | 


--------------------------------------------------------------------------------
/storyweb/parse/lid.176.ftz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/parse/lid.176.ftz


--------------------------------------------------------------------------------
/storyweb/parse/pipeline.py:
--------------------------------------------------------------------------------
  1 | import spacy
  2 | import logging
  3 | import hashlib
  4 | from spacy.tokens import Span, Doc
  5 | from pathlib import Path
  6 | from typing import Dict, Generator, List, Optional, Set, Tuple
  7 | from functools import cache
  8 | from normality import slugify
  9 | from articledata import Article
 10 | from pydantic import ValidationError
 11 | 
 12 | from storyweb.db import engine, Conn
 13 | from storyweb.clean import clean_entity_name, most_common, pick_name
 14 | from storyweb.models import ArticleDetails, Sentence, Tag, TagSentence
 15 | from storyweb.logic.articles import save_extracted
 16 | from storyweb.ontology import ClusterType
 17 | 
 18 | log = logging.getLogger(__name__)
 19 | 
 20 | NLP_TYPES = {
 21 |     "PERSON": ClusterType.PERSON,
 22 |     "PER": ClusterType.PERSON,
 23 |     "ORG": ClusterType.ORGANIZATION,
 24 |     "GPE": ClusterType.LOCATION,
 25 | }
 26 | NLP_MODELS = {
 27 |     "eng": "en_core_web_sm",
 28 |     # "en_core_web_trf",
 29 |     "deu": "de_core_news_sm",
 30 |     "rus": "ru_core_news_sm",
 31 |     "xxx": "xx_ent_wiki_sm",
 32 | }
 33 | 
 34 | 
 35 | @cache
 36 | def load_nlp(language: str):
 37 |     if language not in NLP_MODELS:
 38 |         return load_nlp("xxx")
 39 |     spacy.prefer_gpu()
 40 |     # disable everything but NER:
 41 |     nlp = spacy.load(
 42 |         NLP_MODELS[language],
 43 |         disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"],
 44 |     )
 45 |     nlp.add_pipe("sentencizer")
 46 |     return nlp
 47 | 
 48 | 
 49 | def read_raw_articles(path: Path) -> Generator[Tuple[str, Article], None, None]:
 50 |     with open(path, "rb") as fh:
 51 |         while line := fh.readline():
 52 |             try:
 53 |                 article = Article.parse_raw(line)
 54 |                 if article.id is None:
 55 |                     continue
 56 |                 if article.language != "eng":
 57 |                     continue
 58 |                 yield (article.text, article)
 59 |             except ValidationError as ve:
 60 |                 log.warn("Article validation [%s]: %s", article.id, ve)
 61 | 
 62 | 
 63 | def extract_tag(ent: Span) -> Optional[Tuple[str, str, str]]:
 64 |     tag_type = NLP_TYPES.get(ent.label_)
 65 |     if tag_type is None:
 66 |         return None
 67 |     label = clean_entity_name(ent.text)
 68 |     fp = slugify(label, sep="-")
 69 |     if fp is None or label is None:
 70 |         return None
 71 |     fp = "-".join(sorted(fp.split("-")))
 72 |     if tag_type == ClusterType.PERSON and " " not in label:
 73 |         return None
 74 |     return (label, tag_type, fp)
 75 | 
 76 | 
 77 | def _load_article(conn: Conn, doc: Doc, raw: Article) -> str:
 78 |     log.info("Article [%s, %s]: %r", raw.id, raw.language, raw.title)
 79 |     article = ArticleDetails(
 80 |         id=raw.id,
 81 |         site=raw.site,
 82 |         url=raw.url,
 83 |         title=raw.title,
 84 |         language=raw.language,
 85 |         text=raw.text,
 86 |     )
 87 |     sentences: List[Sentence] = []
 88 |     tag_sentences: Dict[str, Set[int]] = {}
 89 |     tag_types: Dict[str, List[str]] = {}
 90 |     tag_labels: Dict[str, List[str]] = {}
 91 |     for seq, sent in enumerate(doc.sents):
 92 |         sent_tags = 0
 93 |         for ent in sent.ents:
 94 |             extracted = extract_tag(ent)
 95 |             if extracted is None:
 96 |                 continue
 97 |             (label, type_, fp) = extracted
 98 |             tag_labels.setdefault(fp, [])
 99 |             tag_labels[fp].append(label)
100 |             tag_types.setdefault(fp, [])
101 |             tag_types[fp].append(type_)
102 |             tag_sentences.setdefault(fp, set())
103 |             tag_sentences[fp].add(seq)
104 |             sent_tags += 1
105 | 
106 |         if sent_tags > 0:
107 |             sentence = Sentence(article=article.id, sequence=seq, text=sent.text)
108 |             sentences.append(sentence)
109 | 
110 |     article.tags = len(tag_labels)
111 |     article.mentions = sum([len(v) for v in tag_labels.values()])
112 |     tags: List[Tag] = []
113 |     tag_sentence_objs: List[TagSentence] = []
114 |     for fp, labels in tag_labels.items():
115 |         key = f"{article.id}>{fp}".encode("utf-8")
116 |         tag_id = hashlib.sha1(key).hexdigest()
117 |         type_ = most_common(tag_types[fp])
118 |         label = pick_name(labels)
119 |         tag = Tag(
120 |             id=tag_id,
121 |             cluster=tag_id,
122 |             article=article.id,
123 |             fingerprint=fp,
124 |             type=type_,
125 |             label=label,
126 |             count=len(labels),
127 |             frequency=float(len(labels)) / article.mentions,
128 |             cluster_type=type_,
129 |             cluster_label=label,
130 |         )
131 |         tags.append(tag)
132 | 
133 |         for seq in tag_sentences.get(fp, []):
134 |             obj = TagSentence(tag=tag_id, article=article.id, sentence=seq)
135 |             tag_sentence_objs.append(obj)
136 | 
137 |     save_extracted(conn, article, sentences, tag_sentence_objs, tags)
138 |     return article.id
139 | 
140 | 
141 | def load_articles(path: Path) -> None:
142 |     nlp = load_nlp("eng")
143 |     raw_articles = read_raw_articles(path)
144 |     for (doc, raw_article) in nlp.pipe(raw_articles, batch_size=20, as_tuples=True):
145 |         with engine.begin() as conn:
146 |             _load_article(conn, doc, raw_article)
147 | 
148 | 
149 | def load_one_article(conn: Conn, article: Article) -> str:
150 |     nlp = load_nlp(article.language)
151 |     doc = nlp(article.text)
152 |     return _load_article(conn, doc, article)
153 | 


--------------------------------------------------------------------------------
/storyweb/routes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/routes/__init__.py


--------------------------------------------------------------------------------
/storyweb/routes/articles.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | from fastapi import APIRouter, Depends, Path, Query
 3 | from fastapi.exceptions import HTTPException
 4 | 
 5 | from storyweb.db import Conn
 6 | from storyweb.logic.articles import fetch_article, list_articles, list_sites
 7 | from storyweb.routes.util import get_conn, get_listing
 8 | from storyweb.models import (
 9 |     Article,
10 |     ArticleDetails,
11 |     Listing,
12 |     ListingResponse,
13 |     Site,
14 | )
15 | 
16 | router = APIRouter()
17 | 
18 | 
19 | @router.get("/sites", response_model=ListingResponse[Site])
20 | def sites_index(
21 |     conn: Conn = Depends(get_conn),
22 |     listing: Listing = Depends(get_listing),
23 | ):
24 |     """List all the source sites from which articles (refs) have been imported."""
25 |     return list_sites(conn, listing)
26 | 
27 | 
28 | @router.get("/articles", response_model=ListingResponse[Article])
29 | def articles_index(
30 |     conn: Conn = Depends(get_conn),
31 |     listing: Listing = Depends(get_listing),
32 |     site: Optional[str] = Query(None),
33 |     story: Optional[int] = Query(None),
34 |     q: Optional[str] = Query(None),
35 |     cluster: List[str] = Query([]),
36 | ):
37 |     clusters = [i for i in cluster if i is not None and len(i.strip())]
38 |     return list_articles(
39 |         conn,
40 |         listing,
41 |         site=site,
42 |         story=story,
43 |         query=q,
44 |         clusters=clusters,
45 |     )
46 | 
47 | 
48 | @router.get("/articles/{article_id}", response_model=ArticleDetails)
49 | def article_view(
50 |     conn: Conn = Depends(get_conn),
51 |     article_id: str = Path(),
52 | ):
53 |     article = fetch_article(conn, article_id)
54 |     if article is None:
55 |         raise HTTPException(404)
56 |     return article
57 | 


--------------------------------------------------------------------------------
/storyweb/routes/clusters.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | from fastapi import APIRouter, Depends, Path, Query
 3 | from fastapi.exceptions import HTTPException
 4 | 
 5 | from storyweb.db import Conn
 6 | from storyweb.logic.clusters import (
 7 |     fetch_cluster,
 8 |     list_clusters,
 9 |     list_related,
10 |     list_similar,
11 | )
12 | from storyweb.routes.util import get_conn, get_listing
13 | from storyweb.models import (
14 |     Cluster,
15 |     ClusterDetails,
16 |     Listing,
17 |     ListingResponse,
18 |     RelatedCluster,
19 |     SimilarCluster,
20 | )
21 | 
22 | router = APIRouter()
23 | 
24 | 
25 | @router.get("/clusters", response_model=ListingResponse[Cluster])
26 | def route_cluster_index(
27 |     conn: Conn = Depends(get_conn),
28 |     listing: Listing = Depends(get_listing),
29 |     q: Optional[str] = Query(None),
30 |     article: Optional[str] = Query(None),
31 |     story: Optional[str] = Query(None),
32 |     types: List[str] = Query([]),
33 | ):
34 |     return list_clusters(
35 |         conn,
36 |         listing,
37 |         query=q,
38 |         article=article,
39 |         story=story,
40 |         types=types,
41 |     )
42 | 
43 | 
44 | @router.get("/clusters/{cluster}", response_model=ClusterDetails)
45 | def route_cluster_view(conn: Conn = Depends(get_conn), cluster: str = Path()):
46 |     obj = fetch_cluster(conn, cluster)
47 |     if obj is None:
48 |         raise HTTPException(404)
49 |     return obj
50 | 
51 | 
52 | @router.get(
53 |     "/clusters/{cluster}/similar", response_model=ListingResponse[SimilarCluster]
54 | )
55 | def route_cluster_similar(
56 |     conn: Conn = Depends(get_conn),
57 |     listing: Listing = Depends(get_listing),
58 |     cluster: str = Path(),
59 | ):
60 |     return list_similar(conn, listing, cluster)
61 | 
62 | 
63 | @router.get(
64 |     "/clusters/{cluster}/related", response_model=ListingResponse[RelatedCluster]
65 | )
66 | def route_cluster_related(
67 |     conn: Conn = Depends(get_conn),
68 |     listing: Listing = Depends(get_listing),
69 |     cluster: str = Path(),
70 |     linked: Optional[bool] = Query(None),
71 |     types: List[str] = Query([]),
72 | ):
73 |     return list_related(
74 |         conn,
75 |         listing,
76 |         cluster,
77 |         linked=linked,
78 |         types=types,
79 |     )
80 | 


--------------------------------------------------------------------------------
/storyweb/routes/links.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from fastapi import APIRouter, Depends, Query
 3 | 
 4 | from storyweb.db import Conn
 5 | from storyweb.logic.clusters import (
 6 |     fetch_cluster,
 7 |     merge_cluster,
 8 |     explode_cluster,
 9 |     untag_article,
10 | )
11 | from storyweb.logic.links import (
12 |     create_link,
13 |     list_links,
14 |     untag_article,
15 | )
16 | from storyweb.logic.predict import link_predict
17 | from storyweb.routes.util import get_conn, get_listing
18 | from storyweb.models import (
19 |     ClusterDetails,
20 |     Link,
21 |     LinkBase,
22 |     LinkPrediction,
23 |     Listing,
24 |     ListingResponse,
25 |     MergeRequest,
26 |     ExplodeRequest,
27 |     UntagRequest,
28 | )
29 | 
30 | router = APIRouter()
31 | 
32 | 
33 | @router.get("/links", response_model=ListingResponse[Link])
34 | def links_index(
35 |     conn: Conn = Depends(get_conn),
36 |     listing: Listing = Depends(get_listing),
37 |     cluster: List[str] = Query([]),
38 | ):
39 |     clusters = [i for i in cluster if i is not None and len(i.strip())]
40 |     return list_links(conn, listing, clusters)
41 | 
42 | 
43 | @router.post("/links", response_model=Link)
44 | def links_save(
45 |     link: LinkBase,
46 |     conn: Conn = Depends(get_conn),
47 | ):
48 |     # * make a link (any type)
49 |     #   * see all sentences that mention both tags/identities
50 |     #   * pick a relationship type
51 |     result = create_link(conn, link.source, link.target, link.type)
52 |     return result
53 | 
54 | 
55 | @router.get("/links/_predict", response_model=LinkPrediction)
56 | def link_predict_(
57 |     conn: Conn = Depends(get_conn),
58 |     anchor: str = Query(),
59 |     other: str = Query(),
60 | ):
61 |     return link_predict(conn, anchor, other)
62 | 
63 | 
64 | @router.post("/links/_merge", response_model=ClusterDetails)
65 | def merge_cluster_save(
66 |     data: MergeRequest,
67 |     conn: Conn = Depends(get_conn),
68 | ):
69 |     cluster = merge_cluster(conn, data.anchor, data.other)
70 |     return fetch_cluster(conn, cluster)
71 | 
72 | 
73 | @router.post("/links/_explode", response_model=ClusterDetails)
74 | def explode_cluster_save(
75 |     data: ExplodeRequest,
76 |     conn: Conn = Depends(get_conn),
77 | ):
78 |     cluster = explode_cluster(conn, data.cluster)
79 |     return fetch_cluster(conn, cluster)
80 | 
81 | 
82 | @router.post("/links/_untag", response_model=ClusterDetails)
83 | def untag_article_save(
84 |     data: UntagRequest,
85 |     conn: Conn = Depends(get_conn),
86 | ):
87 |     cluster = untag_article(conn, data.cluster, data.article)
88 |     return fetch_cluster(conn, cluster)
89 | 


--------------------------------------------------------------------------------
/storyweb/routes/stories.py:
--------------------------------------------------------------------------------
  1 | from normality import slugify
  2 | from typing import List, Optional
  3 | from fastapi import APIRouter, Depends, Path, Query
  4 | from fastapi.responses import PlainTextResponse
  5 | from fastapi.exceptions import HTTPException
  6 | 
  7 | from storyweb.db import Conn
  8 | from storyweb.logic.stories import (
  9 |     list_stories,
 10 |     fetch_story,
 11 |     create_story,
 12 |     update_story,
 13 |     delete_story,
 14 |     toggle_story_article,
 15 | )
 16 | from storyweb.logic.clusters import list_story_pairs
 17 | from storyweb.logic.graph import generate_graph_gexf, generate_graph_ftm
 18 | from storyweb.logic.links import story_merge
 19 | from storyweb.parse import import_article_by_url
 20 | from storyweb.routes.util import get_conn, get_listing
 21 | from storyweb.models import (
 22 |     StoryMutation,
 23 |     StoryArticleToggle,
 24 |     StoryArticleImportUrl,
 25 |     Story,
 26 |     ClusterPair,
 27 |     Listing,
 28 |     ListingResponse,
 29 | )
 30 | 
 31 | router = APIRouter()
 32 | 
 33 | 
 34 | @router.get("/stories", response_model=ListingResponse[Story])
 35 | def story_index(
 36 |     conn: Conn = Depends(get_conn),
 37 |     listing: Listing = Depends(get_listing),
 38 |     q: Optional[str] = Query(None),
 39 |     article: Optional[str] = Query(None),
 40 | ):
 41 |     return list_stories(conn, listing, query=q, article=article)
 42 | 
 43 | 
 44 | @router.post("/stories", response_model=Story)
 45 | def story_create(story: StoryMutation, conn: Conn = Depends(get_conn)):
 46 |     return create_story(conn, story)
 47 | 
 48 | 
 49 | @router.get("/stories/{story_id}", response_model=Story)
 50 | def story_view(
 51 |     conn: Conn = Depends(get_conn),
 52 |     story_id: int = Path(),
 53 | ):
 54 |     story = fetch_story(conn, story_id)
 55 |     if story is None:
 56 |         raise HTTPException(404)
 57 |     return story
 58 | 
 59 | 
 60 | @router.post("/stories/{story_id}/articles", response_model=Story)
 61 | def story_article_toggle(
 62 |     data: StoryArticleToggle,
 63 |     conn: Conn = Depends(get_conn),
 64 |     story_id: int = Path(),
 65 | ):
 66 |     story = fetch_story(conn, story_id)
 67 |     if story is None:
 68 |         raise HTTPException(404)
 69 |     toggle_story_article(conn, story_id, data.article)
 70 |     return story
 71 | 
 72 | 
 73 | @router.post("/stories/{story_id}/articles/import-url", response_model=Story)
 74 | def story_article_import_url(
 75 |     data: StoryArticleImportUrl,
 76 |     conn: Conn = Depends(get_conn),
 77 |     story_id: int = Path(),
 78 | ):
 79 |     story = fetch_story(conn, story_id)
 80 |     if story is None:
 81 |         raise HTTPException(404)
 82 |     article_id = import_article_by_url(conn, data.url)
 83 |     if article_id is None:
 84 |         raise HTTPException(400)
 85 |     story_merge(conn, story_id, article_id)
 86 |     toggle_story_article(conn, story_id, article_id, delete_existing=False)
 87 |     return story
 88 | 
 89 | 
 90 | @router.get("/stories/{story_id}/pairs", response_model=ListingResponse[ClusterPair])
 91 | def story_pairs(
 92 |     conn: Conn = Depends(get_conn),
 93 |     listing: Listing = Depends(get_listing),
 94 |     story_id: int = Path(),
 95 |     linked: Optional[bool] = Query(None),
 96 |     types: List[str] = Query([]),
 97 | ):
 98 |     story = fetch_story(conn, story_id)
 99 |     if story is None:
100 |         raise HTTPException(404)
101 |     return list_story_pairs(conn, listing, story_id, linked=linked, types=types)
102 | 
103 | 
104 | @router.get("/stories/{story_id}/gexf", response_class=PlainTextResponse)
105 | def story_gexf(
106 |     conn: Conn = Depends(get_conn),
107 |     story_id: int = Path(),
108 | ):
109 |     story = fetch_story(conn, story_id)
110 |     if story is None:
111 |         raise HTTPException(404)
112 |     filename = slugify(story.title, sep="_")
113 |     text = generate_graph_gexf(conn, story_id=story_id)
114 |     return PlainTextResponse(
115 |         content=text,
116 |         media_type="text/xml",
117 |         headers={"Content-Disposition": f"attachment; filename={filename}.gexf"},
118 |     )
119 | 
120 | 
121 | @router.get("/stories/{story_id}/ftm", response_class=PlainTextResponse)
122 | def story_ftm(
123 |     conn: Conn = Depends(get_conn),
124 |     story_id: int = Path(),
125 | ):
126 |     story = fetch_story(conn, story_id)
127 |     if story is None:
128 |         raise HTTPException(404)
129 |     filename = slugify(story.title, sep="_")
130 |     text = generate_graph_ftm(conn, story_id=story_id)
131 |     return PlainTextResponse(
132 |         content=text,
133 |         media_type="application/json+ftm",
134 |         headers={"Content-Disposition": f"attachment; filename={filename}.ftm.json"},
135 |     )
136 | 
137 | 
138 | @router.post("/stories/{story_id}", response_model=Story)
139 | def story_update(
140 |     data: StoryMutation, conn: Conn = Depends(get_conn), story_id: int = Path()
141 | ):
142 |     story = fetch_story(conn, story_id)
143 |     if story is None:
144 |         raise HTTPException(404)
145 |     return update_story(conn, data, story_id)
146 | 
147 | 
148 | @router.delete("/stories/{story_id}")
149 | def story_delete(
150 |     conn: Conn = Depends(get_conn),
151 |     story_id: int = Path(),
152 | ):
153 |     story = fetch_story(conn, story_id)
154 |     if story is None:
155 |         raise HTTPException(404)
156 |     delete_story(conn, story_id)
157 |     return None
158 | 


--------------------------------------------------------------------------------
/storyweb/routes/system.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends
 2 | from fastapi.responses import PlainTextResponse
 3 | 
 4 | from storyweb.db import Conn
 5 | from storyweb.ontology import OntologyModel, ontology
 6 | from storyweb.logic.graph import generate_graph_gexf, generate_graph_ftm
 7 | from storyweb.routes.util import get_conn
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.get("/ontology", response_model=OntologyModel)
13 | def ontology_model() -> OntologyModel:
14 |     return ontology.model
15 | 
16 | 
17 | @router.get("/gexf", response_class=PlainTextResponse)
18 | def all_gexf(
19 |     conn: Conn = Depends(get_conn),
20 | ):
21 |     text = generate_graph_gexf(conn)
22 |     return PlainTextResponse(
23 |         content=text,
24 |         media_type="text/xml",
25 |         headers={"Content-Disposition": f"attachment; filename=storyweb.gexf"},
26 |     )
27 | 
28 | 
29 | @router.get("/ftm", response_class=PlainTextResponse)
30 | def all_ftm(conn: Conn = Depends(get_conn)):
31 |     text = generate_graph_ftm(conn)
32 |     return PlainTextResponse(
33 |         content=text,
34 |         media_type="application/json+ftm",
35 |         headers={"Content-Disposition": f"attachment; filename=storyweb.ftm.json"},
36 |     )
37 | 


--------------------------------------------------------------------------------
/storyweb/routes/util.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator, Optional
 2 | from fastapi import Query
 3 | 
 4 | from storyweb.db import engine, Conn
 5 | from storyweb.models import Listing
 6 | 
 7 | 
 8 | def get_conn() -> Generator[Conn, None, None]:
 9 |     """Create a database transaction for the request."""
10 |     with engine.begin() as conn:
11 |         yield conn
12 | 
13 | 
14 | def get_listing(
15 |     limit: int = Query(50, description="Number of objects to return", le=5000),
16 |     offset: int = Query(0, description="Skip the first N objects in response"),
17 |     sort: Optional[str] = Query(
18 |         None, description="Sort criterion, format: field:direction"
19 |     ),
20 | ) -> Listing:
21 |     direction = "desc"
22 |     if sort is not None and ":" in sort:
23 |         sort, direction = sort.rsplit(":", 1)
24 |         direction = direction.lower().strip()
25 |         direction = "asc" if direction == "asc" else "desc"
26 |     return Listing(
27 |         limit=limit,
28 |         offset=offset,
29 |         sort_direction=direction,
30 |         sort_field=sort,
31 |     )
32 | 


--------------------------------------------------------------------------------
/storyweb/server.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from storyweb.app import app
3 | 
4 | logging.basicConfig(level=logging.INFO)
5 | 


--------------------------------------------------------------------------------
/storyweb/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | DB_URL = os.environ.get("STORYWEB_DB_URL")
4 | if DB_URL is None:
5 |     raise RuntimeError("No $STORYWEB_DB_URL is configured!")
6 | 


--------------------------------------------------------------------------------