├── .dockerignore
├── .editorconfig
├── .github
├── dependabot.yml
└── workflows
│ └── docker.yml
├── .gitignore
├── DESIGN.md
├── Dockerfile
├── LICENSE
├── Makefile
├── QUERIES.md
├── README.md
├── RESEARCH.md
├── contrib
├── link_classification_experiments.ipynb
├── link_classification_experiments_2.ipynb
├── occrp-experiment
│ ├── DH_2022_NLP_Workshop.ipynb
│ └── crawl.py
└── tagged_sentences_20230203.csv
├── docker-compose.yml
├── docs
├── CNAME
├── index.html
└── scribble.png
├── frontend
├── .gitignore
├── package-lock.json
├── package.json
├── public
│ ├── favicon.ico
│ └── index.html
├── src
│ ├── App.tsx
│ ├── components
│ │ ├── ArticleClusters.tsx
│ │ ├── ArticleCorefList.tsx
│ │ ├── ArticleDrawer.tsx
│ │ ├── ArticlePreview.tsx
│ │ ├── ArticleStoryEditor.tsx
│ │ ├── ArticleText.tsx
│ │ ├── ClusterArticles.tsx
│ │ ├── ClusterButtonGroup.tsx
│ │ ├── ClusterDrawer.tsx
│ │ ├── Footer.tsx
│ │ ├── Navbar.tsx
│ │ ├── Pagination.tsx
│ │ ├── PairLink.tsx
│ │ ├── RelatedListing.tsx
│ │ ├── ScreenContent.tsx
│ │ ├── ScreenHeading.tsx
│ │ ├── SettingsDialog.tsx
│ │ ├── SimilarListing.tsx
│ │ ├── StoryArticleImportDialog.tsx
│ │ ├── StoryArticles.tsx
│ │ ├── StoryCreateDialog.tsx
│ │ ├── StoryDeleteDialog.tsx
│ │ ├── StoryGraph.tsx
│ │ ├── StoryLinkerBanner.tsx
│ │ ├── StoryNomNom.tsx
│ │ ├── StoryPairs.tsx
│ │ ├── StoryUpdateDialog.tsx
│ │ └── util.tsx
│ ├── constants.ts
│ ├── hooks.ts
│ ├── index.tsx
│ ├── logic.ts
│ ├── react-app-env.d.ts
│ ├── router.tsx
│ ├── screens
│ │ ├── ArticleIndex.tsx
│ │ ├── ClusterIndex.tsx
│ │ ├── ClusterView.tsx
│ │ ├── Home.tsx
│ │ ├── Layout.tsx
│ │ ├── Linker.tsx
│ │ ├── LinkerRelated.tsx
│ │ ├── StoryIndex.tsx
│ │ ├── StoryLinker.tsx
│ │ └── StoryView.tsx
│ ├── selectors.ts
│ ├── services
│ │ ├── articles.ts
│ │ ├── clusters.ts
│ │ ├── config.ts
│ │ ├── links.ts
│ │ ├── ontology.ts
│ │ ├── sites.ts
│ │ └── stories.ts
│ ├── store.ts
│ ├── styles
│ │ ├── App.scss
│ │ ├── Article.module.scss
│ │ ├── Cluster.module.scss
│ │ ├── Footer.module.scss
│ │ ├── Layout.module.scss
│ │ ├── Linker.module.scss
│ │ ├── Navbar.module.scss
│ │ ├── Story.module.scss
│ │ ├── index.scss
│ │ ├── util.module.scss
│ │ └── variables.scss
│ ├── types.ts
│ └── util.ts
└── tsconfig.json
├── setup.py
├── sources.json
├── stories.md
└── storyweb
├── __init__.py
├── app.py
├── clean.py
├── cli.py
├── db.py
├── logic
├── __init__.py
├── articles.py
├── clusters.py
├── graph.py
├── links.py
├── predict.py
├── stories.py
└── util.py
├── models.py
├── ontology.py
├── ontology.yml
├── parse
├── __init__.py
├── extract.py
├── language.py
├── lid.176.ftz
└── pipeline.py
├── routes
├── __init__.py
├── articles.py
├── clusters.py
├── links.py
├── stories.py
├── system.py
└── util.py
├── server.py
└── settings.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | frontend/node_modules
2 | .mypy_cache
3 | __pycache__
4 | storyweb.egg-info
5 | .envrc
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 |
2 | [*.{ts,tsx,js,jsx}]
3 | indent_style = space
4 | indent_size = 2
5 | charset = utf-8
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates: []
3 | # - package-ecosystem: pip
4 | # open-pull-requests-limit: 99
5 | # directory: "/"
6 | # schedule:
7 | # interval: weekly
8 | # - package-ecosystem: npm
9 | # open-pull-requests-limit: 99
10 | # directory: "/frontend"
11 | # schedule:
12 | # interval: weekly
13 | # - package-ecosystem: docker
14 | # open-pull-requests-limit: 99
15 | # directory: "/"
16 | # schedule:
17 | # interval: weekly
18 | # - package-ecosystem: "github-actions"
19 | # open-pull-requests-limit: 99
20 | # directory: "/"
21 | # schedule:
22 | # interval: weekly
23 |
--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
1 | name: docker
2 |
3 | on: [push]
4 |
5 | permissions:
6 | packages: write
7 |
8 | jobs:
9 | docker-build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v3
13 | # - name: Set up QEMU
14 | # uses: docker/setup-qemu-action@v2
15 | - name: Docker meta
16 | id: meta
17 | uses: docker/metadata-action@v4
18 | with:
19 | images: ghcr.io/opensanctions/storyweb
20 | tags: |
21 | type=ref,event=branch
22 | type=semver,pattern={{version}}
23 | type=sha
24 | - name: Set up Docker Buildx
25 | uses: docker/setup-buildx-action@v2
26 | with:
27 | install: true
28 | - name: Debug information
29 | run: |
30 | docker --version
31 | docker-compose --version
32 | echo "${GITHUB_REF}"
33 | - name: Login to GitHub Container Registry
34 | uses: docker/login-action@v2
35 | with:
36 | registry: ghcr.io
37 | username: ${{ github.actor }}
38 | password: ${{ secrets.GITHUB_TOKEN }}
39 | - name: Build and push release
40 | uses: docker/build-push-action@v4
41 | with:
42 | context: .
43 | # platforms: linux/amd64,linux/arm64
44 | push: true
45 | tags: ${{ steps.meta.outputs.tags }}
46 | labels: ${{ steps.meta.outputs.labels }}
47 | cache-from: type=gha
48 | cache-to: type=gha,mode=max
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.code-workspace
6 | *.gexf
7 | .DS_Store
8 | .envrc
9 | data/
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
1 |
2 | # Workflow ideas for StoryWeb
3 |
4 | We want to go from a corpus of media reports to a knowledge graph for a specific set of journalistic stories (called it a `Scandal` for now, I guess `StoryLine` also works).
5 |
6 |
7 | ## Step 1: Crawling the reporting
8 |
9 | * Contact a bunch of GIJN member orgs to see if I may. Maybe offer a formalised quid pro quo deal ("I can parse your stories, you get story graph data")
10 | * Build a news crawler in async Python, store everything to a SQL database that allows for incremental crawls.
11 | * Output articles with metadata (https://schema.org/Article) as a JSONL file.
12 | * Requires identifying which pages contain articles
13 | * Requires extracting article metadata and body (e.g. via `newspaper`, `trafilatura`)
14 |
15 |
16 | ## Step 2: Extract named entities
17 |
18 | * Run a competition between spaCy and SparkNLP, decide if we always want to run both of if it's a per-language decision.
19 | * Find a disk format for annotated articles, probably going to need:
20 | * Every extracted entity and their tag type, span
21 | * Every sentence and their spans
22 |
23 |
24 | ## Step 3: Build a co-occurrence matrix
25 |
26 | Get everything into a massive SQL table a la:
27 |
28 | * `article_url`, `sentence_no`, `tag_type`, `tag_label`, `tag_normalised`
29 |
30 | e.g.:
31 |
32 | * `https://rise.md/...`,`6`,`PER`,`Vladimir Plahotniuc`,`vladimir-plahotniuc`
33 | * `https://rise.md/...`,`16`,`PER`,`Vlad Plahotniuc`,`vlad-plahotniuc`
34 | * `https://rise.md/...`,`4`,`LOC`,`Moldova`,`md`
35 | * `https://rise.md/...`,`4`,`ORG`,`Democratic Party`,`democratic-party`
36 | * `https://istories.ru/...`,`1`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk`
37 | * `https://istories.ru/...`,`5`,`PER`,`Плахотнюк`,`plahotnuk`
38 | * `https://istories.ru/...`,`17`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk`
39 |
40 |
41 | ## Step 4: Build an entity loom
42 |
43 | The core of an interactive graph entity identity building tool could be an interactive loop like this:
44 |
45 | * Pick a particularly namey-looking tag that occurs a lot.
46 | * Show it to a user and prompt them to decide:
47 | 1. This is a new entity's name, make a new ID (shortuuid)
48 | 2. This is another surface form of an existing entity, show top 5 search results (ask if it's a strong or weak alias)
49 | * Focus the user process on the (new) entity
50 | * Show co-occurring other tags, including place and date tags
51 | * Maybe: show any sentence in which both the tag and an alias of the entity occur
52 | * For each tag, prompt the user to say if it's a strong/weak alias, context or related entity or unrelated tag
53 | * Allow the user to finish working on this entity and start with a new one
54 | * Start over.
55 |
56 | Resulting table:
57 |
58 | `entity_id`, `tag_type`, `tag_label`, `tag_normalised`, `role`
59 |
60 | where `role` is one of:
61 |
62 | * `alias` (e.g. `Vladimir Plahotniuc`)
63 | * `weak_alias` (e.g. `Plahotniuc's`, `the Oligarch`)
64 | * `context` (e.g. `Moldova` for Plahotniuc)
65 | * `related` (e.g. `Democratic Party` for Plahotniuc)
66 | * `unrelated` (e.g. `European Union` for Plahotniuc)
67 |
68 | This process can probably later be partially automated, eg. if one of the related labels already is part of an existing entity, or by doing string similarity on the aliases.
69 |
70 | ### How to disambiguate?
71 |
72 | This doesn't yet allow us to say that there are two separate `Markus Braun` - one maybe an actor mentioned in a gossip piece, the other the CEO of Wirecard. We basically need a way to fork an entity and say: this alias, in this article - make it part of another entity!
73 |
74 | ### Clustering
75 |
76 | What I'm describing here is really a clustering process. Need to do some research on what scikit-learn-level machine learning for clustering looks like and how well it might apply.
77 |
78 | ## Step 5: Build a relationship loom
79 |
80 | Similar process as above: take two entities from Step 4 that co-occur in multiple articles, show the user any sentences that mention both and then propose to them to classify their relationship (or do it based on a keyword list, and merely double-check directionality).
81 |
82 | Categories (tbd):
83 |
84 | * Family
85 | * Personal associate
86 | * Business associate
87 | * Nemesis, opponent, adversary, antagonist (word?)
88 | * Owner
89 | * In control of (Director, etc.)
90 | * Participant
91 | * Member/Employee
92 | * Payment, debt, business relationship
93 |
94 |
95 | ### Can we model events?
96 |
97 | Media reporting is all about events, do we want to reify them? How can we label events, maybe by deriving key words from the headline?
98 |
99 |
100 | ## Step 6: Reconcile entities
101 |
102 | This can maybe already happen in `nomenklatura`:
103 |
104 | * https://github.com/opensanctions/nomenklatura/blob/master/README.md
105 |
106 |
107 | ## Step 7: Visualise, profit!
108 |
109 | * What can we compute on the output using NetworkX?
110 | * https://sayari-analytics.github.io/trellis/
111 |
112 |
113 | ## Credits
114 |
115 | * Thanks to [Heleen](https://twitter.com/heleenemanuel) and [Johan](https://johanschuijt.nl/) :)
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:19 AS frontend
2 |
3 | RUN mkdir -p /fe
4 | WORKDIR /fe
5 | COPY frontend /fe
6 | RUN npm install
7 | RUN npm run build
8 |
9 | FROM ubuntu:23.04
10 | ENV DEBIAN_FRONTEND noninteractive
11 |
12 | LABEL org.opencontainers.image.title "StoryWeb"
13 | LABEL org.opencontainers.image.licenses MIT
14 | LABEL org.opencontainers.image.source https://github.com/opensanctions/storyweb
15 |
16 | RUN apt-get -qq -y update \
17 | && apt-get -qq -y upgrade \
18 | && apt-get -qq -y install locales ca-certificates tzdata curl python3-pip \
19 | python3-icu python3-cryptography libicu-dev pkg-config postgresql-client-common \
20 | postgresql-client libpq-dev \
21 | && apt-get -qq -y autoremove \
22 | && apt-get clean \
23 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
24 |
25 | RUN localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \
26 | && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
27 | && dpkg-reconfigure -f noninteractive tzdata
28 |
29 | ENV LANG='en_US.UTF-8' \
30 | TZ="UTC" \
31 | API_URL="/api/1"
32 |
33 | RUN pip install -U pip setuptools wheel
34 | RUN pip install spacy
35 | RUN python3 -m spacy download en_core_web_sm
36 | RUN python3 -m spacy download de_core_news_sm
37 | RUN python3 -m spacy download xx_ent_wiki_sm
38 | RUN python3 -m spacy download ru_core_news_sm
39 |
40 | RUN mkdir -p /storyweb
41 | WORKDIR /storyweb
42 | COPY . /storyweb
43 | RUN pip install --no-cache-dir -e /storyweb
44 | COPY --from=frontend /fe/build /storyweb/frontend/build
45 |
46 | CMD ["uvicorn", "--host", "0.0.0.0", "storyweb.server:app"]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022-2023, Friedrich Lindenberg
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | data: data/articles.ijson
3 |
4 | clean:
5 | rm -rf data/articles
6 |
7 | data/articles:
8 | mkdir -p data/articles
9 |
10 | data/articles/%.ijson: data/articles
11 | curl -o data/articles/$*.ijson -s https://data.opensanctions.org/contrib/mediacrawl/$*.ijson
12 |
13 | fetch: data/articles/occrp.ijson \
14 | data/articles/icij.ijson \
15 | data/articles/dossier_at.ijson \
16 | data/articles/daphne_foundation.ijson \
17 | data/articles/istories_media.ijson \
18 | data/articles/amabhungane.ijson
19 |
20 | data/articles.ijson: fetch
21 | cat data/articles/* >data/articles.ijson
22 |
23 | load: data/articles.ijson
24 | storyweb import data/articles.ijson
25 |
26 | serve:
27 | uvicorn --reload storyweb.server:app
28 |
29 | reset:
30 | dropdb storyweb
31 | createdb -E utf-8 storyweb
32 | storyweb init
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # StoryWeb
6 |
7 | StoryWeb is a project aimed to extract networks of entities from journalistic reporting. The idea is to reverse engineer stories into structured graphs of the persons and companies involved, and to capture the relationships between them.
8 |
9 | https://storyweb.opensanctions.org
10 |
11 | StoryWeb consumes news articles as input data. Individual articles can be imported via the web interface, but there's also a possibility for bulk import using the [`articledata`](https://github.com/pudo/articledata) micro-format. One producer of `articledata` files is [`mediacrawl`](https://github.com/opensanctions/mediacrawl), which can be used to crawl news websites and harvest all of their articles.
12 |
13 |
14 |
15 |
16 |
17 | ## Installation
18 |
19 | Storyweb can be run as a Python web application from a developer's machine, or via a docker container. We recommend using docker for any production deployment and as a quick means to get the application running if you don't intend to change its code.
20 |
21 | ### Running in Docker mode
22 |
23 | You can start up the a docker instance by running the following commands in an empty directory:
24 |
25 | ```bash
26 | wget https://raw.githubusercontent.com/opensanctions/storyweb/main/docker-compose.yml
27 | docker-compose up
28 | ```
29 |
30 | This will make the storyweb user interface available on port 8000 of the host machine.
31 |
32 | ### Running in development mode
33 |
34 | Before installing storyweb on the host machine, we recommend setting up a Python virtual environment of some form (venv, virtualenv, etc.).
35 |
36 | As a first step, let's install the `spaCy` models that are used to extract person and company names from the given articles:
37 |
38 | ```bash
39 | pip install spacy
40 | python3 -m spacy download en_core_web_sm
41 | python3 -m spacy download de_core_news_sm
42 | python3 -m spacy download xx_ent_wiki_sm
43 | python3 -m spacy download ru_core_news_sm
44 | ```
45 |
46 | Next, we'll install the application itself, and its dependencies. Run the following command inside of a git checkout of the storyweb repository:
47 |
48 | ```bash
49 | pip install -e ".[dev]"
50 | ```
51 |
52 | You also need to have a PostgreSQL server running somewhere (e.g. on the same machine, perhaps installed via homebrew or apt). Create a fresh database on that server and point storyweb to it like this:
53 |
54 | ```bash
55 | export STORYWEB_DB_URL=postgresql://storyweb:storyweb@db/storyweb
56 | # Create the database tables:
57 | storyweb init
58 | ```
59 |
60 | You now have the application configured and you can explore the commands exposed by the `storyweb` command-line tool:
61 |
62 | ```
63 | Usage: storyweb [OPTIONS] COMMAND [ARGS]...
64 |
65 | Storyweb CLI
66 |
67 | Options:
68 | --help Show this message and exit.
69 |
70 | Commands:
71 | auto-merge Automatically merge on fingerprints
72 | compute Run backend computations
73 | graph Export an entity graph
74 | import Import articles into the DB
75 | import-url Load a single news story by URL
76 | init Initialize the database
77 | ```
78 |
79 | The `import` command listed here will accept any data file in the `articledata` format, which is emitted by the `mediacrawl` tool.
80 |
81 | #### Running the backend API
82 |
83 | Finally, you can run the backend API using `uvicorn`:
84 |
85 | ```bash
86 | uvicorn --reload --host 0.0.0.0 storyweb.server:app
87 | ```
88 |
89 | This will boot up the API server of port 8000 of the local host and enable hot reloads whenever the code changes during development.
90 |
91 | #### Installing and running the frontend
92 |
93 | Once you have the API running, you can install and run the development server for the frontend. Storyweb uses React and ReduxToolkit internally and will use a Webpack dev server to dynamically re-build the frontend during development.
94 |
95 | ```bash
96 | cd frontend/
97 | npm install
98 | npm run dev
99 | ```
100 |
101 | Remember that you need to run `npm run dev` whenever you do frontend development.
102 |
103 | ## License and credits
104 |
105 | Thanks to [Heleen Emanuel](https://twitter.com/heleenemanuel) and [Tobias Sterbak](https://tobiassterbak.com/) for their advice on the design and implementation of StoryWeb.
106 |
107 | This project receives financial support from the German Federal Ministry for Education and Research (Bundesministerium für Bildung und Forschung, BMBF) under the grant identifier `01IS22S42`. The full responsibility for the content of this publication remains with its authors.
108 |
109 | The software is licensed under the MIT license, see `LICENSE` in this repository.
--------------------------------------------------------------------------------
/RESEARCH.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ### Bad Will Hunting
4 |
5 | * https://blogs.lse.ac.uk/polis/2022/09/15/bad-will-hunting-the-story-so-far/
6 |
7 | * SparkNLP: https://nlp.johnsnowlabs.com/
8 | * Rosette Text Analytics: https://www.rosette.com/capability/entity-extractor/
9 |
10 | # Selecting articles from a corpus
11 |
12 | * https://github.com/asreview/asreview
13 |
14 | Tips from Tobias:
15 |
16 | * https://maartengr.github.io/BERTopic/index.html#quick-start
17 | * Dimensionality reduction: https://umap-learn.readthedocs.io/en/latest/basic_usage.html
18 |
19 | * One-class classification: https://en.wikipedia.org/wiki/One-class_classification
20 | * Get confidence scores for classification?
21 | * Or: fuzzying out names for other names and see if it can disambiguate
22 |
23 |
24 |
25 | * https://github.com/microsoft/spacy-ann-linker
--------------------------------------------------------------------------------
/contrib/occrp-experiment/crawl.py:
--------------------------------------------------------------------------------
1 | import json
2 | import asyncio
3 | import trafilatura
4 | from datetime import datetime
5 | from typing import Optional
6 | import aiohttp
7 | from lxml import html
8 | from urllib.parse import urlparse, urljoin
9 | from sqlmodel import Field, Session, SQLModel, create_engine, select
10 |
11 |
12 | queue = asyncio.Queue()
13 | seen = set()
14 | engine = create_engine("sqlite:///crawl.sqlite3")
15 |
16 |
17 | class Page(SQLModel, table=True):
18 | url: Optional[str] = Field(primary_key=True)
19 | text: Optional[str]
20 | is_article: bool
21 | crawled_at: datetime
22 |
23 |
24 | class Article(SQLModel, table=True):
25 | url: Optional[str] = Field(primary_key=True)
26 | title: Optional[str]
27 | author: Optional[str]
28 | date: Optional[str]
29 | text: Optional[str]
30 |
31 |
32 | async def clean_url(url):
33 | parsed = urlparse(url)
34 | if parsed.scheme != "https":
35 | return
36 | if parsed.hostname not in ["occrp.org", "www.occrp.org"]:
37 | return None
38 | if parsed.path.startswith("/ru/"):
39 | return None
40 | parsed = parsed._replace(query=None)
41 | parsed = parsed._replace(fragment=None)
42 | url = parsed.geturl()
43 | return url
44 |
45 |
46 | async def crawl_url(url):
47 | url = await clean_url(url)
48 | if url is None:
49 | return
50 | if url in seen:
51 | return
52 | seen.add(url)
53 | await queue.put(url)
54 |
55 |
56 | async def get_page(db: Session, session: aiohttp.ClientSession, url: str):
57 | statement = select(Page).where(Page.url == url)
58 | page = db.exec(statement).first()
59 | if page is not None:
60 | return page
61 |
62 | async with session.get(url) as response:
63 | content_type = response.headers.get("Content-Type")
64 | text = None
65 | if response.status == 200:
66 | if content_type is None or "html" in content_type.lower():
67 | # print("CONTENT_TYPE", content_type)
68 | data = await response.read()
69 | # print("FETCHED", url, response.headers.get("Content-Type"))
70 | try:
71 | text = data.decode("utf-8")
72 | except UnicodeDecodeError as exc:
73 | # text = None
74 | pass
75 | page = Page(
76 | url=url,
77 | text=text,
78 | is_article=False,
79 | crawled_at=datetime.utcnow(),
80 | )
81 | db.add(page)
82 | db.commit()
83 | return page
84 |
85 |
86 | def is_article(doc):
87 | if doc.find('.//article//li[@class="authors"]') is not None:
88 | return True
89 | if doc.find('.//aside[@class="byline"]') is not None:
90 | return True
91 | if doc.find('.//section[@class="blog"]') is not None:
92 | return True
93 | if doc.find('.//div[@class="occrp-story"]') is not None:
94 | return True
95 | return False
96 |
97 |
98 | async def extract_article(db: Session, page: Page, doc):
99 | extract = trafilatura.bare_extraction(doc)
100 | statement = select(Article).where(Article.url == page.url)
101 | article = db.exec(statement).first()
102 | if article is None:
103 | article = Article(url=page.url)
104 | title = extract.get("title")
105 | if title is not None:
106 | title = title.replace(" - OCCRP", "")
107 | article.title = title.strip()
108 | article.date = extract.get("date")
109 | article.text = extract.get("text")
110 | article.author = extract.get("author")
111 | # print(list(extract.keys()))
112 | print("ARTICLE", page.url, extract.get("title"))
113 | db.add(article)
114 | db.commit()
115 |
116 |
117 | async def worker(session: aiohttp.ClientSession):
118 | while True:
119 | with Session(engine) as db:
120 | url = await queue.get()
121 | try:
122 | page = await get_page(db, session, url)
123 | if page is not None and page.text is not None:
124 | doc = html.fromstring(page.text)
125 | # article = trafilatura.bare_extraction(doc)
126 | # print(article)
127 | for link in doc.findall(".//a"):
128 | next_url = link.get("href")
129 | if next_url is None:
130 | continue
131 | next_url = urljoin(url, next_url)
132 | await crawl_url(next_url)
133 | # print(link)
134 | if is_article(doc):
135 | await extract_article(db, page, doc)
136 | # print("NO ARTICLE", url)
137 | # print(url, doc, queue.qsize())
138 |
139 | except Exception as exc:
140 | print("EXCEPTION", exc)
141 | queue.task_done()
142 |
143 |
144 | async def crawl():
145 | SQLModel.metadata.create_all(engine)
146 | headers = {"User-Agent": "pudo from the hood"}
147 | async with aiohttp.ClientSession(headers=headers) as session:
148 | await crawl_url("https://occrp.org")
149 | tasks = []
150 | for _ in range(10):
151 | task = asyncio.create_task(worker(session))
152 | tasks.append(task)
153 |
154 | await queue.join()
155 | for task in tasks:
156 | task.cancel()
157 | await asyncio.gather(*tasks, return_exceptions=True)
158 |
159 |
160 | async def export():
161 | with open("articles.json", "w") as fh:
162 | with Session(engine) as db:
163 | statement = select(Article)
164 | articles = db.exec(statement).all()
165 | data = [a.dict() for a in articles]
166 | json.dump(data, fh)
167 |
168 |
169 | def main():
170 | asyncio.run(export())
171 |
172 |
173 | if __name__ == "__main__":
174 | main()
175 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 |
3 | services:
4 | db:
5 | image: postgres:15
6 | expose:
7 | - "5432"
8 | container_name: db
9 | environment:
10 | POSTGRES_USER: storyweb
11 | POSTGRES_PASSWORD: storyweb
12 | POSTGRES_DB: storyweb
13 | ulimits:
14 | memlock:
15 | soft: -1
16 | hard: -1
17 | volumes:
18 | - db-data:/var/lib/postgresql/data
19 | # - "./schema.sql:/docker-entrypoint-initdb.d/storyweb-schema.sql"
20 | deploy:
21 | restart_policy:
22 | condition: on-failure
23 |
24 | app:
25 | build: .
26 | image: ghcr.io/opensanctions/storyweb:main
27 | command: bash -c 'while !0.2%",
52 | "not dead",
53 | "not op_mini all"
54 | ],
55 | "development": [
56 | "last 1 chrome version",
57 | "last 1 firefox version",
58 | "last 1 safari version"
59 | ]
60 | },
61 | "devDependencies": {
62 | "@types/react-helmet": "^6.1.6"
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | StoryWeb
10 |
11 |
12 | You need to enable JavaScript to run this app.
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import { RouterProvider } from 'react-router-dom';
3 | import { Provider } from 'react-redux'
4 | import { FocusStyleManager } from "@blueprintjs/core"
5 |
6 | import { store } from './store';
7 | import { router } from './router';
8 |
9 | import './styles/App.scss';
10 |
11 | FocusStyleManager.onlyShowFocusOnTabs();
12 |
13 | function App() {
14 | return (
15 |
16 |
17 |
18 | );
19 | }
20 |
21 | export default App;
22 |
--------------------------------------------------------------------------------
/frontend/src/components/ArticleClusters.tsx:
--------------------------------------------------------------------------------
1 | import { HTMLTable } from "@blueprintjs/core";
2 | import { Link } from "react-router-dom";
3 | import { useFetchClusterListingQuery } from "../services/clusters";
4 | import { IArticle } from "../types";
5 | import { getClusterLink } from "../util";
6 | import { ClusterLabel, ClusterTypeIcon, ErrorSection, SectionLoading } from "./util";
7 |
8 | type ArticleClustersProps = {
9 | article: IArticle
10 | }
11 |
12 | export default function ArticleClusters({ article }: ArticleClustersProps) {
13 | const query = { article: article.id, limit: 100 };
14 | const { data: clusters, isLoading, error: clustersError } = useFetchClusterListingQuery(query);
15 | if (clustersError) {
16 | return
17 | }
18 | if (isLoading || clusters === undefined) {
19 | return ;
20 | }
21 |
22 | return (
23 |
24 |
25 |
26 |
27 | Name
28 |
29 |
30 |
31 |
32 | {clusters.results.map((cluster) =>
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | )}
43 |
44 |
45 | )
46 | };
--------------------------------------------------------------------------------
/frontend/src/components/ArticleCorefList.tsx:
--------------------------------------------------------------------------------
1 | import { useState } from "react";
2 | import { Menu, MenuItem } from '@blueprintjs/core';
3 |
4 | import { useFetchArticleListingQuery } from "../services/articles"
5 | import ArticleDrawer from "./ArticleDrawer";
6 | import { IArticle } from "../types";
7 | import { ARTICLE_ICON } from "../constants";
8 |
9 | type ArticleCorefListProps = {
10 | clusters: string[]
11 | tags: string[][]
12 | }
13 |
14 | export default function ArticleCorefList({ clusters, tags }: ArticleCorefListProps) {
15 | const [articleId, setArticleId] = useState()
16 | const articleParams = { cluster: clusters };
17 | const { data, isLoading } = useFetchArticleListingQuery(articleParams);
18 | if (isLoading || data === undefined) {
19 | return null;
20 | }
21 |
22 | const onClick = (e: React.MouseEvent, article: IArticle) => {
23 | e.preventDefault();
24 | setArticleId(article.id);
25 | }
26 |
27 | return (
28 | <>
29 |
30 | {data.results.map((a) => (
31 | onClick(e, a)}
35 | text={a.title} active={a.id === articleId} />
36 | ))
37 | }
38 |
39 | setArticleId(undefined)}
41 | articleId={articleId}
42 | tags={tags}
43 | />
44 | >
45 | )
46 | };
--------------------------------------------------------------------------------
/frontend/src/components/ArticleDrawer.tsx:
--------------------------------------------------------------------------------
1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core"
2 | import { SyntheticEvent, useEffect, useState } from "react"
3 | import { ARTICLE_ICON } from "../constants"
4 | import { useFetchArticleQuery } from "../services/articles"
5 | import { useFetchClusterListingQuery } from "../services/clusters"
6 | import ArticleText from "./ArticleText"
7 | import { ErrorSection, NumericTag, SectionLoading } from "./util"
8 |
9 | import styles from '../styles/Article.module.scss'
10 | import ArticleClusters from "./ArticleClusters"
11 |
12 | type ArticleDrawerInnerProps = {
13 | articleId: string,
14 | tags?: string[][]
15 | isOpen: boolean,
16 | onClose: (event: SyntheticEvent) => void
17 | onClosed: (node: HTMLElement) => void
18 | }
19 |
20 | function ArticleDrawerInner({ articleId, tags, isOpen, onClose, onClosed }: ArticleDrawerInnerProps) {
21 | const { data: article, error: articleError } = useFetchArticleQuery(articleId);
22 | const clustersQuery = { article: articleId, limit: 0 };
23 | const { data: clusters } = useFetchClusterListingQuery(clustersQuery);
24 | const realTags = tags ? tags : []
25 | const realIsOpen = isOpen && articleId.trim().length > 1;
26 |
27 | return (
28 |
39 |
40 | {(article === undefined) && (
41 |
42 | )}
43 | {(articleError !== undefined) && (
44 |
45 | )}
46 | {article && (
47 |
48 |
54 | }
55 | />
56 |
61 | Extracted entities
62 |
63 | >
64 | }
65 | panel={
66 |
67 | }
68 | />
69 |
70 | )}
71 |
72 |
73 |
74 | )
75 | }
76 |
77 | type ArticleDrawerProps = {
78 | articleId?: string,
79 | tags?: string[][]
80 | onClose: (event: SyntheticEvent) => void
81 | }
82 |
83 | export default function ArticleDrawer({ articleId, tags, onClose }: ArticleDrawerProps) {
84 | const isOpen = !!articleId;
85 | const [activeArticleId, setActiveArticleId] = useState(articleId);
86 |
87 | useEffect(() => {
88 | if (!!articleId && articleId !== activeArticleId) {
89 | setActiveArticleId(articleId);
90 | }
91 | }, [articleId, activeArticleId])
92 |
93 | const onClosed = () => {
94 | setActiveArticleId(undefined);
95 | }
96 |
97 | if (activeArticleId === undefined) {
98 | return null;
99 | }
100 |
101 | return (
102 |
109 | );
110 | }
--------------------------------------------------------------------------------
/frontend/src/components/ArticlePreview.tsx:
--------------------------------------------------------------------------------
1 | import { AnchorButton } from "@blueprintjs/core";
2 | import { useFetchArticleQuery } from "../services/articles"
3 | import ArticleText from "./ArticleText";
4 |
5 | type ArticlePreviewProps = {
6 | articleId: string,
7 | tags: string[][]
8 | }
9 |
10 | export default function ArticlePreview({ articleId, tags }: ArticlePreviewProps) {
11 | const { data, isLoading } = useFetchArticleQuery(articleId);
12 | if (isLoading || data === undefined) {
13 | return null;
14 | }
15 |
16 | return (
17 |
18 |
19 |
20 | {data.title}
21 |
22 |
23 |
24 | )
25 | };
--------------------------------------------------------------------------------
/frontend/src/components/ArticleStoryEditor.tsx:
--------------------------------------------------------------------------------
1 | import { Button, Intent, Menu, MenuItem } from "@blueprintjs/core";
2 | import { Popover2, Classes as Popover2Classes } from "@blueprintjs/popover2";
3 | import { MouseEvent } from "react";
4 | import { useFetchStoryListingQuery, useToggleStoryArticleMutation } from "../services/stories";
5 | import { IArticle, IStory } from "../types";
6 |
7 | type ArticleStoryEditorContentProps = {
8 | article: IArticle
9 | }
10 |
11 | function ArticleStoryEditorContent({ article }: ArticleStoryEditorContentProps) {
12 | const { data: allListing } = useFetchStoryListingQuery({ limit: 100 });
13 | const { data: linkedListing } = useFetchStoryListingQuery({ limit: 100, article: article.id });
14 | const [toggleStoryArticle] = useToggleStoryArticleMutation();
15 |
16 |
17 | const linkedIds = linkedListing?.results.map((s) => s.id) || [];
18 | const onToggleAssign = async (e: MouseEvent, story: IStory) => {
19 | await toggleStoryArticle({ story: story.id, article: article.id }).unwrap();
20 | }
21 | if (allListing === undefined) {
22 | return null;
23 | }
24 |
25 | return (
26 |
27 | {allListing.results.map((story) =>
28 | onToggleAssign(e, story)}
32 | intent={linkedIds.indexOf(story.id) === -1 ? Intent.NONE : Intent.SUCCESS}
33 | icon={linkedIds.indexOf(story.id) === -1 ? "small-minus" : "small-tick"}
34 | />
35 | )}
36 |
37 | );
38 | }
39 |
40 |
41 | type ArticleStoryManagerProps = {
42 | article: IArticle
43 | inList: boolean
44 | }
45 |
46 | export default function ArticleStoryEditor({ article, inList }: ArticleStoryManagerProps) {
47 | return (
48 | }
50 | interactionKind="click"
51 | popoverClassName={Popover2Classes.POPOVER2_CONTENT_SIZING}
52 | placement="auto"
53 | >
54 |
59 |
60 | )
61 | }
--------------------------------------------------------------------------------
/frontend/src/components/ArticleText.tsx:
--------------------------------------------------------------------------------
1 | import classnames from 'classnames';
2 |
3 | import styles from '../styles/Article.module.scss'
4 |
5 | const CLASSES = ['markup1', 'markup2', 'markup3', 'markup4', 'markup5']
6 |
7 | type ArticleTextProps = {
8 | text: string
9 | tags: string[][]
10 | }
11 |
12 | function cleanName(text: string): string {
13 | return text.trim().replace(' ', '\\s')
14 | // https://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex:
15 | // function escapeRegExp(string) {
16 | // return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
17 | // }
18 | }
19 |
20 | function checkName(text: string): boolean {
21 | if (text === undefined || text === null || text.length === 0) {
22 | return false
23 | }
24 | try {
25 | new RegExp(text, 'muig')
26 | return true;
27 | } catch {
28 | return false
29 | }
30 | }
31 |
32 | export default function ArticleText({ text, tags }: ArticleTextProps) {
33 | let html = text;
34 | tags.forEach((forms, index) => {
35 | const alternatives = forms.map(cleanName).filter(checkName).join('|');
36 | const altRx = new RegExp(`(${alternatives})`, 'muig')
37 | const clazz = CLASSES[index % CLASSES.length];
38 | html = html.replaceAll(altRx, (m) => `${m} `);
39 | })
40 | // html = html.replaceAll(/\n/g, ' \n');
41 | const paragraphs = html.split('\n').filter((p) => p.trim().length > 1);
42 | const paraHtml = paragraphs.join('')
43 |
44 | return (
45 |
${paraHtml}` }} />
46 | )
47 | }
--------------------------------------------------------------------------------
/frontend/src/components/ClusterArticles.tsx:
--------------------------------------------------------------------------------
1 | import { Button, HTMLTable } from "@blueprintjs/core";
2 | import { MouseEvent } from "react";
3 | import { Link, useSearchParams } from "react-router-dom";
4 | import { useFetchArticleListingQuery } from "../services/articles";
5 | import { useUntagArticleMutation } from "../services/links";
6 | import { IArticle, IClusterDetails } from "../types";
7 | import { useListingPagination } from "../util";
8 | import ArticleDrawer from "./ArticleDrawer";
9 | import Pagination from "./Pagination";
10 | import { SectionLoading } from "./util";
11 |
12 |
13 | type ClusterArticlesProps = {
14 | cluster: IClusterDetails,
15 | }
16 |
17 | export default function ClusterArticles({ cluster }: ClusterArticlesProps) {
18 | const page = useListingPagination('articles');
19 | const query = { ...page, cluster: cluster.id };
20 | const { data: listing, isLoading } = useFetchArticleListingQuery(query);
21 | const [params, setParams] = useSearchParams();
22 | const [untagArticleMutation, { isLoading: isUntagging }] = useUntagArticleMutation();
23 | const articleId = params.get('article') || undefined;
24 |
25 | if (listing === undefined || isLoading || isUntagging) {
26 | return
27 | }
28 |
29 | const showArticle = (e: MouseEvent
, articleId: string) => {
30 | e.preventDefault();
31 | setArticle(articleId);
32 | }
33 |
34 | const setArticle = (articleId?: string) => {
35 | const paramsObj = Object.fromEntries(params.entries());
36 | setParams({ ...paramsObj, article: articleId || '' });
37 | }
38 |
39 | const untagArticle = async (article: IArticle) => {
40 | await untagArticleMutation({ cluster: cluster.id, article: article.id }).unwrap()
41 | }
42 |
43 | return (
44 | <>
45 |
46 |
47 |
48 | Title
49 | Site
50 | Split
51 |
52 |
53 |
54 | {listing.results.map((article) => (
55 |
56 |
57 | showArticle(e, article.id)}
59 | to={`/articles?article=${article.id}`}
60 | >
61 | {article.title}
62 |
63 |
64 | {article.site}
65 |
66 | untagArticle(article)}
68 | icon="unresolve"
69 | minimal
70 | small
71 | />
72 |
73 |
74 | ))}
75 |
76 |
77 |
78 | setArticle(undefined)}
80 | articleId={articleId}
81 | tags={[[cluster.label, ...cluster.labels]]}
82 | />
83 | >
84 | )
85 | }
--------------------------------------------------------------------------------
/frontend/src/components/ClusterButtonGroup.tsx:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { AnchorButton, Button, ButtonGroup, Intent } from "@blueprintjs/core";
3 | import { useExplodeClusterMutation } from "../services/links";
4 | import { ICluster } from "../types";
5 | import { getLinkLoomLink } from "../util";
6 | import { LINKER_ICON } from '../constants';
7 |
8 | type ClusterButtonGroupProps = {
9 | cluster: ICluster,
10 | }
11 |
12 | export default function ClusterButtonGroup({ cluster }: ClusterButtonGroupProps) {
13 | const [explodeCluster, { isLoading: isExploding }] = useExplodeClusterMutation();
14 |
15 | const onExplode = async () => {
16 | await explodeCluster(cluster.id).unwrap();
17 | };
18 |
19 | const disabled = isExploding;
20 | const googleUrl = queryString.stringifyUrl({ url: 'https://www.google.com/search', query: { q: cluster.label } });
21 | const sanctionsUrl = queryString.stringifyUrl({ url: 'https://www.opensanctions.org/search', query: { q: cluster.label } });
22 | return (
23 |
24 |
25 | Build web
26 |
27 | Google
28 | OpenSanctions
29 | Explode
30 |
31 | )
32 | }
--------------------------------------------------------------------------------
/frontend/src/components/ClusterDrawer.tsx:
--------------------------------------------------------------------------------
1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core"
2 | import { SyntheticEvent, useEffect, useState } from "react"
3 | import { useFetchArticleListingQuery } from "../services/articles"
4 | import { useFetchClusterQuery, useFetchRelatedClusterListingQuery, useFetchSimilarClusterListingQuery } from "../services/clusters"
5 | import { ErrorSection, NumericTag, SectionLoading } from "./util"
6 |
7 | import styles from '../styles/Cluster.module.scss'
8 | import { useFetchOntologyQuery } from "../services/ontology"
9 | import { useNodeTypes } from "../selectors"
10 | import RelatedListing from "./RelatedListing"
11 | import SimilarListing from "./SimilarListing"
12 | import ClusterArticles from "./ClusterArticles"
13 |
14 | type ClusterDrawerInnerProps = {
15 | clusterId: string,
16 | isOpen: boolean,
17 | onClose: (event: SyntheticEvent) => void
18 | onClosed: (node: HTMLElement) => void
19 | }
20 |
21 | function ClusterDrawerInner({ clusterId, isOpen, onClose }: ClusterDrawerInnerProps) {
22 | const nodeTypes = useNodeTypes();
23 | const { data: cluster, error: clusterError } = useFetchClusterQuery(clusterId);
24 | const relatedQuery = { clusterId: clusterId || '', params: { types: nodeTypes } };
25 | const { data: related } = useFetchRelatedClusterListingQuery(relatedQuery)
26 | const similarQuery = { clusterId: clusterId || '', params: {} };
27 | const { data: similar } = useFetchSimilarClusterListingQuery(similarQuery);
28 | const articleQuery = { cluster: clusterId };
29 | const { data: articles } = useFetchArticleListingQuery(articleQuery);
30 | const { data: ontology } = useFetchOntologyQuery();
31 | const meta = ontology?.cluster_types.find((t) => t.name === cluster?.type);
32 | const icon = meta?.icon || 'hat';
33 | const realIsOpen = isOpen && clusterId.trim().length > 1;
34 |
35 | return (
36 |
47 |
48 | {(cluster === undefined) && (
49 |
50 | )}
51 | {(clusterError !== undefined) && (
52 |
53 | )}
54 | {cluster && (
55 |
56 |
59 | Co-occurring
60 |
61 | >
62 | }
63 | panel={
64 |
65 | }
66 | />
67 |
70 | Similar
71 |
72 | >
73 | }
74 | disabled={similar?.total === 0}
75 | panel={
76 |
77 | }
78 | />
79 |
82 | Articles
83 |
84 | >
85 | }
86 | panel={
87 |
88 | }
89 | />
90 |
91 | )}
92 |
93 |
94 | )
95 | }
96 |
97 |
98 |
99 | type ClusterDrawerProps = {
100 | clusterId?: string,
101 | onClose: (event: SyntheticEvent) => void
102 | }
103 |
104 | export default function ClusterDrawer({ clusterId, onClose }: ClusterDrawerProps) {
105 | const isOpen = !!clusterId;
106 | const [activeClusterId, setActiveClusterId] = useState(clusterId);
107 |
108 | useEffect(() => {
109 | if (!!clusterId && clusterId !== activeClusterId) {
110 | setActiveClusterId(clusterId);
111 | }
112 | }, [clusterId, activeClusterId])
113 |
114 | const onClosed = () => {
115 | setActiveClusterId(undefined);
116 | }
117 |
118 | if (activeClusterId === undefined) {
119 | return null;
120 | }
121 |
122 | return (
123 |
129 | );
130 | }
--------------------------------------------------------------------------------
/frontend/src/components/Footer.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 |
3 | import styles from '../styles/Footer.module.scss';
4 |
5 | export default class Footer extends React.Component {
6 | render() {
7 | return (
8 |
9 |
10 | StoryWeb prototype
11 |
12 |
13 | )
14 | }
15 | }
--------------------------------------------------------------------------------
/frontend/src/components/Navbar.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState } from 'react';
2 | import classnames from 'classnames';
3 | import { Link } from 'react-router-dom';
4 | import { Navbar, Alignment, Icon, Button } from '@blueprintjs/core';
5 | import SettingsDialog from './SettingsDialog';
6 | import { ARTICLE_ICON, CLUSTER_ICON, SITE, STORY_ICON } from '../constants';
7 |
8 | import styles from "../styles/Navbar.module.scss";
9 |
10 | export default function NavbarSection() {
11 | const [showSettings, setShowSettings] = useState(false);
12 | return (
13 | // 'bp4-dark',
14 |
15 |
16 |
17 |
18 | {SITE}
19 |
20 |
21 |
22 |
23 | Stories
24 |
25 |
26 |
27 | Entities
28 |
29 |
30 |
31 | Articles
32 |
33 |
34 |
35 | setShowSettings(true)}>Settings
36 | setShowSettings(false)} />
37 |
38 |
39 |
40 | )
41 | }
--------------------------------------------------------------------------------
/frontend/src/components/Pagination.tsx:
--------------------------------------------------------------------------------
1 | import { Button, ButtonGroup } from "@blueprintjs/core";
2 | import { useSearchParams } from "react-router-dom";
3 |
4 | import { IListingResponse } from "../types";
5 | import styles from '../styles/util.module.scss';
6 | import { Numeric } from "./util";
7 |
8 |
9 | type PaginationProps = {
10 | prefix: string
11 | response: IListingResponse
12 | }
13 |
14 | export default function Pagination({ prefix, response }: PaginationProps) {
15 | const [params, setParams] = useSearchParams();
16 |
17 | const setOffset = (e: React.MouseEvent, newOffset: number) => {
18 | e.preventDefault();
19 | const oldParams = Object.fromEntries(params.entries());
20 | const key = `${prefix}.offset`;
21 | setParams({ ...oldParams, [key]: newOffset + '' });
22 | }
23 |
24 | const nextOffset = response.offset + response.limit;
25 | const upper = Math.min(response.total, nextOffset);
26 | const hasPrev = response.offset > 0;
27 | const hasNext = response.total > nextOffset;
28 | const prevOffset = Math.max(0, response.offset - response.limit)
29 | return (
30 |
31 | setOffset(e, prevOffset)} />
32 |
33 | {response.offset + 1} - {upper} of
34 |
35 | setOffset(e, nextOffset)} />
36 |
37 | )
38 | }
--------------------------------------------------------------------------------
/frontend/src/components/PairLink.tsx:
--------------------------------------------------------------------------------
1 | import { Icon } from "@blueprintjs/core";
2 | import { Link } from "react-router-dom";
3 | import { IClusterBase } from "../types";
4 | import { getLinkLoomLink } from "../util";
5 | import { LinkType, SpacedList } from "./util";
6 |
7 | type PairLinkProps = {
8 | left: IClusterBase,
9 | right: IClusterBase,
10 | link_types: string[]
11 | story?: number
12 | }
13 |
14 | export default function PairLink({ left, right, link_types, story }: PairLinkProps) {
15 | return (
16 | <>
17 |
18 | )} />
19 | {link_types.length === 0 && (
20 |
21 | )}
22 |
23 | >
24 | )
25 | }
--------------------------------------------------------------------------------
/frontend/src/components/RelatedListing.tsx:
--------------------------------------------------------------------------------
1 | import { HTMLTable } from "@blueprintjs/core";
2 | import { Link } from "react-router-dom";
3 | import { useNodeTypes } from "../selectors";
4 | import { useFetchRelatedClusterListingQuery } from "../services/clusters";
5 | import { ICluster } from "../types";
6 | import { getClusterLink, useListingPagination } from "../util";
7 | import Pagination from "./Pagination";
8 | import PairLink from "./PairLink";
9 | import { SectionLoading, ClusterTypeIcon } from "./util";
10 |
11 | type RelatedListingProps = {
12 | cluster: ICluster,
13 | }
14 |
15 | export default function RelatedListing({ cluster }: RelatedListingProps) {
16 | const nodeTypes = useNodeTypes();
17 | const page = useListingPagination('related');
18 | const relatedParams = { clusterId: cluster.id, params: { ...page, types: nodeTypes } };
19 | const { data: listing, isLoading } = useFetchRelatedClusterListingQuery(relatedParams)
20 | if (listing === undefined || isLoading) {
21 | return
22 | }
23 |
24 | return (
25 | <>
26 |
27 |
28 |
29 | Name
30 | Link
31 | Articles
32 |
33 |
34 |
35 | {listing.results.map((related) => (
36 |
37 |
38 |
39 | {related.label}
40 |
41 |
42 |
43 |
44 | {related.articles}
45 |
46 | ))}
47 |
48 |
49 |
50 | >
51 | )
52 | }
--------------------------------------------------------------------------------
/frontend/src/components/ScreenContent.tsx:
--------------------------------------------------------------------------------
1 | import { Card, Elevation } from '@blueprintjs/core';
2 |
3 | import styles from '../styles/util.module.scss';
4 |
5 | type ScreenContentProps = {
6 | children?: React.ReactNode
7 | }
8 |
9 | export default function ScreenContent({ children }: ScreenContentProps) {
10 | return (
11 |
12 | {children}
13 |
14 | )
15 | }
--------------------------------------------------------------------------------
/frontend/src/components/ScreenHeading.tsx:
--------------------------------------------------------------------------------
1 | import styles from '../styles/util.module.scss';
2 |
3 | type ScreenHeadingProps = {
4 | title: React.ReactNode
5 | children?: React.ReactNode
6 | }
7 |
8 | export default function ScreenHeading({ title, children }: ScreenHeadingProps) {
9 |
10 | return (
11 | <>
12 | {!!children && (
13 |
14 | {children}
15 |
16 | )}
17 | {title}
18 | >
19 | )
20 | }
--------------------------------------------------------------------------------
/frontend/src/components/SettingsDialog.tsx:
--------------------------------------------------------------------------------
1 | import { Checkbox, Classes, Dialog, Label } from "@blueprintjs/core";
2 | import { FormEvent } from "react";
3 | import { useDispatch } from "react-redux";
4 | import { useSelector } from "react-redux";
5 | import { setHiddenNodeTypes } from "../services/config";
6 | import { useFetchOntologyQuery } from "../services/ontology";
7 | import { RootState } from "../store";
8 | import { IClusterType } from "../types";
9 | import { listToggle } from "../util";
10 |
11 | type SettingsDialogProps = {
12 | isOpen: boolean
13 | onClose: () => void
14 | }
15 |
16 | export default function SettingsDialog({ isOpen, onClose }: SettingsDialogProps) {
17 | const { data: ontology } = useFetchOntologyQuery();
18 | const hiddenNodeTypes = useSelector((state: RootState) => state.config.hiddenNodeTypes);
19 | const dispatch = useDispatch();
20 |
21 | const onChangeNodeType = (e: FormEvent, type: IClusterType) => {
22 | const updated = listToggle(hiddenNodeTypes, type.name);
23 | dispatch(setHiddenNodeTypes(updated));
24 | }
25 |
26 | if (ontology === undefined) {
27 | return null;
28 | }
29 |
30 | return (
31 |
32 |
33 |
34 | Include the following node types in listings:
35 | {ontology.cluster_types.map((ct) =>
36 | onChangeNodeType(e, ct)}
41 | />
42 | )}
43 |
44 |
45 |
46 | )
47 | }
--------------------------------------------------------------------------------
/frontend/src/components/SimilarListing.tsx:
--------------------------------------------------------------------------------
1 | import { Button, ButtonGroup, Checkbox, HTMLTable, Intent, NonIdealState } from "@blueprintjs/core"
2 | import { useState } from "react"
3 | import { Link, useNavigate } from "react-router-dom"
4 | import { useFetchSimilarClusterListingQuery, useMergeClustersMutation } from "../services/clusters"
5 | import { ICluster } from "../types"
6 | import { getClusterLink, listToggle, useListingPagination } from "../util"
7 | import Pagination from "./Pagination"
8 | import { SectionLoading, SpacedList, ClusterLabel, ClusterTypeIcon } from "./util"
9 |
10 | type SimilarListingProps = {
11 | cluster: ICluster,
12 | }
13 |
14 | export default function SimilarListing({ cluster }: SimilarListingProps) {
15 | const page = useListingPagination('similar');
16 | const similarQuery = { clusterId: cluster.id, params: { ...page } };
17 | const { data: listing, isLoading } = useFetchSimilarClusterListingQuery(similarQuery);
18 | const navigate = useNavigate();
19 | const [postMerge, { isLoading: isUpdating }] = useMergeClustersMutation();
20 | const [merges, setMerges] = useState([] as string[]);
21 |
22 | if (listing === undefined || isLoading) {
23 | return
24 | }
25 | const allSelected = merges.length === listing.results.length;
26 |
27 | const onMerge = async () => {
28 | const response = await postMerge({ anchor: cluster.id, other: merges }).unwrap()
29 | setMerges([]);
30 | if (response.id !== cluster.id) {
31 | navigate(`/clusters/${response.id}`);
32 | }
33 | }
34 |
35 | const toggleAll = async () => {
36 | if (allSelected) {
37 | setMerges([]);
38 | } else {
39 | setMerges(listing.results.map(r => r.id));
40 | }
41 | }
42 |
43 | const toggleOne = async (id: string) => {
44 | setMerges(listToggle(merges, id));
45 | }
46 |
47 | return (
48 | <>
49 |
50 | onMerge()}
53 | intent={Intent.PRIMARY}
54 | >
55 | Merge ({merges.length})
56 |
57 | toggleAll()}
59 | disabled={isUpdating}
60 | >
61 | {allSelected && <>Select none>}
62 | {!allSelected && <>Select all>}
63 |
64 |
65 | {listing.total < 1 && (
66 |
71 | )}
72 | {listing.total > 0 && (
73 | <>
74 |
75 |
76 |
77 | Name
78 | Common tags
79 | Count
80 | Same
81 |
82 |
83 |
84 | {listing.results.map((similar) => (
85 |
86 |
87 |
88 | {similar.label}
89 |
90 |
91 | )} />
92 |
93 |
94 | {similar.common_count}
95 |
96 |
97 | toggleOne(similar.id)}
100 | disabled={isUpdating}
101 | />
102 |
103 |
104 | ))}
105 |
106 |
107 | >
108 | )}
109 |
110 | >
111 | )
112 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryArticleImportDialog.tsx:
--------------------------------------------------------------------------------
1 | import { Button, Classes, Dialog, FormGroup, InputGroup } from "@blueprintjs/core";
2 | import { FormEvent, MouseEvent, useState } from "react";
3 | import { ARTICLE_ICON } from "../constants";
4 | import { useImportStoryArticleMutation } from "../services/stories";
5 | import { SectionLoading } from "./util";
6 |
7 | type StoryArticleImportProps = {
8 | storyId: number
9 | isOpen: boolean
10 | onClose: () => void
11 | }
12 |
13 | export default function StoryArticleImportDialog({ storyId, isOpen, onClose }: StoryArticleImportProps) {
14 | const [url, setUrl] = useState('');
15 | const [importArticle, { isLoading: isCreating }] = useImportStoryArticleMutation();
16 | const hasUrl = url.trim().length > 10;
17 |
18 | const onImport = async (e: MouseEvent | FormEvent) => {
19 | e.preventDefault();
20 | if (hasUrl && !isCreating) {
21 | await importArticle({ story: storyId, url }).unwrap();
22 | setUrl('')
23 | onClose()
24 | }
25 | }
26 |
27 | return (
28 |
29 | {isCreating && (
30 |
31 | )}
32 | {!isCreating && (
33 |
49 | )}
50 |
51 | )
52 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryArticles.tsx:
--------------------------------------------------------------------------------
1 | import { Button, HTMLTable } from "@blueprintjs/core";
2 | import { MouseEvent } from "react";
3 | import { Link, useSearchParams } from "react-router-dom";
4 | import { ARTICLE_THRESHOLD } from "../constants";
5 | import { useFetchArticleListingQuery } from "../services/articles";
6 | import { useToggleStoryArticleMutation } from "../services/stories";
7 | import { IArticle, IStory } from "../types";
8 | import { useListingPagination } from "../util";
9 | import ArticleDrawer from "./ArticleDrawer";
10 | import Pagination from "./Pagination";
11 | import StoryNomNom from "./StoryNomNom";
12 | import { ErrorSection, SectionLoading } from "./util";
13 |
14 | type StoryArticlesProps = {
15 | story: IStory,
16 | }
17 |
18 | export default function StoryArticles({ story }: StoryArticlesProps) {
19 | const [params, setParams] = useSearchParams();
20 | const articleId = params.get('article') || undefined;
21 | const page = useListingPagination('pairs');
22 | const { data: articles, error, isLoading } = useFetchArticleListingQuery({ ...page, story: story.id });
23 | const [toggleStoryArticle] = useToggleStoryArticleMutation();
24 |
25 | if (error !== undefined) {
26 | return
27 | }
28 | if (articles === undefined || isLoading) {
29 | return
30 | }
31 |
32 | const onRemoveArticle = async (article: IArticle) => {
33 | if (story !== undefined) {
34 | await toggleStoryArticle({ story: story.id, article: article.id }).unwrap()
35 | }
36 | }
37 |
38 | const setPreviewArticle = (articleId?: string) => {
39 | const paramsObj = Object.fromEntries(params.entries());
40 | setParams({ ...paramsObj, article: articleId || '' });
41 | }
42 |
43 | const onPreviewArticle = (event: MouseEvent, article: IArticle) => {
44 | event.preventDefault();
45 | setPreviewArticle(article.id)
46 | }
47 |
48 | return (
49 | <>
50 | {(articles.total < ARTICLE_THRESHOLD) && (
51 |
52 | )}
53 | {articles.results.length > 0 && (
54 | <>
55 |
56 |
57 |
58 | Title
59 | Site
60 | Remove
61 |
62 |
63 |
64 | {articles.results.map((article) => (
65 |
66 |
67 | onPreviewArticle(e, article)}
70 | >
71 | {article.title}
72 |
73 |
74 | {article.site}
75 |
76 | onRemoveArticle(article)}
78 | icon="trash"
79 | minimal
80 | small
81 | />
82 |
83 |
84 | ))}
85 |
86 |
87 |
88 | >
89 | )}
90 | setPreviewArticle(undefined)}
92 | articleId={articleId}
93 | tags={[]}
94 | />
95 | >
96 | )
97 | };
--------------------------------------------------------------------------------
/frontend/src/components/StoryCreateDialog.tsx:
--------------------------------------------------------------------------------
1 | import { Button, Classes, Dialog, FormGroup, InputGroup, TextArea } from "@blueprintjs/core";
2 | import { FormEvent, MouseEvent, useState } from "react";
3 | import { useNavigate } from "react-router-dom";
4 | import { STORY_ICON } from "../constants";
5 | import { useCreateStoryMutation } from "../services/stories";
6 |
7 | type StoryCreateDialogProps = {
8 | isOpen: boolean
9 | onClose: () => void
10 | }
11 |
12 | export default function StoryCreateDialog({ isOpen, onClose }: StoryCreateDialogProps) {
13 | const [title, setTitle] = useState('');
14 | const [summary, setSummary] = useState('');
15 | const navigate = useNavigate();
16 | const [createStory, { isLoading: isCreating }] = useCreateStoryMutation();
17 |
18 | const hasTitle = title.trim().length > 3;
19 |
20 | const onCreate = async (e: MouseEvent | FormEvent) => {
21 | e.preventDefault();
22 | if (hasTitle && !isCreating) {
23 | const story = await createStory({ title: title, summary: summary }).unwrap();
24 | navigate(`/stories/${story.id}`);
25 | }
26 | }
27 |
28 | return (
29 |
30 |
57 |
58 | )
59 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryDeleteDialog.tsx:
--------------------------------------------------------------------------------
1 | import { Button, Callout, Classes, Dialog, Intent } from "@blueprintjs/core";
2 | import { FormEvent, MouseEvent } from "react";
3 | import { useNavigate } from "react-router-dom";
4 | import { STORY_ICON } from "../constants";
5 | import { useDeleteStoryMutation } from "../services/stories";
6 | import { IStory } from "../types";
7 |
8 | type StoryCreateDialogProps = {
9 | story: IStory
10 | isOpen: boolean
11 | onClose: () => void
12 | }
13 |
14 | export default function StoryDeleteDialog({ isOpen, onClose, story }: StoryCreateDialogProps) {
15 | const navigate = useNavigate();
16 | const [deleteStory, { isLoading: isDeleting }] = useDeleteStoryMutation();
17 |
18 | const onDelete = async (e: MouseEvent | FormEvent) => {
19 | e.preventDefault();
20 | await deleteStory(story.id).unwrap();
21 | navigate('/');
22 | }
23 |
24 | return (
25 |
26 |
27 |
28 |
29 | Are you sure you want to delete this story?
30 |
31 |
32 |
33 |
34 | Delete
35 |
36 |
37 |
38 |
39 | )
40 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryGraph.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from "react";
2 | import Graph from "graphology";
3 | import { parse } from "graphology-gexf/browser";
4 | import { ControlsContainer, SigmaContainer, useLoadGraph, useRegisterEvents, ZoomControl } from "@react-sigma/core";
5 | import "@react-sigma/core/lib/react-sigma.min.css";
6 | import { useFetchStoryGraphQuery } from "../services/stories";
7 | import { useLayoutForceAtlas2 } from "@react-sigma/layout-forceatlas2";
8 | import { useFetchOntologyQuery } from "../services/ontology";
9 | import { IStory } from "../types";
10 | import ClusterDrawer from "./ClusterDrawer";
11 |
12 | export type StoryGraphProps = {
13 | story: IStory
14 | }
15 |
16 | export const LoadGraph = ({ story }: StoryGraphProps) => {
17 | const loadGraph = useLoadGraph();
18 | const { data: ontology } = useFetchOntologyQuery();
19 | const { assign } = useLayoutForceAtlas2();
20 |
21 | const { data: graphData } = useFetchStoryGraphQuery({ storyId: story.id });
22 |
23 | useEffect(() => {
24 | if (graphData !== undefined && ontology !== undefined) {
25 | const graph = parse(Graph, graphData)
26 | graph.forEachNode((node, attributes) => {
27 | const type = ontology.cluster_types.find((tp) => tp.name === attributes.node_type);
28 | attributes.x = Math.random() * 20;
29 | attributes.y = Math.random() * 20;
30 | attributes.size = 5 + (1.5 * graph.degree(node));
31 | attributes.color = type?.color || '#dddddd';
32 | });
33 | graph.forEachEdge((edge, attributes) => {
34 | const type = ontology.link_types.find((t) => t.name === attributes.edge_type);
35 | attributes.size = 2;
36 | attributes.label = type?.label;
37 | });
38 | loadGraph(graph);
39 | assign();
40 | }
41 | // console.log(positions());
42 | }, [loadGraph, assign, ontology, graphData]);
43 |
44 | return null;
45 | };
46 |
47 | type GraphEventsProps = {
48 | showCluster: (id: string) => void
49 | }
50 |
51 | function GraphEvents({ showCluster }: GraphEventsProps) {
52 | const registerEvents = useRegisterEvents();
53 |
54 | useEffect(() => {
55 | registerEvents({
56 | clickNode: (event) => showCluster(event.node),
57 | doubleClickNode: (event) => showCluster(event.node),
58 | // clickEdge: (event) => console.log("clickEdge", event.event, event.edge, event.preventSigmaDefault),
59 | // doubleClickEdge: (event) => console.log("doubleClickEdge", event.event, event.edge, event.preventSigmaDefault),
60 | // wheel: (event) => event.preventSigmaDefault(),
61 | });
62 | }, [registerEvents, showCluster]);
63 |
64 | return null;
65 | }
66 |
67 | export default function StoryGraph({ story }: StoryGraphProps) {
68 | const [showCluster, setShowCluster] = useState();
69 |
70 | return (
71 | <>
72 | setShowCluster(undefined)}
75 | />
76 |
80 |
81 |
82 |
83 |
84 |
85 |
86 | >
87 | );
88 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryLinkerBanner.tsx:
--------------------------------------------------------------------------------
1 | import { Breadcrumbs2 } from "@blueprintjs/popover2"
2 | import { useFetchStoryQuery } from "../services/stories"
3 |
4 | import styles from '../styles/Linker.module.scss';
5 |
6 | type StoryLinkerBannerProps = {
7 | storyId: string,
8 | }
9 |
10 | export default function StoryLinkerBanner({ storyId }: StoryLinkerBannerProps) {
11 | const { data: story, isLoading } = useFetchStoryQuery(storyId)
12 |
13 | if (story === undefined || isLoading) {
14 | return null
15 | }
16 | return (
17 |
18 | )
19 | }
--------------------------------------------------------------------------------
/frontend/src/components/StoryNomNom.tsx:
--------------------------------------------------------------------------------
1 | import { Button, ButtonGroup, Intent, NonIdealState } from "@blueprintjs/core";
2 | import { useState } from "react";
3 | import { useNavigate } from "react-router-dom";
4 | import { ARTICLE_ICON, ARTICLE_THRESHOLD } from "../constants";
5 | import { IStory } from "../types";
6 | import StoryArticleImportDialog from "./StoryArticleImportDialog";
7 |
8 | import styles from '../styles/Story.module.scss';
9 |
10 | type StoryNomNomProps = {
11 | story: IStory,
12 | }
13 |
14 | export default function StoryNomNom({ story }: StoryNomNomProps) {
15 | const [showImport, setShowImport] = useState(false);
16 | const navigate = useNavigate();
17 |
18 | const onImportClose = () => {
19 | setShowImport(false);
20 | navigate(0);
21 | }
22 |
23 | return (
24 | <>
25 |
32 | setShowImport(true)}>Add by URL
33 | navigate("/articles")}>Select from archive...
34 |
35 | }
36 | />
37 |
38 | >
39 | )
40 | };
--------------------------------------------------------------------------------
/frontend/src/components/StoryPairs.tsx:
--------------------------------------------------------------------------------
1 | import { HTMLTable } from "@blueprintjs/core";
2 | import { MouseEvent, useState } from "react";
3 | import { Link } from "react-router-dom";
4 | import { useNodeTypes } from "../selectors";
5 | import { useFetchStoryPairsQuery } from "../services/stories";
6 | import { IClusterBase, IStory } from "../types";
7 | import { getClusterLink, useListingPagination } from "../util";
8 | import ClusterDrawer from "./ClusterDrawer";
9 | import Pagination from "./Pagination";
10 | import PairLink from "./PairLink";
11 | import { ErrorSection, Numeric, SectionLoading, ClusterTypeIcon } from "./util";
12 |
13 | type StoryPairsProps = {
14 | story: IStory,
15 | }
16 |
17 | export default function StoryPairs({ story }: StoryPairsProps) {
18 | const nodeTypes = useNodeTypes();
19 | const [showCluster, setShowCluster] = useState();
20 | const page = useListingPagination('pairs');
21 | const { data: clusters, error: clustersError } = useFetchStoryPairsQuery({
22 | storyId: story.id,
23 | params: { ...page, types: nodeTypes }
24 | });
25 |
26 | if (clustersError !== undefined) {
27 | return
28 | }
29 | if (clusters === undefined) {
30 | return
31 | }
32 |
33 | const onPreview = (e: MouseEvent, cluster: IClusterBase) => {
34 | setShowCluster(cluster.id);
35 | e.preventDefault();
36 | }
37 |
38 | return (
39 | <>
40 |
41 |
42 |
43 | From
44 | To
45 | Links
46 | Articles
47 |
48 |
49 |
50 | {clusters.results.map((pair) => (
51 |
52 |
53 |
54 | onPreview(e, pair.left)}>{pair.left.label}
55 |
56 |
57 |
58 | onPreview(e, pair.right)}>{pair.right.label}
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 | ))}
68 |
69 |
70 |
71 | setShowCluster(undefined)}
74 | />
75 | >
76 | )
77 | };
--------------------------------------------------------------------------------
/frontend/src/components/StoryUpdateDialog.tsx:
--------------------------------------------------------------------------------
1 | import { Button, Classes, Dialog, FormGroup, InputGroup, TextArea } from "@blueprintjs/core";
2 | import { FormEvent, MouseEvent, useState } from "react";
3 | import { STORY_ICON } from "../constants";
4 | import { useUpdateStoryMutation } from "../services/stories";
5 | import { IStory } from "../types";
6 |
7 | type StoryUpdateDialogProps = {
8 | story: IStory
9 | isOpen: boolean
10 | onClose: () => void
11 | }
12 |
13 | export default function StoryUpdateDialog({ story, isOpen, onClose }: StoryUpdateDialogProps) {
14 | const [title, setTitle] = useState(story.title);
15 | const [summary, setSummary] = useState(story.summary);
16 | const [updateStory, { isLoading: isCreating }] = useUpdateStoryMutation();
17 |
18 | const hasTitle = title.trim().length > 3;
19 |
20 | const onSave = async (e: MouseEvent | FormEvent) => {
21 | e.preventDefault();
22 | if (hasTitle && !isCreating) {
23 | await updateStory({ id: story.id, title: title, summary: summary }).unwrap();
24 | onClose()
25 | }
26 | }
27 |
28 | return (
29 |
30 |
31 |
32 |
37 | setTitle(e.target.value)} />
38 |
39 |
43 | setSummary(e.target.value)}
48 | />
49 |
50 |
51 |
56 |
57 |
58 | )
59 | }
--------------------------------------------------------------------------------
/frontend/src/components/util.tsx:
--------------------------------------------------------------------------------
1 | import { Icon, NonIdealState, NonIdealStateIconSize, Spinner, SpinnerSize, Tag } from '@blueprintjs/core';
2 | import { useEffect } from "react";
3 | import { useLocation } from "react-router-dom";
4 | import { ReactNode } from 'react';
5 | import classnames from 'classnames';
6 |
7 | import { SPACER } from '..//constants';
8 | import { useFetchOntologyQuery } from '../services/ontology';
9 |
10 | import styles from '../styles/util.module.scss';
11 |
12 |
13 | type ClusterLabelProps = {
14 | label: string
15 | }
16 |
17 | export function ClusterLabel({ label }: ClusterLabelProps) {
18 | return {label} ;
19 | }
20 |
21 | type ClusterTypeProps = {
22 | type: string
23 | }
24 |
25 | export function ClusterType({ type }: ClusterTypeProps) {
26 | const { data: ontology } = useFetchOntologyQuery();
27 | const meta = ontology?.cluster_types.find((t) => t.name === type)
28 | return (
29 |
30 | {meta?.label || type}
31 |
32 | );
33 | }
34 |
35 |
36 | type ClusterTypeIconProps = {
37 | type: string
38 | size?: number
39 | className?: string
40 | }
41 |
42 | export function ClusterTypeIcon({ type, size, className }: ClusterTypeIconProps) {
43 | const { data: ontology } = useFetchOntologyQuery();
44 | const allClassName = classnames('spaced-icon', className)
45 | const meta = ontology?.cluster_types.find((t) => t.name === type);
46 | const icon = meta?.icon || 'hat';
47 | return (
48 |
55 | );
56 | }
57 |
58 |
59 | type LinkTypeProps = {
60 | type: string
61 | }
62 |
63 | export function LinkType({ type }: LinkTypeProps) {
64 | const { data: ontology } = useFetchOntologyQuery();
65 | const meta = ontology?.link_types.find((t) => t.name === type)
66 | return {meta?.label || type} ;
67 | }
68 |
69 | type NumericProps = {
70 | value?: number | null
71 | }
72 |
73 | export function Numeric({ value }: NumericProps) {
74 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat
75 | if (value === undefined || value === null) {
76 | return null;
77 | }
78 | const fmt = new Intl.NumberFormat('en-US');
79 | return <>{fmt.format(value)}>;
80 | }
81 |
82 | type NumericTagProps = {
83 | value?: number | null
84 | className?: string
85 | }
86 |
87 | export function NumericTag({ value, className }: NumericTagProps) {
88 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat
89 | const isLoading = value === undefined || value === null;
90 | const fmt = new Intl.NumberFormat('en-US');
91 | const displayValue = isLoading ? "…" : fmt.format(value);
92 | return {displayValue} ;
93 | }
94 |
95 |
96 | type SpacedListProps = {
97 | values: Array
98 | }
99 |
100 | export function SpacedList({ values }: SpacedListProps) {
101 | if (values.length === 0) {
102 | return null;
103 | }
104 | return (
105 | <>
106 | {values
107 | .map((t, idx) => {t} )
108 | .reduce((prev, curr, idx) => [prev, , curr])}
109 | >
110 | )
111 | }
112 |
113 | export function Spacer() {
114 | return (
115 | {SPACER}
116 | )
117 | }
118 |
119 |
120 | export function SectionLoading() {
121 | return (
122 | }
124 | iconSize={NonIdealStateIconSize.STANDARD}
125 | />
126 | )
127 | }
128 |
129 | type ErrorSectionProps = {
130 | title: string
131 | description?: string
132 | }
133 |
134 | export function ErrorSection({ title, description }: ErrorSectionProps) {
135 | return (
136 |
142 | )
143 | }
144 |
145 | export function ScreenLoading() {
146 | return (
147 | }
150 | iconSize={NonIdealStateIconSize.STANDARD}
151 | />
152 | )
153 | }
154 |
155 |
156 | export function ScrollToTop() {
157 | const { pathname } = useLocation();
158 |
159 | useEffect(() => {
160 | window.scrollTo(0, 0);
161 | }, [pathname]);
162 |
163 | return null;
164 | }
165 |
--------------------------------------------------------------------------------
/frontend/src/constants.ts:
--------------------------------------------------------------------------------
1 |
2 | export const SITE = 'StoryWeb';
3 | export const API_URL = process.env.API_URL || 'http://localhost:8000/api/1';
4 | export const SPACER = " · ";
5 | export const STORY_ICON = "projects";
6 | export const ARTICLE_ICON = "document";
7 | export const CLUSTER_ICON = "people";
8 | export const LINKER_ICON = "send-to-graph";
9 | export const ARTICLE_THRESHOLD = 5;
10 | export const LINKS_THRESHOLD = 3;
--------------------------------------------------------------------------------
/frontend/src/hooks.ts:
--------------------------------------------------------------------------------
1 | import { TypedUseSelectorHook, useDispatch, useSelector } from 'react-redux'
2 | import type { RootState, AppDispatch } from './store'
3 |
4 | export const useAppDispatch: () => AppDispatch = useDispatch
5 | export const useAppSelector: TypedUseSelectorHook = useSelector
--------------------------------------------------------------------------------
/frontend/src/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom/client';
3 |
4 | import './styles/index.scss';
5 | import App from './App';
6 |
7 | const root = ReactDOM.createRoot(
8 | document.getElementById('root') as HTMLElement
9 | );
10 | root.render(
11 |
12 |
13 |
14 | );
15 |
--------------------------------------------------------------------------------
/frontend/src/logic.ts:
--------------------------------------------------------------------------------
1 | import { IClusterBase, IOntology } from "./types";
2 |
3 |
4 | export function isA(ontology: IOntology, type_: string, required: string): boolean {
5 | if (type_ === required) {
6 | return true
7 | }
8 | const clusterType = ontology.cluster_types.find((ct) => ct.name === type_);
9 | if (clusterType === undefined || clusterType.parent === undefined) {
10 | return false;
11 | }
12 | return isA(ontology, clusterType.parent, required);
13 | }
14 |
15 | export function canHaveLink(ontology: IOntology, source: IClusterBase, target: IClusterBase, linkType: string): boolean {
16 | const linkTypeEnt = ontology.link_types.find((lt) => lt.name === linkType);
17 | if (linkTypeEnt === undefined) {
18 | return false;
19 | }
20 | if (!isA(ontology, source.type, linkTypeEnt.source_type)) {
21 | return false;
22 | }
23 | if (!isA(ontology, target.type, linkTypeEnt.target_type)) {
24 | return false;
25 | }
26 | return true;
27 | }
28 |
29 | export function canHaveBidi(ontology: IOntology, source: IClusterBase, target: IClusterBase, linkType: string): boolean {
30 | return canHaveLink(ontology, source, target, linkType) || canHaveLink(ontology, target, source, linkType);
31 | }
--------------------------------------------------------------------------------
/frontend/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/frontend/src/router.tsx:
--------------------------------------------------------------------------------
1 | import { createBrowserRouter, useRouteError } from "react-router-dom";
2 | import ArticleIndex from "./screens/ArticleIndex";
3 | import ClusterIndex from "./screens/ClusterIndex";
4 | import ClusterView from "./screens/ClusterView";
5 | // import HomePage from "./screens/Home";
6 | import Layout from "./screens/Layout";
7 | import Linker from "./screens/Linker";
8 | import LinkerRelated from "./screens/LinkerRelated";
9 | import StoryIndex from "./screens/StoryIndex";
10 | import StoryLinker from "./screens/StoryLinker";
11 | import StoryView from "./screens/StoryView";
12 |
13 | interface IRouteError {
14 | statusText?: string
15 | message: string
16 | }
17 |
18 | function ErrorPage() {
19 | const error = useRouteError() as IRouteError;
20 |
21 | return (
22 |
23 |
Oops!
24 |
Sorry, an unexpected error has occurred.
25 |
26 | {error.statusText || error.message}
27 |
28 |
29 | );
30 | }
31 |
32 | export const router = createBrowserRouter([
33 | {
34 | path: "/",
35 | element: ,
36 | errorElement: ,
37 | children: [
38 | {
39 | path: "",
40 | element:
41 | },
42 | {
43 | path: "stories/:storyId",
44 | element:
45 | },
46 | {
47 | path: "stories/:storyId/linker",
48 | element:
49 | },
50 | {
51 | path: "articles",
52 | element: ,
53 | },
54 | {
55 | path: "clusters",
56 | element: ,
57 | },
58 | {
59 | path: "clusters/:clusterId",
60 | element: ,
61 | },
62 | {
63 | path: "linker",
64 | element: ,
65 | },
66 | {
67 | path: "linker/related",
68 | element: ,
69 | },
70 | ],
71 | },
72 | ]);
73 |
74 |
75 |
--------------------------------------------------------------------------------
/frontend/src/screens/ArticleIndex.tsx:
--------------------------------------------------------------------------------
1 | import { ControlGroup, Classes, HTMLSelect, HTMLTable, Button, IconSize, Icon } from '@blueprintjs/core';
2 | import classnames from "classnames";
3 | import { FormEvent, MouseEvent, useState } from 'react';
4 | import { Link } from 'react-router-dom';
5 | import { useSearchParams } from "react-router-dom";
6 | import ArticleDrawer from '../components/ArticleDrawer';
7 | import ArticleStoryEditor from '../components/ArticleStoryEditor';
8 | import Pagination from '../components/Pagination';
9 | import { Numeric, SectionLoading } from '../components/util';
10 | import { ARTICLE_ICON } from '../constants';
11 |
12 | import { useFetchArticleListingQuery } from "../services/articles"
13 | import { useFetchSitesQuery } from '../services/sites';
14 | import { IArticle } from '../types';
15 | import { asString, useListingPagination } from "../util";
16 |
17 | export default function ArticleIndex() {
18 | const [params, setParams] = useSearchParams();
19 | const page = useListingPagination('articles');
20 | const { data: sitesResponse } = useFetchSitesQuery();
21 | const [query, setQuery] = useState(asString(params.get('q')) || '')
22 | const [site, setSite] = useState(asString(params.get('site')) || '')
23 | const { data: listing } = useFetchArticleListingQuery({
24 | ...page,
25 | q: params.get('q'),
26 | site: params.get('site'),
27 | sort: 'tags:desc'
28 | });
29 | const sites = sitesResponse === undefined ? [] : sitesResponse.results.map(s => s.site);
30 | const articleId = params.get('article') || undefined;
31 |
32 | const onSubmit = function (e: FormEvent) {
33 | e.preventDefault();
34 | setParams({ site: site, q: query });
35 | }
36 |
37 | const setArticle = (articleId: string | undefined) => {
38 | const paramsObj = Object.fromEntries(params.entries());
39 | setParams({ ...paramsObj, article: articleId || '' });
40 | }
41 |
42 | const onClickArticle = (event: MouseEvent, article: IArticle) => {
43 | event.preventDefault();
44 | setArticle(article.id)
45 | }
46 |
47 | return (
48 |
49 | {(listing === undefined || sites === undefined) && (
50 |
51 | {' '}
52 | Articles in the StoryWeb database
53 |
54 | )}
55 | {(listing !== undefined && sites !== undefined) && (
56 |
57 | {' '}
58 | articles from sources in the StoryWeb database
59 |
60 | )}
61 |
62 |
85 | {listing === undefined && (
86 |
87 | )}
88 | {listing !== undefined && (
89 | <>
90 |
91 |
92 |
93 | Title
94 | Site
95 | Entities
96 | Stories
97 |
98 |
99 |
100 | {listing.results.map((article) => (
101 |
102 |
103 | onClickArticle(e, article)}
105 | to={`/articles?article=${article.id}`}
106 | >
107 | {article.title}
108 |
109 |
110 |
111 | {article.site}
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 | ))}
121 |
122 |
123 |
124 |
setArticle(undefined)}
126 | articleId={articleId}
127 | tags={[]}
128 | />
129 | >
130 | )}
131 |
132 | )
133 | }
134 |
--------------------------------------------------------------------------------
/frontend/src/screens/ClusterIndex.tsx:
--------------------------------------------------------------------------------
1 | import { ControlGroup, Classes, HTMLTable, Button, Checkbox, IconSize, Icon } from '@blueprintjs/core';
2 | import classnames from "classnames";
3 | import { FormEvent, useState } from 'react';
4 | import { Link } from 'react-router-dom';
5 | import { useSearchParams } from "react-router-dom";
6 | import Pagination from '../components/Pagination';
7 | import { ErrorSection, Numeric, SectionLoading, ClusterTypeIcon } from '../components/util';
8 | import { CLUSTER_ICON } from '../constants';
9 | import { useNodeTypes } from '../selectors';
10 |
11 | import { useFetchClusterListingQuery, useMergeClustersMutation } from '../services/clusters';
12 | import { asString, getClusterLink, listToggle, useListingPagination } from "../util";
13 |
14 | export default function ClusterIndex() {
15 | const [params, setParams] = useSearchParams();
16 | const page = useListingPagination('clusters');
17 | const [query, setQuery] = useState(asString(params.get('q')) || '');
18 | const [merges, setMerges] = useState([] as string[]);
19 | const [postMerge, { isLoading: isUpdating }] = useMergeClustersMutation();
20 | const { data: listing, error } = useFetchClusterListingQuery({
21 | ...page,
22 | q: params.get('q'),
23 | types: useNodeTypes(),
24 | });
25 |
26 | const onMerge = async () => {
27 | if (merges.length > 1) {
28 | const [anchor, ...other] = merges;
29 | await postMerge({ anchor: anchor, other: other }).unwrap()
30 | setMerges([]);
31 | }
32 | }
33 |
34 | const toggleMerge = async (id: string) => {
35 | setMerges(listToggle(merges, id));
36 | }
37 |
38 | const onSubmit = function (e: FormEvent) {
39 | e.preventDefault();
40 | setParams({ q: query });
41 | }
42 |
43 | if (error !== undefined) {
44 | return
45 | }
46 |
47 | return (
48 |
49 | {listing === undefined && (
50 |
51 | {' '}
52 | Entities in the StoryWeb database
53 |
54 | )}
55 | {listing !== undefined && (
56 |
57 | {' '}
58 | entities in the StoryWeb database
59 |
60 | )}
61 |
62 |
75 | {listing === undefined && (
76 |
77 | )}
78 | {listing !== undefined && (
79 | <>
80 |
81 |
82 |
83 | Label
84 | Articles
85 |
86 |
87 | Merge
88 |
89 |
90 |
91 |
92 |
93 | {listing.results.map((cluster) => (
94 |
95 |
96 |
97 | {cluster.label}
98 |
99 |
100 |
101 |
102 |
103 | toggleMerge(cluster.id)}
106 | disabled={isUpdating}
107 | />
108 |
109 |
110 | ))}
111 |
112 |
113 |
114 | >
115 | )}
116 |
117 | )
118 | }
119 |
--------------------------------------------------------------------------------
/frontend/src/screens/ClusterView.tsx:
--------------------------------------------------------------------------------
1 | import { useParams, useSearchParams } from "react-router-dom";
2 | import { Tabs, Tab, IconSize } from "@blueprintjs/core";
3 |
4 | import RelatedListing from "../components/RelatedListing";
5 | import SimilarListing from "../components/SimilarListing";
6 | import { ErrorSection, SectionLoading, ClusterLabel, ClusterTypeIcon, NumericTag } from "../components/util";
7 | import { useFetchClusterQuery, useFetchSimilarClusterListingQuery, useFetchRelatedClusterListingQuery } from "../services/clusters";
8 | import ClusterArticles from "../components/ClusterArticles";
9 | import ScreenHeading from "../components/ScreenHeading";
10 | import ClusterButtonGroup from "../components/ClusterButtonGroup";
11 | import { useFetchArticleListingQuery } from "../services/articles";
12 | import { useNodeTypes } from "../selectors";
13 | import ScreenContent from "../components/ScreenContent";
14 |
15 | export default function ClusterView() {
16 | const { clusterId } = useParams();
17 | const [params, setParams] = useSearchParams();
18 | const nodeTypes = useNodeTypes();
19 | const { data: cluster, isLoading, error } = useFetchClusterQuery(clusterId as string);
20 | const relatedQuery = { clusterId: clusterId || '', params: { types: nodeTypes } };
21 | const { data: related } = useFetchRelatedClusterListingQuery(relatedQuery)
22 | const similarQuery = { clusterId: clusterId || '', params: {} };
23 | const { data: similar } = useFetchSimilarClusterListingQuery(similarQuery);
24 | const articleQuery = { cluster: clusterId };
25 | const { data: articles } = useFetchArticleListingQuery(articleQuery);
26 | if (error !== undefined) {
27 | return
28 | }
29 | if (cluster === undefined || isLoading) {
30 | return
31 | }
32 |
33 | const activeTab = params.get('view') || 'related';
34 |
35 | const setView = (view: string) => {
36 | const paramsObj = Object.fromEntries(params.entries());
37 | setParams({ ...paramsObj, view });
38 | }
39 |
40 | const title = <>
41 |
42 |
43 | >;
44 | return (
45 |
46 |
47 |
48 |
49 | setView(tab.toString())}>
50 |
53 | Co-occurring
54 |
55 | >
56 | }
57 | panel={
58 |
59 |
60 |
61 | }
62 | />
63 |
66 | Similar
67 |
68 | >
69 | }
70 | disabled={similar?.total === 0}
71 | panel={
72 |
73 |
74 |
75 | }
76 | />
77 |
80 | Articles
81 |
82 | >
83 | }
84 | panel={
85 |
86 |
87 |
88 | }
89 | />
90 |
91 |
92 | )
93 | }
94 |
--------------------------------------------------------------------------------
/frontend/src/screens/Home.tsx:
--------------------------------------------------------------------------------
1 | import { Link } from "react-router-dom";
2 |
3 | export default function HomePage() {
4 | return (
5 |
6 |
Welcome to storyweb
7 | articles
8 |
9 | )
10 | }
11 |
--------------------------------------------------------------------------------
/frontend/src/screens/Layout.tsx:
--------------------------------------------------------------------------------
1 | import { Outlet } from "react-router-dom";
2 | import classnames from "classnames";
3 | import { HotkeyConfig, HotkeysTarget2, HotkeysProvider } from '@blueprintjs/core';
4 | import Helmet from "react-helmet";
5 |
6 | import { ErrorSection, ScreenLoading, ScrollToTop } from "../components/util";
7 | import { SITE } from "../constants";
8 | import { useFetchOntologyQuery } from "../services/ontology";
9 | import Footer from "../components/Footer";
10 | import NavbarSection from "../components/Navbar";
11 |
12 | import styles from "../styles/Layout.module.scss";
13 |
14 | export default function Layout() {
15 | const { data: ontology, error: ontologyError } = useFetchOntologyQuery();
16 |
17 | if (ontologyError !== undefined) {
18 | return ;
19 | }
20 |
21 | if (ontology === undefined) {
22 | return ;
23 | }
24 |
25 | const appHotkeys: HotkeyConfig[] = [
26 | {
27 | combo: "/",
28 | global: true,
29 | label: "Search entity",
30 | onKeyDown: () => alert('tbd :)'),
31 | },
32 | ];
33 |
34 | return (
35 | <>
36 |
37 | {/*
38 |
39 | */}
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | >
58 | )
59 | }
60 |
--------------------------------------------------------------------------------
/frontend/src/screens/LinkerRelated.tsx:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { useFetchRelatedClusterListingQuery } from '../services/clusters';
3 | import { useNavigate, useSearchParams } from 'react-router-dom';
4 | import { SectionLoading } from '../components/util';
5 | import { useEffect } from 'react';
6 | import { useNodeTypes } from '../selectors';
7 |
8 | export default function LinkerRelated() {
9 | const navigate = useNavigate();
10 | const [params] = useSearchParams();
11 | const nodeTypes = useNodeTypes();
12 | const anchorId = params.get('anchor');
13 | if (anchorId === null) {
14 | navigate('/clusters');
15 | }
16 | const relatedParams = { linked: false, limit: 1, _: params.get('previous'), types: nodeTypes };
17 | const relatedQuery = { clusterId: anchorId + '', params: relatedParams };
18 | const { data, isLoading } = useFetchRelatedClusterListingQuery(relatedQuery, { refetchOnMountOrArgChange: true })
19 | useEffect(() => {
20 | if (data !== undefined && !isLoading) {
21 | if (data.results.length > 0) {
22 | const otherId = data.results[0].id;
23 | navigate(queryString.stringifyUrl({
24 | 'url': `/linker`,
25 | 'query': { anchor: anchorId, other: otherId, related: true }
26 | }), { replace: true });
27 | } else {
28 | navigate(`/clusters/${anchorId}`, { replace: true });
29 | }
30 | }
31 | });
32 | return ;
33 | }
--------------------------------------------------------------------------------
/frontend/src/screens/StoryIndex.tsx:
--------------------------------------------------------------------------------
1 | import { ControlGroup, Classes, Intent, Button, NonIdealState, Card, Elevation, ButtonGroup, Menu, MenuItem } from '@blueprintjs/core';
2 | import classnames from "classnames";
3 | import { FormEvent, useState } from 'react';
4 | import { useNavigate } from 'react-router-dom';
5 | import { useSearchParams } from "react-router-dom";
6 | import StoryCreateDialog from '../components/StoryCreateDialog';
7 | import { SectionLoading } from '../components/util';
8 |
9 | import { useFetchStoryListingQuery } from '../services/stories';
10 | import ScreenHeading from '../components/ScreenHeading';
11 | import { asString } from "../util";
12 |
13 | import styles from '../styles/Story.module.scss';
14 | import { API_URL } from '../constants';
15 | import { Popover2, PopupKind } from '@blueprintjs/popover2';
16 |
17 | export default function StoryIndex() {
18 | const [params, setParams] = useSearchParams();
19 | const navigate = useNavigate();
20 | const [query, setQuery] = useState(asString(params.get('q')) || '')
21 | const [showCreate, setShowCreate] = useState(false)
22 | const { data: listing } = useFetchStoryListingQuery({
23 | q: params.get('q'),
24 | });
25 |
26 | const onSubmit = function (e: FormEvent) {
27 | e.preventDefault();
28 | setParams({ q: query });
29 | }
30 |
31 | const onCreate = () => { setShowCreate(true) };
32 | const onCloseCreate = () => { setShowCreate(false) };
33 |
34 | return (
35 |
36 |
Your StoryWebs>}>
37 |
38 | New story...
39 |
45 |
46 |
47 |
48 | }
49 | >
50 |
51 | Export
52 |
53 |
54 |
55 |
56 |
57 |
70 | {
71 | listing === undefined && (
72 |
73 | )
74 | }
75 | {
76 | (listing !== undefined && listing.results.length === 0) && (
77 |
82 | )
83 | }
84 | {
85 | (listing !== undefined && listing.results.length > 0) && (
86 |
87 | {listing.results.map((story) => (
88 |
navigate(`/stories/${story.id}`)}
94 | >
95 | {story.title}
96 | {story.summary}
97 |
98 | ))}
99 |
100 | )
101 | }
102 |
103 | )
104 | }
105 |
--------------------------------------------------------------------------------
/frontend/src/screens/StoryLinker.tsx:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { useNavigate, useParams, useSearchParams } from 'react-router-dom';
3 | import { SectionLoading } from '../components/util';
4 | import { useEffect } from 'react';
5 | import { useFetchStoryPairsQuery } from '../services/stories';
6 | import { useNodeTypes } from '../selectors';
7 |
8 | export default function StoryLinker() {
9 | const { storyId } = useParams();
10 | const navigate = useNavigate();
11 | const [params] = useSearchParams();
12 | const nodeTypes = useNodeTypes();
13 | const pairsParams = { linked: false, limit: 1, _: params.get('previous'), types: nodeTypes };
14 | const pairsQuery = { storyId, params: pairsParams };
15 | const { data, isLoading } = useFetchStoryPairsQuery(pairsQuery, { refetchOnMountOrArgChange: true });
16 | useEffect(() => {
17 | if (data !== undefined && !isLoading) {
18 | if (data.results.length > 0) {
19 | const pair = data.results[0];
20 | navigate(queryString.stringifyUrl({
21 | 'url': `/linker`,
22 | 'query': { anchor: pair.left.id, other: pair.right.id, story: storyId }
23 | }), { replace: true });
24 | } else {
25 | navigate(`/stories/${storyId}`, { replace: true });
26 | }
27 | }
28 | });
29 | return ;
30 | }
--------------------------------------------------------------------------------
/frontend/src/screens/StoryView.tsx:
--------------------------------------------------------------------------------
1 | import { AnchorButton, Button, ButtonGroup, Icon, IconSize, Intent, Menu, MenuItem, Tab, Tabs } from "@blueprintjs/core";
2 | import { Popover2, PopupKind } from "@blueprintjs/popover2";
3 | import { useState } from "react";
4 | import { useParams, useSearchParams } from "react-router-dom";
5 | import ScreenContent from "../components/ScreenContent";
6 | import ScreenHeading from "../components/ScreenHeading";
7 | import StoryArticleImportDialog from "../components/StoryArticleImportDialog";
8 | import StoryArticles from "../components/StoryArticles";
9 | import StoryDeleteDialog from "../components/StoryDeleteDialog";
10 | import StoryGraph from "../components/StoryGraph";
11 | import StoryPairs from "../components/StoryPairs";
12 | import StoryUpdateDialog from "../components/StoryUpdateDialog";
13 | import { ErrorSection, NumericTag, SectionLoading } from "../components/util";
14 | import { API_URL, ARTICLE_ICON, ARTICLE_THRESHOLD, LINKER_ICON, LINKS_THRESHOLD, STORY_ICON } from "../constants";
15 | import { useNodeTypes } from "../selectors";
16 | import { useFetchArticleListingQuery } from "../services/articles";
17 | import { useFetchStoryPairsQuery, useFetchStoryQuery } from "../services/stories";
18 |
19 |
20 | export default function StoryView() {
21 | const { storyId } = useParams();
22 | const nodeTypes = useNodeTypes();
23 | const [showImport, setShowImport] = useState(false);
24 | const [showDelete, setShowDelete] = useState(false);
25 | const [showEdit, setShowEdit] = useState(false);
26 | const [params, setParams] = useSearchParams();
27 | const { data: story, isLoading, error } = useFetchStoryQuery(storyId as string);
28 | const { data: articles } = useFetchArticleListingQuery({ story: storyId, limit: 0 });
29 | const { data: links } = useFetchStoryPairsQuery({
30 | storyId: storyId || '',
31 | params: { types: nodeTypes, limit: 0, linked: true }
32 | });
33 |
34 | const hasArticles = (articles?.total || 0) >= ARTICLE_THRESHOLD;
35 | const hasLinks = (links?.total || 0) >= LINKS_THRESHOLD;
36 |
37 | const secondaryTab = hasLinks ? 'graph' : 'pairs';
38 | const defaultTab = hasArticles ? secondaryTab : 'articles';
39 | const activeTab = params.get('view') || defaultTab;
40 |
41 | if (error !== undefined) {
42 | return
43 | }
44 | if (story === undefined || articles === undefined || links === undefined || isLoading) {
45 | return
46 | }
47 |
48 | const setView = (view: string) => {
49 | const paramsObj = Object.fromEntries(params.entries());
50 | setParams({ ...paramsObj, view });
51 | }
52 |
53 | return (
54 |
55 |
{story.title}>}>
56 |
57 | {(hasArticles && hasLinks) && (
58 |
59 | Build web
60 |
61 | )}
62 | setShowImport(true)}>
63 | Add article
64 |
65 | setShowEdit(true)}>
66 | Edit
67 |
68 |
74 |
75 |
76 |
77 | }
78 | >
79 |
80 | Export
81 |
82 |
83 | setShowDelete(true)}>
84 | Delete
85 |
86 |
87 | setShowImport(false)} />
88 | setShowEdit(false)} story={story} />
89 | setShowDelete(false)} story={story} />
90 |
91 |
setView(e.toString())}>
92 |
95 | Network graph
96 | >
97 | }
98 | disabled={!hasLinks}
99 | panel={
100 |
101 |
102 |
103 | }
104 | />
105 |
108 | Links
109 |
110 | >
111 | }
112 | disabled={!hasArticles}
113 | panel={
114 |
115 |
116 |
117 | }
118 | />
119 |
122 | Articles
123 |
124 | >
125 | }
126 | panel={
127 |
128 |
129 |
130 | }
131 | />
132 |
133 |
134 | )
135 | }
136 |
--------------------------------------------------------------------------------
/frontend/src/selectors.ts:
--------------------------------------------------------------------------------
1 | import { useSelector } from "react-redux";
2 | import { useFetchOntologyQuery } from "./services/ontology";
3 | import { RootState } from "./store";
4 |
5 |
6 |
7 | export function useNodeTypes(): string[] {
8 | const { data: ontology } = useFetchOntologyQuery();
9 | const hiddenNodeTypes = useSelector((state: RootState) => state.config.hiddenNodeTypes);
10 | if (ontology === undefined) {
11 | return []
12 | }
13 | return ontology.cluster_types
14 | .map((t) => t.name)
15 | .filter((t) => hiddenNodeTypes.indexOf(t) === -1);
16 | }
--------------------------------------------------------------------------------
/frontend/src/services/articles.ts:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react';
3 |
4 | import { API_URL } from '../constants';
5 | import type { IArticle, IArticleDetails, IListingResponse } from '../types';
6 |
7 | export const articlesApi = createApi({
8 | reducerPath: 'articlesApi',
9 | tagTypes: ['Article'],
10 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
11 | endpoints: (builder) => ({
12 | fetchArticle: builder.query({
13 | query: (articleId) => `articles/${articleId}`,
14 | providesTags: ["Article"]
15 | }),
16 | fetchArticleListing: builder.query, any>({
17 | query: (params) => queryString.stringifyUrl({
18 | 'url': `articles`,
19 | 'query': params
20 | }),
21 | providesTags: ["Article"],
22 | }),
23 | }),
24 | })
25 |
26 | export const { useFetchArticleQuery, useFetchArticleListingQuery } = articlesApi
--------------------------------------------------------------------------------
/frontend/src/services/clusters.ts:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
3 |
4 | import { API_URL } from '../constants'
5 | import { ICluster, IClusterDetails, IClusterMerge, IListingResponse, IRelatedCluster, ISimilarCluster } from '../types'
6 |
7 | type IClusterQueryParams = {
8 | clusterId: string,
9 | params?: any
10 | }
11 |
12 | export const clustersApi = createApi({
13 | reducerPath: 'clustersApi',
14 | tagTypes: ['Cluster', 'Link'],
15 | refetchOnMountOrArgChange: true,
16 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
17 | endpoints: (builder) => ({
18 | fetchCluster: builder.query({
19 | query: (clusterId) => `clusters/${clusterId}`,
20 | providesTags: ["Cluster"]
21 | }),
22 | fetchClusterListing: builder.query, any>({
23 | query: (params) => queryString.stringifyUrl({
24 | 'url': `clusters`,
25 | 'query': params
26 | }),
27 | providesTags: ["Cluster"]
28 | }),
29 | fetchSimilarClusterListing: builder.query, IClusterQueryParams>({
30 | query: ({ clusterId, params }) => queryString.stringifyUrl({
31 | 'url': `clusters/${clusterId}/similar`,
32 | 'query': params
33 | }),
34 | providesTags: ["Cluster", "Link"]
35 | }),
36 | fetchRelatedClusterListing: builder.query, IClusterQueryParams>({
37 | query: ({ clusterId, params }) => queryString.stringifyUrl({
38 | 'url': `clusters/${clusterId}/related`,
39 | 'query': params
40 | }),
41 | providesTags: ["Cluster", "Link"]
42 | }),
43 | mergeClusters: builder.mutation({
44 | query(merge) {
45 | return {
46 | url: `links/_merge`,
47 | method: 'POST',
48 | body: merge,
49 | }
50 | },
51 | invalidatesTags: ['Cluster', 'Link'],
52 | })
53 | }),
54 | })
55 |
56 | export const {
57 | useFetchClusterListingQuery,
58 | useFetchClusterQuery,
59 | useFetchSimilarClusterListingQuery,
60 | useFetchRelatedClusterListingQuery,
61 | useMergeClustersMutation
62 | } = clustersApi
--------------------------------------------------------------------------------
/frontend/src/services/config.ts:
--------------------------------------------------------------------------------
1 | import { createSlice } from '@reduxjs/toolkit'
2 | import type { PayloadAction } from '@reduxjs/toolkit'
3 |
4 | export interface ConfigState {
5 | hiddenNodeTypes: string[]
6 | }
7 |
8 | const initialState: ConfigState = {
9 | // hiddenNodeTypes: ['LOC'],
10 | hiddenNodeTypes: [],
11 | }
12 |
13 | export const configSlice = createSlice({
14 | name: 'config',
15 | initialState,
16 | reducers: {
17 | setHiddenNodeTypes: (state, action: PayloadAction) => {
18 | state.hiddenNodeTypes = action.payload;
19 | },
20 | hydrate: (state, action: PayloadAction) => {
21 | return { ...initialState, ...action.payload };
22 | },
23 | },
24 | })
25 |
26 | export const { setHiddenNodeTypes, hydrate } = configSlice.actions
27 |
28 | export default configSlice.reducer
--------------------------------------------------------------------------------
/frontend/src/services/links.ts:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
3 |
4 | import { API_URL } from '../constants'
5 | import { IClusterDetails, ILink, ILinkPredict, ILinkPrediction, IListingResponse, IUntagArticle } from '../types'
6 |
7 |
8 | export const linksApi = createApi({
9 | reducerPath: 'linksApi',
10 | tagTypes: ['Cluster', 'Link'],
11 | refetchOnMountOrArgChange: true,
12 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
13 | endpoints: (builder) => ({
14 | fetchLinks: builder.query, any>({
15 | query: (params) => queryString.stringifyUrl({
16 | 'url': `links`,
17 | 'query': params
18 | }),
19 | providesTags: ['Link'],
20 | }),
21 | fetchPrediction: builder.query({
22 | query: (params) => queryString.stringifyUrl({
23 | 'url': `links/_predict`,
24 | 'query': { ...params }
25 | }),
26 | providesTags: ['Cluster', 'Link'],
27 | }),
28 | saveLink: builder.mutation>({
29 | query(link) {
30 | return {
31 | url: `links`,
32 | method: 'POST',
33 | body: link,
34 | }
35 | },
36 | invalidatesTags: ['Cluster', 'Link'],
37 | }),
38 | explodeCluster: builder.mutation({
39 | query(clusterId) {
40 | return {
41 | url: `links/_explode`,
42 | method: 'POST',
43 | body: { cluster: clusterId },
44 | }
45 | },
46 | invalidatesTags: ['Cluster', 'Link'],
47 | }),
48 | untagArticle: builder.mutation({
49 | query(untag) {
50 | return {
51 | url: `links/_untag`,
52 | method: 'POST',
53 | body: untag,
54 | }
55 | },
56 | invalidatesTags: ['Cluster', 'Link'],
57 | }),
58 | }),
59 | })
60 |
61 | export const {
62 | useSaveLinkMutation,
63 | useFetchPredictionQuery,
64 | useExplodeClusterMutation,
65 | useUntagArticleMutation,
66 | useFetchLinksQuery,
67 | } = linksApi
--------------------------------------------------------------------------------
/frontend/src/services/ontology.ts:
--------------------------------------------------------------------------------
1 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
2 |
3 | import { API_URL } from '../constants'
4 | import type { IOntology } from '../types'
5 |
6 | export const ontologyApi = createApi({
7 | reducerPath: 'ontologyApi',
8 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
9 | endpoints: (builder) => ({
10 | fetchOntology: builder.query({
11 | query: () => `ontology`,
12 | }),
13 | }),
14 | })
15 |
16 | export const { useFetchOntologyQuery } = ontologyApi
--------------------------------------------------------------------------------
/frontend/src/services/sites.ts:
--------------------------------------------------------------------------------
1 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
2 |
3 | import { API_URL } from '../constants'
4 | import type { IListingResponse, ISite } from '../types'
5 |
6 | export const sitesApi = createApi({
7 | reducerPath: 'sitesApi',
8 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
9 | endpoints: (builder) => ({
10 | fetchSites: builder.query, void>({
11 | query: () => `sites`,
12 | }),
13 | }),
14 | })
15 |
16 | export const { useFetchSitesQuery } = sitesApi
--------------------------------------------------------------------------------
/frontend/src/services/stories.ts:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { createApi, fetchBaseQuery } from '@reduxjs/toolkit/query/react'
3 |
4 | import { API_URL } from '../constants'
5 | import type { IArticle, IClusterPair, IListingResponse, IStory, IStoryArticleImport, IStoryArticleToggle, IStoryMutation } from '../types'
6 |
7 | export const storiesApi = createApi({
8 | reducerPath: 'storiesApi',
9 | tagTypes: ['Story', 'Article', "Cluster", "Link"],
10 | refetchOnMountOrArgChange: true,
11 | baseQuery: fetchBaseQuery({ baseUrl: API_URL }),
12 | endpoints: (builder) => ({
13 | fetchStory: builder.query({
14 | query: (storyId) => `stories/${storyId}`,
15 | providesTags: ["Story"]
16 | }),
17 | fetchStoryListing: builder.query, any>({
18 | query: (params) => queryString.stringifyUrl({
19 | 'url': `stories`,
20 | 'query': params
21 | }),
22 | providesTags: ["Story"]
23 | }),
24 | fetchStoryPairs: builder.query, any>({
25 | query: ({ storyId, params }) => queryString.stringifyUrl({
26 | 'url': `stories/${storyId}/pairs`,
27 | 'query': params
28 | }),
29 | providesTags: ["Story", "Cluster", "Link"]
30 | }),
31 | fetchStoryGraph: builder.query({
32 | query: ({ storyId, params }) => ({
33 | url: queryString.stringifyUrl({
34 | 'url': `stories/${storyId}/gexf`,
35 | 'query': params
36 | }),
37 | responseHandler: (response) => response.text(),
38 | }),
39 | providesTags: ["Story", "Cluster", "Link"],
40 |
41 | }),
42 | createStory: builder.mutation({
43 | query(story) {
44 | return {
45 | url: `stories`,
46 | method: 'POST',
47 | body: story,
48 | }
49 | },
50 | invalidatesTags: ['Story'],
51 | }),
52 | updateStory: builder.mutation({
53 | query(story) {
54 | return {
55 | url: `stories/${story.id}`,
56 | method: 'POST',
57 | body: story,
58 | }
59 | },
60 | invalidatesTags: ['Story'],
61 | }),
62 | deleteStory: builder.mutation({
63 | query(storyId) {
64 | return {
65 | url: `stories/${storyId}`,
66 | method: 'DELETE',
67 | }
68 | },
69 | invalidatesTags: ['Story'],
70 | }),
71 | toggleStoryArticle: builder.mutation({
72 | query(data) {
73 | return {
74 | url: `stories/${data.story}/articles`,
75 | method: 'POST',
76 | body: { article: data.article },
77 | }
78 | },
79 | invalidatesTags: ['Story', 'Article'],
80 | }),
81 | importStoryArticle: builder.mutation({
82 | query(data) {
83 | return {
84 | url: `stories/${data.story}/articles/import-url`,
85 | method: 'POST',
86 | body: { url: data.url },
87 | }
88 | },
89 | invalidatesTags: ['Story', 'Article'],
90 | }),
91 | }),
92 | })
93 |
94 | export const { useFetchStoryListingQuery, useFetchStoryQuery, useFetchStoryGraphQuery, useCreateStoryMutation, useUpdateStoryMutation, useDeleteStoryMutation, useToggleStoryArticleMutation, useImportStoryArticleMutation, useFetchStoryPairsQuery } = storiesApi
--------------------------------------------------------------------------------
/frontend/src/store.ts:
--------------------------------------------------------------------------------
1 | import { configureStore } from '@reduxjs/toolkit'
2 | import { setupListeners } from '@reduxjs/toolkit/query'
3 | import { articlesApi } from './services/articles'
4 | import { clustersApi } from './services/clusters'
5 | import { linksApi } from './services/links'
6 | import { ontologyApi } from './services/ontology'
7 | import { sitesApi } from './services/sites'
8 | import { storiesApi } from './services/stories'
9 | import { configSlice, hydrate } from './services/config'
10 |
11 |
12 | export const store = configureStore({
13 | reducer: {
14 | [ontologyApi.reducerPath]: ontologyApi.reducer,
15 | [articlesApi.reducerPath]: articlesApi.reducer,
16 | [storiesApi.reducerPath]: storiesApi.reducer,
17 | [clustersApi.reducerPath]: clustersApi.reducer,
18 | [linksApi.reducerPath]: linksApi.reducer,
19 | [sitesApi.reducerPath]: sitesApi.reducer,
20 | config: configSlice.reducer,
21 | },
22 | middleware: (getDefaultMiddleware) =>
23 | getDefaultMiddleware()
24 | .concat(ontologyApi.middleware)
25 | .concat(articlesApi.middleware)
26 | .concat(storiesApi.middleware)
27 | .concat(clustersApi.middleware)
28 | .concat(linksApi.middleware)
29 | .concat(sitesApi.middleware),
30 | })
31 |
32 | store.subscribe(() => {
33 | localStorage.setItem('config', JSON.stringify(store.getState().config))
34 | })
35 |
36 | setupListeners(store.dispatch)
37 |
38 | const getConfig = () => {
39 | try {
40 | const persistedState = localStorage.getItem('config')
41 | if (persistedState) {
42 | return JSON.parse(persistedState)
43 | }
44 | }
45 | catch (e) {
46 | console.log(e)
47 | }
48 | }
49 |
50 | const storedConfig = getConfig()
51 | if (storedConfig) {
52 | store.dispatch(hydrate(storedConfig))
53 | }
54 |
55 | export type RootState = ReturnType
56 | export type AppDispatch = typeof store.dispatch
57 |
58 |
--------------------------------------------------------------------------------
/frontend/src/styles/App.scss:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/frontend/src/styles/App.scss
--------------------------------------------------------------------------------
/frontend/src/styles/Article.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .articleDrawer {
4 | margin: $spacer;
5 | overflow-y: auto;
6 | }
7 |
8 | .articleText {
9 | // font-size: 1.3em;
10 | // font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
11 | // line-height: 1.4;
12 |
13 | :global {
14 | .markup {
15 | margin-top: -2px;
16 | margin-bottom: -2px;
17 | padding-top: 2px;
18 | padding-bottom: 2px;
19 | display: inline-block;
20 | }
21 |
22 | .markup1 {
23 | background-color: $blue5;
24 | }
25 |
26 | .markup2 {
27 | background-color: $green5;
28 | }
29 |
30 | .markup3 {
31 | background-color: $orange5;
32 | }
33 |
34 | .markup4 {
35 | background-color: $indigo5;
36 | }
37 |
38 | .markup5 {
39 | background-color: $lime5;
40 | }
41 | }
42 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Cluster.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .clusterDrawer {
4 | margin: $spacer;
5 | overflow-y: auto;
6 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Footer.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .footer {
4 | margin-top: $spacer;
5 | padding-top: $spacer * 2;
6 | padding-bottom: $spacer * 3;
7 | // background-color: $dark-gray3;
8 | color: $gray1;
9 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Layout.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .page {
4 | min-height: 90vh;
5 | }
6 |
7 | .content {
8 | margin-top: $spacer * 2;
9 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Linker.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | // .articlePreview {
4 | // max-height: 70vh;
5 | // overflow-y: scroll;
6 | // }
7 |
8 | .banner {
9 | margin-bottom: $spacer;
10 | }
11 |
12 | .phrase {
13 | text-align: center;
14 | font-size: 1.4em;
15 | // background-color: $light-gray2;
16 | // background-color: white;
17 | // padding-top: $spacer;
18 | margin-bottom: $spacer * 2;
19 | // border-radius: $spacer;
20 |
21 | :global {
22 | .bp4-icon {
23 | vertical-align: middle;
24 | }
25 | }
26 | }
27 |
28 | .phraseSpan {
29 | // text-decoration: underline;
30 | // font-weight: bold;
31 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Navbar.module.scss:
--------------------------------------------------------------------------------
1 | .navContainered {
2 | padding-left: 0 !important;
3 | padding-right: 0 !important;
4 | }
--------------------------------------------------------------------------------
/frontend/src/styles/Story.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .storyCardArea {
4 | display: flex;
5 | // flex-direction: column;
6 | flex-wrap: wrap;
7 | justify-content: space-between;
8 | }
9 |
10 | .storyCard {
11 | flex: 0 1 30%;
12 | // margin-left: $spacer;
13 | margin-bottom: $spacer * 2;
14 | }
15 |
16 | .nomNom {
17 | margin-top: $spacer * 4;
18 | margin-bottom: $spacer * 6;
19 | }
--------------------------------------------------------------------------------
/frontend/src/styles/index.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | @import "~normalize.css";
4 | @import "~@blueprintjs/core/lib/css/blueprint.css";
5 | @import "~@blueprintjs/icons/lib/css/blueprint-icons.css";
6 | @import "~@blueprintjs/select/lib/css/blueprint-select.css";
7 | @import "~@blueprintjs/popover2/lib/css/blueprint-popover2.css";
8 | @import "@react-sigma/core/lib/react-sigma.min.css";
9 |
10 | .page-container {
11 | margin: 0 auto;
12 | max-width: $max-container-width;
13 | // margin-left: $spacer * 2;
14 | // margin-right: $spacer * 2;
15 | line-height: 1.4;
16 | }
17 |
18 | .section {
19 | // padding-left: $spacer;
20 | // padding-right: $spacer;
21 | padding-bottom: $spacer * 1;
22 | }
23 |
24 | html {
25 | scroll-behavior: smooth;
26 | font-size: $default-font-size;
27 | color: $default-text-color;
28 | background-color: $light-gray4;
29 | }
30 |
31 | table.wide {
32 | width: 100%;
33 | }
34 |
35 | td .bp4-control,
36 | th .bp4-control {
37 | margin-bottom: 0 !important;
38 | // padding: 0 !important;
39 | }
40 |
41 | td.numeric,
42 | th.numeric {
43 | text-align: right !important;
44 |
45 | // button.bp4-button {
46 | // float: right;
47 | // }
48 | }
49 |
50 | .page-column-area {
51 | display: flex;
52 | flex-direction: row;
53 | width: 100%;
54 | }
55 |
56 | .page-column {
57 | min-width: 30%;
58 | max-width: 50%;
59 | padding-right: 2 * $spacer
60 | }
61 |
62 | .page-column-wide {
63 | width: 100%;
64 | padding-right: 2 * $spacer
65 | }
66 |
67 | .bp4-navbar-heading a,
68 | .bp4-navbar-heading a:hover {
69 | color: inherit;
70 | text-decoration: none;
71 | }
72 |
73 | .spaced-icon {
74 | margin-right: 0.3em;
75 | }
76 |
77 | .tab-tag {
78 | margin-left: $spacer * 0.3;
79 | // background-color: $gray3;
80 | background-color: $blue2;
81 | }
82 |
83 | [aria-disabled=true] .tab-tag {
84 | background-color: $gray3;
85 | }
86 |
87 | .portal-z-top {
88 | z-index: 9999;
89 | }
90 |
91 | h1 {
92 | clear: both;
93 | padding-top: $spacer;
94 |
95 | .bp4-icon {
96 | vertical-align: middle;
97 | }
98 | }
--------------------------------------------------------------------------------
/frontend/src/styles/util.module.scss:
--------------------------------------------------------------------------------
1 | @import "./variables.scss";
2 |
3 | .headingActions {
4 | display: block;
5 | float: right;
6 | }
7 |
8 | .screenContent {
9 | margin-top: -1 * $spacer;
10 | }
11 |
12 | .pagination {
13 | margin-top: $spacer;
14 | }
15 |
16 | .clusterType {
17 | font-size: 0.8em;
18 | font-weight: bold;
19 | display: inline-block;
20 | padding: $spacer * 0.2;
21 | padding-left: $spacer * 0.5;
22 | padding-right: $spacer * 0.5;
23 | border-radius: $spacer * 0.5;
24 | color: $light-gray4;
25 |
26 | }
27 |
28 | .linkType {
29 | font-size: 0.8em;
30 | font-weight: bold;
31 | display: inline-block;
32 | padding: $spacer * 0.2;
33 | padding-left: $spacer * 0.5;
34 | padding-right: $spacer * 0.5;
35 | border-radius: $spacer * 0.5;
36 | color: $light-gray4;
37 | background-color: $dark-gray3;
38 | }
39 |
40 | .UNRELATED {
41 | background-color: $gray3;
42 | }
43 |
44 | .LOCATED,
45 | .WITHIN {
46 | background-color: $forest3;
47 | }
48 |
49 | .ANTAGONIST {
50 | background-color: $vermilion3;
51 | }
--------------------------------------------------------------------------------
/frontend/src/styles/variables.scss:
--------------------------------------------------------------------------------
1 | @import "~@blueprintjs/core/lib/scss/variables";
2 |
3 | // $max-text-width: 60em;
4 | $max-container-width: 950px;
5 | $spacer: $pt-grid-size;
6 | $default-font-size: 15px;
7 | $default-text-color: $gray1;
--------------------------------------------------------------------------------
/frontend/src/types.ts:
--------------------------------------------------------------------------------
1 |
2 | export interface IResponse {
3 | status: string
4 | debug_msg?: string
5 | }
6 |
7 | export interface IListingResponse extends IResponse {
8 | limit: number
9 | offset: number
10 | total: number
11 | results: T[]
12 | }
13 |
14 | export interface IArticle {
15 | id: string
16 | title: string
17 | site: string
18 | url: string
19 | language: string
20 | tags: number
21 | mentions: number
22 | }
23 |
24 | export interface IArticleDetails extends IArticle {
25 | text: string
26 | }
27 |
28 | export interface IStoryMutation {
29 | title: string
30 | summary: string
31 | }
32 |
33 | export interface IStory extends IStoryMutation {
34 | id: number
35 | }
36 |
37 | export interface IStoryArticleToggle {
38 | story: number
39 | article: string
40 | }
41 |
42 | export interface IStoryArticleImport {
43 | story: number
44 | url: string
45 | }
46 |
47 | export interface IClusterBase {
48 | id: string
49 | label: string
50 | type: string
51 | }
52 |
53 | export interface ICluster extends IClusterBase {
54 | articles: number
55 | }
56 |
57 | export interface IClusterDetails extends ICluster {
58 | labels: string[]
59 | }
60 |
61 | export interface IClusterPair {
62 | left: IClusterBase,
63 | right: IClusterBase,
64 | link_types: string[]
65 | articles: number
66 | }
67 |
68 | export interface IRelatedCluster extends IClusterBase {
69 | articles: number
70 | link_types: string[]
71 | }
72 |
73 | export interface ISimilarCluster extends IClusterBase {
74 | common: string[]
75 | common_count: number
76 | }
77 |
78 | export interface IClusterMerge {
79 | anchor: string
80 | other: string[]
81 | }
82 |
83 | export interface IUntagArticle {
84 | cluster: string
85 | article: string
86 | }
87 |
88 | export interface ILinkPredict {
89 | anchor: string
90 | other: string
91 | }
92 |
93 | export interface ISite {
94 | site: string
95 | articles: number
96 | }
97 |
98 | export interface ILink {
99 | source: string
100 | source_cluster: string
101 | target: string
102 | target_cluster: string
103 | type: string
104 | user?: string
105 | timestamp?: string
106 | }
107 |
108 |
109 | export interface ILinkPrediction {
110 | source: IClusterDetails
111 | target: IClusterDetails
112 | type: string
113 | }
114 |
115 |
116 | export interface ILinkType {
117 | name: string
118 | directed: boolean
119 | label: string
120 | phrase: string
121 | source_type: string
122 | target_type: string
123 | }
124 |
125 | export interface IClusterType {
126 | name: string
127 | label: string
128 | plural: string
129 | parent?: string
130 | color: string
131 | icon: string
132 | }
133 |
134 | export interface IOntology {
135 | link_types: ILinkType[]
136 | cluster_types: IClusterType[]
137 | }
138 |
--------------------------------------------------------------------------------
/frontend/src/util.ts:
--------------------------------------------------------------------------------
1 | import queryString from 'query-string';
2 | import { useSearchParams } from 'react-router-dom';
3 | // import { useLocation } from 'react-router-dom';
4 |
5 | import { IClusterBase } from "./types";
6 |
7 |
8 | export function asString(value: any): string | undefined {
9 | if (!Array.isArray(value)) {
10 | value = [value];
11 | }
12 | for (let item of value) {
13 | if (item === null || item === undefined) {
14 | return undefined
15 | }
16 | item = item + ''
17 | item = item.trim()
18 | if (item.length > 0) {
19 | return item;
20 | }
21 | }
22 | return undefined;
23 | }
24 |
25 | export function listToggle(items: T[], value: T): T[] {
26 | const updated = [...items];
27 | const index = items.indexOf(value);
28 | if (index === -1) {
29 | updated.push(value);
30 | } else {
31 | updated.splice(index, 1);
32 | }
33 | return updated;
34 | }
35 |
36 | export function getClusterLink(cluster: IClusterBase): string {
37 | return `/clusters/${cluster.id}`
38 | }
39 |
40 | export function getLinkLoomLink(anchor: IClusterBase, other?: IClusterBase, story?: number): string {
41 | if (other === undefined) {
42 | return queryString.stringifyUrl({
43 | 'url': `/linker/related`,
44 | 'query': { anchor: anchor.id, story: story }
45 | })
46 | }
47 | return queryString.stringifyUrl({
48 | 'url': `/linker`,
49 | 'query': { anchor: anchor.id, other: other.id, story: story }
50 | })
51 | }
52 |
53 | export function useListingPagination(prefix: string, limit: number = 15) {
54 | const [params] = useSearchParams();
55 | return {
56 | limit: parseInt(params.get(`${prefix}.limit`) || `${limit}`, 10),
57 | offset: parseInt(params.get(`${prefix}.offset`) || '0', 10)
58 | }
59 | }
--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es5",
4 | "lib": [
5 | "dom",
6 | "dom.iterable",
7 | "esnext"
8 | ],
9 | "allowJs": true,
10 | "skipLibCheck": true,
11 | "esModuleInterop": true,
12 | "allowSyntheticDefaultImports": true,
13 | "strict": true,
14 | "forceConsistentCasingInFileNames": true,
15 | "noFallthroughCasesInSwitch": true,
16 | "module": "esnext",
17 | "moduleResolution": "node",
18 | "resolveJsonModule": true,
19 | "isolatedModules": true,
20 | "noEmit": true,
21 | "jsx": "preserve"
22 | },
23 | "include": [
24 | "src"
25 | ]
26 | }
27 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open("README.md") as f:
4 | long_description = f.read()
5 |
6 |
7 | setup(
8 | name="storyweb",
9 | version="0.0.1",
10 | description="Extract actor networks from journalistic reporting.",
11 | long_description=long_description,
12 | long_description_content_type="text/markdown",
13 | keywords="ner spacy journalism text nlp graph entities",
14 | author="Friedrich Lindenberg",
15 | author_email="friedrich@pudo.org",
16 | url="https://github.com/opensanctions/storyweb",
17 | license="MIT",
18 | packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
19 | namespace_packages=[],
20 | include_package_data=True,
21 | package_data={"": ["storyweb/py.typed", "storyweb/data/*"]},
22 | zip_safe=False,
23 | install_requires=[
24 | "sqlalchemy",
25 | "psycopg2",
26 | "pydantic",
27 | "pydantic_yaml",
28 | "articledata",
29 | "pantomime",
30 | "orjson",
31 | "fastapi",
32 | "networkx",
33 | "spacy",
34 | "python-levenshtein",
35 | "followthemoney",
36 | "languagecodes",
37 | "fasttext",
38 | "uvicorn",
39 | "trafilatura",
40 | "charset-normalizer",
41 | "click >= 8.0.0, < 8.2.0",
42 | ],
43 | tests_require=[],
44 | entry_points={
45 | "console_scripts": [
46 | "storyweb = storyweb.cli:cli",
47 | ],
48 | },
49 | extras_require={
50 | "dev": [
51 | "wheel>=0.29.0",
52 | "twine",
53 | "mypy",
54 | "flake8>=2.6.0",
55 | "pytest",
56 | "pytest-cov",
57 | "coverage>=4.1",
58 | "types-setuptools",
59 | "types-requests",
60 | ],
61 | },
62 | )
63 |
--------------------------------------------------------------------------------
/stories.md:
--------------------------------------------------------------------------------
1 |
2 | ### Wirecard
3 |
4 | * https://www.ft.com/content/284fb1ad-ddc0-45df-a075-0709b36868db
5 | * https://www.reuters.com/article/us-germany-wirecard-inquiry-timeline-idUSKBN2B811J
6 | * https://www.accountancycareers.co.uk/2020/06/the-wirecard-scandal-explained/
7 | * https://www.fxcm.com/markets/insights/the-wirecard-scandal/
8 | * https://en.wikipedia.org/wiki/Wirecard_scandal
9 | * https://web.archive.org/web/20200630075122/https://www.ft.com/content/284fb1ad-ddc0-45df-a075-0709b36868db
10 | * https://www.dw.com/en/fincen-turkey-aktif-bank-wirecard/a-54991398
11 | * https://www.straitstimes.com/business/banking/wirecards-creditors-set-for-battle-over-missing-billions
12 | * https://www.straitstimes.com/business/police-raid-wirecard-entities-in-singapore-after-reports-of-fraud-allegations-at-payments
13 | * https://web.archive.org/web/20210426080227/https://www.reuters.com/article/uk-wirecard-accounts-idUKKBN2424I3
14 | * https://web.archive.org/web/20200703095157/https://www.washingtonpost.com/business/how-german-fintech-darling-wirecard-fell-from-grace/2020/06/23/6278e336-b564-11ea-9a1d-d3db1cbe07ce_story.html
15 | * https://web.archive.org/web/20200630052127/https://ffj-online.org/2018/01/23/wirecard-ag-the-great-indian-shareholder-robbery/
16 | * https://www.bbc.com/news/world-europe-63893933
17 | * https://www.bbc.com/news/business-53176003
18 | * https://www.tagesschau.de/wirtschaft/unternehmen/wirecard-aussage-bellenhaus-101.html
19 | * https://www.tagesschau.de/wirtschaft/wirecard-gerichtsprozess-faq-101.html
20 | * https://www.tagesschau.de/wirtschaft/wirecard-prozess-verteidigung-101.html
21 |
22 |
23 | ### Russian Laundromat
24 |
25 | * https://www.occrp.org/en/russianlaundromat/
26 | * https://www.occrp.org/en/laundromat/grand-theft-moldova/
27 | * https://www.occrp.org/en/laundromat/poor-town-controlled-billions/
28 | * https://www.occrp.org/en/laundromat/follow-the-money-follow-the-banker/
29 | * https://www.occrp.org/en/laundromat/russian-laundromat/
30 | * https://www.occrp.org/en/laundromat/the-russian-banks-and-putins-cousin/
31 | * https://www.occrp.org/en/laundromat/kempinski-negotiating-hotel-deal-with-alexander-grigoriev/
32 | * https://www.occrp.org/en/laundromat/the-laundry-cycle-from-start-to-finish/
33 | * https://www.occrp.org/en/laundromat/the-20-billion-bank-in-the-country-of-the-poor/
34 | * https://www.occrp.org/en/laundromat/latvian-bank-was-laundering-tool/
35 | * https://www.theguardian.com/world/2017/mar/20/the-global-laundromat-how-did-it-work-and-who-benefited
36 | * https://www.occrp.org/en/laundromat/the-russian-laundromat-exposed/
37 | * https://www.occrp.org/en/loosetobacco/china-tobacco-goes-global/romanian-prosecutors-probe-china-tobacco-for-millions-of-disappeared-cigarettes
38 | * https://www.occrp.org/en/investigations/bangladeshi-politician-close-to-prime-minister-hasina-secretly-owns-over-4-million-in-new-york-real-estate
39 | * https://www.occrp.org/en/laundromat/profiles/frivent-GmbH
40 | * https://www.occrp.org/en/laundromat/profiles/handelsgericht-wien
41 | * https://www.occrp.org/en/laundromat/profiles/rudolf-and-Anna
42 | * https://www.occrp.org/en/laundromat/profiles/sergey-magin-and-a-japanese-electronics-manufacturer
43 | * https://www.occrp.org/en/laundromat/profiles/the-american-international-school-vienna
44 | * https://www.occrp.org/en/laundromat/profiles/va-intertrading
45 | * https://www.occrp.org/en/laundromat/profiles/dorville-ltd
46 | * https://www.occrp.org/en/laundromat/profiles/intradecom
47 | * https://www.occrp.org/en/laundromat/profiles/montinvest-AD
48 | * https://www.occrp.org/en/laundromat/profiles/pavel-flider-and-trident-international
49 | * https://www.occrp.org/en/laundromat/senior-moldovan-judges-daughter-lived-in-posh-london-flat
50 | * https://www.occrp.org/en/laundromat/regulator-says-latvia-finally-cleaning-up-bad-banks/
51 | * https://www.occrp.org/en/laundromat/the-banks/
52 | * https://www.occrp.org/en/laundromat/two-huge-scams-intersect-at-one-moldovan-businessman/
53 | * https://www.occrp.org/en/laundromat/how-veaceslav-platons-high-life-in-kyiv-came-crashing-down/
54 | * https://www.occrp.org/en/investigations/5617-platon-s-money
55 | * https://www.occrp.org/en/laundromat/the-russian-laundromat-superusers-revealed/
56 | * https://en.wikipedia.org/wiki/Russian_Laundromat
57 | * https://de.wikipedia.org/wiki/Russischer_Waschsalon
--------------------------------------------------------------------------------
/storyweb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/__init__.py
--------------------------------------------------------------------------------
/storyweb/app.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI
2 | from fastapi.middleware.cors import CORSMiddleware
3 | from fastapi.staticfiles import StaticFiles
4 | from starlette.exceptions import HTTPException
5 | from starlette.responses import Response
6 | from starlette.types import Scope
7 |
8 | from storyweb.routes import links, stories, articles, clusters, system
9 |
10 |
11 | app = FastAPI(
12 | title="storyweb",
13 | description="make networks from text",
14 | redoc_url="/api/1/docs",
15 | )
16 | app.add_middleware(
17 | CORSMiddleware,
18 | allow_origins=["*"],
19 | allow_credentials=False,
20 | allow_methods=["*"],
21 | allow_headers=["*"],
22 | )
23 | app.include_router(system.router, prefix="/api/1")
24 | app.include_router(links.router, prefix="/api/1")
25 | app.include_router(stories.router, prefix="/api/1")
26 | app.include_router(articles.router, prefix="/api/1")
27 | app.include_router(clusters.router, prefix="/api/1")
28 |
29 |
30 | class SPAStaticFiles(StaticFiles):
31 | async def get_response(self, path: str, scope: Scope) -> Response:
32 | try:
33 | return await super().get_response(path, scope)
34 | except HTTPException as http:
35 | if http.status_code == 404 and not path.startswith("api"):
36 | return await super().get_response("index.html", scope)
37 | else:
38 | raise
39 |
40 |
41 | app.mount("/", SPAStaticFiles(directory="frontend/build", html=True), name="frontend")
42 |
--------------------------------------------------------------------------------
/storyweb/clean.py:
--------------------------------------------------------------------------------
1 | import re
2 | import Levenshtein
3 | from typing import List, Optional, Union
4 | from normality import collapse_spaces
5 |
6 | PREFIXES_RAW_LIST = [
7 | "Mr",
8 | "Ms",
9 | "Mrs",
10 | "Mister",
11 | "Miss",
12 | "Madam",
13 | "Madame",
14 | "Monsieur",
15 | "Mme",
16 | "Mmme",
17 | "Herr",
18 | "Hr",
19 | "Frau",
20 | "Fr",
21 | "The",
22 | "Fräulein",
23 | "Senor",
24 | "Senorita",
25 | "Sr",
26 | "Sir",
27 | "Lady",
28 | "The",
29 | "de",
30 | "of",
31 | ]
32 | PREFIXES_RAW = "|".join(PREFIXES_RAW_LIST)
33 | NAME_PATTERN_ = r"^\W*((%s)\.?\s+)*(?P.*?)([\'’]s)?\W*$"
34 | NAME_PATTERN_ = NAME_PATTERN_ % PREFIXES_RAW
35 | PREFIXES = re.compile(NAME_PATTERN_, re.I | re.U)
36 |
37 |
38 | def clean_entity_name(name: str) -> Optional[str]:
39 | match = PREFIXES.match(name)
40 | if match is not None:
41 | name = match.group("term")
42 | return collapse_spaces(name)
43 |
44 |
45 | def most_common(texts: List[str]) -> str:
46 | # https://stackoverflow.com/questions/1518522/find-the-most-common-element-in-a-list
47 | return max(set(texts), key=texts.count)
48 |
49 |
50 | def pick_name(names: List[Union[str, bytes]]) -> str:
51 | return Levenshtein.setmedian(names)
52 |
--------------------------------------------------------------------------------
/storyweb/cli.py:
--------------------------------------------------------------------------------
1 | import click
2 | import logging
3 | from pathlib import Path
4 | from typing import Optional
5 | from networkx.readwrite.gexf import write_gexf
6 |
7 | from storyweb.db import create_db, engine
8 | from storyweb.logic.links import auto_merge, story_merge
9 | from storyweb.logic.stories import toggle_story_article
10 | from storyweb.logic.graph import generate_graph
11 | from storyweb.parse import import_article_by_url
12 | from storyweb.parse.pipeline import load_articles
13 |
14 |
15 | log = logging.getLogger(__name__)
16 |
17 | InPath = click.Path(dir_okay=False, readable=True, path_type=Path)
18 | OutPath = click.Path(dir_okay=False, readable=True, path_type=Path)
19 |
20 |
21 | @click.group(help="Storyweb CLI")
22 | def cli() -> None:
23 | logging.basicConfig(level=logging.INFO)
24 |
25 |
26 | @cli.command("import", help="Import articles into the DB")
27 | @click.argument("articles", type=InPath)
28 | def parse(articles: Path) -> None:
29 | load_articles(articles)
30 |
31 |
32 | @cli.command("import-url", help="Load a single news story by URL")
33 | @click.argument("url", type=str)
34 | @click.option("-s", "--story", "story", help="Story ID", type=int)
35 | def parse(url: str, story: Optional[int] = None) -> None:
36 | with engine.begin() as conn:
37 | article_id = import_article_by_url(conn, url)
38 | if story is not None:
39 | story_merge(conn, story, article_id)
40 | toggle_story_article(conn, story, article_id, delete_existing=False)
41 |
42 |
43 | @cli.command("graph", help="Export an entity graph")
44 | @click.argument("graph_path", type=OutPath)
45 | def export_graph(graph_path: Path) -> None:
46 | with engine.begin() as conn:
47 | graph = generate_graph(conn)
48 | write_gexf(graph, graph_path)
49 |
50 |
51 | @cli.command("compute", help="Run backend computations")
52 | def compute() -> None:
53 | from pprint import pprint
54 | from storyweb.models import Listing
55 | from storyweb.logic.clusters import list_story_pairs
56 |
57 | with engine.begin() as conn:
58 | # print(compute_cluster(conn, "ffd364472a999c3d1001f5910398a53997ae0afe"))
59 | listing = Listing(limit=5, offset=0, sort_direction="desc")
60 | resp = list_story_pairs(conn, listing, 4)
61 | pprint(resp.dict())
62 |
63 |
64 | @cli.command("auto-merge", help="Automatically merge on fingerprints")
65 | @click.option(
66 | "-f",
67 | "--force",
68 | "force",
69 | help="Do not check existing links",
70 | default=False,
71 | is_flag=True,
72 | )
73 | def auto_merge_(force: bool) -> None:
74 | with engine.begin() as conn:
75 | auto_merge(conn, check_links=force)
76 |
77 |
78 | @cli.command("init", help="Initialize the database")
79 | def init() -> None:
80 | create_db()
81 |
82 |
83 | if __name__ == "__main__":
84 | cli()
85 |
--------------------------------------------------------------------------------
/storyweb/db.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from sqlalchemy import MetaData, create_engine
3 | from sqlalchemy import Table, Column, Integer, Unicode, DateTime, Float
4 | from sqlalchemy.engine import Connection
5 | from sqlalchemy.dialects.postgresql import insert as upsert
6 |
7 | from storyweb import settings
8 |
9 | Conn = Connection
10 | KEY_LEN = 40
11 |
12 | log = logging.getLogger(__name__)
13 | engine = create_engine(settings.DB_URL)
14 | meta = MetaData(bind=engine)
15 |
16 | __all__ = ["Conn", "upsert", "create_db"]
17 |
18 |
19 | def create_db() -> None:
20 | meta.create_all(checkfirst=True)
21 |
22 |
23 | article_table = Table(
24 | "article",
25 | meta,
26 | Column("id", Unicode(255), primary_key=True),
27 | Column("site", Unicode(255), index=True, nullable=False),
28 | Column("url", Unicode, nullable=True),
29 | Column("title", Unicode, nullable=True),
30 | Column("language", Unicode(10), nullable=True),
31 | Column("text", Unicode, nullable=True),
32 | Column("tags", Integer, default=0),
33 | Column("mentions", Integer, default=0),
34 | )
35 |
36 | story_table = Table(
37 | "story",
38 | meta,
39 | Column("id", Integer, primary_key=True),
40 | Column("title", Unicode, nullable=True),
41 | Column("summary", Unicode, nullable=True),
42 | )
43 |
44 | story_article_table = Table(
45 | "story_article",
46 | meta,
47 | Column("article", Unicode(255), primary_key=True),
48 | Column("story", Integer, primary_key=True),
49 | )
50 |
51 | sentence_table = Table(
52 | "sentence",
53 | meta,
54 | Column("article", Unicode(255), primary_key=True),
55 | Column("sequence", Integer, primary_key=True),
56 | Column("text", Unicode),
57 | )
58 |
59 | tag_table = Table(
60 | "tag",
61 | meta,
62 | Column("id", Unicode(KEY_LEN), primary_key=True),
63 | Column("cluster", Unicode(KEY_LEN), index=True),
64 | Column("article", Unicode(255), index=True),
65 | Column("fingerprint", Unicode(1024), index=True),
66 | Column("type", Unicode(10)),
67 | Column("cluster_type", Unicode(10)),
68 | Column("label", Unicode),
69 | Column("cluster_label", Unicode),
70 | Column("count", Integer),
71 | Column("frequency", Float),
72 | )
73 |
74 | tag_sentence_table = Table(
75 | "tag_sentence",
76 | meta,
77 | Column("article", Unicode(255), primary_key=True),
78 | Column("sentence", Integer, primary_key=True),
79 | Column("tag", Unicode(KEY_LEN), primary_key=True),
80 | )
81 |
82 | link_table = Table(
83 | "link",
84 | meta,
85 | Column("source", Unicode(KEY_LEN), primary_key=True),
86 | Column("source_cluster", Unicode(KEY_LEN)),
87 | Column("target", Unicode(KEY_LEN), primary_key=True),
88 | Column("target_cluster", Unicode(KEY_LEN)),
89 | Column("type", Unicode(255)),
90 | Column("user", Unicode(255), nullable=True),
91 | Column("timestamp", DateTime),
92 | )
93 |
--------------------------------------------------------------------------------
/storyweb/logic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/logic/__init__.py
--------------------------------------------------------------------------------
/storyweb/logic/articles.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Iterable, List, Optional
3 | from sqlalchemy.sql import select, delete, insert, func
4 |
5 | from storyweb.db import Conn, upsert
6 | from storyweb.db import article_table, sentence_table
7 | from storyweb.db import tag_table, tag_sentence_table
8 | from storyweb.db import story_article_table
9 | from storyweb.logic.util import count_stmt
10 | from storyweb.models import (
11 | ArticleDetails,
12 | Link,
13 | Article,
14 | Listing,
15 | ListingResponse,
16 | Sentence,
17 | Site,
18 | Tag,
19 | TagSentence,
20 | )
21 |
22 | log = logging.getLogger(__name__)
23 |
24 |
25 | def list_sites(conn: Conn, listing: Listing) -> ListingResponse[Site]:
26 | stmt = select(
27 | article_table.c.site,
28 | func.count(article_table.c.id).label("articles"),
29 | )
30 | total = count_stmt(conn, stmt, func.distinct(article_table.c.site))
31 | stmt = stmt.group_by(article_table.c.site)
32 | stmt = stmt.order_by(article_table.c.site)
33 | stmt = stmt.limit(listing.limit).offset(listing.offset)
34 | cursor = conn.execute(stmt)
35 | results = [Site.parse_obj(r) for r in cursor.fetchall()]
36 | return ListingResponse[Site](
37 | total=total,
38 | debug_msg=str(stmt),
39 | limit=listing.limit,
40 | offset=listing.offset,
41 | results=results,
42 | )
43 |
44 |
45 | def list_articles(
46 | conn: Conn,
47 | listing: Listing,
48 | site: Optional[str] = None,
49 | story: Optional[int] = None,
50 | query: Optional[str] = None,
51 | clusters: List[str] = [],
52 | ) -> ListingResponse[Article]:
53 | stmt = select(
54 | article_table.c.id,
55 | article_table.c.title,
56 | article_table.c.url,
57 | article_table.c.language,
58 | article_table.c.site,
59 | article_table.c.tags,
60 | article_table.c.mentions,
61 | )
62 | stmt = stmt.select_from(article_table)
63 | if site is not None and len(site.strip()):
64 | stmt = stmt.where(article_table.c.site == site)
65 | if query is not None and len(query.strip()):
66 | stmt = stmt.where(article_table.c.title.ilike(f"%{query}%"))
67 | if story is not None:
68 | stmt = stmt.join(
69 | story_article_table,
70 | story_article_table.c.article == article_table.c.id,
71 | )
72 | stmt = stmt.where(story_article_table.c.story == story)
73 | for cluster in clusters:
74 | cluster_t = tag_table.alias()
75 | stmt = stmt.join(cluster_t, cluster_t.c.article == article_table.c.id)
76 | stmt = stmt.where(cluster_t.c.cluster == cluster)
77 |
78 | total = count_stmt(conn, stmt, func.distinct(article_table.c.id))
79 | if listing.sort_field is not None:
80 | column = article_table.c[listing.sort_field]
81 | if listing.sort_direction == "desc":
82 | stmt = stmt.order_by(column.desc())
83 | else:
84 | stmt = stmt.order_by(column.asc())
85 | stmt = stmt.group_by(
86 | article_table.c.id,
87 | article_table.c.title,
88 | article_table.c.url,
89 | article_table.c.language,
90 | article_table.c.site,
91 | article_table.c.tags,
92 | article_table.c.mentions,
93 | )
94 | stmt = stmt.limit(listing.limit).offset(listing.offset)
95 | cursor = conn.execute(stmt)
96 | results = [Article.parse_obj(r) for r in cursor.fetchall()]
97 | return ListingResponse[Article](
98 | total=total,
99 | debug_msg=str(stmt),
100 | limit=listing.limit,
101 | offset=listing.offset,
102 | results=results,
103 | )
104 |
105 |
106 | def fetch_article(conn: Conn, article_id: str) -> Optional[ArticleDetails]:
107 | stmt = select(article_table)
108 | stmt = stmt.where(article_table.c.id == article_id)
109 | stmt = stmt.limit(1)
110 | cursor = conn.execute(stmt)
111 | obj = cursor.fetchone()
112 | if obj is None:
113 | return None
114 | return ArticleDetails.parse_obj(obj)
115 |
116 |
117 | def save_article(conn: Conn, article: ArticleDetails) -> None:
118 | istmt = upsert(article_table).values([article.dict()])
119 | values = dict(
120 | site=istmt.excluded.site,
121 | url=istmt.excluded.url,
122 | title=istmt.excluded.title,
123 | language=istmt.excluded.language,
124 | text=istmt.excluded.text,
125 | )
126 | stmt = istmt.on_conflict_do_update(index_elements=["id"], set_=values)
127 | conn.execute(stmt)
128 |
129 |
130 | def save_extracted(
131 | conn: Conn,
132 | article: ArticleDetails,
133 | sentences: Iterable[Sentence],
134 | tag_sentences: Iterable[TagSentence],
135 | tags: Iterable[Tag],
136 | ) -> None:
137 | save_article(conn, article)
138 | stmt = delete(sentence_table)
139 | stmt = stmt.where(sentence_table.c.article == article.id)
140 | conn.execute(stmt)
141 | sentence_values = [s.dict() for s in sentences]
142 | if len(sentence_values):
143 | sstmt = insert(sentence_table).values(sentence_values)
144 | conn.execute(sstmt)
145 |
146 | stmt = delete(tag_sentence_table)
147 | stmt = stmt.where(tag_sentence_table.c.article == article.id)
148 | conn.execute(stmt)
149 | tag_sentence_values = [s.dict() for s in tag_sentences]
150 | if len(tag_sentence_values):
151 | sstmt = insert(tag_sentence_table).values(tag_sentence_values)
152 | conn.execute(sstmt)
153 |
154 | tag_values = [t.dict() for t in tags]
155 | if len(tag_values):
156 | istmt = upsert(tag_table).values(tag_values)
157 | updates = dict(
158 | type=istmt.excluded.type,
159 | label=istmt.excluded.label,
160 | count=istmt.excluded.count,
161 | frequency=istmt.excluded.frequency,
162 | )
163 | ustmt = istmt.on_conflict_do_update(index_elements=["id"], set_=updates)
164 | conn.execute(ustmt)
165 |
166 |
167 | # def compute_idf(conn: Conn):
168 | # cstmt = select(func.count(article_table.c.id))
169 | # article_count = float(conn.execute(cstmt).scalar())
170 | # print("Article count", article_count)
171 |
172 | # conn.execute(delete(fingerprint_idf_table))
173 | # gstmt = select(
174 | # tag_table.c.fingerprint,
175 | # func.count(tag_table.c.article),
176 | # func.log(article_count / func.count(tag_table.c.article)),
177 | # )
178 | # gstmt = gstmt.group_by(tag_table.c.fingerprint)
179 | # stmt = fingerprint_idf_table.insert()
180 | # stmt = stmt.from_select(["fingerprint", "count", "frequency"], gstmt)
181 | # print("Update tf/idf", stmt)
182 | # conn.execute(stmt)
183 |
--------------------------------------------------------------------------------
/storyweb/logic/graph.py:
--------------------------------------------------------------------------------
1 | import json
2 | import countrynames
3 | from typing import List, Optional, Generator, Dict
4 | from networkx import DiGraph
5 | from followthemoney import model
6 | from followthemoney.proxy import EntityProxy
7 | from sqlalchemy.future import select
8 | from sqlalchemy.engine import Row
9 | from networkx.readwrite.gexf import generate_gexf
10 |
11 | from storyweb.db import Conn, link_table, tag_table, story_article_table
12 | from storyweb.ontology import ontology, LinkType
13 |
14 |
15 | def query_links(
16 | conn: Conn,
17 | story_id: Optional[int] = None,
18 | link_types: List[str] = list(ontology.link_types.keys()),
19 | ) -> Generator[Row, None, None]:
20 | link_t = link_table.alias("l")
21 | source_t = tag_table.alias("s")
22 | target_t = tag_table.alias("t")
23 |
24 | lstmt = select(
25 | link_t.c.type.label("link_type"),
26 | source_t.c.cluster.label("source_id"),
27 | source_t.c.label.label("source_alias"),
28 | source_t.c.cluster_label.label("source_label"),
29 | source_t.c.cluster_type.label("source_type"),
30 | target_t.c.cluster.label("target_id"),
31 | target_t.c.label.label("target_alias"),
32 | target_t.c.cluster_label.label("target_label"),
33 | target_t.c.cluster_type.label("target_type"),
34 | )
35 |
36 | if story_id is not None:
37 | sa_source_t = story_article_table.alias("src_sa")
38 | sa_target_t = story_article_table.alias("tgt_sa")
39 | lstmt = lstmt.join(source_t, link_t.c.source_cluster == source_t.c.cluster)
40 | lstmt = lstmt.join(sa_source_t, sa_source_t.c.article == source_t.c.article)
41 | lstmt = lstmt.filter(sa_source_t.c.story == story_id)
42 | lstmt = lstmt.join(target_t, link_t.c.target_cluster == target_t.c.cluster)
43 | lstmt = lstmt.join(sa_target_t, sa_target_t.c.article == target_t.c.article)
44 | lstmt = lstmt.filter(sa_target_t.c.story == story_id)
45 | # lstmt = lstmt.filter(
46 | # or_(
47 | # sa_target_t.c.story == story,
48 | # sa_source_t.c.story == story,
49 | # )
50 | # )
51 | else:
52 | lstmt = lstmt.join(source_t, link_t.c.source_cluster == source_t.c.id)
53 | lstmt = lstmt.join(target_t, link_t.c.target_cluster == target_t.c.id)
54 |
55 | lstmt = lstmt.where(link_t.c.type.in_(link_types))
56 | lstmt = lstmt.distinct()
57 | for row in conn.execute(lstmt):
58 | yield row
59 |
60 |
61 | def generate_graph(
62 | conn: Conn,
63 | story_id: Optional[int] = None,
64 | link_types: List[str] = list(ontology.link_types.keys()),
65 | ) -> DiGraph:
66 | for skip in (LinkType.SAME, LinkType.UNRELATED):
67 | if skip in link_types:
68 | link_types.remove(skip)
69 |
70 | graph = DiGraph()
71 | for row in query_links(conn, story_id=story_id, link_types=link_types):
72 | source_id = row["source_id"]
73 | target_id = row["target_id"]
74 | if not graph.has_node(source_id):
75 | graph.add_node(
76 | source_id,
77 | label=row["source_label"],
78 | node_type=row["source_type"],
79 | )
80 | if not graph.has_node(target_id):
81 | graph.add_node(
82 | target_id,
83 | label=row["target_label"],
84 | node_type=row["target_type"],
85 | )
86 | graph.add_edge(
87 | source_id,
88 | target_id,
89 | edge_type=row["link_type"],
90 | )
91 | return graph
92 |
93 |
94 | def generate_graph_gexf(
95 | conn: Conn,
96 | story_id: Optional[int] = None,
97 | link_types: List[str] = list(ontology.link_types.keys()),
98 | ) -> str:
99 | graph = generate_graph(conn, story_id=story_id, link_types=link_types)
100 | return "\n".join(generate_gexf(graph))
101 |
102 |
103 | def _make_ent(row: Row, prefix: str) -> EntityProxy:
104 | type_ = row[f"{prefix}_type"]
105 | schema = ontology.get_cluster_type(type_).ftm
106 | ent = model.make_entity(schema)
107 | ent_id = row[f"{prefix}_id"]
108 | ent.id = f"sw-{ent_id}"
109 | label = row[f"{prefix}_label"]
110 | alias = row[f"{prefix}_alias"]
111 | ent.add("name", label)
112 | if alias != label:
113 | ent.add("alias", alias)
114 | return ent
115 |
116 |
117 | def generate_graph_ftm(conn: Conn, story_id: Optional[int] = None) -> str:
118 | link_types = list(ontology.link_types.keys())
119 | for skip in (LinkType.SAME, LinkType.UNRELATED):
120 | if skip in link_types:
121 | link_types.remove(skip)
122 |
123 | entities: Dict[str, EntityProxy] = {}
124 |
125 | def _merge(e: EntityProxy):
126 | if e.id in entities:
127 | entities[e.id].merge(e)
128 | else:
129 | entities[e.id] = e
130 |
131 | for row in query_links(conn, story_id=story_id, link_types=link_types):
132 | if row["link_type"] == "LOCATED" and row["target_type"] == "LOC":
133 | for label in (row["target_label"], row["target_alias"]):
134 | code = countrynames.to_code(label)
135 | if code is None:
136 | continue
137 | ent = _make_ent(row, "source")
138 | ent.add("country", code)
139 | _merge(ent)
140 | continue
141 |
142 | if row["source_type"] == "LOC" or row["target_type"] == "LOC":
143 | continue
144 |
145 | source = _make_ent(row, "source")
146 | target = _make_ent(row, "target")
147 |
148 | link_type = ontology.get_link_type(row["link_type"])
149 | if not link_type.ftm:
150 | continue
151 |
152 | schema = model.get(link_type.ftm)
153 | if schema is None or not schema.edge:
154 | raise ValueError()
155 |
156 | link = model.make_entity(schema)
157 | link.make_id(source.id, target.id, link_type.model.name)
158 | link.add("summary", link_type.model.label)
159 | link.add(schema.edge_source, source)
160 | link.add(schema.edge_target, target)
161 |
162 | _merge(source)
163 | _merge(target)
164 | _merge(link)
165 |
166 | texts = []
167 | for ent in entities.values():
168 | # print(ent.to_dict())
169 | texts.append(json.dumps(ent.to_dict()))
170 |
171 | return "\n".join(texts)
172 |
--------------------------------------------------------------------------------
/storyweb/logic/predict.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from sqlalchemy.sql import select, delete, update, and_, or_, func, distinct
3 |
4 | from storyweb.db import Conn, link_table, tag_table
5 | from storyweb.logic.clusters import fetch_cluster
6 | from storyweb.logic.links import get_links
7 | from storyweb.ontology import ontology, LinkType
8 | from storyweb.models import LinkPrediction, ClusterBase, Link
9 |
10 |
11 | def is_observer(conn: Conn, id: str) -> bool:
12 | stmt = select(
13 | link_table.c.type.label("type"),
14 | func.count(func.distinct(link_table.c.target_cluster)).label("targets"),
15 | )
16 | stmt = stmt.where(link_table.c.source_cluster == id)
17 | stmt = stmt.where(~link_table.c.type.in_((LinkType.SAME, LinkType.UNRELATED)))
18 | # stmt = stmt.filter(tag_table.c.cluster == id)
19 | stmt = stmt.group_by(link_table.c.type)
20 | observer = 0.0
21 | total = 0.0
22 | for row in conn.execute(stmt):
23 | if row["type"] == LinkType.OBSERVER:
24 | observer = row["targets"]
25 | total += row["targets"]
26 | if total == 0.0:
27 | return False
28 | return (observer / total) >= 0.5
29 |
30 |
31 | def pick_cluster(id: str, *clusters: ClusterBase) -> ClusterBase:
32 | for cluster in clusters:
33 | if id == cluster.id:
34 | return cluster
35 | raise ValueError("Cluster not found!")
36 |
37 |
38 | def can_have_link(source: ClusterBase, target: ClusterBase, link_type: str) -> bool:
39 | obj = ontology.get_link_type(link_type)
40 | src_type = ontology.get_cluster_type(source.type)
41 | tgt_type = ontology.get_cluster_type(target.type)
42 | if not src_type.is_a(obj.source_type.name):
43 | return False
44 | if not tgt_type.is_a(obj.target_type.name):
45 | return False
46 | return True
47 |
48 |
49 | # def can_have_bidi(source: ClusterBase, target: ClusterBase, link_type: str) -> bool:
50 | # pass
51 |
52 |
53 | def link_predict(conn: Conn, anchor_id: str, other_id: str) -> LinkPrediction:
54 | anchor = fetch_cluster(conn, anchor_id)
55 | other = fetch_cluster(conn, other_id)
56 | if anchor is None or other is None:
57 | raise ValueError("Invalid clusters for link prediction!")
58 | link_type = LinkType.UNRELATED
59 |
60 | # Check if there is a link already:
61 | existing_links: List[Link] = []
62 | for link in get_links(conn, anchor_id, other_id):
63 | if link.type == LinkType.UNRELATED:
64 | continue
65 | link_source = pick_cluster(link.source_cluster, anchor, other)
66 | link_target = pick_cluster(link.target_cluster, anchor, other)
67 | if not can_have_link(link_source, link_target, link.type):
68 | continue
69 | existing_links.append(link)
70 | if len(existing_links) > 0:
71 | existing_links.sort(key=lambda l: ontology.get_link_type(l.type).weight)
72 | link = existing_links[-1]
73 | return LinkPrediction(
74 | source=pick_cluster(link.source_cluster, anchor, other),
75 | target=pick_cluster(link.target_cluster, anchor, other),
76 | type=link.type,
77 | )
78 |
79 | # Heuristic: if one of the two clusters is known to be an observer on most of
80 | # their links, assume they are overall an observer (e.g. a media organisation,
81 | # or a journalist).
82 | anchor_observer = is_observer(conn, anchor.id)
83 | other_observer = is_observer(conn, other.id)
84 | if anchor_observer and not other_observer:
85 | return LinkPrediction(source=anchor, target=other, type=LinkType.OBSERVER)
86 | if other_observer and not anchor_observer:
87 | return LinkPrediction(source=other, target=anchor, type=LinkType.OBSERVER)
88 |
89 | # Heuristic: locations have very limited connection types they can enter into.
90 | if anchor.type == "LOC" and other.type == "LOC":
91 | return LinkPrediction(source=anchor, target=other, type="WITHIN")
92 | if anchor.type == "LOC":
93 | return LinkPrediction(source=other, target=anchor, type="LOCATED")
94 | if other.type == "LOC":
95 | return LinkPrediction(source=anchor, target=other, type="LOCATED")
96 |
97 | return LinkPrediction(source=anchor, target=other, type=link_type)
98 |
--------------------------------------------------------------------------------
/storyweb/logic/stories.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Optional
3 | from sqlalchemy.sql import select, delete, update, insert, func
4 |
5 | from storyweb.db import Conn
6 | from storyweb.db import story_table
7 | from storyweb.db import story_article_table
8 | from storyweb.logic.util import count_stmt
9 | from storyweb.models import Story, StoryMutation, Listing, ListingResponse
10 |
11 | log = logging.getLogger(__name__)
12 |
13 |
14 | def list_stories(
15 | conn: Conn, listing: Listing, query: Optional[str], article: Optional[str]
16 | ) -> ListingResponse[Story]:
17 | stmt = select(story_table)
18 | if query is not None and len(query.strip()):
19 | stmt = stmt.where(story_table.c.title.ilike(f"%{query}%"))
20 | if article is not None and len(article.strip()):
21 | stmt = stmt.join(
22 | story_article_table,
23 | story_article_table.c.story == story_table.c.id,
24 | )
25 | stmt = stmt.where(story_article_table.c.article == article)
26 | total = count_stmt(conn, stmt, story_table.c.id)
27 | stmt = stmt.limit(listing.limit).offset(listing.offset)
28 | cursor = conn.execute(stmt)
29 | results = [Story.parse_obj(r) for r in cursor.fetchall()]
30 | return ListingResponse[Story](
31 | total=total,
32 | debug_msg=str(stmt),
33 | limit=listing.limit,
34 | offset=listing.offset,
35 | results=results,
36 | )
37 |
38 |
39 | def fetch_story(conn: Conn, story_id: int) -> Optional[Story]:
40 | stmt = select(story_table)
41 | stmt = stmt.where(story_table.c.id == story_id)
42 | stmt = stmt.limit(1)
43 | cursor = conn.execute(stmt)
44 | obj = cursor.fetchone()
45 | if obj is None:
46 | return None
47 | return Story.parse_obj(obj)
48 |
49 |
50 | def create_story(conn: Conn, data: StoryMutation) -> Story:
51 | stmt = insert(story_table)
52 | stmt = stmt.values(title=data.title, summary=data.summary)
53 | cursor = conn.execute(stmt)
54 | story = fetch_story(conn, cursor.inserted_primary_key[0])
55 | if story is None:
56 | raise Exception("Story was not saved.")
57 | return story
58 |
59 |
60 | def update_story(conn: Conn, data: StoryMutation, story_id: int) -> Story:
61 | stmt = update(story_table)
62 | stmt = stmt.where(story_table.c.id == story_id)
63 | stmt = stmt.values(title=data.title, summary=data.summary)
64 | conn.execute(stmt)
65 | story = fetch_story(conn, story_id)
66 | if story is None:
67 | raise Exception("Story was not saved.")
68 | return story
69 |
70 |
71 | def delete_story(conn: Conn, story_id: int) -> None:
72 | sa_stmt = delete(story_article_table)
73 | sa_stmt = sa_stmt.where(story_article_table.c.story == story_id)
74 | conn.execute(sa_stmt)
75 | s_stmt = delete(story_table)
76 | s_stmt = s_stmt.where(story_table.c.id == story_id)
77 | conn.execute(s_stmt)
78 |
79 |
80 | def toggle_story_article(
81 | conn: Conn, story: int, article: str, delete_existing: bool = True
82 | ) -> None:
83 | t = story_article_table.alias("t")
84 | sstmt = select(func.count(t.c.story))
85 | sstmt = sstmt.filter(t.c.story == story, t.c.article == article)
86 | scursor = conn.execute(sstmt)
87 | if scursor.scalar_one() > 0:
88 | if delete_existing:
89 | dstmt = delete(t)
90 | dstmt = dstmt.filter(t.c.story == story, t.c.article == article)
91 | conn.execute(dstmt)
92 | else:
93 | istmt = insert(story_article_table)
94 | istmt = istmt.values(story=story, article=article)
95 | conn.execute(istmt)
96 |
--------------------------------------------------------------------------------
/storyweb/logic/util.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from sqlalchemy.sql import Select, Selectable, ColumnElement
3 | from sqlalchemy.sql import func
4 |
5 | from storyweb.db import Conn
6 |
7 | log = logging.getLogger(__name__)
8 |
9 |
10 | def count_stmt(conn: Conn, stmt: Select, col: Selectable | ColumnElement) -> int:
11 | count_stmt = stmt.with_only_columns(func.count(col))
12 | cursor = conn.execute(count_stmt)
13 | return cursor.scalar_one()
14 |
--------------------------------------------------------------------------------
/storyweb/models.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from typing import Generic, List, Optional, TypeVar
3 | from pydantic import BaseModel, Field
4 | from pydantic.generics import GenericModel
5 |
6 | R = TypeVar("R", bound=BaseModel)
7 |
8 |
9 | class Response(GenericModel):
10 | status: str = Field("ok")
11 | debug_msg: Optional[str] = Field(None)
12 |
13 |
14 | class ListingResponse(Response, Generic[R]):
15 | total: int = Field(0)
16 | limit: int = Field()
17 | offset: int = Field(0)
18 | results: List[R]
19 |
20 |
21 | class Listing(BaseModel):
22 | limit: int
23 | offset: int
24 | sort_direction: str
25 | sort_field: Optional[str]
26 |
27 |
28 | class Article(BaseModel):
29 | id: str
30 | site: str
31 | url: str
32 | title: Optional[str]
33 | language: Optional[str]
34 | tags: Optional[int]
35 | mentions: Optional[int]
36 |
37 |
38 | class ArticleDetails(Article):
39 | text: str
40 |
41 |
42 | class StoryMutation(BaseModel):
43 | title: str = Field(min_length=4)
44 | summary: Optional[str]
45 |
46 |
47 | class StoryArticleToggle(BaseModel):
48 | article: str
49 |
50 |
51 | class StoryArticleImportUrl(BaseModel):
52 | url: str
53 |
54 |
55 | class Story(BaseModel):
56 | id: int
57 | title: str
58 | summary: Optional[str]
59 |
60 |
61 | class Sentence(BaseModel):
62 | article: str
63 | sequence: int
64 | text: str
65 |
66 |
67 | class ClusterBase(BaseModel):
68 | id: str
69 | type: str
70 | label: str
71 |
72 |
73 | class Tag(ClusterBase):
74 | cluster: str
75 | article: str
76 | fingerprint: str
77 | count: int
78 | frequency: float
79 | cluster_type: Optional[str]
80 | cluster_label: Optional[str]
81 |
82 |
83 | class TagSentence(BaseModel):
84 | tag: str
85 | article: str
86 | sentence: int
87 |
88 |
89 | class Cluster(ClusterBase):
90 | articles: int
91 |
92 |
93 | class ClusterDetails(Cluster):
94 | labels: List[str]
95 |
96 |
97 | class ClusterPair(BaseModel):
98 | left: ClusterBase
99 | right: ClusterBase
100 | articles: int
101 | link_types: List[str] = []
102 |
103 |
104 | class RelatedCluster(ClusterBase):
105 | articles: int
106 | link_types: List[str] = []
107 |
108 |
109 | class SimilarCluster(ClusterBase):
110 | common: List[str]
111 | common_count: int
112 |
113 |
114 | class LinkBase(BaseModel):
115 | source: str
116 | target: str
117 | type: str
118 |
119 |
120 | class Link(LinkBase):
121 | source_cluster: str
122 | target_cluster: str
123 | user: Optional[str]
124 | timestamp: datetime
125 |
126 |
127 | class LinkPrediction(BaseModel):
128 | source: ClusterDetails
129 | target: ClusterDetails
130 | type: str
131 |
132 |
133 | class MergeRequest(BaseModel):
134 | anchor: str
135 | other: List[str]
136 |
137 |
138 | class ExplodeRequest(BaseModel):
139 | cluster: str
140 |
141 |
142 | class UntagRequest(BaseModel):
143 | cluster: str
144 | article: str
145 |
146 |
147 | class Site(BaseModel):
148 | site: str
149 | articles: int = 0
150 |
--------------------------------------------------------------------------------
/storyweb/ontology.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from pydantic import BaseModel
3 | from pydantic_yaml import YamlModel
4 | from typing import List, Optional
5 |
6 | from storyweb.clean import most_common
7 |
8 |
9 | class ClusterTypeModel(BaseModel):
10 | name: str
11 | label: str
12 | plural: str
13 | parent: Optional[str]
14 | color: str
15 | icon: str
16 | ftm: str
17 |
18 |
19 | class LinkTypeModel(BaseModel):
20 | name: str
21 | directed: bool = False
22 | label: str
23 | phrase: str
24 | source_type: str
25 | target_type: str
26 | ftm: Optional[str]
27 | weight: int
28 |
29 |
30 | class OntologyModel(YamlModel):
31 | cluster_types: List[ClusterTypeModel]
32 | link_types: List[LinkTypeModel]
33 |
34 |
35 | class ClusterType(object):
36 | PERSON = "PER"
37 | ORGANIZATION = "ORG"
38 | LOCATION = "LOC"
39 |
40 | def __init__(self, ontology: "Ontology", model: ClusterTypeModel):
41 | self.ontology = ontology
42 | self.model = model
43 | self.name = model.name
44 | self.label = model.label
45 | self.plural = model.plural
46 | self.color = model.color
47 | self.icon = model.icon
48 | self.ftm = model.ftm
49 |
50 | @property
51 | def parent(self) -> Optional["ClusterType"]:
52 | if self.model.parent is None:
53 | return None
54 | return self.ontology.get_cluster_type(self.model.parent)
55 |
56 | def is_a(self, name: str) -> bool:
57 | if name == self.name:
58 | return True
59 | parent = self.parent
60 | if parent is None:
61 | return False
62 | return parent.is_a(name)
63 |
64 | def pick(self, names: List[str]) -> str:
65 | """Given a set of categories, pick the most descriptive one."""
66 | # TODO: does this want to be a proper class-based type system (ftm?) at
67 | # some point?
68 | # if not len(categories):
69 | # raise TypeError("No categories for this entity!")
70 | # unique = set(categories)
71 | # if len(unique) == 1:
72 | # return categories[0]
73 | # if LOCATION in unique:
74 | # # works in practice, not in theory:
75 | # return LOCATION
76 | # if PERSON in unique and ORGANIZATION in unique:
77 | # return ENTITY
78 | # if PERSON in unique:
79 | # return PERSON
80 | # return ORGANIZATION
81 | return most_common(names)
82 |
83 |
84 | class LinkType(object):
85 | SAME = "SAME"
86 | UNRELATED = "UNRELATED"
87 | OBSERVER = "OBSERVER"
88 |
89 | def __init__(self, ontology: "Ontology", model: LinkTypeModel):
90 | self.ontology = ontology
91 | self.model = model
92 | self.ftm = model.ftm
93 | self.weight = model.weight
94 |
95 | @property
96 | def source_type(self) -> "ClusterType":
97 | return self.ontology.get_cluster_type(self.model.source_type)
98 |
99 | @property
100 | def target_type(self) -> "ClusterType":
101 | return self.ontology.get_cluster_type(self.model.target_type)
102 |
103 |
104 | class Ontology(object):
105 | def __init__(self, model: OntologyModel):
106 | self.model = model
107 | self.node_types = {n.name: ClusterType(self, n) for n in model.cluster_types}
108 | self.link_types = {l.name: LinkType(self, l) for l in model.link_types}
109 |
110 | assert LinkType.SAME in self.link_types, LinkType.SAME
111 | assert ClusterType.LOCATION in self.node_types, ClusterType.LOCATION
112 | assert ClusterType.PERSON in self.node_types, ClusterType.PERSON
113 | assert ClusterType.ORGANIZATION in self.node_types, ClusterType.ORGANIZATION
114 |
115 | def get_cluster_type(self, name: str) -> ClusterType:
116 | return self.node_types[name]
117 |
118 | def get_link_type(self, name: str) -> LinkType:
119 | return self.link_types[name]
120 |
121 | @classmethod
122 | def load(cls) -> "Ontology":
123 | path = Path(__file__).parent / "ontology.yml"
124 | model = OntologyModel.parse_file(path)
125 | return Ontology(model)
126 |
127 |
128 | ontology = Ontology.load()
129 |
--------------------------------------------------------------------------------
/storyweb/ontology.yml:
--------------------------------------------------------------------------------
1 | cluster_types:
2 | - name: ANY
3 | label: "Thing"
4 | plural: "Things"
5 | color: "#9D3F9D"
6 | icon: "hat"
7 | ftm: Thing
8 | - name: ENT
9 | label: "Entity"
10 | plural: "Entities"
11 | parent: ANY
12 | color: "#9D3F9D"
13 | icon: "people"
14 | ftm: LegalEntity
15 | - name: LOC
16 | label: "Location"
17 | plural: "Locations"
18 | parent: ANY
19 | color: "#29a634"
20 | icon: "mountain"
21 | ftm: Address
22 | - name: PER
23 | label: "Person"
24 | plural: "People"
25 | parent: ENT
26 | color: "#9d3f9d"
27 | icon: "person"
28 | ftm: Person
29 | - name: ORG
30 | label: "Organization"
31 | plural: "Organizations"
32 | parent: ENT
33 | color: "#2965cc"
34 | icon: "office"
35 | ftm: Organization
36 | link_types:
37 | - name: SAME
38 | label: "Same as"
39 | directed: false
40 | phrase: "is the same as"
41 | source_type: ANY
42 | target_type: ANY
43 | weight: 100
44 | - name: OBSERVER
45 | directed: true
46 | label: "Observer"
47 | phrase: "writes about"
48 | source_type: ENT
49 | target_type: ANY
50 | weight: 20
51 | - name: UNRELATED
52 | directed: false
53 | label: "Unrelated"
54 | phrase: "has nothing to do with"
55 | source_type: ANY
56 | target_type: ANY
57 | weight: 0
58 | - name: ASSOCIATE
59 | directed: false
60 | label: "Associate"
61 | phrase: "is an associate of"
62 | source_type: PER
63 | target_type: PER
64 | ftm: Associate
65 | weight: 30
66 | - name: FAMILY
67 | directed: false
68 | label: "Family"
69 | phrase: "is related to"
70 | source_type: PER
71 | target_type: PER
72 | ftm: Family
73 | weight: 40
74 | - name: ANTAGONIST
75 | directed: false
76 | label: "Antagonist"
77 | phrase: "is in conflict with"
78 | source_type: ENT
79 | target_type: ENT
80 | weight: 50
81 | - name: OWNER
82 | directed: true
83 | label: "Owner"
84 | phrase: "owns"
85 | source_type: ENT
86 | target_type: ORG
87 | ftm: Ownership
88 | weight: 40
89 | - name: MANAGER
90 | directed: true
91 | label: "Manager"
92 | phrase: "manages or directs"
93 | source_type: ENT
94 | target_type: ORG
95 | ftm: Directorship
96 | weight: 40
97 | - name: EMPLOYEE
98 | directed: true
99 | label: "Employer"
100 | phrase: "works for"
101 | source_type: PER
102 | target_type: ORG
103 | ftm: Employment
104 | weight: 40
105 | - name: MEMBER
106 | directed: true
107 | label: "Member"
108 | phrase: "is part of"
109 | source_type: PER
110 | target_type: ORG
111 | ftm: Membership
112 | weight: 40
113 | - name: BUSINESS
114 | directed: false
115 | label: "Business activity"
116 | phrase: "does business with"
117 | source_type: ENT
118 | target_type: ENT
119 | weight: 30
120 | - name: LOCATED
121 | directed: true
122 | label: "Located"
123 | phrase: "is located in"
124 | source_type: ENT
125 | target_type: LOC
126 | weight: 20
127 | - name: WITHIN
128 | directed: true
129 | label: "Within"
130 | phrase: "is located in"
131 | source_type: LOC
132 | target_type: LOC
133 | weight: 20
134 | - name: INDIRECT
135 | directed: false
136 | label: "Indirect link"
137 | phrase: "is indirectly linked to"
138 | source_type: ANY
139 | target_type: ANY
140 | weight: 5
141 | - name: OTHER
142 | directed: false
143 | label: "Other link"
144 | phrase: "is linked to"
145 | source_type: ANY
146 | target_type: ANY
147 | ftm: UnknownLink
148 | weight: 7
149 |
--------------------------------------------------------------------------------
/storyweb/parse/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import requests
3 | from typing import Optional
4 | from articledata import URL
5 |
6 | from storyweb.db import Conn
7 | from storyweb.parse.extract import extract
8 | from storyweb.parse.pipeline import load_one_article
9 |
10 | log = logging.getLogger(__name__)
11 |
12 |
13 | def import_article_by_url(conn: Conn, url: str) -> Optional[str]:
14 | try:
15 | res = requests.get(url)
16 | res.raise_for_status()
17 | except Exception as exc:
18 | log.exception("Cannot fetch article text: %r" % exc)
19 | return None
20 |
21 | url_obj = URL(url)
22 | article = extract(url_obj, res.content)
23 | if article is None:
24 | return None
25 |
26 | return load_one_article(conn, article)
27 |
--------------------------------------------------------------------------------
/storyweb/parse/extract.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from datetime import datetime
3 | from typing import Optional, Dict, Any
4 | from articledata import URL, Article
5 | from trafilatura import bare_extraction
6 |
7 | from storyweb.parse.language import detect_language
8 |
9 | log = logging.getLogger(__name__)
10 |
11 |
12 | def extract(url: URL, html: Any) -> Optional[Article]:
13 | log.info("Parsing: %r", url)
14 | article = Article(
15 | id=url.id,
16 | url=url.url,
17 | title=url.url,
18 | site=url.domain,
19 | bylines=[],
20 | language="xxx",
21 | locale="xx",
22 | text="",
23 | extracted_at=datetime.utcnow().isoformat(),
24 | )
25 |
26 | extract: Dict[str, str] = bare_extraction(html, url=url.url, include_comments=False)
27 | if extract is not None:
28 | article.title = extract.get("title", article.title)
29 | article.date = extract.get("date")
30 | article.text = extract.get("text", article.text)
31 | author = extract.get("author")
32 | if author is not None:
33 | article.bylines.append(author)
34 |
35 | lang = detect_language(article.text)
36 | if lang is not None:
37 | article.language = lang
38 | return article
39 |
--------------------------------------------------------------------------------
/storyweb/parse/language.py:
--------------------------------------------------------------------------------
1 | from functools import cache
2 | from typing import Optional
3 | import fasttext
4 | import languagecodes
5 | from normality import collapse_spaces
6 | from pathlib import Path
7 |
8 | model_path = Path(__file__).parent / "lid.176.ftz"
9 |
10 |
11 | @cache
12 | def get_model():
13 | try:
14 | # see https://github.com/facebookresearch/fastText/issues/1056
15 | fasttext.FastText.eprint = lambda *args, **kwargs: None
16 | except:
17 | pass
18 | return fasttext.load_model(model_path.as_posix())
19 |
20 |
21 | def detect_language(text: Optional[str]) -> Optional[str]:
22 | model = get_model()
23 | text = collapse_spaces(text)
24 | if text is None:
25 | return text
26 | out = model.predict(text[:10000])
27 | if not len(out):
28 | return None
29 | ((lang,), _) = out
30 | lang = lang.replace("__label__", "")
31 | lang_long = languagecodes.iso_639_alpha3(lang)
32 | if lang_long is not None:
33 | return lang_long
34 | return None
35 |
--------------------------------------------------------------------------------
/storyweb/parse/lid.176.ftz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/parse/lid.176.ftz
--------------------------------------------------------------------------------
/storyweb/parse/pipeline.py:
--------------------------------------------------------------------------------
1 | import spacy
2 | import logging
3 | import hashlib
4 | from spacy.tokens import Span, Doc
5 | from pathlib import Path
6 | from typing import Dict, Generator, List, Optional, Set, Tuple
7 | from functools import cache
8 | from normality import slugify
9 | from articledata import Article
10 | from pydantic import ValidationError
11 |
12 | from storyweb.db import engine, Conn
13 | from storyweb.clean import clean_entity_name, most_common, pick_name
14 | from storyweb.models import ArticleDetails, Sentence, Tag, TagSentence
15 | from storyweb.logic.articles import save_extracted
16 | from storyweb.ontology import ClusterType
17 |
18 | log = logging.getLogger(__name__)
19 |
20 | NLP_TYPES = {
21 | "PERSON": ClusterType.PERSON,
22 | "PER": ClusterType.PERSON,
23 | "ORG": ClusterType.ORGANIZATION,
24 | "GPE": ClusterType.LOCATION,
25 | }
26 | NLP_MODELS = {
27 | "eng": "en_core_web_sm",
28 | # "en_core_web_trf",
29 | "deu": "de_core_news_sm",
30 | "rus": "ru_core_news_sm",
31 | "xxx": "xx_ent_wiki_sm",
32 | }
33 |
34 |
35 | @cache
36 | def load_nlp(language: str):
37 | if language not in NLP_MODELS:
38 | return load_nlp("xxx")
39 | spacy.prefer_gpu()
40 | # disable everything but NER:
41 | nlp = spacy.load(
42 | NLP_MODELS[language],
43 | disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"],
44 | )
45 | nlp.add_pipe("sentencizer")
46 | return nlp
47 |
48 |
49 | def read_raw_articles(path: Path) -> Generator[Tuple[str, Article], None, None]:
50 | with open(path, "rb") as fh:
51 | while line := fh.readline():
52 | try:
53 | article = Article.parse_raw(line)
54 | if article.id is None:
55 | continue
56 | if article.language != "eng":
57 | continue
58 | yield (article.text, article)
59 | except ValidationError as ve:
60 | log.warn("Article validation [%s]: %s", article.id, ve)
61 |
62 |
63 | def extract_tag(ent: Span) -> Optional[Tuple[str, str, str]]:
64 | tag_type = NLP_TYPES.get(ent.label_)
65 | if tag_type is None:
66 | return None
67 | label = clean_entity_name(ent.text)
68 | fp = slugify(label, sep="-")
69 | if fp is None or label is None:
70 | return None
71 | fp = "-".join(sorted(fp.split("-")))
72 | if tag_type == ClusterType.PERSON and " " not in label:
73 | return None
74 | return (label, tag_type, fp)
75 |
76 |
77 | def _load_article(conn: Conn, doc: Doc, raw: Article) -> str:
78 | log.info("Article [%s, %s]: %r", raw.id, raw.language, raw.title)
79 | article = ArticleDetails(
80 | id=raw.id,
81 | site=raw.site,
82 | url=raw.url,
83 | title=raw.title,
84 | language=raw.language,
85 | text=raw.text,
86 | )
87 | sentences: List[Sentence] = []
88 | tag_sentences: Dict[str, Set[int]] = {}
89 | tag_types: Dict[str, List[str]] = {}
90 | tag_labels: Dict[str, List[str]] = {}
91 | for seq, sent in enumerate(doc.sents):
92 | sent_tags = 0
93 | for ent in sent.ents:
94 | extracted = extract_tag(ent)
95 | if extracted is None:
96 | continue
97 | (label, type_, fp) = extracted
98 | tag_labels.setdefault(fp, [])
99 | tag_labels[fp].append(label)
100 | tag_types.setdefault(fp, [])
101 | tag_types[fp].append(type_)
102 | tag_sentences.setdefault(fp, set())
103 | tag_sentences[fp].add(seq)
104 | sent_tags += 1
105 |
106 | if sent_tags > 0:
107 | sentence = Sentence(article=article.id, sequence=seq, text=sent.text)
108 | sentences.append(sentence)
109 |
110 | article.tags = len(tag_labels)
111 | article.mentions = sum([len(v) for v in tag_labels.values()])
112 | tags: List[Tag] = []
113 | tag_sentence_objs: List[TagSentence] = []
114 | for fp, labels in tag_labels.items():
115 | key = f"{article.id}>{fp}".encode("utf-8")
116 | tag_id = hashlib.sha1(key).hexdigest()
117 | type_ = most_common(tag_types[fp])
118 | label = pick_name(labels)
119 | tag = Tag(
120 | id=tag_id,
121 | cluster=tag_id,
122 | article=article.id,
123 | fingerprint=fp,
124 | type=type_,
125 | label=label,
126 | count=len(labels),
127 | frequency=float(len(labels)) / article.mentions,
128 | cluster_type=type_,
129 | cluster_label=label,
130 | )
131 | tags.append(tag)
132 |
133 | for seq in tag_sentences.get(fp, []):
134 | obj = TagSentence(tag=tag_id, article=article.id, sentence=seq)
135 | tag_sentence_objs.append(obj)
136 |
137 | save_extracted(conn, article, sentences, tag_sentence_objs, tags)
138 | return article.id
139 |
140 |
141 | def load_articles(path: Path) -> None:
142 | nlp = load_nlp("eng")
143 | raw_articles = read_raw_articles(path)
144 | for (doc, raw_article) in nlp.pipe(raw_articles, batch_size=20, as_tuples=True):
145 | with engine.begin() as conn:
146 | _load_article(conn, doc, raw_article)
147 |
148 |
149 | def load_one_article(conn: Conn, article: Article) -> str:
150 | nlp = load_nlp(article.language)
151 | doc = nlp(article.text)
152 | return _load_article(conn, doc, article)
153 |
--------------------------------------------------------------------------------
/storyweb/routes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/storyweb/routes/__init__.py
--------------------------------------------------------------------------------
/storyweb/routes/articles.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from fastapi import APIRouter, Depends, Path, Query
3 | from fastapi.exceptions import HTTPException
4 |
5 | from storyweb.db import Conn
6 | from storyweb.logic.articles import fetch_article, list_articles, list_sites
7 | from storyweb.routes.util import get_conn, get_listing
8 | from storyweb.models import (
9 | Article,
10 | ArticleDetails,
11 | Listing,
12 | ListingResponse,
13 | Site,
14 | )
15 |
16 | router = APIRouter()
17 |
18 |
19 | @router.get("/sites", response_model=ListingResponse[Site])
20 | def sites_index(
21 | conn: Conn = Depends(get_conn),
22 | listing: Listing = Depends(get_listing),
23 | ):
24 | """List all the source sites from which articles (refs) have been imported."""
25 | return list_sites(conn, listing)
26 |
27 |
28 | @router.get("/articles", response_model=ListingResponse[Article])
29 | def articles_index(
30 | conn: Conn = Depends(get_conn),
31 | listing: Listing = Depends(get_listing),
32 | site: Optional[str] = Query(None),
33 | story: Optional[int] = Query(None),
34 | q: Optional[str] = Query(None),
35 | cluster: List[str] = Query([]),
36 | ):
37 | clusters = [i for i in cluster if i is not None and len(i.strip())]
38 | return list_articles(
39 | conn,
40 | listing,
41 | site=site,
42 | story=story,
43 | query=q,
44 | clusters=clusters,
45 | )
46 |
47 |
48 | @router.get("/articles/{article_id}", response_model=ArticleDetails)
49 | def article_view(
50 | conn: Conn = Depends(get_conn),
51 | article_id: str = Path(),
52 | ):
53 | article = fetch_article(conn, article_id)
54 | if article is None:
55 | raise HTTPException(404)
56 | return article
57 |
--------------------------------------------------------------------------------
/storyweb/routes/clusters.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from fastapi import APIRouter, Depends, Path, Query
3 | from fastapi.exceptions import HTTPException
4 |
5 | from storyweb.db import Conn
6 | from storyweb.logic.clusters import (
7 | fetch_cluster,
8 | list_clusters,
9 | list_related,
10 | list_similar,
11 | )
12 | from storyweb.routes.util import get_conn, get_listing
13 | from storyweb.models import (
14 | Cluster,
15 | ClusterDetails,
16 | Listing,
17 | ListingResponse,
18 | RelatedCluster,
19 | SimilarCluster,
20 | )
21 |
22 | router = APIRouter()
23 |
24 |
25 | @router.get("/clusters", response_model=ListingResponse[Cluster])
26 | def route_cluster_index(
27 | conn: Conn = Depends(get_conn),
28 | listing: Listing = Depends(get_listing),
29 | q: Optional[str] = Query(None),
30 | article: Optional[str] = Query(None),
31 | story: Optional[str] = Query(None),
32 | types: List[str] = Query([]),
33 | ):
34 | return list_clusters(
35 | conn,
36 | listing,
37 | query=q,
38 | article=article,
39 | story=story,
40 | types=types,
41 | )
42 |
43 |
44 | @router.get("/clusters/{cluster}", response_model=ClusterDetails)
45 | def route_cluster_view(conn: Conn = Depends(get_conn), cluster: str = Path()):
46 | obj = fetch_cluster(conn, cluster)
47 | if obj is None:
48 | raise HTTPException(404)
49 | return obj
50 |
51 |
52 | @router.get(
53 | "/clusters/{cluster}/similar", response_model=ListingResponse[SimilarCluster]
54 | )
55 | def route_cluster_similar(
56 | conn: Conn = Depends(get_conn),
57 | listing: Listing = Depends(get_listing),
58 | cluster: str = Path(),
59 | ):
60 | return list_similar(conn, listing, cluster)
61 |
62 |
63 | @router.get(
64 | "/clusters/{cluster}/related", response_model=ListingResponse[RelatedCluster]
65 | )
66 | def route_cluster_related(
67 | conn: Conn = Depends(get_conn),
68 | listing: Listing = Depends(get_listing),
69 | cluster: str = Path(),
70 | linked: Optional[bool] = Query(None),
71 | types: List[str] = Query([]),
72 | ):
73 | return list_related(
74 | conn,
75 | listing,
76 | cluster,
77 | linked=linked,
78 | types=types,
79 | )
80 |
--------------------------------------------------------------------------------
/storyweb/routes/links.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from fastapi import APIRouter, Depends, Query
3 |
4 | from storyweb.db import Conn
5 | from storyweb.logic.clusters import (
6 | fetch_cluster,
7 | merge_cluster,
8 | explode_cluster,
9 | untag_article,
10 | )
11 | from storyweb.logic.links import (
12 | create_link,
13 | list_links,
14 | untag_article,
15 | )
16 | from storyweb.logic.predict import link_predict
17 | from storyweb.routes.util import get_conn, get_listing
18 | from storyweb.models import (
19 | ClusterDetails,
20 | Link,
21 | LinkBase,
22 | LinkPrediction,
23 | Listing,
24 | ListingResponse,
25 | MergeRequest,
26 | ExplodeRequest,
27 | UntagRequest,
28 | )
29 |
30 | router = APIRouter()
31 |
32 |
33 | @router.get("/links", response_model=ListingResponse[Link])
34 | def links_index(
35 | conn: Conn = Depends(get_conn),
36 | listing: Listing = Depends(get_listing),
37 | cluster: List[str] = Query([]),
38 | ):
39 | clusters = [i for i in cluster if i is not None and len(i.strip())]
40 | return list_links(conn, listing, clusters)
41 |
42 |
43 | @router.post("/links", response_model=Link)
44 | def links_save(
45 | link: LinkBase,
46 | conn: Conn = Depends(get_conn),
47 | ):
48 | # * make a link (any type)
49 | # * see all sentences that mention both tags/identities
50 | # * pick a relationship type
51 | result = create_link(conn, link.source, link.target, link.type)
52 | return result
53 |
54 |
55 | @router.get("/links/_predict", response_model=LinkPrediction)
56 | def link_predict_(
57 | conn: Conn = Depends(get_conn),
58 | anchor: str = Query(),
59 | other: str = Query(),
60 | ):
61 | return link_predict(conn, anchor, other)
62 |
63 |
64 | @router.post("/links/_merge", response_model=ClusterDetails)
65 | def merge_cluster_save(
66 | data: MergeRequest,
67 | conn: Conn = Depends(get_conn),
68 | ):
69 | cluster = merge_cluster(conn, data.anchor, data.other)
70 | return fetch_cluster(conn, cluster)
71 |
72 |
73 | @router.post("/links/_explode", response_model=ClusterDetails)
74 | def explode_cluster_save(
75 | data: ExplodeRequest,
76 | conn: Conn = Depends(get_conn),
77 | ):
78 | cluster = explode_cluster(conn, data.cluster)
79 | return fetch_cluster(conn, cluster)
80 |
81 |
82 | @router.post("/links/_untag", response_model=ClusterDetails)
83 | def untag_article_save(
84 | data: UntagRequest,
85 | conn: Conn = Depends(get_conn),
86 | ):
87 | cluster = untag_article(conn, data.cluster, data.article)
88 | return fetch_cluster(conn, cluster)
89 |
--------------------------------------------------------------------------------
/storyweb/routes/stories.py:
--------------------------------------------------------------------------------
1 | from normality import slugify
2 | from typing import List, Optional
3 | from fastapi import APIRouter, Depends, Path, Query
4 | from fastapi.responses import PlainTextResponse
5 | from fastapi.exceptions import HTTPException
6 |
7 | from storyweb.db import Conn
8 | from storyweb.logic.stories import (
9 | list_stories,
10 | fetch_story,
11 | create_story,
12 | update_story,
13 | delete_story,
14 | toggle_story_article,
15 | )
16 | from storyweb.logic.clusters import list_story_pairs
17 | from storyweb.logic.graph import generate_graph_gexf, generate_graph_ftm
18 | from storyweb.logic.links import story_merge
19 | from storyweb.parse import import_article_by_url
20 | from storyweb.routes.util import get_conn, get_listing
21 | from storyweb.models import (
22 | StoryMutation,
23 | StoryArticleToggle,
24 | StoryArticleImportUrl,
25 | Story,
26 | ClusterPair,
27 | Listing,
28 | ListingResponse,
29 | )
30 |
31 | router = APIRouter()
32 |
33 |
34 | @router.get("/stories", response_model=ListingResponse[Story])
35 | def story_index(
36 | conn: Conn = Depends(get_conn),
37 | listing: Listing = Depends(get_listing),
38 | q: Optional[str] = Query(None),
39 | article: Optional[str] = Query(None),
40 | ):
41 | return list_stories(conn, listing, query=q, article=article)
42 |
43 |
44 | @router.post("/stories", response_model=Story)
45 | def story_create(story: StoryMutation, conn: Conn = Depends(get_conn)):
46 | return create_story(conn, story)
47 |
48 |
49 | @router.get("/stories/{story_id}", response_model=Story)
50 | def story_view(
51 | conn: Conn = Depends(get_conn),
52 | story_id: int = Path(),
53 | ):
54 | story = fetch_story(conn, story_id)
55 | if story is None:
56 | raise HTTPException(404)
57 | return story
58 |
59 |
60 | @router.post("/stories/{story_id}/articles", response_model=Story)
61 | def story_article_toggle(
62 | data: StoryArticleToggle,
63 | conn: Conn = Depends(get_conn),
64 | story_id: int = Path(),
65 | ):
66 | story = fetch_story(conn, story_id)
67 | if story is None:
68 | raise HTTPException(404)
69 | toggle_story_article(conn, story_id, data.article)
70 | return story
71 |
72 |
73 | @router.post("/stories/{story_id}/articles/import-url", response_model=Story)
74 | def story_article_import_url(
75 | data: StoryArticleImportUrl,
76 | conn: Conn = Depends(get_conn),
77 | story_id: int = Path(),
78 | ):
79 | story = fetch_story(conn, story_id)
80 | if story is None:
81 | raise HTTPException(404)
82 | article_id = import_article_by_url(conn, data.url)
83 | if article_id is None:
84 | raise HTTPException(400)
85 | story_merge(conn, story_id, article_id)
86 | toggle_story_article(conn, story_id, article_id, delete_existing=False)
87 | return story
88 |
89 |
90 | @router.get("/stories/{story_id}/pairs", response_model=ListingResponse[ClusterPair])
91 | def story_pairs(
92 | conn: Conn = Depends(get_conn),
93 | listing: Listing = Depends(get_listing),
94 | story_id: int = Path(),
95 | linked: Optional[bool] = Query(None),
96 | types: List[str] = Query([]),
97 | ):
98 | story = fetch_story(conn, story_id)
99 | if story is None:
100 | raise HTTPException(404)
101 | return list_story_pairs(conn, listing, story_id, linked=linked, types=types)
102 |
103 |
104 | @router.get("/stories/{story_id}/gexf", response_class=PlainTextResponse)
105 | def story_gexf(
106 | conn: Conn = Depends(get_conn),
107 | story_id: int = Path(),
108 | ):
109 | story = fetch_story(conn, story_id)
110 | if story is None:
111 | raise HTTPException(404)
112 | filename = slugify(story.title, sep="_")
113 | text = generate_graph_gexf(conn, story_id=story_id)
114 | return PlainTextResponse(
115 | content=text,
116 | media_type="text/xml",
117 | headers={"Content-Disposition": f"attachment; filename={filename}.gexf"},
118 | )
119 |
120 |
121 | @router.get("/stories/{story_id}/ftm", response_class=PlainTextResponse)
122 | def story_ftm(
123 | conn: Conn = Depends(get_conn),
124 | story_id: int = Path(),
125 | ):
126 | story = fetch_story(conn, story_id)
127 | if story is None:
128 | raise HTTPException(404)
129 | filename = slugify(story.title, sep="_")
130 | text = generate_graph_ftm(conn, story_id=story_id)
131 | return PlainTextResponse(
132 | content=text,
133 | media_type="application/json+ftm",
134 | headers={"Content-Disposition": f"attachment; filename={filename}.ftm.json"},
135 | )
136 |
137 |
138 | @router.post("/stories/{story_id}", response_model=Story)
139 | def story_update(
140 | data: StoryMutation, conn: Conn = Depends(get_conn), story_id: int = Path()
141 | ):
142 | story = fetch_story(conn, story_id)
143 | if story is None:
144 | raise HTTPException(404)
145 | return update_story(conn, data, story_id)
146 |
147 |
148 | @router.delete("/stories/{story_id}")
149 | def story_delete(
150 | conn: Conn = Depends(get_conn),
151 | story_id: int = Path(),
152 | ):
153 | story = fetch_story(conn, story_id)
154 | if story is None:
155 | raise HTTPException(404)
156 | delete_story(conn, story_id)
157 | return None
158 |
--------------------------------------------------------------------------------
/storyweb/routes/system.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends
2 | from fastapi.responses import PlainTextResponse
3 |
4 | from storyweb.db import Conn
5 | from storyweb.ontology import OntologyModel, ontology
6 | from storyweb.logic.graph import generate_graph_gexf, generate_graph_ftm
7 | from storyweb.routes.util import get_conn
8 |
9 | router = APIRouter()
10 |
11 |
12 | @router.get("/ontology", response_model=OntologyModel)
13 | def ontology_model() -> OntologyModel:
14 | return ontology.model
15 |
16 |
17 | @router.get("/gexf", response_class=PlainTextResponse)
18 | def all_gexf(
19 | conn: Conn = Depends(get_conn),
20 | ):
21 | text = generate_graph_gexf(conn)
22 | return PlainTextResponse(
23 | content=text,
24 | media_type="text/xml",
25 | headers={"Content-Disposition": f"attachment; filename=storyweb.gexf"},
26 | )
27 |
28 |
29 | @router.get("/ftm", response_class=PlainTextResponse)
30 | def all_ftm(conn: Conn = Depends(get_conn)):
31 | text = generate_graph_ftm(conn)
32 | return PlainTextResponse(
33 | content=text,
34 | media_type="application/json+ftm",
35 | headers={"Content-Disposition": f"attachment; filename=storyweb.ftm.json"},
36 | )
37 |
--------------------------------------------------------------------------------
/storyweb/routes/util.py:
--------------------------------------------------------------------------------
1 | from typing import Generator, Optional
2 | from fastapi import Query
3 |
4 | from storyweb.db import engine, Conn
5 | from storyweb.models import Listing
6 |
7 |
8 | def get_conn() -> Generator[Conn, None, None]:
9 | """Create a database transaction for the request."""
10 | with engine.begin() as conn:
11 | yield conn
12 |
13 |
14 | def get_listing(
15 | limit: int = Query(50, description="Number of objects to return", le=5000),
16 | offset: int = Query(0, description="Skip the first N objects in response"),
17 | sort: Optional[str] = Query(
18 | None, description="Sort criterion, format: field:direction"
19 | ),
20 | ) -> Listing:
21 | direction = "desc"
22 | if sort is not None and ":" in sort:
23 | sort, direction = sort.rsplit(":", 1)
24 | direction = direction.lower().strip()
25 | direction = "asc" if direction == "asc" else "desc"
26 | return Listing(
27 | limit=limit,
28 | offset=offset,
29 | sort_direction=direction,
30 | sort_field=sort,
31 | )
32 |
--------------------------------------------------------------------------------
/storyweb/server.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from storyweb.app import app
3 |
4 | logging.basicConfig(level=logging.INFO)
5 |
--------------------------------------------------------------------------------
/storyweb/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | DB_URL = os.environ.get("STORYWEB_DB_URL")
4 | if DB_URL is None:
5 | raise RuntimeError("No $STORYWEB_DB_URL is configured!")
6 |
--------------------------------------------------------------------------------