├── .dockerignore ├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ └── docker.yml ├── .gitignore ├── DESIGN.md ├── Dockerfile ├── LICENSE ├── Makefile ├── QUERIES.md ├── README.md ├── RESEARCH.md ├── contrib ├── link_classification_experiments.ipynb ├── link_classification_experiments_2.ipynb ├── occrp-experiment │ ├── DH_2022_NLP_Workshop.ipynb │ └── crawl.py └── tagged_sentences_20230203.csv ├── docker-compose.yml ├── docs ├── CNAME ├── index.html └── scribble.png ├── frontend ├── .gitignore ├── package-lock.json ├── package.json ├── public │ ├── favicon.ico │ └── index.html ├── src │ ├── App.tsx │ ├── components │ │ ├── ArticleClusters.tsx │ │ ├── ArticleCorefList.tsx │ │ ├── ArticleDrawer.tsx │ │ ├── ArticlePreview.tsx │ │ ├── ArticleStoryEditor.tsx │ │ ├── ArticleText.tsx │ │ ├── ClusterArticles.tsx │ │ ├── ClusterButtonGroup.tsx │ │ ├── ClusterDrawer.tsx │ │ ├── Footer.tsx │ │ ├── Navbar.tsx │ │ ├── Pagination.tsx │ │ ├── PairLink.tsx │ │ ├── RelatedListing.tsx │ │ ├── ScreenContent.tsx │ │ ├── ScreenHeading.tsx │ │ ├── SettingsDialog.tsx │ │ ├── SimilarListing.tsx │ │ ├── StoryArticleImportDialog.tsx │ │ ├── StoryArticles.tsx │ │ ├── StoryCreateDialog.tsx │ │ ├── StoryDeleteDialog.tsx │ │ ├── StoryGraph.tsx │ │ ├── StoryLinkerBanner.tsx │ │ ├── StoryNomNom.tsx │ │ ├── StoryPairs.tsx │ │ ├── StoryUpdateDialog.tsx │ │ └── util.tsx │ ├── constants.ts │ ├── hooks.ts │ ├── index.tsx │ ├── logic.ts │ ├── react-app-env.d.ts │ ├── router.tsx │ ├── screens │ │ ├── ArticleIndex.tsx │ │ ├── ClusterIndex.tsx │ │ ├── ClusterView.tsx │ │ ├── Home.tsx │ │ ├── Layout.tsx │ │ ├── Linker.tsx │ │ ├── LinkerRelated.tsx │ │ ├── StoryIndex.tsx │ │ ├── StoryLinker.tsx │ │ └── StoryView.tsx │ ├── selectors.ts │ ├── services │ │ ├── articles.ts │ │ ├── clusters.ts │ │ ├── config.ts │ │ ├── links.ts │ │ ├── ontology.ts │ │ ├── sites.ts │ │ └── stories.ts │ ├── store.ts │ ├── styles │ │ ├── App.scss │ │ ├── Article.module.scss │ │ ├── Cluster.module.scss │ │ ├── Footer.module.scss │ │ ├── Layout.module.scss │ │ ├── Linker.module.scss │ │ ├── Navbar.module.scss │ │ ├── Story.module.scss │ │ ├── index.scss │ │ ├── util.module.scss │ │ └── variables.scss │ ├── types.ts │ └── util.ts └── tsconfig.json ├── setup.py ├── sources.json ├── stories.md └── storyweb ├── __init__.py ├── app.py ├── clean.py ├── cli.py ├── db.py ├── logic ├── __init__.py ├── articles.py ├── clusters.py ├── graph.py ├── links.py ├── predict.py ├── stories.py └── util.py ├── models.py ├── ontology.py ├── ontology.yml ├── parse ├── __init__.py ├── extract.py ├── language.py ├── lid.176.ftz └── pipeline.py ├── routes ├── __init__.py ├── articles.py ├── clusters.py ├── links.py ├── stories.py ├── system.py └── util.py ├── server.py └── settings.py /.dockerignore: -------------------------------------------------------------------------------- 1 | frontend/node_modules 2 | .mypy_cache 3 | __pycache__ 4 | storyweb.egg-info 5 | .envrc -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | 2 | [*.{ts,tsx,js,jsx}] 3 | indent_style = space 4 | indent_size = 2 5 | charset = utf-8 -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: [] 3 | # - package-ecosystem: pip 4 | # open-pull-requests-limit: 99 5 | # directory: "/" 6 | # schedule: 7 | # interval: weekly 8 | # - package-ecosystem: npm 9 | # open-pull-requests-limit: 99 10 | # directory: "/frontend" 11 | # schedule: 12 | # interval: weekly 13 | # - package-ecosystem: docker 14 | # open-pull-requests-limit: 99 15 | # directory: "/" 16 | # schedule: 17 | # interval: weekly 18 | # - package-ecosystem: "github-actions" 19 | # open-pull-requests-limit: 99 20 | # directory: "/" 21 | # schedule: 22 | # interval: weekly 23 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: [push] 4 | 5 | permissions: 6 | packages: write 7 | 8 | jobs: 9 | docker-build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | # - name: Set up QEMU 14 | # uses: docker/setup-qemu-action@v2 15 | - name: Docker meta 16 | id: meta 17 | uses: docker/metadata-action@v4 18 | with: 19 | images: ghcr.io/opensanctions/storyweb 20 | tags: | 21 | type=ref,event=branch 22 | type=semver,pattern={{version}} 23 | type=sha 24 | - name: Set up Docker Buildx 25 | uses: docker/setup-buildx-action@v2 26 | with: 27 | install: true 28 | - name: Debug information 29 | run: | 30 | docker --version 31 | docker-compose --version 32 | echo "${GITHUB_REF}" 33 | - name: Login to GitHub Container Registry 34 | uses: docker/login-action@v2 35 | with: 36 | registry: ghcr.io 37 | username: ${{ github.actor }} 38 | password: ${{ secrets.GITHUB_TOKEN }} 39 | - name: Build and push release 40 | uses: docker/build-push-action@v4 41 | with: 42 | context: . 43 | # platforms: linux/amd64,linux/arm64 44 | push: true 45 | tags: ${{ steps.meta.outputs.tags }} 46 | labels: ${{ steps.meta.outputs.labels }} 47 | cache-from: type=gha 48 | cache-to: type=gha,mode=max 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.code-workspace 6 | *.gexf 7 | .DS_Store 8 | .envrc 9 | data/ 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /DESIGN.md: -------------------------------------------------------------------------------- 1 | 2 | # Workflow ideas for StoryWeb 3 | 4 | We want to go from a corpus of media reports to a knowledge graph for a specific set of journalistic stories (called it a `Scandal` for now, I guess `StoryLine` also works). 5 | 6 | 7 | ## Step 1: Crawling the reporting 8 | 9 | * Contact a bunch of GIJN member orgs to see if I may. Maybe offer a formalised quid pro quo deal ("I can parse your stories, you get story graph data") 10 | * Build a news crawler in async Python, store everything to a SQL database that allows for incremental crawls. 11 | * Output articles with metadata (https://schema.org/Article) as a JSONL file. 12 | * Requires identifying which pages contain articles 13 | * Requires extracting article metadata and body (e.g. via `newspaper`, `trafilatura`) 14 | 15 | 16 | ## Step 2: Extract named entities 17 | 18 | * Run a competition between spaCy and SparkNLP, decide if we always want to run both of if it's a per-language decision. 19 | * Find a disk format for annotated articles, probably going to need: 20 | * Every extracted entity and their tag type, span 21 | * Every sentence and their spans 22 | 23 | 24 | ## Step 3: Build a co-occurrence matrix 25 | 26 | Get everything into a massive SQL table a la: 27 | 28 | * `article_url`, `sentence_no`, `tag_type`, `tag_label`, `tag_normalised` 29 | 30 | e.g.: 31 | 32 | * `https://rise.md/...`,`6`,`PER`,`Vladimir Plahotniuc`,`vladimir-plahotniuc` 33 | * `https://rise.md/...`,`16`,`PER`,`Vlad Plahotniuc`,`vlad-plahotniuc` 34 | * `https://rise.md/...`,`4`,`LOC`,`Moldova`,`md` 35 | * `https://rise.md/...`,`4`,`ORG`,`Democratic Party`,`democratic-party` 36 | * `https://istories.ru/...`,`1`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk` 37 | * `https://istories.ru/...`,`5`,`PER`,`Плахотнюк`,`plahotnuk` 38 | * `https://istories.ru/...`,`17`,`PER`,`Владимир Плахотнюк`,`vladimir-plahotnuk` 39 | 40 | 41 | ## Step 4: Build an entity loom 42 | 43 | The core of an interactive graph entity identity building tool could be an interactive loop like this: 44 | 45 | * Pick a particularly namey-looking tag that occurs a lot. 46 | * Show it to a user and prompt them to decide: 47 | 1. This is a new entity's name, make a new ID (shortuuid) 48 | 2. This is another surface form of an existing entity, show top 5 search results (ask if it's a strong or weak alias) 49 | * Focus the user process on the (new) entity 50 | * Show co-occurring other tags, including place and date tags 51 | * Maybe: show any sentence in which both the tag and an alias of the entity occur 52 | * For each tag, prompt the user to say if it's a strong/weak alias, context or related entity or unrelated tag 53 | * Allow the user to finish working on this entity and start with a new one 54 | * Start over. 55 | 56 | Resulting table: 57 | 58 | `entity_id`, `tag_type`, `tag_label`, `tag_normalised`, `role` 59 | 60 | where `role` is one of: 61 | 62 | * `alias` (e.g. `Vladimir Plahotniuc`) 63 | * `weak_alias` (e.g. `Plahotniuc's`, `the Oligarch`) 64 | * `context` (e.g. `Moldova` for Plahotniuc) 65 | * `related` (e.g. `Democratic Party` for Plahotniuc) 66 | * `unrelated` (e.g. `European Union` for Plahotniuc) 67 | 68 | This process can probably later be partially automated, eg. if one of the related labels already is part of an existing entity, or by doing string similarity on the aliases. 69 | 70 | ### How to disambiguate? 71 | 72 | This doesn't yet allow us to say that there are two separate `Markus Braun` - one maybe an actor mentioned in a gossip piece, the other the CEO of Wirecard. We basically need a way to fork an entity and say: this alias, in this article - make it part of another entity! 73 | 74 | ### Clustering 75 | 76 | What I'm describing here is really a clustering process. Need to do some research on what scikit-learn-level machine learning for clustering looks like and how well it might apply. 77 | 78 | ## Step 5: Build a relationship loom 79 | 80 | Similar process as above: take two entities from Step 4 that co-occur in multiple articles, show the user any sentences that mention both and then propose to them to classify their relationship (or do it based on a keyword list, and merely double-check directionality). 81 | 82 | Categories (tbd): 83 | 84 | * Family 85 | * Personal associate 86 | * Business associate 87 | * Nemesis, opponent, adversary, antagonist (word?) 88 | * Owner 89 | * In control of (Director, etc.) 90 | * Participant 91 | * Member/Employee 92 | * Payment, debt, business relationship 93 | 94 | 95 | ### Can we model events? 96 | 97 | Media reporting is all about events, do we want to reify them? How can we label events, maybe by deriving key words from the headline? 98 | 99 | 100 | ## Step 6: Reconcile entities 101 | 102 | This can maybe already happen in `nomenklatura`: 103 | 104 | * https://github.com/opensanctions/nomenklatura/blob/master/README.md 105 | 106 | 107 | ## Step 7: Visualise, profit! 108 | 109 | * What can we compute on the output using NetworkX? 110 | * https://sayari-analytics.github.io/trellis/ 111 | 112 | 113 | ## Credits 114 | 115 | * Thanks to [Heleen](https://twitter.com/heleenemanuel) and [Johan](https://johanschuijt.nl/) :) -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:19 AS frontend 2 | 3 | RUN mkdir -p /fe 4 | WORKDIR /fe 5 | COPY frontend /fe 6 | RUN npm install 7 | RUN npm run build 8 | 9 | FROM ubuntu:23.04 10 | ENV DEBIAN_FRONTEND noninteractive 11 | 12 | LABEL org.opencontainers.image.title "StoryWeb" 13 | LABEL org.opencontainers.image.licenses MIT 14 | LABEL org.opencontainers.image.source https://github.com/opensanctions/storyweb 15 | 16 | RUN apt-get -qq -y update \ 17 | && apt-get -qq -y upgrade \ 18 | && apt-get -qq -y install locales ca-certificates tzdata curl python3-pip \ 19 | python3-icu python3-cryptography libicu-dev pkg-config postgresql-client-common \ 20 | postgresql-client libpq-dev \ 21 | && apt-get -qq -y autoremove \ 22 | && apt-get clean \ 23 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 24 | 25 | RUN localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ 26 | && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \ 27 | && dpkg-reconfigure -f noninteractive tzdata 28 | 29 | ENV LANG='en_US.UTF-8' \ 30 | TZ="UTC" \ 31 | API_URL="/api/1" 32 | 33 | RUN pip install -U pip setuptools wheel 34 | RUN pip install spacy 35 | RUN python3 -m spacy download en_core_web_sm 36 | RUN python3 -m spacy download de_core_news_sm 37 | RUN python3 -m spacy download xx_ent_wiki_sm 38 | RUN python3 -m spacy download ru_core_news_sm 39 | 40 | RUN mkdir -p /storyweb 41 | WORKDIR /storyweb 42 | COPY . /storyweb 43 | RUN pip install --no-cache-dir -e /storyweb 44 | COPY --from=frontend /fe/build /storyweb/frontend/build 45 | 46 | CMD ["uvicorn", "--host", "0.0.0.0", "storyweb.server:app"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022-2023, Friedrich Lindenberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | data: data/articles.ijson 3 | 4 | clean: 5 | rm -rf data/articles 6 | 7 | data/articles: 8 | mkdir -p data/articles 9 | 10 | data/articles/%.ijson: data/articles 11 | curl -o data/articles/$*.ijson -s https://data.opensanctions.org/contrib/mediacrawl/$*.ijson 12 | 13 | fetch: data/articles/occrp.ijson \ 14 | data/articles/icij.ijson \ 15 | data/articles/dossier_at.ijson \ 16 | data/articles/daphne_foundation.ijson \ 17 | data/articles/istories_media.ijson \ 18 | data/articles/amabhungane.ijson 19 | 20 | data/articles.ijson: fetch 21 | cat data/articles/* >data/articles.ijson 22 | 23 | load: data/articles.ijson 24 | storyweb import data/articles.ijson 25 | 26 | serve: 27 | uvicorn --reload storyweb.server:app 28 | 29 | reset: 30 | dropdb storyweb 31 | createdb -E utf-8 storyweb 32 | storyweb init -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # StoryWeb 6 | 7 | StoryWeb is a project aimed to extract networks of entities from journalistic reporting. The idea is to reverse engineer stories into structured graphs of the persons and companies involved, and to capture the relationships between them. 8 | 9 | https://storyweb.opensanctions.org 10 | 11 | StoryWeb consumes news articles as input data. Individual articles can be imported via the web interface, but there's also a possibility for bulk import using the [`articledata`](https://github.com/pudo/articledata) micro-format. One producer of `articledata` files is [`mediacrawl`](https://github.com/opensanctions/mediacrawl), which can be used to crawl news websites and harvest all of their articles. 12 | 13 |

14 | 15 |

16 | 17 | ## Installation 18 | 19 | Storyweb can be run as a Python web application from a developer's machine, or via a docker container. We recommend using docker for any production deployment and as a quick means to get the application running if you don't intend to change its code. 20 | 21 | ### Running in Docker mode 22 | 23 | You can start up the a docker instance by running the following commands in an empty directory: 24 | 25 | ```bash 26 | wget https://raw.githubusercontent.com/opensanctions/storyweb/main/docker-compose.yml 27 | docker-compose up 28 | ``` 29 | 30 | This will make the storyweb user interface available on port 8000 of the host machine. 31 | 32 | ### Running in development mode 33 | 34 | Before installing storyweb on the host machine, we recommend setting up a Python virtual environment of some form (venv, virtualenv, etc.). 35 | 36 | As a first step, let's install the `spaCy` models that are used to extract person and company names from the given articles: 37 | 38 | ```bash 39 | pip install spacy 40 | python3 -m spacy download en_core_web_sm 41 | python3 -m spacy download de_core_news_sm 42 | python3 -m spacy download xx_ent_wiki_sm 43 | python3 -m spacy download ru_core_news_sm 44 | ``` 45 | 46 | Next, we'll install the application itself, and its dependencies. Run the following command inside of a git checkout of the storyweb repository: 47 | 48 | ```bash 49 | pip install -e ".[dev]" 50 | ``` 51 | 52 | You also need to have a PostgreSQL server running somewhere (e.g. on the same machine, perhaps installed via homebrew or apt). Create a fresh database on that server and point storyweb to it like this: 53 | 54 | ```bash 55 | export STORYWEB_DB_URL=postgresql://storyweb:storyweb@db/storyweb 56 | # Create the database tables: 57 | storyweb init 58 | ``` 59 | 60 | You now have the application configured and you can explore the commands exposed by the `storyweb` command-line tool: 61 | 62 | ``` 63 | Usage: storyweb [OPTIONS] COMMAND [ARGS]... 64 | 65 | Storyweb CLI 66 | 67 | Options: 68 | --help Show this message and exit. 69 | 70 | Commands: 71 | auto-merge Automatically merge on fingerprints 72 | compute Run backend computations 73 | graph Export an entity graph 74 | import Import articles into the DB 75 | import-url Load a single news story by URL 76 | init Initialize the database 77 | ``` 78 | 79 | The `import` command listed here will accept any data file in the `articledata` format, which is emitted by the `mediacrawl` tool. 80 | 81 | #### Running the backend API 82 | 83 | Finally, you can run the backend API using `uvicorn`: 84 | 85 | ```bash 86 | uvicorn --reload --host 0.0.0.0 storyweb.server:app 87 | ``` 88 | 89 | This will boot up the API server of port 8000 of the local host and enable hot reloads whenever the code changes during development. 90 | 91 | #### Installing and running the frontend 92 | 93 | Once you have the API running, you can install and run the development server for the frontend. Storyweb uses React and ReduxToolkit internally and will use a Webpack dev server to dynamically re-build the frontend during development. 94 | 95 | ```bash 96 | cd frontend/ 97 | npm install 98 | npm run dev 99 | ``` 100 | 101 | Remember that you need to run `npm run dev` whenever you do frontend development. 102 | 103 | ## License and credits 104 | 105 | Thanks to [Heleen Emanuel](https://twitter.com/heleenemanuel) and [Tobias Sterbak](https://tobiassterbak.com/) for their advice on the design and implementation of StoryWeb. 106 | 107 | This project receives financial support from the German Federal Ministry for Education and Research (Bundesministerium für Bildung und Forschung, BMBF) under the grant identifier `01IS22S42`. The full responsibility for the content of this publication remains with its authors. 108 | 109 | The software is licensed under the MIT license, see `LICENSE` in this repository. -------------------------------------------------------------------------------- /RESEARCH.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Bad Will Hunting 4 | 5 | * https://blogs.lse.ac.uk/polis/2022/09/15/bad-will-hunting-the-story-so-far/ 6 | 7 | * SparkNLP: https://nlp.johnsnowlabs.com/ 8 | * Rosette Text Analytics: https://www.rosette.com/capability/entity-extractor/ 9 | 10 | # Selecting articles from a corpus 11 | 12 | * https://github.com/asreview/asreview 13 | 14 | Tips from Tobias: 15 | 16 | * https://maartengr.github.io/BERTopic/index.html#quick-start 17 | * Dimensionality reduction: https://umap-learn.readthedocs.io/en/latest/basic_usage.html 18 | 19 | * One-class classification: https://en.wikipedia.org/wiki/One-class_classification 20 | * Get confidence scores for classification? 21 | * Or: fuzzying out names for other names and see if it can disambiguate 22 | 23 | 24 | 25 | * https://github.com/microsoft/spacy-ann-linker -------------------------------------------------------------------------------- /contrib/occrp-experiment/crawl.py: -------------------------------------------------------------------------------- 1 | import json 2 | import asyncio 3 | import trafilatura 4 | from datetime import datetime 5 | from typing import Optional 6 | import aiohttp 7 | from lxml import html 8 | from urllib.parse import urlparse, urljoin 9 | from sqlmodel import Field, Session, SQLModel, create_engine, select 10 | 11 | 12 | queue = asyncio.Queue() 13 | seen = set() 14 | engine = create_engine("sqlite:///crawl.sqlite3") 15 | 16 | 17 | class Page(SQLModel, table=True): 18 | url: Optional[str] = Field(primary_key=True) 19 | text: Optional[str] 20 | is_article: bool 21 | crawled_at: datetime 22 | 23 | 24 | class Article(SQLModel, table=True): 25 | url: Optional[str] = Field(primary_key=True) 26 | title: Optional[str] 27 | author: Optional[str] 28 | date: Optional[str] 29 | text: Optional[str] 30 | 31 | 32 | async def clean_url(url): 33 | parsed = urlparse(url) 34 | if parsed.scheme != "https": 35 | return 36 | if parsed.hostname not in ["occrp.org", "www.occrp.org"]: 37 | return None 38 | if parsed.path.startswith("/ru/"): 39 | return None 40 | parsed = parsed._replace(query=None) 41 | parsed = parsed._replace(fragment=None) 42 | url = parsed.geturl() 43 | return url 44 | 45 | 46 | async def crawl_url(url): 47 | url = await clean_url(url) 48 | if url is None: 49 | return 50 | if url in seen: 51 | return 52 | seen.add(url) 53 | await queue.put(url) 54 | 55 | 56 | async def get_page(db: Session, session: aiohttp.ClientSession, url: str): 57 | statement = select(Page).where(Page.url == url) 58 | page = db.exec(statement).first() 59 | if page is not None: 60 | return page 61 | 62 | async with session.get(url) as response: 63 | content_type = response.headers.get("Content-Type") 64 | text = None 65 | if response.status == 200: 66 | if content_type is None or "html" in content_type.lower(): 67 | # print("CONTENT_TYPE", content_type) 68 | data = await response.read() 69 | # print("FETCHED", url, response.headers.get("Content-Type")) 70 | try: 71 | text = data.decode("utf-8") 72 | except UnicodeDecodeError as exc: 73 | # text = None 74 | pass 75 | page = Page( 76 | url=url, 77 | text=text, 78 | is_article=False, 79 | crawled_at=datetime.utcnow(), 80 | ) 81 | db.add(page) 82 | db.commit() 83 | return page 84 | 85 | 86 | def is_article(doc): 87 | if doc.find('.//article//li[@class="authors"]') is not None: 88 | return True 89 | if doc.find('.//aside[@class="byline"]') is not None: 90 | return True 91 | if doc.find('.//section[@class="blog"]') is not None: 92 | return True 93 | if doc.find('.//div[@class="occrp-story"]') is not None: 94 | return True 95 | return False 96 | 97 | 98 | async def extract_article(db: Session, page: Page, doc): 99 | extract = trafilatura.bare_extraction(doc) 100 | statement = select(Article).where(Article.url == page.url) 101 | article = db.exec(statement).first() 102 | if article is None: 103 | article = Article(url=page.url) 104 | title = extract.get("title") 105 | if title is not None: 106 | title = title.replace(" - OCCRP", "") 107 | article.title = title.strip() 108 | article.date = extract.get("date") 109 | article.text = extract.get("text") 110 | article.author = extract.get("author") 111 | # print(list(extract.keys())) 112 | print("ARTICLE", page.url, extract.get("title")) 113 | db.add(article) 114 | db.commit() 115 | 116 | 117 | async def worker(session: aiohttp.ClientSession): 118 | while True: 119 | with Session(engine) as db: 120 | url = await queue.get() 121 | try: 122 | page = await get_page(db, session, url) 123 | if page is not None and page.text is not None: 124 | doc = html.fromstring(page.text) 125 | # article = trafilatura.bare_extraction(doc) 126 | # print(article) 127 | for link in doc.findall(".//a"): 128 | next_url = link.get("href") 129 | if next_url is None: 130 | continue 131 | next_url = urljoin(url, next_url) 132 | await crawl_url(next_url) 133 | # print(link) 134 | if is_article(doc): 135 | await extract_article(db, page, doc) 136 | # print("NO ARTICLE", url) 137 | # print(url, doc, queue.qsize()) 138 | 139 | except Exception as exc: 140 | print("EXCEPTION", exc) 141 | queue.task_done() 142 | 143 | 144 | async def crawl(): 145 | SQLModel.metadata.create_all(engine) 146 | headers = {"User-Agent": "pudo from the hood"} 147 | async with aiohttp.ClientSession(headers=headers) as session: 148 | await crawl_url("https://occrp.org") 149 | tasks = [] 150 | for _ in range(10): 151 | task = asyncio.create_task(worker(session)) 152 | tasks.append(task) 153 | 154 | await queue.join() 155 | for task in tasks: 156 | task.cancel() 157 | await asyncio.gather(*tasks, return_exceptions=True) 158 | 159 | 160 | async def export(): 161 | with open("articles.json", "w") as fh: 162 | with Session(engine) as db: 163 | statement = select(Article) 164 | articles = db.exec(statement).all() 165 | data = [a.dict() for a in articles] 166 | json.dump(data, fh) 167 | 168 | 169 | def main(): 170 | asyncio.run(export()) 171 | 172 | 173 | if __name__ == "__main__": 174 | main() 175 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | db: 5 | image: postgres:15 6 | expose: 7 | - "5432" 8 | container_name: db 9 | environment: 10 | POSTGRES_USER: storyweb 11 | POSTGRES_PASSWORD: storyweb 12 | POSTGRES_DB: storyweb 13 | ulimits: 14 | memlock: 15 | soft: -1 16 | hard: -1 17 | volumes: 18 | - db-data:/var/lib/postgresql/data 19 | # - "./schema.sql:/docker-entrypoint-initdb.d/storyweb-schema.sql" 20 | deploy: 21 | restart_policy: 22 | condition: on-failure 23 | 24 | app: 25 | build: . 26 | image: ghcr.io/opensanctions/storyweb:main 27 | command: bash -c 'while !0.2%", 52 | "not dead", 53 | "not op_mini all" 54 | ], 55 | "development": [ 56 | "last 1 chrome version", 57 | "last 1 firefox version", 58 | "last 1 safari version" 59 | ] 60 | }, 61 | "devDependencies": { 62 | "@types/react-helmet": "^6.1.6" 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensanctions/storyweb/39cd544769b1c5b783f0919a03a838332add4d0c/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | StoryWeb 10 | 11 | 12 | 13 |
14 | 15 | 16 | -------------------------------------------------------------------------------- /frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { RouterProvider } from 'react-router-dom'; 3 | import { Provider } from 'react-redux' 4 | import { FocusStyleManager } from "@blueprintjs/core" 5 | 6 | import { store } from './store'; 7 | import { router } from './router'; 8 | 9 | import './styles/App.scss'; 10 | 11 | FocusStyleManager.onlyShowFocusOnTabs(); 12 | 13 | function App() { 14 | return ( 15 | 16 | 17 | 18 | ); 19 | } 20 | 21 | export default App; 22 | -------------------------------------------------------------------------------- /frontend/src/components/ArticleClusters.tsx: -------------------------------------------------------------------------------- 1 | import { HTMLTable } from "@blueprintjs/core"; 2 | import { Link } from "react-router-dom"; 3 | import { useFetchClusterListingQuery } from "../services/clusters"; 4 | import { IArticle } from "../types"; 5 | import { getClusterLink } from "../util"; 6 | import { ClusterLabel, ClusterTypeIcon, ErrorSection, SectionLoading } from "./util"; 7 | 8 | type ArticleClustersProps = { 9 | article: IArticle 10 | } 11 | 12 | export default function ArticleClusters({ article }: ArticleClustersProps) { 13 | const query = { article: article.id, limit: 100 }; 14 | const { data: clusters, isLoading, error: clustersError } = useFetchClusterListingQuery(query); 15 | if (clustersError) { 16 | return 17 | } 18 | if (isLoading || clusters === undefined) { 19 | return ; 20 | } 21 | 22 | return ( 23 | 24 | 25 | 26 | 27 | Name 28 | 29 | 30 | 31 | 32 | {clusters.results.map((cluster) => 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | )} 43 | 44 | 45 | ) 46 | }; -------------------------------------------------------------------------------- /frontend/src/components/ArticleCorefList.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import { Menu, MenuItem } from '@blueprintjs/core'; 3 | 4 | import { useFetchArticleListingQuery } from "../services/articles" 5 | import ArticleDrawer from "./ArticleDrawer"; 6 | import { IArticle } from "../types"; 7 | import { ARTICLE_ICON } from "../constants"; 8 | 9 | type ArticleCorefListProps = { 10 | clusters: string[] 11 | tags: string[][] 12 | } 13 | 14 | export default function ArticleCorefList({ clusters, tags }: ArticleCorefListProps) { 15 | const [articleId, setArticleId] = useState() 16 | const articleParams = { cluster: clusters }; 17 | const { data, isLoading } = useFetchArticleListingQuery(articleParams); 18 | if (isLoading || data === undefined) { 19 | return null; 20 | } 21 | 22 | const onClick = (e: React.MouseEvent, article: IArticle) => { 23 | e.preventDefault(); 24 | setArticleId(article.id); 25 | } 26 | 27 | return ( 28 | <> 29 | 30 | {data.results.map((a) => ( 31 | onClick(e, a)} 35 | text={a.title} active={a.id === articleId} /> 36 | )) 37 | } 38 | 39 | setArticleId(undefined)} 41 | articleId={articleId} 42 | tags={tags} 43 | /> 44 | 45 | ) 46 | }; -------------------------------------------------------------------------------- /frontend/src/components/ArticleDrawer.tsx: -------------------------------------------------------------------------------- 1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core" 2 | import { SyntheticEvent, useEffect, useState } from "react" 3 | import { ARTICLE_ICON } from "../constants" 4 | import { useFetchArticleQuery } from "../services/articles" 5 | import { useFetchClusterListingQuery } from "../services/clusters" 6 | import ArticleText from "./ArticleText" 7 | import { ErrorSection, NumericTag, SectionLoading } from "./util" 8 | 9 | import styles from '../styles/Article.module.scss' 10 | import ArticleClusters from "./ArticleClusters" 11 | 12 | type ArticleDrawerInnerProps = { 13 | articleId: string, 14 | tags?: string[][] 15 | isOpen: boolean, 16 | onClose: (event: SyntheticEvent) => void 17 | onClosed: (node: HTMLElement) => void 18 | } 19 | 20 | function ArticleDrawerInner({ articleId, tags, isOpen, onClose, onClosed }: ArticleDrawerInnerProps) { 21 | const { data: article, error: articleError } = useFetchArticleQuery(articleId); 22 | const clustersQuery = { article: articleId, limit: 0 }; 23 | const { data: clusters } = useFetchClusterListingQuery(clustersQuery); 24 | const realTags = tags ? tags : [] 25 | const realIsOpen = isOpen && articleId.trim().length > 1; 26 | 27 | return ( 28 | 39 |
40 | {(article === undefined) && ( 41 | 42 | )} 43 | {(articleError !== undefined) && ( 44 | 45 | )} 46 | {article && ( 47 | 48 | 54 | } 55 | /> 56 | 61 | Extracted entities 62 | 63 | 64 | } 65 | panel={ 66 | 67 | } 68 | /> 69 | 70 | )} 71 | 72 |
73 |
74 | ) 75 | } 76 | 77 | type ArticleDrawerProps = { 78 | articleId?: string, 79 | tags?: string[][] 80 | onClose: (event: SyntheticEvent) => void 81 | } 82 | 83 | export default function ArticleDrawer({ articleId, tags, onClose }: ArticleDrawerProps) { 84 | const isOpen = !!articleId; 85 | const [activeArticleId, setActiveArticleId] = useState(articleId); 86 | 87 | useEffect(() => { 88 | if (!!articleId && articleId !== activeArticleId) { 89 | setActiveArticleId(articleId); 90 | } 91 | }, [articleId, activeArticleId]) 92 | 93 | const onClosed = () => { 94 | setActiveArticleId(undefined); 95 | } 96 | 97 | if (activeArticleId === undefined) { 98 | return null; 99 | } 100 | 101 | return ( 102 | 109 | ); 110 | } -------------------------------------------------------------------------------- /frontend/src/components/ArticlePreview.tsx: -------------------------------------------------------------------------------- 1 | import { AnchorButton } from "@blueprintjs/core"; 2 | import { useFetchArticleQuery } from "../services/articles" 3 | import ArticleText from "./ArticleText"; 4 | 5 | type ArticlePreviewProps = { 6 | articleId: string, 7 | tags: string[][] 8 | } 9 | 10 | export default function ArticlePreview({ articleId, tags }: ArticlePreviewProps) { 11 | const { data, isLoading } = useFetchArticleQuery(articleId); 12 | if (isLoading || data === undefined) { 13 | return null; 14 | } 15 | 16 | return ( 17 |
18 |

19 | 20 | {data.title} 21 |

22 | 23 |
24 | ) 25 | }; -------------------------------------------------------------------------------- /frontend/src/components/ArticleStoryEditor.tsx: -------------------------------------------------------------------------------- 1 | import { Button, Intent, Menu, MenuItem } from "@blueprintjs/core"; 2 | import { Popover2, Classes as Popover2Classes } from "@blueprintjs/popover2"; 3 | import { MouseEvent } from "react"; 4 | import { useFetchStoryListingQuery, useToggleStoryArticleMutation } from "../services/stories"; 5 | import { IArticle, IStory } from "../types"; 6 | 7 | type ArticleStoryEditorContentProps = { 8 | article: IArticle 9 | } 10 | 11 | function ArticleStoryEditorContent({ article }: ArticleStoryEditorContentProps) { 12 | const { data: allListing } = useFetchStoryListingQuery({ limit: 100 }); 13 | const { data: linkedListing } = useFetchStoryListingQuery({ limit: 100, article: article.id }); 14 | const [toggleStoryArticle] = useToggleStoryArticleMutation(); 15 | 16 | 17 | const linkedIds = linkedListing?.results.map((s) => s.id) || []; 18 | const onToggleAssign = async (e: MouseEvent, story: IStory) => { 19 | await toggleStoryArticle({ story: story.id, article: article.id }).unwrap(); 20 | } 21 | if (allListing === undefined) { 22 | return null; 23 | } 24 | 25 | return ( 26 | 27 | {allListing.results.map((story) => 28 | onToggleAssign(e, story)} 32 | intent={linkedIds.indexOf(story.id) === -1 ? Intent.NONE : Intent.SUCCESS} 33 | icon={linkedIds.indexOf(story.id) === -1 ? "small-minus" : "small-tick"} 34 | /> 35 | )} 36 | 37 | ); 38 | } 39 | 40 | 41 | type ArticleStoryManagerProps = { 42 | article: IArticle 43 | inList: boolean 44 | } 45 | 46 | export default function ArticleStoryEditor({ article, inList }: ArticleStoryManagerProps) { 47 | return ( 48 | } 50 | interactionKind="click" 51 | popoverClassName={Popover2Classes.POPOVER2_CONTENT_SIZING} 52 | placement="auto" 53 | > 54 | 30 | 31 | ) 32 | } -------------------------------------------------------------------------------- /frontend/src/components/ClusterDrawer.tsx: -------------------------------------------------------------------------------- 1 | import { Drawer, Tab, Tabs } from "@blueprintjs/core" 2 | import { SyntheticEvent, useEffect, useState } from "react" 3 | import { useFetchArticleListingQuery } from "../services/articles" 4 | import { useFetchClusterQuery, useFetchRelatedClusterListingQuery, useFetchSimilarClusterListingQuery } from "../services/clusters" 5 | import { ErrorSection, NumericTag, SectionLoading } from "./util" 6 | 7 | import styles from '../styles/Cluster.module.scss' 8 | import { useFetchOntologyQuery } from "../services/ontology" 9 | import { useNodeTypes } from "../selectors" 10 | import RelatedListing from "./RelatedListing" 11 | import SimilarListing from "./SimilarListing" 12 | import ClusterArticles from "./ClusterArticles" 13 | 14 | type ClusterDrawerInnerProps = { 15 | clusterId: string, 16 | isOpen: boolean, 17 | onClose: (event: SyntheticEvent) => void 18 | onClosed: (node: HTMLElement) => void 19 | } 20 | 21 | function ClusterDrawerInner({ clusterId, isOpen, onClose }: ClusterDrawerInnerProps) { 22 | const nodeTypes = useNodeTypes(); 23 | const { data: cluster, error: clusterError } = useFetchClusterQuery(clusterId); 24 | const relatedQuery = { clusterId: clusterId || '', params: { types: nodeTypes } }; 25 | const { data: related } = useFetchRelatedClusterListingQuery(relatedQuery) 26 | const similarQuery = { clusterId: clusterId || '', params: {} }; 27 | const { data: similar } = useFetchSimilarClusterListingQuery(similarQuery); 28 | const articleQuery = { cluster: clusterId }; 29 | const { data: articles } = useFetchArticleListingQuery(articleQuery); 30 | const { data: ontology } = useFetchOntologyQuery(); 31 | const meta = ontology?.cluster_types.find((t) => t.name === cluster?.type); 32 | const icon = meta?.icon || 'hat'; 33 | const realIsOpen = isOpen && clusterId.trim().length > 1; 34 | 35 | return ( 36 | 47 |
48 | {(cluster === undefined) && ( 49 | 50 | )} 51 | {(clusterError !== undefined) && ( 52 | 53 | )} 54 | {cluster && ( 55 | 56 | 59 | Co-occurring 60 | 61 | 62 | } 63 | panel={ 64 | 65 | } 66 | /> 67 | 70 | Similar 71 | 72 | 73 | } 74 | disabled={similar?.total === 0} 75 | panel={ 76 | 77 | } 78 | /> 79 | 82 | Articles 83 | 84 | 85 | } 86 | panel={ 87 | 88 | } 89 | /> 90 | 91 | )} 92 |
93 |
94 | ) 95 | } 96 | 97 | 98 | 99 | type ClusterDrawerProps = { 100 | clusterId?: string, 101 | onClose: (event: SyntheticEvent) => void 102 | } 103 | 104 | export default function ClusterDrawer({ clusterId, onClose }: ClusterDrawerProps) { 105 | const isOpen = !!clusterId; 106 | const [activeClusterId, setActiveClusterId] = useState(clusterId); 107 | 108 | useEffect(() => { 109 | if (!!clusterId && clusterId !== activeClusterId) { 110 | setActiveClusterId(clusterId); 111 | } 112 | }, [clusterId, activeClusterId]) 113 | 114 | const onClosed = () => { 115 | setActiveClusterId(undefined); 116 | } 117 | 118 | if (activeClusterId === undefined) { 119 | return null; 120 | } 121 | 122 | return ( 123 | 129 | ); 130 | } -------------------------------------------------------------------------------- /frontend/src/components/Footer.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | import styles from '../styles/Footer.module.scss'; 4 | 5 | export default class Footer extends React.Component { 6 | render() { 7 | return ( 8 |
9 |
10 | StoryWeb prototype 11 |
12 |
13 | ) 14 | } 15 | } -------------------------------------------------------------------------------- /frontend/src/components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import classnames from 'classnames'; 3 | import { Link } from 'react-router-dom'; 4 | import { Navbar, Alignment, Icon, Button } from '@blueprintjs/core'; 5 | import SettingsDialog from './SettingsDialog'; 6 | import { ARTICLE_ICON, CLUSTER_ICON, SITE, STORY_ICON } from '../constants'; 7 | 8 | import styles from "../styles/Navbar.module.scss"; 9 | 10 | export default function NavbarSection() { 11 | const [showSettings, setShowSettings] = useState(false); 12 | return ( 13 | // 'bp4-dark', 14 | 15 |
16 | 17 | 18 | {SITE} 19 | 20 | 21 | 22 | 23 | Stories 24 | 25 | 26 | 27 | Entities 28 | 29 | 30 | 31 | Articles 32 | 33 | 34 | 35 | 36 | setShowSettings(false)} /> 37 | 38 |
39 |
40 | ) 41 | } -------------------------------------------------------------------------------- /frontend/src/components/Pagination.tsx: -------------------------------------------------------------------------------- 1 | import { Button, ButtonGroup } from "@blueprintjs/core"; 2 | import { useSearchParams } from "react-router-dom"; 3 | 4 | import { IListingResponse } from "../types"; 5 | import styles from '../styles/util.module.scss'; 6 | import { Numeric } from "./util"; 7 | 8 | 9 | type PaginationProps = { 10 | prefix: string 11 | response: IListingResponse 12 | } 13 | 14 | export default function Pagination({ prefix, response }: PaginationProps) { 15 | const [params, setParams] = useSearchParams(); 16 | 17 | const setOffset = (e: React.MouseEvent, newOffset: number) => { 18 | e.preventDefault(); 19 | const oldParams = Object.fromEntries(params.entries()); 20 | const key = `${prefix}.offset`; 21 | setParams({ ...oldParams, [key]: newOffset + '' }); 22 | } 23 | 24 | const nextOffset = response.offset + response.limit; 25 | const upper = Math.min(response.total, nextOffset); 26 | const hasPrev = response.offset > 0; 27 | const hasNext = response.total > nextOffset; 28 | const prevOffset = Math.max(0, response.offset - response.limit) 29 | return ( 30 | 31 | 35 | 57 | 64 | 65 | {listing.total < 1 && ( 66 | 71 | )} 72 | {listing.total > 0 && ( 73 | <> 74 | 75 | 76 | 77 | Name 78 | Common tags 79 | Count 80 | Same 81 | 82 | 83 | 84 | {listing.results.map((similar) => ( 85 | 86 | 87 | 88 | {similar.label} 89 | 90 | 91 | )} /> 92 | 93 | 94 | {similar.common_count} 95 | 96 | 97 | toggleOne(similar.id)} 100 | disabled={isUpdating} 101 | /> 102 | 103 | 104 | ))} 105 | 106 | 107 | 108 | )} 109 | 110 | 111 | ) 112 | } -------------------------------------------------------------------------------- /frontend/src/components/StoryArticleImportDialog.tsx: -------------------------------------------------------------------------------- 1 | import { Button, Classes, Dialog, FormGroup, InputGroup } from "@blueprintjs/core"; 2 | import { FormEvent, MouseEvent, useState } from "react"; 3 | import { ARTICLE_ICON } from "../constants"; 4 | import { useImportStoryArticleMutation } from "../services/stories"; 5 | import { SectionLoading } from "./util"; 6 | 7 | type StoryArticleImportProps = { 8 | storyId: number 9 | isOpen: boolean 10 | onClose: () => void 11 | } 12 | 13 | export default function StoryArticleImportDialog({ storyId, isOpen, onClose }: StoryArticleImportProps) { 14 | const [url, setUrl] = useState(''); 15 | const [importArticle, { isLoading: isCreating }] = useImportStoryArticleMutation(); 16 | const hasUrl = url.trim().length > 10; 17 | 18 | const onImport = async (e: MouseEvent | FormEvent) => { 19 | e.preventDefault(); 20 | if (hasUrl && !isCreating) { 21 | await importArticle({ story: storyId, url }).unwrap(); 22 | setUrl('') 23 | onClose() 24 | } 25 | } 26 | 27 | return ( 28 | 29 | {isCreating && ( 30 | 31 | )} 32 | {!isCreating && ( 33 |
34 |
35 | 40 | setUrl(e.target.value)} /> 41 | 42 |
43 |
44 |
45 | 46 |
47 |
48 |
49 | )} 50 |
51 | ) 52 | } -------------------------------------------------------------------------------- /frontend/src/components/StoryArticles.tsx: -------------------------------------------------------------------------------- 1 | import { Button, HTMLTable } from "@blueprintjs/core"; 2 | import { MouseEvent } from "react"; 3 | import { Link, useSearchParams } from "react-router-dom"; 4 | import { ARTICLE_THRESHOLD } from "../constants"; 5 | import { useFetchArticleListingQuery } from "../services/articles"; 6 | import { useToggleStoryArticleMutation } from "../services/stories"; 7 | import { IArticle, IStory } from "../types"; 8 | import { useListingPagination } from "../util"; 9 | import ArticleDrawer from "./ArticleDrawer"; 10 | import Pagination from "./Pagination"; 11 | import StoryNomNom from "./StoryNomNom"; 12 | import { ErrorSection, SectionLoading } from "./util"; 13 | 14 | type StoryArticlesProps = { 15 | story: IStory, 16 | } 17 | 18 | export default function StoryArticles({ story }: StoryArticlesProps) { 19 | const [params, setParams] = useSearchParams(); 20 | const articleId = params.get('article') || undefined; 21 | const page = useListingPagination('pairs'); 22 | const { data: articles, error, isLoading } = useFetchArticleListingQuery({ ...page, story: story.id }); 23 | const [toggleStoryArticle] = useToggleStoryArticleMutation(); 24 | 25 | if (error !== undefined) { 26 | return 27 | } 28 | if (articles === undefined || isLoading) { 29 | return 30 | } 31 | 32 | const onRemoveArticle = async (article: IArticle) => { 33 | if (story !== undefined) { 34 | await toggleStoryArticle({ story: story.id, article: article.id }).unwrap() 35 | } 36 | } 37 | 38 | const setPreviewArticle = (articleId?: string) => { 39 | const paramsObj = Object.fromEntries(params.entries()); 40 | setParams({ ...paramsObj, article: articleId || '' }); 41 | } 42 | 43 | const onPreviewArticle = (event: MouseEvent, article: IArticle) => { 44 | event.preventDefault(); 45 | setPreviewArticle(article.id) 46 | } 47 | 48 | return ( 49 | <> 50 | {(articles.total < ARTICLE_THRESHOLD) && ( 51 | 52 | )} 53 | {articles.results.length > 0 && ( 54 | <> 55 | 56 | 57 | 58 | Title 59 | Site 60 | Remove 61 | 62 | 63 | 64 | {articles.results.map((article) => ( 65 | 66 | 67 | onPreviewArticle(e, article)} 70 | > 71 | {article.title} 72 | 73 | 74 | {article.site} 75 | 76 |