├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── enhancement_proposal.md
    │   └── question.md
    └── pull_request_template.md
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── __attic__
    ├── docs
    │   ├── README.md
    │   ├── design
    │   │   ├── 00_Header.jpg
    │   │   ├── 1.1_Colour_.jpg
    │   │   ├── 1.2_Typography.jpg
    │   │   ├── 1.3_Header.jpg
    │   │   ├── 1.4_Footer.jpg
    │   │   ├── 1.5_Breadcrumbs_2_levels_.jpg
    │   │   ├── 1.5_Breadcrumbs_3_levels_.jpg
    │   │   ├── 1.6_CTA_Links_.jpg
    │   │   ├── 1.7_Search.jpg
    │   │   ├── 1.8_Tables.jpg
    │   │   ├── design-system.md
    │   │   ├── mockups.md
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556632095254_00_Header2x.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556633077597_1.2_Typography.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556639329062_1.1_Colour+palletex2.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641211609_1.7_Search.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641251273_1.5_Breadcrumbs_3+levels.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641260288_1.5_Breadcrumbs_2+levels.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641267412_1.4_Footer.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641277300_1.3_Header.jpg
    │   │   ├── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556642898755_1.6_Buttons.jpg
    │   │   └── s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556805892800_1.8_Tables.jpg
    │   ├── problems.md
    │   └── s3-layout.md
    └── pipeline
    │   └── reach-evaluator
    │       ├── Dockerfile
    │       ├── evaluator_task.py
    │       └── requirements.txt
├── argo
    ├── 00-namespace.yaml
    ├── README.md
    ├── argo.yaml
    ├── elasticsearch.yaml
    ├── postgres.yaml
    ├── psqlinit.yaml
    ├── reach-msf.yaml
    ├── reach-populate-pg.yaml
    └── secrets
    │   ├── minikube
    │       └── argo
    │       │   └── aws
    │       │       └── .gitkeep
    │   └── sync_secrets.py
├── base
    ├── Dockerfile
    ├── elastic
    │   ├── __init__.py
    │   ├── common.py
    │   ├── count.py
    │   ├── epmc_metadata.py
    │   ├── fulltext_docs.py
    │   ├── fuzzy_matched_citations.py
    │   ├── import_refs_from_s3.py
    │   └── import_sections_from_s3.py
    ├── hooks
    │   ├── s3hook.py
    │   └── sentry.py
    ├── requirements.txt
    ├── safe_import.py
    └── tests
    │   ├── common.py
    │   ├── mock_sites
    │       ├── gov
    │       │   ├── 1.html
    │       │   ├── 2.html
    │       │   └── 3.html
    │       ├── msf
    │       │   └── 1.html
    │       ├── nice
    │       │   ├── 1.html
    │       │   └── 2.html
    │       ├── parliament
    │       │   ├── 1.html
    │       │   └── 2.html
    │       ├── unicef
    │       │   ├── 1.html
    │       │   └── 2.html
    │       └── who
    │       │   ├── 1.html
    │       │   └── 2.html
    │   ├── pdfs
    │       ├── test_pdf.pdf
    │       ├── test_pdf_multipage.pdf
    │       └── test_pdf_page_number.pdf
    │   └── xml
    │       └── test_xml.xml
├── buildspec.yml
├── docker-compose.yaml
├── docs
    ├── antora.yml
    └── modules
    │   └── ROOT
    │       ├── nav.adoc
    │       └── pages
    │           └── index.adoc
├── export_wellcome_env.py
├── pipeline
    ├── reach-es-extractor
    │   ├── Dockerfile
    │   ├── Dockerfile.test
    │   ├── extract_refs_task.py
    │   ├── refparse
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── algo_evaluation
    │   │   │   ├── compare_found_sections.py
    │   │   │   ├── data_evaluate
    │   │   │   │   └── .gitkeep
    │   │   │   ├── evaluate_find_section.py
    │   │   │   ├── evaluate_match_references.py
    │   │   │   ├── evaluate_parse.py
    │   │   │   ├── evaluate_settings.py
    │   │   │   ├── evaluate_split_section.py
    │   │   │   ├── evaluation.md
    │   │   │   ├── exploratory
    │   │   │   │   ├── investigate_match_thresholds.py
    │   │   │   │   ├── negative_cosines_hist_2019-07-01-1211.png
    │   │   │   │   ├── negative_cosines_len_scatter_2019-07-01-1211.png
    │   │   │   │   ├── thresholds_F1Score_negative_heatmap_2019-07-01-1211.png
    │   │   │   │   ├── thresholds_Precision_negative_heatmap_2019-07-01-1211.png
    │   │   │   │   ├── thresholds_Recall_negative_heatmap_2019-07-01-1211.png
    │   │   │   │   └── title_lengths_2019-07-01-1211.png
    │   │   │   └── results
    │   │   │   │   └── .gitkeep
    │   │   ├── evaluate_algo.py
    │   │   ├── merge_results.py
    │   │   ├── parse_latest.py
    │   │   ├── reference_parser_models
    │   │   │   └── reference_parser_pipeline.pkl
    │   │   ├── refparse.py
    │   │   ├── settings.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_config_multitask.ini
    │   │   │   ├── test_exact_match.py
    │   │   │   ├── test_fuzzy_match.py
    │   │   │   └── test_split_parse.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── exact_match.py
    │   │   │   ├── file_manager.py
    │   │   │   ├── fuzzy_match.py
    │   │   │   ├── parse.py
    │   │   │   ├── s3.py
    │   │   │   └── serialiser.py
    │   └── requirements.txt
    ├── reach-es-indexer
    │   ├── Dockerfile
    │   ├── index_task.py
    │   └── requirements.txt
    ├── reach-fuzzy-matcher
    │   ├── Dockerfile
    │   ├── fuzzymatcher_task.py
    │   └── requirements.txt
    ├── reach-parser
    │   ├── Dockerfile
    │   ├── Dockerfile.test
    │   ├── __init__.py
    │   ├── normalizer
    │   │   ├── __init__.py
    │   │   └── title_normalizer.py
    │   ├── parser_task.py
    │   ├── pdf_parser
    │   │   ├── __init__.py
    │   │   ├── main.py
    │   │   ├── objects
    │   │   │   ├── PdfObjects.py
    │   │   │   └── __init__.py
    │   │   ├── pdf_parse.py
    │   │   ├── resources
    │   │   │   ├── keywords.txt
    │   │   │   └── section_keywords.txt
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_pdf_objects.py
    │   │   │   └── test_pdf_parser_tools.py
    │   │   └── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── dbTools.py
    │   │   │   └── extraction.py
    │   └── requirements.txt
    └── reach-scraper
    │   ├── Dockerfile
    │   ├── Dockerfile.test
    │   ├── README.md
    │   ├── __init__.py
    │   ├── docker-compose.yaml
    │   ├── pg_exists.py
    │   ├── pg_isready.py
    │   ├── requirements.txt
    │   ├── scrapy.cfg
    │   ├── spider_task.py
    │   └── wsf_scraping
    │       ├── __init__.py
    │       ├── contracts.py
    │       ├── feed_storage.py
    │       ├── filter.py
    │       ├── items.py
    │       ├── middlewares.py
    │       ├── pipelines.py
    │       ├── settings.py
    │       ├── spiders
    │           ├── __init__.py
    │           ├── acme_spider.py
    │           ├── base_spider.py
    │           ├── gov_spider.py
    │           ├── msf_spider.py
    │           ├── nice_spider.py
    │           ├── parliament_spider.py
    │           ├── unicef_spider.py
    │           └── who_iris_spider.py
    │       └── tests
    │           ├── __init__.py
    │           ├── test_gov_spider.py
    │           ├── test_msf_spider.py
    │           ├── test_nice_spider.py
    │           ├── test_parliament_spider.py
    │           ├── test_scraper_spiders.py
    │           ├── test_unicef_spider.py
    │           └── test_who_spider.py
├── requirements.txt
├── test_target
    ├── README.md
    ├── inner_page.html
    ├── page.html
    └── target_server.py
└── web
    ├── .babelrc
    ├── .dockerignore
    ├── .eslintrc.json
    ├── Dockerfile
    ├── Makefile
    ├── bin
        └── update_vendor.sh
    ├── config
        ├── docker.config.toml
        └── local.config.toml
    ├── package-lock.json
    ├── package.json
    ├── requirements.txt
    ├── setup.py
    └── web
        ├── __init__.py
        ├── api.py
        ├── config.py
        ├── db.py
        ├── docs
            ├── .gitignore
            ├── Makefile
            ├── README.md
            ├── build
            │   ├── doctrees
            │   │   ├── api.doctree
            │   │   ├── environment.pickle
            │   │   ├── index.doctree
            │   │   └── intro.doctree
            │   └── html
            │   │   ├── .buildinfo
            │   │   ├── _static
            │   │       ├── basic.css
            │   │       ├── css
            │   │       │   ├── badge_only.css
            │   │       │   └── theme.css
            │   │       ├── doctools.js
            │   │       ├── documentation_options.js
            │   │       ├── file.png
            │   │       ├── fonts
            │   │       │   ├── Inconsolata-Bold.ttf
            │   │       │   ├── Inconsolata-Regular.ttf
            │   │       │   ├── Inconsolata.ttf
            │   │       │   ├── Lato-Bold.ttf
            │   │       │   ├── Lato-Regular.ttf
            │   │       │   ├── Lato
            │   │       │   │   ├── lato-bold.eot
            │   │       │   │   ├── lato-bold.ttf
            │   │       │   │   ├── lato-bold.woff
            │   │       │   │   ├── lato-bold.woff2
            │   │       │   │   ├── lato-bolditalic.eot
            │   │       │   │   ├── lato-bolditalic.ttf
            │   │       │   │   ├── lato-bolditalic.woff
            │   │       │   │   ├── lato-bolditalic.woff2
            │   │       │   │   ├── lato-italic.eot
            │   │       │   │   ├── lato-italic.ttf
            │   │       │   │   ├── lato-italic.woff
            │   │       │   │   ├── lato-italic.woff2
            │   │       │   │   ├── lato-regular.eot
            │   │       │   │   ├── lato-regular.ttf
            │   │       │   │   ├── lato-regular.woff
            │   │       │   │   └── lato-regular.woff2
            │   │       │   ├── RobotoSlab-Bold.ttf
            │   │       │   ├── RobotoSlab-Regular.ttf
            │   │       │   ├── RobotoSlab
            │   │       │   │   ├── roboto-slab-v7-bold.eot
            │   │       │   │   ├── roboto-slab-v7-bold.ttf
            │   │       │   │   ├── roboto-slab-v7-bold.woff
            │   │       │   │   ├── roboto-slab-v7-bold.woff2
            │   │       │   │   ├── roboto-slab-v7-regular.eot
            │   │       │   │   ├── roboto-slab-v7-regular.ttf
            │   │       │   │   ├── roboto-slab-v7-regular.woff
            │   │       │   │   └── roboto-slab-v7-regular.woff2
            │   │       │   ├── fontawesome-webfont.eot
            │   │       │   ├── fontawesome-webfont.svg
            │   │       │   ├── fontawesome-webfont.ttf
            │   │       │   ├── fontawesome-webfont.woff
            │   │       │   └── fontawesome-webfont.woff2
            │   │       ├── jquery-3.5.1.js
            │   │       ├── jquery.js
            │   │       ├── js
            │   │       │   ├── modernizr.min.js
            │   │       │   └── theme.js
            │   │       ├── language_data.js
            │   │       ├── minus.png
            │   │       ├── plus.png
            │   │       ├── pygments.css
            │   │       ├── searchtools.js
            │   │       ├── underscore-1.3.1.js
            │   │       └── underscore.js
            │   │   ├── api.html
            │   │   ├── genindex.html
            │   │   ├── index.html
            │   │   ├── intro.html
            │   │   ├── objects.inv
            │   │   ├── search.html
            │   │   └── searchindex.js
            ├── make.bat
            ├── requirements.txt
            └── source
            │   ├── api.md
            │   ├── conf.py
            │   ├── index.rst
            │   └── intro.md
        ├── src
            ├── css
            │   ├── about.less
            │   ├── contact.less
            │   ├── footer.less
            │   ├── header.less
            │   ├── home.less
            │   ├── icons.less
            │   ├── results.less
            │   ├── search.less
            │   ├── style.less
            │   ├── variables.less
            │   └── wellcome-bold-webfont.woff2
            ├── favicon
            │   ├── android-icon-144x144.png
            │   ├── android-icon-192x192.png
            │   ├── android-icon-36x36.png
            │   ├── android-icon-48x48.png
            │   ├── android-icon-72x72.png
            │   ├── android-icon-96x96.png
            │   ├── apple-icon-114x114.png
            │   ├── apple-icon-120x120.png
            │   ├── apple-icon-144x144.png
            │   ├── apple-icon-152x152.png
            │   ├── apple-icon-180x180.png
            │   ├── apple-icon-57x57.png
            │   ├── apple-icon-60x60.png
            │   ├── apple-icon-72x72.png
            │   ├── apple-icon-76x76.png
            │   ├── apple-icon-precomposed.png
            │   ├── apple-icon.png
            │   ├── browserconfig.xml
            │   ├── favicon-16x16.png
            │   ├── favicon-32x32.png
            │   ├── favicon-96x96.png
            │   ├── favicon.ico
            │   ├── manifest.json
            │   ├── ms-icon-144x144.png
            │   ├── ms-icon-150x150.png
            │   ├── ms-icon-310x310.png
            │   └── ms-icon-70x70.png
            ├── images
            │   ├── Icon_ New-window.svg
            │   ├── Icon_About_Accuracy_100px.svg
            │   ├── Icon_About_Open-source_100px.svg
            │   ├── Icon_About_Transparent_100px.svg
            │   ├── Icon_Arrow_down.svg
            │   ├── Icon_Chevron_Double.svg
            │   ├── Icon_Chevron_Down.svg
            │   ├── Icon_Download_16px.svg
            │   ├── Icon_How_Download_160px.svg
            │   ├── Icon_How_Extract_160px.svg
            │   ├── Icon_How_Match_160px.svg
            │   ├── Icon_Info.svg
            │   ├── Icon_Menu_16px.svg
            │   ├── Icon_Policy_24px.svg
            │   ├── Icon_Research_24px.svg
            │   ├── Icon_Scroll-arow.svg
            │   ├── Icon_Search_16px.svg
            │   ├── Icon_Sort-by_16px.svg
            │   ├── Icon_new_window.svg
            │   ├── Illustration_Glass.svg
            │   ├── Illustration_Papers.svg
            │   ├── Image_Product-shot.png
            │   ├── Shape_01.svg
            │   ├── Shape_02.svg
            │   ├── Wellcome_logo.svg
            │   ├── reach_alpha_branding.svg
            │   ├── reach_site_view.png
            │   ├── wave.svg
            │   ├── wellcome-logo.svg
            │   └── white-wave.svg
            ├── js
            │   ├── app.js
            │   ├── citationsTable.js
            │   ├── clearSearch.js
            │   ├── home.js
            │   ├── policyTable.js
            │   ├── resultsCommon.js
            │   ├── templates
            │   │   └── no_results.js
            │   └── v.contact.js
            ├── vendor
            │   └── spectre-0.5.8
            │   │   ├── spectre-exp.css
            │   │   ├── spectre-exp.min.css
            │   │   ├── spectre-icons.css
            │   │   ├── spectre-icons.min.css
            │   │   ├── spectre.css
            │   │   └── spectre.min.css
            └── w-avatar-pitch-1.svg
        ├── templates
            ├── about.html
            ├── base.html
            ├── contact.html
            ├── how-it-works.html
            ├── index.html
            ├── privacy.html
            ├── results
            │   ├── citations.html
            │   └── policy-docs.html
            └── search
            │   ├── citations.html
            │   └── policy-docs.html
        ├── tests
            ├── test_search_api.py
            └── test_template.py
        ├── utils.py
        ├── views
            ├── __init__.py
            ├── api
            │   ├── __init__.py
            │   ├── api_search_citations.py
            │   ├── api_search_policies.py
            │   └── utils.py
            ├── apidocs.py
            ├── contact.py
            ├── opt_search.py
            ├── robotstxt.py
            ├── search
            │   ├── __init__.py
            │   ├── citations.py
            │   ├── export_citations.py
            │   ├── export_policies.py
            │   └── policies.py
            ├── search_exports.py
            └── template.py
        └── wsgi.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | **/*.pyc
 2 | **/__pycache__/*
 3 | .git
 4 | .gitignore
 5 | .idea
 6 | .pytest_cache
 7 | 
 8 | Dockerfile
 9 | 
10 | __attic__
11 | 
12 | argo
13 | 
14 | web/build/web/static/*
15 | 
16 | reach/refparse/algo_evaluation/data_evaluate/*
17 | 
18 | **/env
19 | **/venv
20 | **/docs
21 | pull_request_template.md
22 | CONTRIBUTING.md
23 | 
24 | web/node_modules/*
25 | web/package.json
26 | web/package-lock.json
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: 'bug'
 6 | assignees: ''
 7 | ---
 8 | Checklist:
 9 | 
10 | * [ ] I've included the version
11 | * [ ] I've included reproduction steps
12 | * [ ] I've included any config
13 | * [ ] I've included the logs
14 | 
15 | 
16 | ## What Happened
17 | 
18 | ## What you expected to happen
19 | 
20 | ## How to reproduce it (as minimally and preciselt as possible)
21 | 
22 | ## Anything else we should know
23 | 
24 | ## Environment
25 | 
26 | * [ ] Production
27 | * [ ] Staging
28 | * [ ] Local
29 | 
30 | ## Error Message / Logs
31 | 
32 | ---
33 | <!-- Issue Author: Don't delete this message to encourage other users to support your issue! -->
34 | ## Message from Maintainers
35 | 
36 | If you are impacted by this bug please add a :thumbsup: reaction this is issue!
37 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement_proposal.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement proposal
 3 | about: Propose an enhancement for this project
 4 | title: ''
 5 | labels: 'enhancement'
 6 | assignees: ''
 7 | ---
 8 | # Description of feature/ functionality
 9 | 
10 | (Be sure to include the reasoning if this is not part of a bigger project. Link to spec, notion page, Zeplin etc)
11 | 
12 | ## Risks & dependencies
13 | 
14 | (Consider customer facing, internal and deployment)
15 | 
16 | ## Acceptance Criteria
17 | (What needs to happen for this ticket to be closed?)
18 | 
19 | ## Estimation of dev task size
20 | 
21 | - [ ] Small
22 | - [ ] Medium
23 | - [ ] Large
24 | 
25 | ## Who needs to test this?
26 | 
27 | - [ ] Dev
28 | - [ ] UI
29 | - [ ] UX
30 | - [ ] Data science
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question
 3 | about: Ask a question regarding this project
 4 | title: ''
 5 | labels: 'question'
 6 | assignees: ''
 7 | ---
 8 | <!-- Also consider asking your question on our Slack channel. See the README for more info! -->
 9 | 
10 | # Summary
11 | 
12 | What do you want to know about this project?
13 | 
14 | # Motivation
15 | 
16 | Why do you need to know this, any examples or use cases you could include?
17 | 
18 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | 
 3 | Please include a summary of the changes this PR introduces for the codebase.
 4 | Also specify if some sections need special attention, and why you want to introduce this change.
 5 | 
 6 | Make sure to split changes across multiple pull requests, as we won't review bundled pull requests.
 7 | 
 8 | Finally, make sure your PR follows our code of conduct before posting (Check our [contributing guidelines](CONTRIBUTING.md) if you're not sure).
 9 | 
10 | ## Type of change
11 | 
12 | Please delete options that are not relevant.
13 | 
14 | - [ ] :bug: Bug fix (Add `Fix #(issue)` to your PR)
15 | - [ ] :sparkles: New feature
16 | - [ ] :fire: Breaking change
17 | - [ ] :memo: Documentation update
18 | 
19 | # How Has This Been Tested?
20 | 
21 | Please describe the tests that you ran to verify your changes. Provide instructions so we can run the tests. Please also list any relevant details for your test configuration:
22 | 
23 | # Checklist:
24 | 
25 | - [ ] My code follows the style guidelines of this project (pep8 AND pyflakes)
26 | - [ ] I have commented my code, particularly in hard-to-understand areas
27 | - [ ] If needed, I changed related parts of the documentation
28 | - [ ] I included tests in my PR
29 | - [ ] New and existing unit tests pass locally with my changes
30 | - [ ] Any dependent changes have been merged and published in downstream modules
31 | - [ ] If my PR aims to fix an issue, I referenced it using `#(issue)`
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *__pycache__
 3 | *.DS_Store
 4 | .idea
 5 | 
 6 | *.egg-info
 7 | **/.cache/*
 8 | 
 9 | *.csv
10 | *.pdf
11 | *.txt
12 | 
13 | !package.json
14 | !base/tests/pdfs/*
15 | !keywords.txt
16 | !section_keywords.txt
17 | !**/requirements.*
18 | 
19 | **/node_modules
20 | **/epmc-metadata.json.gz
21 | *env
22 | 
23 | argo/secrets/minikube/argo/aws/*
24 | !argo/secrets/minikube/argo/aws/.gitkeep
25 | 
26 | web/build/web/static/*
27 | !web/build/web/static/.gitkeep
28 | !web/docs/*
29 | venv
30 | .env
31 | package.lock
32 | web/config/dev.config.toml
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Wellcome Trust
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/__attic__/docs/README.md:
--------------------------------------------------------------------------------
1 | # Reach documentation
2 | 
3 | Directories:
4 | 
5 | - [design](./design)
6 | 


--------------------------------------------------------------------------------
/__attic__/docs/design/00_Header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/00_Header.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.1_Colour_.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.1_Colour_.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.2_Typography.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.2_Typography.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.3_Header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.3_Header.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.4_Footer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.4_Footer.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.5_Breadcrumbs_2_levels_.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.5_Breadcrumbs_2_levels_.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.5_Breadcrumbs_3_levels_.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.5_Breadcrumbs_3_levels_.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.6_CTA_Links_.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.6_CTA_Links_.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.7_Search.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.7_Search.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/1.8_Tables.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/1.8_Tables.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/design-system.md:
--------------------------------------------------------------------------------
  1 | # Reach – Design System
  2 | Created by Data Labs at Wellcome Trust
  3 | 
  4 | 
  5 | ![](00_Header.jpg)
  6 | 
  7 | 
  8 | 
  9 | 
 10 | # 1.1. Intro
 11 | ----------
 12 | 
 13 | The present document provides a high level overview of the Reach brand and style. [Spectre.CSS](https://picturepan2.github.io/spectre/index.html) is used as the CSS framework, but the UI components should be adapted to provide a unique user experience to the website.
 14 | 
 15 | 
 16 | 
 17 | # 1.2. Colour Palette
 18 | ----------
 19 | 
 20 | The web colour palette is taken largely from the Wellcome brand book ([Data Viz section](https://company-57536.frontify.com/d/gFEfjydViLRJ/wellcome-brand-book#/visuals/dataviz-elements-and-rationale)) with certain additions to accommodate the web environment.
 21 | 
 22 | 
 23 | ![](1.1_Colour_.jpg)
 24 | 
 25 | 
 26 | 
 27 | 
 28 | # 1.3. Typography
 29 | ----------
 30 | 
 31 | Wellcome's brand fonts (Wellcome & Helvetica Neue) are used alternatively throughout the website with no exceptions. 
 32 | 
 33 | 
 34 | ![](1.2_Typography.jpg)
 35 | 
 36 | 
 37 | 
 38 | 
 39 | # 1.4. Grid System
 40 | ----------
 41 | 
 42 | The grid system follows [Spectre CSS framework](https://picturepan2.github.io/spectre/layout/responsive.html).
 43 | 
 44 | 
 45 | | XS  | 320 – 480px  | padding left/right 11px                                |
 46 | | --- | ------------ | ------------------------------------------------------ |
 47 | | SM  | 481 – 600px  | TBC                                                    |
 48 | | MD  | 601 – 840px  | TBC                                                    |
 49 | | LG  | 961 – 1280px | TBC                                                    |
 50 | | XL  | 961 – 1280px | 12 columns  :  gutter 22px  :  padding left/right 11px |
 51 | | XXL | > 1280px     | 12 columns  :  gutter 22px                             |
 52 | 
 53 | 
 54 | 
 55 | 
 56 | # 1.5. UI Components
 57 | 
 58 | 
 59 | ## 1.5.1. Buttons
 60 | ----------
 61 | 
 62 | 
 63 | ![](1.6_CTA_Links_.jpg)
 64 | 
 65 | 
 66 | 
 67 | 
 68 | ## 1.5.2. Header
 69 | ----------
 70 | ![](1.3_Header.jpg)
 71 | 
 72 | 
 73 | 
 74 | ## 1.5.3. Footer
 75 | ----------
 76 | ![](1.4_Footer.jpg)
 77 | 
 78 | 
 79 | 
 80 | ## 1.5.6. Breadcrumbs > 2 levels
 81 | ----------
 82 | 
 83 | In the breadcrumb trail, the breadcrumb corresponding to the current page **should not be a link**.
 84 | 
 85 | 
 86 | ![](1.5_Breadcrumbs_2_levels_.jpg)
 87 | 
 88 | 
 89 | 
 90 | 
 91 | ## 1.5.7. Breadcrumbs > 3 levels (Search results pages)
 92 | ----------
 93 | 
 94 | The **search term is excluded from breadcrumbs** (to avoid long pages on mobile)
 95 | 
 96 | 
 97 | ![](1.5_Breadcrumbs_3_levels_.jpg)
 98 | 
 99 | 
100 | 
101 | 
102 | ## 1.5.8. Search
103 | ----------
104 | 
105 | 
106 | ![](1.7_Search.jpg)
107 | 
108 | 
109 | 
110 | ## 1.5.9. Tables
111 | ----------
112 | 
113 | 
114 | ![](1.8_Tables.jpg)
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/__attic__/docs/design/mockups.md:
--------------------------------------------------------------------------------
 1 | # Internal assets
 2 | 
 3 | We've put what we can into the public repo. However, some things from
 4 | design are elsewhere, since it was easier for design to use tools like
 5 | Google Drive & Invision.
 6 | 
 7 | All raw design assets are kept in the [Data Labs Team Drive under "User
 8 | Experience/UX &
 9 | UI](https://drive.google.com/drive/u/0/folders/1kN5-MbDUGK1YdSw430T_mDhdQdPzGCms).
10 | Most are in Sketch format.
11 | 
12 | UI devs working at Wellcome should also be to view these files (at least
13 | as of time of upload) in
14 | [invision](https://projects.invisionapp.com/d/main?origin=v7#/projects/prototypes/17303255).
15 | 
16 | 


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556632095254_00_Header2x.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556632095254_00_Header2x.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556633077597_1.2_Typography.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556633077597_1.2_Typography.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556639329062_1.1_Colour+palletex2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556639329062_1.1_Colour+palletex2.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641211609_1.7_Search.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641211609_1.7_Search.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641251273_1.5_Breadcrumbs_3+levels.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641251273_1.5_Breadcrumbs_3+levels.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641260288_1.5_Breadcrumbs_2+levels.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641260288_1.5_Breadcrumbs_2+levels.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641267412_1.4_Footer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641267412_1.4_Footer.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641277300_1.3_Header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556641277300_1.3_Header.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556642898755_1.6_Buttons.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556642898755_1.6_Buttons.jpg


--------------------------------------------------------------------------------
/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556805892800_1.8_Tables.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/__attic__/docs/design/s_80E0FAE63D8FEACBC2D84BC148149813EA5AB8AA2E1FDA99090E7199EBBFE5D9_1556805892800_1.8_Tables.jpg


--------------------------------------------------------------------------------
/__attic__/docs/problems.md:
--------------------------------------------------------------------------------
 1 | # Reach Output Problems
 2 | 
 3 | The 10 most cited Wellcome Trust publications as predicted by the Reach tool were looked at. In total these 10 publications were cited 154 times in 62 different policy documents. We looked into these citations and found issues with 103 of them. The issues fell into 6 categories and are given in the table below.
 4 | 
 5 | | GitHub Issue Number | Problem | Example | Solution | Proportion of problematic citations |
 6 | | --- | --- | --- | --- | --- |
 7 | | [#180](https://github.com/wellcometrust/reach/issues/180) | Text found in wrongly identified references section | A table with the row name "Treatment of severe malaria", was identified as a reference title since this table was at the end of the references section and got included in the scraped. | Improve extracting section | 64/103 |
 8 | | [#181](https://github.com/wellcometrust/reach/issues/181) | Text found not in a reference during the exact text search | "attention deficit hyperactivity disorder" was found in the text of several documents and identified as a match to a paper with the same name. | Length threshold to exact matcher | 20/103 |
 9 | | [#182](https://github.com/wellcometrust/reach/issues/182) | Reference repeated in the policy document | A citation for "Disability-adjusted life years (DALYs) for 291 diseases and injuries in 21 regions, 1990-2010: a systematic analysis for the Global Burden of Disease Study 2010" came up in two references sections of a policy document | Deduplicate repeats or decide to keep them in | 8/103 |
10 | | [#183](https://github.com/wellcometrust/reach/issues/183) | Duplicate reference found even though no duplicate found in policy document | A citation for "Trends in adult body-mass index in 200  countries from 1975 to 2014: a pooled analysis of  1698 population-based measurement studies with   1377-1396" only occurred once in a policy document, but the Reach output said it came up twice. | ? | 4/103 |
11 | | - | False positive - parsed reference matched to a similar but different reference | The Reach tool identified a publication entitled "Attention deficit hyperactivity disorder" from 2006, however in the policy document this reference was to a similarly titled paper from 1998. | Increase text similarity and length thresholds | 4/103 |
12 | | [#180](https://github.com/wellcometrust/reach/issues/180) | Text found in a reference during the exact text search | A citation for "Attention deficit hyperactivity disorder" was in the references section of a policy document, however it was only found in the exact text search and not the fuzzy match search. | Improve extracting section | 3/103 |
13 | 


--------------------------------------------------------------------------------
/__attic__/pipeline/reach-evaluator/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.evaluator.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.evaluator.txt
 9 | 
10 | 
11 | COPY ./evaluator_task.py /opt/reach/evaluator_task.py
12 | 
13 | # Give execution rights to the entrypoint Python script
14 | RUN chmod +x /opt/reach/evaluator_task.py
15 | 
16 | ENTRYPOINT ["/opt/reach/evaluator_task.py"]
17 | 


--------------------------------------------------------------------------------
/__attic__/pipeline/reach-evaluator/requirements.txt:
--------------------------------------------------------------------------------
1 | https://datalabs-public.s3.eu-west-2.amazonaws.com/reach_evaluator/reach_evaluator-2020.1.1-py3-none-any.whl


--------------------------------------------------------------------------------
/argo/00-namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |     name: argo
5 | 


--------------------------------------------------------------------------------
/argo/README.md:
--------------------------------------------------------------------------------
 1 | # Argo & Reach
 2 | Reach's pipeline is deployed in production using Argo.
 3 | These files are for local runs and development.
 4 | 
 5 | ## How to run Reach's workflows
 6 | To run this pipeline locally, you'll need:
 7 | 
 8 |   - Docker
 9 |   - Minikube
10 |   - Python >= 3.6
11 |   - The Argo cli (recommended but optionnal)
12 | 
13 | If it is the first time you use minikube with your AWS account, please configure your ECR credentials:
14 | ```
15 | minikube addons enable registry-creds
16 | minikube addons configure registry-creds
17 | ```
18 | 
19 | To build the required images, go to the root folder and run the following:
20 | ```
21 | make docker-build
22 | ```
23 | 
24 | To install Argo to your selected cluster (this will install Argo to a namespace `argo`, so make sure it's available before running these commands or change it beforehand):
25 | ```
26 | kubectl apply -f argo/00-namespace.yaml
27 | kubectl apply -f argo/argo.yaml
28 | kubectl apply -f argo/elasticsearch.yaml
29 | kubectl apply -f argo/psqlinit.yaml
30 | kubectl apply -f argo/postgres.yamls
31 | ```
32 | 
33 | 
34 | You can then run your workflows as follows:
35 | ```
36 | # this is the example workflow for WHO IRIS
37 | argo submit -n argo argo/reach-who.yaml
38 | ```
39 | 
40 | ## Using this infrastructure with the web application
41 | Reach's web application only relies on Postgresql. To expose it and make it usable locally (or within the `docker-compose` local deployment), run:
42 | ```
43 | kubectl port-forward -n argo postgres-0 5432:5432
44 | ```
45 | 


--------------------------------------------------------------------------------
/argo/postgres.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolume
 3 | metadata:
 4 |   name: argo-postgres-volume
 5 |   namespace: argo
 6 |   labels:
 7 |     type: local
 8 | spec:
 9 |   storageClassName: manual
10 |   capacity:
11 |     storage: 5Gi
12 |   accessModes:
13 |     - ReadWriteOnce
14 |   hostPath:
15 |     path: "/mnt/data"
16 | ---
17 | apiVersion: v1
18 | kind: PersistentVolumeClaim
19 | metadata:
20 |   name: argo-postgres-claim
21 |   namespace: argo
22 | spec:
23 |   storageClassName: manual
24 |   accessModes:
25 |     - ReadWriteOnce
26 |   resources:
27 |     requests:
28 |       storage: 5Gi
29 | ---
30 | apiVersion: apps/v1
31 | kind: StatefulSet
32 | metadata:
33 |   name: postgres
34 |   namespace: argo
35 |   annotations:
36 |     kubernetes.io/change-cause: N/A
37 | spec:
38 |   selector:
39 |     matchLabels:
40 |       app: postgres
41 |   serviceName: "postgres"
42 |   replicas: 1
43 |   template:
44 |     metadata:
45 |       labels:
46 |         app: postgres
47 |     spec:
48 |       containers:
49 |       - name: postgresql
50 |         image: postgres:12.2-alpine
51 |         ports:
52 |         - containerPort: 5432
53 |         env:
54 |           - name: POSTGRES_PASSWORD
55 |             value: development
56 |           - name: POSTGRES_DB
57 |             value: warehouse
58 |         volumeMounts:
59 |         - name: argo-postgres-claim
60 |           mountPath: /var/lib/postgresql/datalabs
61 |         - name: psqlinit
62 |           mountPath: /docker-entrypoint-initdb.d
63 |       volumes:
64 |         - name: argo-postgres-claim
65 |           persistentVolumeClaim:
66 |             claimName: argo-postgres-claim
67 |         - name: psqlinit
68 |           configMap:
69 |             name: psqlinit
70 | ---
71 | apiVersion: v1
72 | kind: Service
73 | metadata:
74 |   name: postgres
75 |   namespace: argo
76 |   labels:
77 |     app: postgres
78 | spec:
79 |   ports:
80 |   - port: 5432
81 |     targetPort: 5432
82 |     protocol: TCP
83 |   selector:
84 |     app: postgres
85 | 


--------------------------------------------------------------------------------
/argo/psqlinit.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | data:
 3 |   create.sql: |2
 4 |   
 5 |     CREATE SCHEMA IF NOT EXISTS warehouse;
 6 | 
 7 |     CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 8 | 
 9 | kind: ConfigMap
10 | metadata:
11 |   creationTimestamp: "2020-05-15T15:42:31Z"
12 |   managedFields:
13 |   - apiVersion: v1
14 |     fieldsType: FieldsV1
15 |     fieldsV1:
16 |       f:data:
17 |         .: {}
18 |         f:create.sql: {}
19 |     manager: kubectl
20 |     operation: Update
21 |     time: "2020-05-15T15:42:31Z"
22 |   name: psqlinit
23 |   namespace: argo
24 |   selfLink: /api/v1/namespaces/argo/configmaps/psqlinit
25 | 


--------------------------------------------------------------------------------
/argo/secrets/minikube/argo/aws/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/argo/secrets/minikube/argo/aws/.gitkeep


--------------------------------------------------------------------------------
/base/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use a basic Python image, but current Debian
 2 | FROM python:3.6-slim-stretch
 3 | 
 4 | # Build UTF8 locale to avoid encoding issues with Scrapy encoding
 5 | # C.UTF-8 is the new en_US.UTF-8.
 6 | ENV LC_ALL=C.UTF-8
 7 | ENV LANG=C.UTF-8
 8 | ENV LANGUAGE=C.UTF-8
 9 | 
10 | WORKDIR /opt/reach
11 | 
12 | COPY ./requirements.txt /opt/reach/requirements.txt
13 | 
14 | # Poppler is needed to run pdftotext convertion
15 | RUN apt-get update -yqq && \
16 | apt-get install -yqq --no-install-recommends \
17 |         build-essential \
18 |         libpoppler-cpp-dev \
19 |         poppler-utils \
20 |         locales && \
21 |     apt-get -q clean && \
22 |     locale-gen C.UTF-8 && \
23 |     pip install -U pip && \
24 |     python3 -m pip install -r /opt/reach/requirements.txt && \
25 |     apt-get remove --purge -y build-essential
26 | 
27 | 
28 | COPY ./safe_import.py /opt/reach/safe_import.py
29 | COPY ./hooks /opt/reach/hooks
30 | COPY ./elastic /opt/reach/elastic
31 | COPY ./tests /opt/reach/tests
32 | 


--------------------------------------------------------------------------------
/base/elastic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/base/elastic/__init__.py


--------------------------------------------------------------------------------
/base/elastic/count.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Minimal CLI for counting records in ES.
 3 | """
 4 | 
 5 | from . import common
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = common.create_argument_parser(__doc__.strip())
 9 |     parser.add_argument('index_name')
10 |     args = parser.parse_args()
11 |     es = common.es_from_args(args)
12 |     print(common.count_es(es, args.index_name))
13 | 
14 | 


--------------------------------------------------------------------------------
/base/elastic/epmc_metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Inserts EPMC metadata into Elasticsearch.
 3 | 
 4 | Sample URL for testing:
 5 | 
 6 |     s3://datalabs-staging/airflow/output/open-research/epmc-metadata/epmc-metadata.json.gz
 7 | """
 8 | 
 9 | import json
10 | import logging
11 | import functools
12 | 
13 | from . import common
14 | 
15 | CHUNK_SIZE = 1000  # tuned for small(ish) size of pub metadata
16 | 
17 | 
18 | def to_es_action(es_index, line):
19 |     d = json.loads(line)
20 |     return {
21 |         "_index": es_index,
22 |         "doc": d,
23 |     }
24 | 
25 | 
26 | def clean_es(es, es_index, organisation):
27 |     """ Ensure an empty index exists. """
28 |     common.recreate_index(es, es_index)
29 | 
30 | 
31 | def insert_file(f, es, es_index, organisation, max_items=None):
32 |     """
33 |     Inserts EPMC metadata from a json.gz file into Elasticsearch.
34 | 
35 |     Args:
36 |         f: json.gz file object
37 |         es: a living connection to elacticsearch
38 |         max_items: maximum number of records to insert, or None
39 |     """
40 |     logging.info(
41 |         'epmc_metadata.insert_file: f=%s es=%s max_items=%s',
42 |         f, es, max_items)
43 |     to_es_func = functools.partial(to_es_action, es_index)
44 |     return common.insert_actions(
45 |         es,
46 |         common.yield_actions(f, to_es_func, max_items),
47 |         CHUNK_SIZE,
48 |         )
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     def insert_func(f, es, max_items=None):
53 |         return insert_file(f, es, 'policy-test-epmc-metadata',
54 |                            max_items=max_items)
55 |     count = common.insert_from_argv(
56 |         __doc__.strip(), clean_es, insert_file)
57 |     logging.info('Imported %d pubs into ES', count)
58 | 


--------------------------------------------------------------------------------
/base/hooks/sentry.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | import os
 3 | 
 4 | import sentry_sdk
 5 | 
 6 | 
 7 | def init_sentry_sdk(sentry_dsn):
 8 |     kwargs = {
 9 |         'integrations': [],  # we'll add celery & flask eventually here
10 |         'default_integrations': True,
11 |     }
12 |     sentry_sdk.init(sentry_dsn)
13 | 
14 | 
15 | def report_exception(f):
16 |     """ Minimal decorator for reporting exceptions that occur within a
17 |     function. Does not support generators."""
18 |     @wraps(f)
19 |     def wrapped_f(*args, **kwargs):
20 |         try:
21 |             return f(*args, **kwargs)
22 |         except:
23 |             sentry_sdk.capture_exception()
24 |             raise
25 | 
26 |     return wrapped_f
27 | 
28 | 
29 | # SENTRY_DSN must be present at import time. If we don't have it then,
30 | # we won't have it later either.
31 | init_sentry_sdk(os.environ['SENTRY_DSN'])
32 | 


--------------------------------------------------------------------------------
/base/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3
2 | sentry-sdk
3 | elasticsearch
4 | 


--------------------------------------------------------------------------------
/base/safe_import.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Prevents multiple threads from trying to import at the same time and
 3 | hitting an import lock. Implemented because airflow's web server
 4 | regularly re-imports all DAGs and all tasks therein -- and
 5 | unfortunately, our tasks import so many dependencies that reloading them
 6 | takes enough time (by some random distribution) that the Airflow times
 7 | out imports, resulting in an endless stream of sentry reports from
 8 | within gunicorn.
 9 | 
10 | So, we've moved "slow" imports, especially those pulling in ML libraries
11 | such as scipy or even pandas, into the execute() method of our tasks.
12 | 
13 | This is almost always something you should NEVER do, because imports can
14 | only be trusted not to lock if they're done from the main thread and on
15 | module load.  (And an import lock in Python tends not to (or never?)
16 | resolve itself.) But, not much choice, at least for now.  And, it turns
17 | out that in our execution model, the celery executor spawns subprocesses
18 | to run each task. So, we shouldn't ever have an issue.
19 | 
20 | Hope isn't a strategy, though. So, here's a context manager to use, so
21 | that we'll know if we were going to hit an import lock.
22 | 
23 | Sample usage::
24 | 
25 |     @report_exception
26 |     def execute(self):
27 |         with safe_import:
28 |             from reach.rainbowpony import pony_ai
29 | 
30 |         # do things with pony_ai here.
31 | 
32 | """
33 | 
34 | from contextlib import contextmanager
35 | from threading import Lock
36 | 
37 | # Not a re-entrant lock b/c we believe imports of this sort should
38 | # only happen once from the calling thread.
39 | SAFE_IMPORT_LOCK = Lock()
40 | 
41 | @contextmanager
42 | def safe_import():
43 |     """
44 |     Context manager for ensuring that only one thread is importing
45 |     at a time. If two threads enter this context, the second will fail
46 |     with an exception so that we can't get caught in an import lock.
47 |     """
48 |     acquired = SAFE_IMPORT_LOCK.acquire(blocking=False)
49 |     try:
50 |         if not acquired:
51 |             # NB: we could, instead, just wait here. But the invariant
52 |             # we're expecting is that, thanks to how the celery executor
53 |             # works, only one call to execute() should happen at a time,
54 |             # because only one thread should ever be running.
55 |             raise Exception('Multiple imports attempted at once!')
56 |         yield
57 |     finally:
58 |         if acquired:
59 |             SAFE_IMPORT_LOCK.release()
60 | 


--------------------------------------------------------------------------------
/base/tests/common.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | 
 4 | def get_path(p):
 5 |     return os.path.join(
 6 |         os.path.dirname(__file__),
 7 |         p
 8 |     )
 9 | 
10 | TEST_PDF = get_path('pdfs/test_pdf.pdf')
11 | TEST_PDF_MULTIPAGE = get_path('pdfs/test_pdf_multipage.pdf')
12 | TEST_PDF_PAGE_NUMBER = get_path('pdfs/test_pdf_page_number.pdf')
13 | TEST_XML = get_path('xml/test_xml.xml')
14 | 


--------------------------------------------------------------------------------
/base/tests/mock_sites/parliament/1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html class="no-js" lang="en"> <!--<![endif]-->
 3 |     <head>
 4 |         <title>Search results</title></head>
 5 |     <body class="main">
 6 |         <ul id="results">
 7 |                     <li>
 8 |                         <h4>
 9 |                             <a href="http://www.parliament.uk/written-questions-answers-statements/written-question/commons/2019-01-29/214040">
10 |                                 Motor Vehicles: Insurance
11 |                             </a>
12 |                         </h4>
13 |                         <p>
14 |                             &lt;p&gt;To ask the Secretary of State for Transport, what steps his Department is taking to support British citizens driving in EU countries after 29 March 2019 who do not hold a green card.&lt;/p&gt;
15 |                         </p>
16 |                         <p class="resultdetails">
17 | 06 Feb 2019                                        <span> | </span>
18 | Written questions                                        <span> | </span>
19 | House of Commons                                        <span> | </span>
20 | 214040
21 |                         </p>
22 |                     </li>
23 |             </ul>
24 |     </body>
25 | </html>
26 | 


--------------------------------------------------------------------------------
/base/tests/mock_sites/parliament/2.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html
 3 |   PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">
 4 | <html xmlns="http://www.w3.org/1999/xhtml" xmlns:dct="http://purl.org/dc/terms/" lang="en" xml:lang="en">
 5 | <head>
 6 |     <title>The Animal Feed (Amendment) (EU Exit) Regulations 2019</title>
 7 | </head>
 8 | <body>
 9 |     <a href="/ukdsi/2019/9780111180389/contents/data.pdf" target="_blank" class="pdfLink">PDF<span class="accessibleText"> table of contents</span></a>
10 | </body></html>
11 | 


--------------------------------------------------------------------------------
/base/tests/mock_sites/unicef/1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-US" class="no-js" dir="ltr">
 3 | <head>
 4 | <title>Publications Archives - UNICEF DATA</title>
 5 | </head>
 6 | <body class="archive tax-resource-type term-publications term-18 template--block ">
 7 | <div class="block--card__body__text">
 8 | <h3>
 9 | <a href="https://data.unicef.org/resources/including-everyone-strengthening-the-collection-and-use-of-data-about-persons-with-disabilities-in-humanitarian-situations/" title="Including Everyone: Strengthening the collection and use of data about persons with disabilities in humanitarian situations">Including Everyone: Strengthening the collection and use of data about persons with disabilities in humanitarian situations</a>
10 | </h3>
11 | </div>
12 | <nav class="navigation pagination" role="navigation" aria-label="Posts">
13 | <h2 class="screen-reader-text">Posts navigation</h2>
14 | <div class="nav-links"><span aria-current="page" class="page-numbers current"><span class="screen-reader-text">Page </span>1</span>
15 | <a class="page-numbers" href="https://data.unicef.org/resources/resource-type/publications/page/2/"><span class="screen-reader-text">Page </span>2</a>
16 | <span class="page-numbers dots">&hellip;</span>
17 | <a class="page-numbers" href="https://data.unicef.org/resources/resource-type/publications/page/10/"><span class="screen-reader-text">Page </span>10</a>
18 | <a class="next page-numbers" href="https://data.unicef.org/resources/resource-type/publications/page/2/">Next page</a></div>
19 | </nav>
20 | </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/base/tests/mock_sites/unicef/2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-US" prefix="og: http://ogp.me/ns#" class="no-js" dir="ltr">
 3 | <head>
 4 |     <title>A Right to be Heard - Listening to children and young people on the move - UNICEF DATA</title>
 5 | </head>
 6 | <body class="post-template-default single single-post postid-9761 single-format-standard template--block ">
 7 | <p style="text-align: center;"><a class="button button--size-large button--color-blue " title="Migration, Refugees and migrants | A Right to be Heard – Listening to children and young people on the move | English | PDF" href="https://data.unicef.org/wp-content/uploads/2018/12/A_right_to_be_heard_youthpoll.pdf" target="_blank" rel="noopener" data-category="Resources" data-action="Download">Download</a></p>
 8 | </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/base/tests/mock_sites/who/1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <title>IRIS Home</title>
 3 | </head><body>
 4 | <ul>
 5 | <li class="ds-artifact-item even">
 6 | <div class="item-wrapper row">
 7 | <div class="col-sm-9">
 8 | <div class="artifact-description">
 9 | <h4 class="artifact-title">
10 | <a href="./tests/mock_sites/2.html">Air pollution control: report on an inter-regional seminar convened by the World Health Organization in collaboration with the Government of the USSR</a><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc&amp;rft_id=EP%2F68.2&amp;rft_id=http%3A%2F%2Fwww.who.int%2Firis%2Fhandle%2F10665%2F279983&amp;rfr_id=info%3Asid%2Fdspace.org%3Arepository&amp;">
11 |                     ﻿
12 |                 </span>
13 | </h4>
14 | <div class="artifact-info">
15 | <span class="author h4"><small><span>World Health Organization</span></small></span> <span class="govdoc-date h4"><small>(&lrm;<span class="govdoc">EP/68.2</span>, <span class="date">1967</span>)&lrm;</small></span>
16 | </div>
17 | </div>
18 | </div>
19 | </div>
20 | </li>
21 | </ul>
22 | </body></html>
23 | 


--------------------------------------------------------------------------------
/base/tests/pdfs/test_pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/base/tests/pdfs/test_pdf.pdf


--------------------------------------------------------------------------------
/base/tests/pdfs/test_pdf_multipage.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/base/tests/pdfs/test_pdf_multipage.pdf


--------------------------------------------------------------------------------
/base/tests/pdfs/test_pdf_page_number.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/base/tests/pdfs/test_pdf_page_number.pdf


--------------------------------------------------------------------------------
/base/tests/xml/test_xml.xml:
--------------------------------------------------------------------------------
 1 | <pdf2xml producer="poppler" version="0.62.0">
 2 | <page number="1" position="absolute" top="0" left="0" height="1262" width="892">
 3 | <fontspec id="0" size="12" family="Times" color="#000000"/>
 4 | <fontspec id="1" size="25" family="Times" color="#000000"/>
 5 | <text top="141" left="446" width="475" height="25" font="1">Test Page 1</text>
 6 | <text top="173" left="446" width="475" height="12" font="0"><b>All bold line.</b></text>
 7 | <text top="205" left="446" width="475" height="12" font="0">Partly <b>bold</b> line.</text>
 8 | <text top="173" left="446" width="475" height="12" font="0"><i>All italic line.</i></text>
 9 | <text top="205" left="446" width="475" height="12" font="0">Partly <i>italic</i> line.</text>
10 | </page>
11 | <page number="2" position="absolute" top="0" left="0" height="1262" width="892">
12 | <fontspec id="0" size="12" family="Times" color="#000000"/>
13 | <fontspec id="1" size="25" family="Times" color="#000000"/>
14 | <text top="141" left="446" width="475" height="25" font="1">TestPage 2</text>
15 | <text top="173" left="446" width="475" height="12" font="0"><b>All bold line</b></text>
16 | <text top="205" left="446" width="475" height="12" font="0">Partly <b>bold</b> line.</text>
17 | <text top="173" left="446" width="475" height="12" font="0"><i>All italic line</i></text>
18 | <text top="205" left="446" width="475" height="12" font="0">Partly <i>italic</i> line.</text>
19 | </page>
20 | </pdf2xml>
21 | 


--------------------------------------------------------------------------------
/buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.1
 2 | 
 3 | phases:
 4 |   build:
 5 |     commands:
 6 |       - "echo resolved source version: $CODEBUILD_RESOLVED_SOURCE_VERSION"
 7 |       - "echo source version: $CODEBUILD_SOURCE_VERSION"
 8 |       - make docker-push-all
 9 |       - make push-web
10 | 
11 | artifacts:
12 |     files: []
13 | 


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.4'
 2 | # This will handle the deployment of a local web application, postgresql
 3 | # database and elasticsearch single-node cluster
 4 | 
 5 | services:
 6 |     web:
 7 |       build:
 8 |           context: ./web
 9 |           dockerfile: Dockerfile
10 |       image: uk.ac.wellcome/reach:latest
11 |       ports:
12 |         - 127.0.0.1:8081:8081
13 |       environment:
14 |         AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
15 |         AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
16 |         SENTRY_DSN: "${SENTRY_DSN}"
17 |         STATIC_ROOT: /opt/reach/build/web/static
18 |         DOCS_STATIC_ROOT: /opt/reach/web/docs/build/html/_static
19 |         DB_HOST: "host.docker.internal"
20 |         DB_PORT: 5432
21 |         DB_NAME: "warehouse"
22 |         DB_USER: "postgres"
23 |         DB_PASSWORD: "development"
24 | 
25 |       command:
26 |         - gunicorn
27 |         - --bind=0.0.0.0:8081
28 |         - --reload
29 |         - web:application
30 |       volumes:
31 |         - ./web/web:/opt/reach/web/
32 |       deploy:
33 |         resources:
34 |           limits:
35 |             memory: "64M"
36 | 


--------------------------------------------------------------------------------
/docs/antora.yml:
--------------------------------------------------------------------------------
1 | name: reach
2 | title: Reach
3 | version: "0.0.1"
4 | nav:
5 |   - modules/ROOT/nav.adoc
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/modules/ROOT/nav.adoc:
--------------------------------------------------------------------------------
1 | * xref:index.adoc[]
2 | 


--------------------------------------------------------------------------------
/docs/modules/ROOT/pages/index.adoc:
--------------------------------------------------------------------------------
1 | = Reach
2 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.extracter.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.extracter.txt
 9 | 
10 | 
11 | COPY ./extract_refs_task.py /opt/reach/extract_refs_task.py
12 | COPY ./refparse /opt/reach/refparse
13 | 
14 | # Give execution rights to the entrypoint Python script
15 | RUN chmod +x /opt/reach/extract_refs_task.py
16 | 
17 | ENTRYPOINT ["/opt/reach/extract_refs_task.py"]
18 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/Dockerfile.test:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.extracter.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.extracter.txt
 9 | 
10 | 
11 | COPY ./extract_refs_task.py /opt/reach/extract_refs_task.py
12 | COPY ./refparse /opt/reach/refparse
13 | 
14 | # Give execution rights to the entrypoint Python script
15 | RUN chmod +x /opt/reach/extract_refs_task.py
16 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/compare_found_sections.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | Utility for comparing predicted and actual reference sections.
 4 | 
 5 | Takes the scarpe_data.csv produced by evaluate_algo.py, and produces an
 6 | interactive dashboard through which actual and predicted references sections
 7 | can be compared.
 8 | 
 9 | Requires streamlit>=0.47.3
10 | 
11 | pip3 install streamlit
12 | streamlit run compare_found_sections.py
13 | """
14 | 
15 | import numpy as np
16 | import pandas as pd
17 | import streamlit as st
18 | 
19 | # Load scrape_date produced by evaluate_algo.py
20 | 
21 | data = pd.read_csv("./scrape_data.csv")
22 | 
23 | # Drop examples for which no comparison can be made
24 | 
25 | data.dropna(subset=["Predicted text", "Actual text"], inplace=True)
26 | 
27 | # Add sidebar
28 | 
29 | st.sidebar.title("Reference section explorer")
30 | 
31 | # Create selector for file hash in sidebar.
32 | 
33 | pdf_file = st.sidebar.selectbox("pdf file", data["File"].to_list())
34 | 
35 | lev = data.loc[data["File"] == pdf_file, ["lev_distance"]].iloc[0]["lev_distance"]
36 | comment = st.sidebar.text_area("Comment about the prediction")
37 | actual = data.loc[data["File"] == pdf_file, ["Actual text"]].iloc[0]["Actual text"]
38 | predicted = data.loc[data["File"] == pdf_file, ["Predicted text"]].iloc[0]["Predicted text"]
39 | 
40 | # Produce a line which can easily be copied and pasted into a markdown table
41 | 
42 | st.write("Copy the line below into a markdown table:")
43 | st.write(f"|{pdf_file}|{len(actual)}|{len(predicted)}|{np.round(lev, 2)}|{comment}|")
44 | 
45 | st.table(data.loc[data["File"] == pdf_file, ["Actual text" ,"Predicted text"]])
46 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/data_evaluate/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/data_evaluate/.gitkeep


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/evaluate_settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from refparse.settings import BaseSettings
 3 | 
 4 | class TestSettings(BaseSettings):
 5 | 
 6 |     FOLDER_PREFIX = os.path.join(
 7 |         os.path.dirname(__file__),
 8 |         "../algo_evaluation/data_evaluate"
 9 |     )
10 |     LOG_FILE_PREFIX = './algo_evaluation/results'
11 | 
12 |     # Variables for find section evaluation data
13 |     LEVENSHTEIN_DIST_SCRAPER_THRESHOLD = 0.3
14 |     SCRAPE_DATA_PDF_FOLDER_NAME = "pdfs"
15 |     SCRAPE_DATA_REF_PDF_FOLDER_NAME = "pdf_sections"
16 |     SCRAPE_DATA_PROVIDERS_FILE_NAME = "pdf_providers.csv"
17 | 
18 |     # Variables for split section evaluation data
19 |     SPLIT_SECTION_SIMILARITY_THRESHOLD = 40
20 |     NUM_REFS_FILE_NAME = "split_section_test_data.csv"
21 |     NUM_REFS_TEXT_FOLDER_NAME = "scraped_references_sections"
22 | 
23 |     # Variables for parse evaluation data
24 |     LEVENSHTEIN_DIST_PARSE_THRESHOLD = 0.3
25 |     MODEL_FILE_TYPE = 'pickle'
26 |     MODEL_FILE_PREFIX = './reference_parser_models/'
27 |     MODEL_FILE_NAME = 'reference_parser_pipeline.pkl'
28 |     PARSE_REFERENCE_FILE_NAME = "actual_reference_structures_sample.csv"
29 | 
30 |     # Variables for match evaluation data
31 |     EVAL_PUB_DATA_FILE_NAME = "epmc-metadata.json"
32 |     EVAL_MATCH_NUMBER = 100000
33 |     EVAL_SAMPLE_MATCH_NUMBER = 1000
34 |     LENGTH_THRESHOLD = 50
35 |     MATCH_THRESHOLD = 0.8
36 | 
37 | settings = TestSettings()
38 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/negative_cosines_hist_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/negative_cosines_hist_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/negative_cosines_len_scatter_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/negative_cosines_len_scatter_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_F1Score_negative_heatmap_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_F1Score_negative_heatmap_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_Precision_negative_heatmap_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_Precision_negative_heatmap_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_Recall_negative_heatmap_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/thresholds_Recall_negative_heatmap_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/title_lengths_2019-07-01-1211.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/exploratory/title_lengths_2019-07-01-1211.png


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/algo_evaluation/results/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/algo_evaluation/results/.gitkeep


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/parse_latest.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code lets you run the reference parser with the
 3 | latest scraped documents for an input organisations.
 4 | e.g.
 5 | python parse_latest.py msf
 6 | which will parse and match the latest msf scrape in S3
 7 | with the uber wellcome publications stored in S3
 8 | """
 9 | 
10 | from argparse import ArgumentParser
11 | from urllib.parse import urlparse
12 | import os
13 | import logging
14 | 
15 | import boto3
16 | 
17 | from .refparse import parse_references, create_argparser
18 | from .settings import settings
19 | 
20 | parser = ArgumentParser(description=__doc__.strip())
21 | 
22 | ORG_NAMES = (
23 |     'gov_uk',
24 |     'msf',
25 |     'nice',
26 |     'parliament',
27 |     'unicef',
28 |     'who_iris'
29 | )
30 | 
31 | if __name__ == "__main__":
32 |     logger = settings.logger
33 |     logger.setLevel(logging.INFO)
34 | 
35 |     parser = create_argparser(__doc__.strip())
36 |     parser.add_argument('org_name', choices=ORG_NAMES)
37 | 
38 |     args = parser.parse_args()
39 |     org = args.org_name
40 | 
41 |     s3prefix = os.path.join(settings.SCRAPER_RESULTS_BASEDIR, org)
42 |     u = urlparse(s3prefix)
43 |     bucket_name, prefix = u.netloc, u.path[1:]
44 | 
45 |     s3 = boto3.resource('s3')
46 |     bucket = s3.Bucket(bucket_name)
47 | 
48 |     # Get the most recently scraped filename
49 |     key_name, obj = max(
50 |         (obj.key, obj) for obj in bucket.objects.filter(Prefix=prefix).all()
51 |     )
52 | 
53 |     if args.output_url.startswith('file://'):
54 |         # The output subfolder will be the name of the organisation 
55 |         # and the date of scrape (which is the name of the file)
56 |         output_url = '{}/{}_{}'.format(
57 |             args.output_url,
58 |             org,
59 |             os.path.splitext(os.path.basename(key_name))[0]
60 |         )
61 |         if not os.path.exists(output_url[7:]):
62 |             os.makedirs(output_url[7:])
63 | 
64 |         scraper_file = "s3://{}/{}".format(bucket_name, key_name)
65 | 
66 |         parse_references(
67 |             scraper_file,
68 |             args.references_file,
69 |             args.model_file,
70 |             output_url,
71 |             args.num_workers,
72 |             logger
73 |             )
74 |     else:
75 |         logger.info("Output url should start with 'file://'")
76 |         pass
77 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/reference_parser_models/reference_parser_pipeline.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/reference_parser_models/reference_parser_pipeline.pkl


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | class BaseSettings:
 5 |     logger = logging.getLogger(__name__)
 6 | 
 7 |     DEBUG = True
 8 | 
 9 |     PREDICTION_PROBABILITY_THRESHOLD = 0.75
10 |     FUZZYMATCH_SIMILARITY_THRESHOLD = 0.8
11 | 
12 |     BUCKET = "datalabs-data"
13 | 
14 |     SCRAPER_RESULTS_BASEDIR = "s3://{}/scraper-results".format(BUCKET)
15 |     SCRAPER_RESULTS_DIR = "{}".format(SCRAPER_RESULTS_BASEDIR)
16 |     SCRAPER_RESULTS_FILENAME = ''
17 | 
18 |     LOCAL_OUTPUT_DIR = 'local_output'
19 |     STRUCTURED_REFS_FILENAME = 'structured_references.json'
20 |     MATCHED_REFS_FILENAME = 'matched_references.json'
21 | 
22 |     MIN_CHAR_LIMIT = 20
23 |     MATCH_TITLE_LENGTH_THRESHOLD = 40
24 | 
25 |     REF_CLASSES = ['Authors', 'Journal', 'Volume', 'Issue', 'Pagination', 'Title', 'PubYear']
26 |     DRP_REF_COMPONENTS = ['title', 'year', 'author']
27 |     # This map is between the Deep Reference Parser component names and the legacy names
28 |     # I thought it best to use the legacy names for possible downstream errors
29 |     COMPONENT_NAME_MAP = {'title': 'Title', 'year': 'PubYear', 'author': 'Authors'}
30 | 
31 | 
32 | class ProdSettings(BaseSettings):
33 |     DEBUG = False
34 |     S3 = True
35 | 
36 | 
37 | class LocalSettings(BaseSettings):
38 |     DEBUG = True
39 |     S3 = False
40 |     SCRAPER_RESULTS_DIR = "scraper-results"
41 | 
42 | 
43 | settings_mode = {
44 |     'DEV': BaseSettings,
45 |     'LOCAL': LocalSettings,
46 |     'PROD': ProdSettings
47 | }
48 | settings = settings_mode[os.environ.get('REF_PARSER_SETTINGS', 'LOCAL')]
49 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-es-extractor/refparse/tests/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/tests/test_config_multitask.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | version = test
 3 | 
 4 | [data]
 5 | test_proportion = 0.25
 6 | valid_proportion = 0.25
 7 | data_path = data/
 8 | respect_line_endings = 0
 9 | respect_doc_endings = 1
10 | line_limit = 150
11 | rodrigues_train = data/rodrigues/clean_test.txt
12 | rodrigues_test =
13 | rodrigues_valid =
14 | policy_train = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
15 | policy_test = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
16 | policy_valid = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
17 | # This needs to have a trailing slash!
18 | s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/
19 | 
20 | [build]
21 | output_path = models/multitask/2020.4.5_multitask/
22 | output = crf
23 | word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300-test.txt
24 | pretrained_embedding = 0
25 | dropout = 0.5
26 | lstm_hidden = 400
27 | word_embedding_size = 300
28 | char_embedding_size = 100
29 | char_embedding_type = BILSTM
30 | optimizer = adam
31 | 
32 | [train]
33 | epochs = 60
34 | batch_size = 100
35 | early_stopping_patience = 5
36 | metric = val_f1
37 | 
38 | [evaluate]
39 | out_file = evaluation_data.tsv
40 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/tests/test_split_parse.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pytest
 3 | 
 4 | from refparse.utils import structure_reference
 5 | 
 6 | class TestStructure(unittest.TestCase):
 7 |     def test_empty_components(self):
 8 |         components_predictions = structure_reference([])
 9 |         self.assertEqual(components_predictions.get('Title'), '', "Should be ''")
10 | 
11 |     def test_size(self):
12 |         components_predictions = structure_reference([])
13 |         self.assertEqual(len(components_predictions), 7, "Should be 7 classes predicted")
14 | 
15 |     def test_string_component(self):
16 |         reference_components = [
17 |             ('Medecins', 'author'), ('Sans', 'author'), ('Frontières', 'author'),
18 |             ('.', 'o'), ('TB', 'title'), ('Spot', 'title'), ('Report', 'title'),
19 |             ('.', 'o'), ('2011', 'year')]
20 |         components_predictions = structure_reference(reference_components)
21 |         self.assertEqual(isinstance(components_predictions['Title'], str), True, "Should be a string")
22 | 
23 |     def test_normal_components(self):
24 |         reference_components = [
25 |             ('Medecins', 'author'), ('Sans', 'author'), ('Frontières', 'author'),
26 |             ('.', 'o'), ('TB', 'title'), ('Spot', 'title'), ('Report', 'title'),
27 |             ('.', 'o'), ('2011', 'year')]
28 |         components_predictions = structure_reference(reference_components)
29 |         self.assertEqual(components_predictions['Title'], 'TB Spot Report', "Should be 'TB Spot Report'")
30 | 
31 |     def test_split_title(self):
32 |         reference_components = [
33 |             ('TB', 'title'), ('Spot', 'author'), ('Report', 'title')]
34 |         components_predictions = structure_reference(reference_components)
35 |         self.assertEqual(components_predictions['Title'], 'TB Report', "Should be 'TB Report'")
36 | 
37 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .parse import structure_reference
 2 | from .fuzzy_match import FuzzyMatcher
 3 | from .file_manager import FileManager
 4 | from .serialiser import serialise_matched_reference, serialise_reference
 5 | from .exact_match import ExactMatcher
 6 | 
 7 | __all__ = [
 8 |     structure_reference,
 9 |     FuzzyMatcher,
10 |     FileManager,
11 |     serialise_matched_reference,
12 |     serialise_reference,
13 |     ExactMatcher
14 | ]
15 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/utils/exact_match.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | class ExactMatcher:
 4 |   def __init__(self, sectioned_documents, title_length_threshold):
 5 |     self.texts = [
 6 |       (doc.id, self.clean_text(doc.section))
 7 |       for doc in sectioned_documents
 8 |     ]
 9 |     self.title_length_threshold = title_length_threshold
10 | 
11 |   def clean_text(self, string):
12 |     """
13 |     Input:
14 |     -A string
15 |     Output:
16 |     -A string, with white space normalised and
17 |      non-alphanumeric characters removed
18 |     Cleans up text such that it can easily be searched
19 |     """
20 | 
21 |     string = re.sub("\\n", " ", string)
22 |     string = re.sub("\s{1,}", " ", string)
23 |     string = re.sub("[^A-Za-z0-9 ]", "", string)
24 | 
25 |     string = string.lower()
26 | 
27 |     return string
28 | 
29 |   def match(self, publication):
30 |     """
31 |     Input:
32 |       publication: dict that contains title and uber_id of academic publication
33 |     Output:
34 |       matched_reference: dict that links an academic publication with a policy document
35 |     """
36 |     publication_title = self.clean_text(publication['title'])
37 |     if len(publication_title) < self.title_length_threshold:
38 |       return
39 | 
40 |     for doc_id, text in self.texts:
41 | 
42 |       if publication_title in text:
43 |         yield {
44 |           'Document id': doc_id,
45 |           'Matched title': publication_title,
46 |           'Matched publication id': publication['uber_id'],
47 |           'Match algorithm': 'Exact match'
48 |         }
49 | 
50 |     return
51 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/utils/parse.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import re
 3 | 
 4 | from refparse.settings import settings
 5 | 
 6 | 
 7 | def structure_reference(reference_components):
 8 |     """
 9 |     Join up all the predictions for each component type into one string
10 |     TO DO: Evaluate how often the same component type is predicted
11 |         but not next to one another.
12 |         e.g. ref_components = ['title', 'title', 'year', 'title']
13 |     """
14 | 
15 |     ref_tokens = [r[0] for r in reference_components]
16 |     ref_components = [r[1] for r in reference_components]
17 | 
18 |     # Keep empty strings for classes not predicted from the deep reference parser
19 |     # Useful for possible downstream errors
20 |     structured_reference = {ref_class: '' for ref_class in settings.REF_CLASSES}
21 |     for component in settings.DRP_REF_COMPONENTS:
22 |         structured_reference[
23 |             settings.COMPONENT_NAME_MAP.get(component, component)
24 |             ] = ' '.join([r[0] for r in reference_components if r[1]==component])
25 | 
26 |     return structured_reference
27 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/utils/s3.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from refparse.settings import settings
 3 | from botocore.exceptions import ClientError
 4 | 
 5 | 
 6 | class S3():
 7 |     def __init__(self, bucket_name):
 8 |         self.logger = settings.logger
 9 |         self.s3 = boto3.resource('s3')
10 |         self.client = boto3.client('s3')
11 |         self.bucket_name = bucket_name
12 | 
13 |     def _get_last_modified_file_key(self, prefix):
14 |         try:
15 |             objs = self.client.list_objects_v2(
16 |                 Bucket=self.bucket_name,
17 |                 Prefix=prefix
18 |             ).get('Contents', [])
19 |         except ClientError:
20 |             self.logger.info('Could not connect to s3 bucket.')
21 |             return ''
22 | 
23 |         if not objs:
24 |             self.logger.info('Could not get last result file.')
25 |             last_added = []
26 |             return last_added
27 |         else:
28 |             last_added = [
29 |                 obj['Key']
30 |                 for obj in sorted(
31 |                     objs,
32 |                     key=lambda obj: obj['LastModified'],
33 |                     reverse=True)
34 |             ][0]
35 |             return last_added
36 | 
37 |     def get(self, key, temp_file):
38 |         self.logger.info('[+] Fetching s3://%s/%s', self.bucket_name, key)
39 |         object = self.s3.Object(self.bucket_name, key)
40 |         object.download_fileobj(temp_file)
41 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/refparse/utils/serialiser.py:
--------------------------------------------------------------------------------
 1 | def serialise_matched_reference(data, current_timestamp):
 2 |     """Serialise the data matched by the model."""
 3 |     serialised_data = {
 4 |         'publication_id': data['WT_Ref_Id'],
 5 |         'cosine_similarity': data['Cosine_Similarity'],
 6 |         'datetime_creation': current_timestamp,
 7 |         'document_hash': data['Document id']
 8 |     }
 9 |     return serialised_data
10 | 
11 | 
12 | def serialise_reference(data, current_timestamp):
13 |     """Serialise the data parsed by the model."""
14 |     if data.get('Title', '') and len(data.get('Title', '')) > 1024:
15 |         title = data['Title'][:1024]
16 |     else:
17 |         title = data.get('Title', None)
18 | 
19 |     for key, value in data.items():
20 |         if value and key != 'Title' and type(value) == str:
21 |             data[key] = value[:256]
22 | 
23 |     if type(data['PubYear']) != int:
24 |         data['PubYear'] = None
25 | 
26 |     serialised_data = {
27 |         'author': data.get('Authors'),
28 |         'issue': data.get('Issue'),
29 |         'journal': data.get('Journal'),
30 |         'pub_year': data.get('PubYear'),
31 |         'pagination': data.get('Pagination'),
32 |         'title': title,
33 |         'file_hash': data['Document id'],
34 |         'datetime_creation': current_timestamp,
35 |         'volume': data.get('Volume', None),
36 |     }
37 |     return serialised_data
38 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-extractor/requirements.txt:
--------------------------------------------------------------------------------
1 | editdistance
2 | numpy
3 | pandas
4 | scikit-learn
5 | sentry-sdk
6 | https://github.com/wellcometrust/deep_reference_parser/releases/download/2020.4.29/deep_reference_parser-2020.8.5-py3-none-any.whl
7 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-indexer/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.indexer.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.indexer.txt
 9 | 
10 | 
11 | COPY ./index_task.py /opt/reach/index_task.py
12 | 
13 | # Give execution rights to the entrypoint Python script
14 | RUN chmod +x /opt/reach/index_task.py
15 | 
16 | ENTRYPOINT ["/opt/reach/index_task.py"]
17 | 


--------------------------------------------------------------------------------
/pipeline/reach-es-indexer/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch
2 | 


--------------------------------------------------------------------------------
/pipeline/reach-fuzzy-matcher/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.fuzzymatcher.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.fuzzymatcher.txt
 9 | 
10 | 
11 | COPY ./fuzzymatcher_task.py /opt/reach/fuzzymatcher_task.py
12 | 
13 | # Give execution rights to the entrypoint Python script
14 | RUN chmod +x /opt/reach/fuzzymatcher_task.py
15 | 
16 | ENTRYPOINT ["/opt/reach/fuzzymatcher_task.py"]
17 | 


--------------------------------------------------------------------------------
/pipeline/reach-fuzzy-matcher/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-fuzzy-matcher/requirements.txt


--------------------------------------------------------------------------------
/pipeline/reach-parser/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.parser.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.parser.txt
 9 | 
10 | 
11 | COPY ./parser_task.py /opt/reach/parser_task.py
12 | COPY ./pdf_parser /opt/reach/pdf_parser
13 | COPY ./normalizer /opt/reach/normalizer
14 | 
15 | # Give execution rights to the entrypoint Python script
16 | RUN chmod +x /opt/reach/parser_task.py
17 | 
18 | ENTRYPOINT ["/opt/reach/parser_task.py"]
19 | 


--------------------------------------------------------------------------------
/pipeline/reach-parser/Dockerfile.test:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.parser.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.parser.txt
 9 | 
10 | 
11 | COPY ./parser_task.py /opt/reach/parser_task.py
12 | COPY ./pdf_parser /opt/reach/pdf_parser
13 | COPY ./normalizer /opt/reach/normalizer
14 | 
15 | # Give execution rights to the entrypoint Python script
16 | RUN chmod +x /opt/reach/parser_task.py
17 | 


--------------------------------------------------------------------------------
/pipeline/reach-parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/normalizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/normalizer/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/parser_task.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Operator to run the web scraper on every organisation.
 4 | """
 5 | import os
 6 | import logging
 7 | import argparse
 8 | 
 9 | from hooks.sentry import report_exception
10 | from hooks import s3hook
11 | from normalizer.title_normalizer import PolicyNameNormalizerOperator
12 | from pdf_parser import main as pdf_parser_main
13 | 
14 | logging.basicConfig()
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class ParsePdfOperator:
19 |     """
20 |     Pulls data from the dimensions.ai to a bucket in S3.
21 | 
22 |     Args:
23 |         organisation: The organisation to pull documents from.
24 |     """
25 | 
26 |     def __init__(self, organisation, src_s3_dir, dst_s3_key):
27 |         self.organisation = organisation
28 |         self.src_s3_dir = src_s3_dir
29 |         self.dst_s3_key = dst_s3_key
30 | 
31 |         self.client = s3hook.S3Hook()
32 | 
33 |     @report_exception
34 |     def execute(self):
35 |         os.environ.setdefault(
36 |             'SCRAPY_SETTINGS_MODULE',
37 |             'scraper.wsf_scraping.settings'
38 |         )
39 |         if not self.src_s3_dir.startswith('s3://'):
40 |             raise ValueError
41 |         if not self.dst_s3_key.startswith('s3://'):
42 |             raise ValueError
43 | 
44 |         pdf_parser_main.parse_all_pdf(
45 |             self.organisation,
46 |             self.src_s3_dir,
47 |             self.dst_s3_key,
48 |         )
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     arg_parser = argparse.ArgumentParser(
53 |         description='Run a web scraper for a given organisation and writes the'
54 |                     ' results to the given S3 path.'
55 |     )
56 |     arg_parser.add_argument(
57 |         'src_s3_dir',
58 |         help='The source path to s3.'
59 |     )
60 |     arg_parser.add_argument(
61 |         'dst_s3_key',
62 |         help='The destination path to s3.'
63 |     )
64 |     arg_parser.add_argument(
65 |         'organisation',
66 |         choices=s3hook.ORGS,
67 |         help='The organisation to scrape.'
68 |     )
69 | 
70 |     args = arg_parser.parse_args()
71 | 
72 |     # Create an intermediate folder in s3 for raw parser output
73 |     parser_dst_key = args.dst_s3_key.replace(
74 |         "_normalized",
75 |         "_raw",
76 |     )
77 | 
78 |     parser = ParsePdfOperator(
79 |         args.organisation,
80 |         args.src_s3_dir,
81 |         parser_dst_key
82 |     )
83 |     parser.execute()
84 | 
85 |     normalizer = PolicyNameNormalizerOperator(
86 |         args.organisation,
87 |         parser_dst_key,
88 |         args.dst_s3_key
89 |     )
90 | 
91 |     normalizer.normalize()
92 | 


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/pdf_parser/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/objects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/pdf_parser/objects/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/resources/keywords.txt:
--------------------------------------------------------------------------------
 1 | # 1.Wellcome places
 2 | 
 3 | Sanger
 4 | Wellcome
 5 | MOP
 6 | FARR
 7 | Crick
 8 | Hilleman Institute
 9 | Night Star
10 | Syncona
11 | 
12 | # 2. Coalitions
13 | 
14 | UPD
15 | CARI
16 | O’Neil review
17 | Cepi
18 | AESA
19 | Gavi
20 | India Alliance
21 | CarbX
22 | GLOPID/R
23 | Science Media Centre
24 | NC3R
25 | MQ
26 | National Stem Learning Centre
27 | 
28 | # 3. Programmes
29 | 
30 | Deltas
31 | H3 Africa
32 | 10000 Genome
33 | Genomics England
34 | HRCS
35 | 
36 | # 4. Individuals we fund
37 | 
38 | 
39 | # 5. Wellcome staff where they operate in personal expert capacity
40 | 
41 | 
42 | # 6. Resources
43 | 
44 | Institute for health metrics DRI map
45 | Malaria map
46 | 
47 | # 7. Thematic
48 | 
49 | Mitochondrial donation
50 | Gene editing
51 | 30 days
52 | DRI/AMR
53 | one health
54 | 


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/resources/section_keywords.txt:
--------------------------------------------------------------------------------
1 | reference
2 | bibliograph
3 | endnote


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/pdf_parser/tests/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/tests/test_pdf_parser_tools.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from lxml import etree
 4 | 
 5 | from pdf_parser.pdf_parse import parse_pdf_document
 6 | from pdf_parser.tools.extraction import (_find_elements,
 7 |                                                     _flatten_text,
 8 |                                                     _flatten_fontspec)
 9 | from tests.common import TEST_PDF, TEST_XML
10 | 
11 | 
12 | class TestTools(unittest.TestCase):
13 | 
14 |     def setUp(self):
15 |         self.test_file = open(TEST_PDF, 'rb')
16 |         self.pdf_file_object, _, _, errors = parse_pdf_document(self.test_file)
17 |         assert not errors
18 | 
19 |     def tearDown(self):
20 |         self.test_file.close()
21 | 
22 |     def test_element_finder(self):
23 |         elements = _find_elements(self.pdf_file_object, 'Reference')
24 |         self.assertEqual(elements, [])
25 | 
26 | class TestFlattenTools(unittest.TestCase):
27 | 
28 |     def setUp(self):
29 | 
30 |         self.test_file = open(TEST_XML, 'r')
31 |         tree = etree.parse(self.test_file)
32 |         self.fontspecs = tree.xpath('//fontspec')
33 |         self.texts = tree.xpath('//text')
34 | 
35 |     def tearDown(self):
36 |         #self.test_file.close()
37 |         pass
38 | 
39 |     def test_flatten_text(self):
40 |         text = _flatten_text(self.texts[0])
41 |         self.assertEqual(text, "Test Page 1")
42 |         self.assertIs(type(text), str)
43 | 
44 |     def test_flatten_texts(self):
45 |         """ Ensure that _flatten_text adequately captures text with formatting.
46 |         """
47 |         texts = [_flatten_text(i) for i in self.texts]
48 |         self.assertIs(type(texts), list)
49 |         self.assertIs(len(texts), 10)
50 |         self.assertEqual(texts[1], 'All bold line.')
51 |         self.assertEqual(texts[2], 'Partly  bold  line.')
52 |         self.assertEqual(texts[3], 'All italic line.')
53 |         self.assertEqual(texts[4], 'Partly  italic  line.')
54 | 
55 |     def test_flatten_fontspec(self):
56 |         font_map = _flatten_fontspec(self.fontspecs)
57 |         self.assertEqual(len(font_map), 2)
58 |         self.assertIs(type(font_map), dict)
59 | 


--------------------------------------------------------------------------------
/pipeline/reach-parser/pdf_parser/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-parser/pdf_parser/tools/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-parser/requirements.txt:
--------------------------------------------------------------------------------
1 | attrs
2 | boto3
3 | lxml
4 | pyahocorasick
5 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.scraper.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.scraper.txt
 9 | 
10 | 
11 | COPY ./scrapy.cfg /etc/reach/scrapy.cfg
12 | COPY ./spider_task.py /opt/reach/spider_task.py
13 | COPY ./wsf_scraping /opt/reach/wsf_scraping
14 | 
15 | # Give execution rights to the entrypoint Python script
16 | RUN chmod +x /opt/reach/spider_task.py
17 | 
18 | ENTRYPOINT ["/opt/reach/spider_task.py"]
19 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/Dockerfile.test:
--------------------------------------------------------------------------------
 1 | FROM reach.base
 2 | 
 3 | WORKDIR /opt/reach
 4 | 
 5 | COPY ./requirements.txt /opt/reach/requirements.scraper.txt
 6 | 
 7 | RUN pip install -U pip && \
 8 |         python3 -m pip install -r /opt/reach/requirements.scraper.txt
 9 | 
10 | 
11 | COPY ./scrapy.cfg /etc/reach/scrapy.cfg
12 | COPY ./spider_task.py /opt/reach/spider_task.py
13 | COPY ./wsf_scraping /opt/reach/wsf_scraping
14 | 
15 | # Give execution rights to the entrypoint Python script
16 | RUN chmod +x /opt/reach/spider_task.py
17 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/README.md:
--------------------------------------------------------------------------------
 1 | # scraper
 2 | 
 3 | A web scraper tool to get data for evaluating Wellcome impact.
 4 | 
 5 | ## What do we scrape
 6 | 
 7 | 
 8 |  | Organisation | What is scraped                                                                     | Years       |
 9 |  |--------------|-------------------------------------------------------------------------------------|-------------|
10 |  | WHO          | Everything on apps.who.int/iris                                                     | 2012 - 2019 |
11 |  | NICE         | All the Guidances and evidences                                                     | 2000 - 2019 |
12 |  | MSF          | All the reports and activity reports                                                | 2007 - 2019 |
13 |  | GOV          | Everything from gov.uk/government/publications                                      | 1945 - 2019 |
14 |  | UNICEF       | Everything from data.unicef.org/resources/resource-type/[publications and guidance] | 2010 - 2019 |
15 |  | Parliament   | Everything from search-material.parliament.uk                                       | 1984 - 2019 |
16 | 
17 | 
18 | 
19 | ## Output Formatting
20 | 
21 | The outputed file is meant to contain a number a different fields, which
22 | can vary depending on the scraper provider.
23 | 
24 | It will always have the following attributes, though:
25 | 
26 | |Unique|Attribute|Description|
27 | |------|---------|-----------|
28 | |      |title    | a string containing the document title|
29 | |*     |uri      | the url of the document|
30 | |      |pdf      | the name of the file|
31 | |      |sections | a json object of section names, containing the text extracted from matching sections|
32 | |      |keywords | a json object of keywords, containing the text extracted from matching text|
33 | |*     |hash     | a md5 digest of the file|
34 | |      |provider | the provider from where the file has been downloaded|
35 | |      |date_scraped | the date (YYYYMMDD) when the article has been scraped|
36 | 
37 | Some providers will have additional parameters:
38 | 
39 | ### WHO
40 | 
41 | |Attribute|Description|
42 | |---------|-----------|
43 | |year     | the publication year of the document|
44 | |types    | an array containing the WHO type associated with the document|
45 | |subjects | an array containing the WHO subjects of the document|
46 | |authors  | an array containing the authors (from WHO)|
47 | 
48 | ### Nice
49 | 
50 | |Attribute|Description|
51 | |---------|-----------|
52 | |year     | the publication year of the document|
53 | 
54 | ### Parliament
55 | 
56 | |Attribute|Description|
57 | |---------|-----------|
58 | |year     | the publication year of the document|
59 | |types    | the type of the document |
60 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-scraper/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-scraper/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.4'
 2 | 
 3 | x-airflow-image: &airflow-image
 4 |   160358319781.dkr.ecr.eu-west-1.amazonaws.com/uk.ac.wellcome/reach:latest
 5 | 
 6 | x-env: &env
 7 |   AWS_ACCESS_KEY_ID: "${AWS_ACCESS_KEY_ID}"
 8 |   AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}"
 9 |   AWS_SESSION_TOKEN: "${AWS_SESSION_TOKEN}"
10 | 
11 |   SENTRY_DSN: "${SENTRY_DSN}"
12 | 
13 | services:
14 |   scraper_msf:
15 |     image: uk.ac.wellcome/reach/scraper:latest
16 |     environment: *env
17 |     entrypoint:
18 |       - /opt/scraper/spider_task.py
19 |       - s3://datalabs-dev/scraper/split-container/
20 |       - msf
21 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/pg_exists.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Tests for whether something exists in postgres or not.
 5 | """
 6 | 
 7 | import argparse
 8 | import logging
 9 | import os
10 | import sys
11 | 
12 | import psycopg2
13 | 
14 | CONNECT_TIMEOUT = 1
15 | 
16 | parser = argparse.ArgumentParser(description=__doc__.strip())
17 | parser.add_argument('tablename')
18 | parser.add_argument('-v', dest='verbose', action='store_true')
19 | 
20 | 
21 | def check_table(dsn, tablename):
22 |     with psycopg2.connect(dsn, connect_timeout=CONNECT_TIMEOUT) as con:
23 |         with con.cursor() as c:
24 |             c.execute(
25 |                 'SELECT 1 FROM pg_tables '
26 |                 'WHERE schemaname = %s AND tablename = %s',
27 |                 ('public', tablename)
28 |             )
29 |             if c.fetchone() == (1,):
30 |                 return 0
31 |     return 1
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     args = parser.parse_args()
36 |     logging.basicConfig()
37 |     if args.verbose:
38 |         logging.getLogger().setLevel(logging.DEBUG)
39 |     else:
40 |         logging.getLogger().setLevel(logging.INFO)
41 | 
42 |     if 'POSTGRES_DSN' not in os.environ:
43 |         logging.error('Error: POSTGRES_DSN not set')
44 |         sys.exit(2)
45 | 
46 |     result = check_table(os.environ['POSTGRES_DSN'], args.tablename)
47 |     sys.exit(result)
48 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/pg_isready.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Executes a command when postgres is ready, or exits 1 if it's not ready
 5 | after the timeout.
 6 | 
 7 | Here, "ready" means:
 8 | 
 9 | 1. We can connect to postgres on $POSTGRES_DSN, and
10 | 1. We can run a query (SELECT 1;) for > (success-secs), to accomodate
11 |    postgres initialization scripts that restart the DB.
12 | """
13 | 
14 | import argparse
15 | import logging
16 | import os
17 | import sys
18 | import time
19 | 
20 | import psycopg2
21 | 
22 | CONNECT_TIMEOUT = 1
23 | POLL_WAIT = 0.5
24 | 
25 | parser = argparse.ArgumentParser(description=__doc__.strip())
26 | parser.add_argument('--timeout', type=int, default=15)
27 | parser.add_argument('--success-secs', type=float, default=5)
28 | parser.add_argument('-v', dest='verbose', action='store_true')
29 | parser.add_argument('args', nargs=argparse.REMAINDER)
30 | 
31 | 
32 | def test_connection(dsn):
33 |     with psycopg2.connect(dsn, connect_timeout=CONNECT_TIMEOUT) as con:
34 |         with con.cursor() as c:
35 |             c.execute('SELECT 1')
36 | 
37 | 
38 | def pg_isready(dsn, timeout, success_secs):
39 |     start = time.time()
40 |     while time.time() - start < timeout:
41 |         try:
42 |             success_start = time.time()
43 |             while time.time() - success_start < success_secs:
44 |                 if time.time() - start > timeout:
45 |                     return 1
46 |                 test_connection(dsn)
47 |                 logging.debug('pg_isready: successful connect')
48 |                 time.sleep(POLL_WAIT)
49 |             return 0
50 |         except psycopg2.OperationalError as e:
51 |             logging.debug('pg_isready: %s', e)
52 |             time.sleep(POLL_WAIT)
53 |     return 1
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     args = parser.parse_args()
58 |     logging.basicConfig()
59 |     if args.verbose:
60 |         logging.getLogger().setLevel(logging.DEBUG)
61 |     else:
62 |         logging.getLogger().setLevel(logging.INFO)
63 | 
64 |     if args.success_secs > args.timeout:
65 |         logging.error(
66 |             'pg_isready: timeout time is lower than success secs. Failing'
67 |         )
68 |         sys.exit(2)
69 | 
70 |     if 'POSTGRES_DSN' not in os.environ:
71 |         logging.error('Error: POSTGRES_DSN not set')
72 |         sys.exit(2)
73 | 
74 |     result = pg_isready(
75 |         os.environ['POSTGRES_DSN'], args.timeout, args.success_secs)
76 |     if result != 0:
77 |         logging.error(
78 |             'pg_isready: queries did not succeeed after %ss. Failing',
79 |             args.timeout
80 |         )
81 |         sys.exit(result)
82 |     os.execvp(args.args[0], args.args)
83 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | scrapy
2 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = scraper.wsf_scraping.settings
 8 | 
 9 | [deploy]
10 | url = http://localhost:6800/
11 | project = wsf_scraping
12 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-scraper/wsf_scraping/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/contracts.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from scrapy.contracts import Contract
 4 | 
 5 | 
 6 | class AjaxContract(Contract):
 7 |     """Add headers to a contract so that it becomes an ajax request."""
 8 |     name = "ajax"
 9 | 
10 |     def adjust_request_args(self, kwargs):
11 |         headers = {
12 |             'X-Requested-With': 'XMLHttpRequest',
13 |             'referer': 'https://www.nice.org.uk/guidance/published'
14 |         }
15 |         kwargs['headers'] = headers
16 |         return kwargs
17 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/feed_storage.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from scrapy.extensions.feedexport import BlockingFeedStorage
 4 | from twisted.internet import reactor
 5 | from twisted.internet import threads
 6 | 
 7 | from hooks.s3hook import S3Hook
 8 | from hooks.sentry import report_exception
 9 | 
10 | manifest_storage_error = object()
11 | 
12 | 
13 | class ManifestFeedStorage(BlockingFeedStorage):
14 |     """This FeedStorage is given the informations about the pdf files scraped
15 |     in the pipeline. It processes this information to update the manifest file
16 |     in amazon s3. the PDF files are saved to s3 in the pipeline.py file.
17 |     """
18 | 
19 |     def __init__(self, url):
20 |         """Initialise the Feed Storage with the feed uri."""
21 |         self.logger = logging.getLogger(__name__)
22 |         self.dst_key_url = url
23 |         self.spider = None
24 | 
25 |     def open(self, spider):
26 |         """The FeedStorage is opened by scrapy autmatically to receive
27 |         items returned by the pipeleine. This methos initialise the object with
28 |         a file system backend and a spider name (organisation).
29 | 
30 |         Should always return a class object.
31 |         """
32 |         self.spider = spider
33 |         self.file_system = S3Hook()
34 |         return super(ManifestFeedStorage, self).open(spider)
35 | 
36 |     @report_exception
37 |     def _store_in_thread(self, data_file):
38 |         """
39 |         Uploads our manifest file to S3.
40 | 
41 |         Called in Twisted's thread pool using
42 |         twisted.internet.deferToThread. Thus the explicit exception
43 |         reporting above.
44 |         """
45 |         self.logger.info('Updating the manifest at {dst_key_url}'.format(
46 |             dst_key_url=self.dst_key_url,
47 |         ))
48 |         try:
49 |             self.file_system.update_manifest(
50 |                 data_file,
51 |                 self.dst_key_url,
52 |                 self.spider.name
53 |             )
54 |         except Exception as e:
55 |             # If it went bad, we need to inform the spider back in
56 |             # Twisted space, so that eventually the calling airflow task
57 |             # can find out, too.
58 |             self.logger.error('ManifestFeedStorage error: %s', e)
59 |             result = threads.blockingCallFromThread(
60 |                 reactor,
61 |                 self.spider.crawler.signals.send_catch_log,
62 |                 signal=manifest_storage_error,
63 |                 exception=e
64 |             )
65 |             self.logger.info('send_catch_log: %s', result)
66 |             raise
67 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/filter.py:
--------------------------------------------------------------------------------
 1 | from pybloom_live import BloomFilter
 2 | from scrapy.utils.job import job_dir
 3 | from scrapy.dupefilters import BaseDupeFilter
 4 | 
 5 | 
 6 | class BLOOMDupeFilter(BaseDupeFilter):
 7 |     """Request Fingerprint duplicates filter"""
 8 | 
 9 |     def __init__(self, path=None):
10 |         self.file = None
11 |         self.fingerprints = BloomFilter(2000000, 0.00001)
12 | 
13 |     @classmethod
14 |     def from_settings(cls, settings):
15 |         return cls(job_dir(settings))
16 | 
17 |     def request_seen(self, request):
18 |         fp = request.url
19 |         if fp in self.fingerprints:
20 |             return True
21 |         self.fingerprints.add(fp)
22 | 
23 |     def close(self, reason):
24 |         self.fingerprints = None
25 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | 
 4 | 
 5 | class Article(scrapy.Item):
 6 | 
 7 |     def __repr__(self):
 8 |         return repr({
 9 |             'title': self.get('title'),
10 |             'url': self.get('url'),
11 |         })
12 | 
13 |     title = scrapy.Field()
14 |     year = scrapy.Field()
15 |     url = scrapy.Field()
16 |     url_filename = scrapy.Field()
17 |     pdf = scrapy.Field()
18 |     hash = scrapy.Field()
19 |     has_text = scrapy.Field()
20 |     types = scrapy.Field()
21 |     subjects = scrapy.Field()
22 |     authors = scrapy.Field()
23 |     types = scrapy.Field()
24 |     date_scraped = scrapy.Field()
25 |     page_title = scrapy.Field()
26 |     source_page = scrapy.Field()
27 |     disposition_title = scrapy.Field()
28 |     link_title = scrapy.Field()
29 |     page_headings = scrapy.Field()
30 |     path = scrapy.Field()
31 |     did = scrapy.Field()
32 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import os
 4 | from datetime import datetime
 5 | 
 6 | # Get feed configuration from environment variable. Default to debug
 7 | FEED_CONFIG = os.environ.get('SCRAPY_FEED_CONFIG', 'DEBUG')
 8 | BOT_NAME = 'wsf_scraper'
 9 | 
10 | SPIDER_MODULES = ['wsf_scraping.spiders']
11 | NEWSPIDER_MODULE = 'wsf_scraping.spiders'
12 | 
13 | # Custom contrats for spider testing
14 | SPIDER_CONTRACTS = {
15 |     'wsf_scraping.contracts.AjaxContract': 10,
16 | }
17 | ITEM_PIPELINES = {
18 |     'wsf_scraping.pipelines.WsfScrapingPipeline': 10,
19 | }
20 | FEED_STORAGES = {
21 |     'manifests3': 'wsf_scraping.feed_storage.ManifestFeedStorage',
22 |     'local': 'wsf_scraping.feed_storage.ManifestFeedStorage',
23 | }
24 | 
25 | SPIDER_MIDDLEWARES = {
26 |     'wsf_scraping.middlewares.ReachDisallowedHostMiddleware': 450,
27 | }
28 | 
29 | LOG_LEVEL = 'INFO'
30 | LOG_FORMATTER = 'wsf_scraping.middlewares.PoliteLogFormatter'
31 | 
32 | # Set pdfminer log to WARNING
33 | logging.basicConfig()
34 | logging.getLogger("pdfminer").setLevel(logging.WARNING)
35 | 
36 | DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter'
37 | # Use a physicqal queue, slower but add fiability
38 | DEPTH_PRIORITY = 1
39 | SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleFifoDiskQueue'
40 | 
41 | # Crawl responsibly by identifying yourself (and your website)
42 | USER_AGENT = 'Wellcome Reach Scraper (datalabs-ops@wellcome.ac.uk)'
43 | 
44 | # Obey robots.txt rules
45 | ROBOTSTXT_OBEY = True
46 | 
47 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
48 | CONCURRENT_REQUESTS = 5
49 | CONCURRENT_REQUESTS_PER_DOMAIN = 5
50 | RETRY_ENABLED = True
51 | RETRY_TIMES = 3
52 | DOWNLOAD_WARNSIZE = 0
53 | DOWNLOAD_MAXSIZE = 0
54 | DOWNLOAD_TIMEOUT = 20
55 | DOWNLOAD_FAIL_ON_DATALOSS = True
56 | DOWNLOAD_DELAY = 0.25
57 | 
58 | HTTPCACHE_ENABLED = False
59 | 
60 | AUTOTHROTTLE_ENABLED = True
61 | AUTOTHROTTLE_START_DELAY = 0.1
62 | AUTOTHROTTLE_MAX_DELAY = 0.5
63 | AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
64 | 
65 | # Disable cookies
66 | COOKIES_ENABLED = False
67 | 
68 | MAX_ARTICLE = int(os.environ.get('MAX_ARTICLE', '-1'))
69 | 
70 | #  who_iris and who_iris_single_page dedicated settings
71 | WHO_IRIS_RPP = 250
72 | WHO_IRIS_LIMIT = False
73 | if 'WHO_IRIS_YEARS' in os.environ:
74 |     WHO_IRIS_YEARS = [
75 |         int(x) for x in os.environ['WHO_IRIS_YEARS'].split(',')
76 |     ]
77 | else:
78 |     WHO_IRIS_YEARS = list(range(2012, datetime.now().year + 1))
79 | 
80 | # nice dedicated settings
81 | NICE_GET_HISTORY = False
82 | NICE_GET_EVIDENCES = False
83 | 
84 | KEYWORDS_CONTEXT = 0
85 | 
86 | # Jsonlines are cleaner for big feeds
87 | FEED_FORMAT = 'jsonlines'
88 | FEED_EXPORT_ENCODING = 'utf-8'
89 | FEED_TEMPDIR = '/tmp/'
90 | 
91 | # By default, log the results in a local folder
92 | FEED_URI = os.environ.get('SCRAPY_FEED_URI', 'local:///tmp/%(name)s')
93 | 
94 | DATABASE_URL = os.environ.get('DATABASE_URL')
95 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/spiders/msf_spider.py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | from .base_spider import BaseSpider
 3 | 
 4 | 
 5 | class MsfSpider(BaseSpider):
 6 |     name = 'msf'
 7 | 
 8 | 
 9 |     def start_requests(self):
10 |         """Set up the initial request to the website to scrape."""
11 | 
12 |         urls = [
13 |             'https://www.msf.org.uk/activity-reports',
14 |             'https://www.msf.org.uk/reports',
15 |         ]
16 | 
17 |         for url in urls:
18 |             callback = self.parse
19 |             if "/reports" in url:
20 |                 callback = self.parse_reports
21 | 
22 |             self.logger.info('Initial url: %s', url)
23 |             yield scrapy.Request(
24 |                 url=url,
25 |                 errback=self.on_error,
26 |                 callback=callback,
27 |             )
28 | 
29 |     def parse(self, response):
30 |         """ Parse activity-reports pages.
31 | 
32 |         @url https://www.msf.org.uk/activity-reports
33 |         @returns items 0 0
34 |         @returns requests 10
35 |         """
36 | 
37 |         # TODO: Can pull document title from image alt or title properties
38 | 
39 |         doc_links = list(response.css('.field-item p'))
40 | 
41 | 
42 |         for item in doc_links:
43 |             url = item.xpath('.//a[@class="btn"]/@href').extract_first()
44 |             image_alt = item.xpath('.//img[@class="media-element file-default"]/@alt').extract_first()
45 | 
46 |             if self._is_valid_pdf_url(url):
47 |                 data_dict = {
48 |                     'source_page': response.url,
49 |                     'page_title': response.xpath('/html/head/title/text()').extract_first(),
50 |                     'title': image_alt
51 |                 }
52 |                 yield scrapy.Request(
53 |                     url=response.urljoin(url),
54 |                     errback=self.on_error,
55 |                     callback=self.save_pdf,
56 |                     meta={'data_dict': data_dict}
57 |                 )
58 | 
59 |     def parse_reports(self, response):
60 |         """ Parse reports page
61 | 
62 |         Args:
63 |             url: The reports page
64 |         Returns:
65 |             items
66 |             requests
67 |         """
68 | 
69 |         doc_links = list(response.css('.field-item a'))
70 | 
71 |         data_dict = {
72 |             'source_page': response.url,
73 |             'page_title': response.xpath('/html/head/title/text()').extract_first(),
74 |             'title': None
75 |         }
76 | 
77 |         for item in doc_links:
78 |             url = item.xpath('@href').extract_first()
79 |             if self._is_valid_pdf_url(url):
80 |                 data_dict['title'] = item.xpath('text()').extract_first()
81 |                 yield scrapy.Request(
82 |                     url=response.urljoin(url),
83 |                     errback=self.on_error,
84 |                     callback=self.save_pdf,
85 |                     dont_filter=True,
86 |                     meta={'data_dict': data_dict}
87 |                 )
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/pipeline/reach-scraper/wsf_scraping/tests/__init__.py


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/tests/test_msf_spider.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrapy.http import Response, Request, HtmlResponse
 3 | from scrapy.utils.project import get_project_settings
 4 | from wsf_scraping.spiders.msf_spider import MsfSpider
 5 | 
 6 | from tests.common import get_path, TEST_PDF
 7 | 
 8 | 
 9 | class Crawler:
10 | 
11 |     class Stats:
12 |         def get_value(*args):
13 |             return None
14 | 
15 |     stats = Stats()
16 | 
17 | 
18 | class TestMsfSpider(unittest.TestCase):
19 | 
20 |     def setUp(self):
21 |         self.test_file = open(TEST_PDF, 'rb')
22 |         self.spider = MsfSpider()
23 |         self.spider.settings = get_project_settings()
24 |         self.spider.crawler = Crawler()
25 | 
26 |     def tearDown(self):
27 |         self.test_file.close()
28 | 
29 |     def test_save_pdf(self):
30 |         """Tests if, given a pdf-like response containing a data_dict metadata,
31 |         the save_pdf method does:
32 |           - Create a NamedTemporaryFile
33 |           - Return an item
34 |         """
35 | 
36 |         meta = {
37 |             'data_dict': {
38 |                 'title': 'foo',
39 |             }
40 |         }
41 | 
42 |         headers = {
43 |             'content-type': b'application/pdf'
44 |         }
45 | 
46 |         request = Request('http://foo.bar/documents/document.pdf', meta=meta)
47 |         pdf_response = Response(
48 |             'http://foo.bar/documents/document.pdf',
49 |             body=self.test_file.read(),
50 |             request=request,
51 |             headers=headers
52 |         )
53 | 
54 |         res = self.spider.save_pdf(pdf_response)
55 |         self.assertTrue(res)
56 |         self.assertTrue('foo' == res['title'])
57 | 
58 |     def test_parse(self):
59 |         """Test if given an publication listing page of the who website,
60 |         the spider yields a request to a publication, parsed by the
61 |         parse_article function.
62 |         """
63 | 
64 |         with open(get_path('mock_sites/msf/1.html'), 'rb') as html_site:
65 |             request = Request('http://foo.bar')
66 |             response = HtmlResponse(
67 |                 'http://foo.bar',
68 |                 body=html_site.read(),
69 |                 request=request
70 |             )
71 | 
72 |             res = next(self.spider.parse(response))
73 | 
74 |             # Check if something is returned
75 |             self.assertTrue(res)
76 | 
77 |             self.assertEqual(res.callback.__name__, 'save_pdf')
78 | 


--------------------------------------------------------------------------------
/pipeline/reach-scraper/wsf_scraping/tests/test_scraper_spiders.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrapy.http import Response, Request
 3 | from scrapy.utils.project import get_project_settings
 4 | from wsf_scraping.spiders.base_spider import BaseSpider
 5 | 
 6 | from tests.common import TEST_PDF
 7 | 
 8 | 
 9 | class Crawler:
10 | 
11 |     class Stats:
12 |         def get_value(*args):
13 |             return None
14 | 
15 |     stats = Stats()
16 | 
17 | 
18 | class TestBaseSpider(unittest.TestCase):
19 | 
20 |     def setUp(self):
21 |         self.test_file = open(TEST_PDF, 'rb')
22 |         self.spider = BaseSpider()
23 |         self.spider.settings = get_project_settings()
24 |         self.spider.crawler = Crawler()
25 | 
26 |         meta = {
27 |             'data_dict': {
28 |                 'title': 'foo',
29 |             }
30 |         }
31 |         headers = {
32 |             'content-type': b'application/pdf'
33 |         }
34 |         request = Request('http://foo.bar/documents/document.pdf', meta=meta)
35 |         self.pdf_response = Response(
36 |             'http://foo.bar/documents/document.pdf',
37 |             body=self.test_file.read(),
38 |             request=request,
39 |             headers=headers
40 |         )
41 | 
42 |     def tearDown(self):
43 |         self.test_file.close()
44 | 
45 |     def test_base_spider(self):
46 |         """Tests if, given a pdf-like response containing a data_dict metadata,
47 |         the save_pdf method does:
48 |           - Create a NamedTemporaryFile
49 |           - Return an item
50 |         """
51 | 
52 |         res = self.spider.save_pdf(self.pdf_response)
53 |         self.assertTrue(res)
54 |         self.assertTrue('foo' == res['title'])
55 | 


--------------------------------------------------------------------------------
/test_target/README.md:
--------------------------------------------------------------------------------
1 | # Scrape Target
2 | 
3 | A basic HTTP endpoint serving HTML pages for testing the scraper
4 | 
5 | 


--------------------------------------------------------------------------------
/test_target/inner_page.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 | 
 4 |         <title>Inner Page</title>
 5 | 
 6 |     </head>
 7 | 
 8 |     <body>
 9 | 
10 |         <div class="body">
11 | 
12 |             <div class="content">
13 | 
14 |                 <h1>Some Page Title</h1>
15 | 
16 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
17 | 
18 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
19 | 
20 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
21 | 
22 |                 <hr/>
23 | 
24 |                 <a class="doc-download-link" href="/static/document.pdf">Download Now</a>
25 | 
26 |             </div>
27 | 
28 |         </div>
29 | 
30 | 
31 |     </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/test_target/page.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 | 
 4 |         <title>Demo Page</title>
 5 | 
 6 |     </head>
 7 | 
 8 |     <body>
 9 | 
10 |         <div class="body">
11 | 
12 |             <div class="content">
13 | 
14 |                 <h1>Some Page Title</h1>
15 | 
16 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
17 | 
18 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
19 | 
20 |                 <p>Spicy jalapeno bacon ipsum dolor amet buffalo leberkas spare ribs chuck ball tip short ribs hamburger. Capicola drumstick chicken, swine turkey picanha frankfurter jowl shank landjaeger. Rump leberkas beef ribs bacon flank shankle. Pastrami porchetta tongue spare ribs ball tip shoulder strip steak doner ham hock sausage. Prosciutto cupim shoulder, ham hock pork chop capicola pig andouille shank pork loin salami doner pork belly.</p>
21 | 
22 |                 <hr/>
23 | 
24 |                 <a class="doc-page-link" href="/page">View Page</a>
25 |                 <a class="doc-page-link" href="/page2">View Page 2</a>
26 |                 <a class="doc-page-link" href="/page3">View Page 3</a>
27 | 
28 |             </div>
29 | 
30 |         </div>
31 | 
32 | 
33 |     </body>
34 | </html>
35 | 


--------------------------------------------------------------------------------
/test_target/target_server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import tornado.ioloop
 4 | import tornado.web
 5 | 
 6 | PORT = 8888
 7 | 
 8 | class MainHandler(tornado.web.RequestHandler):
 9 |     def get(self):
10 |         self.render("page.html")
11 | 
12 | class PageHandler(tornado.web.RequestHandler):
13 |     def get(self):
14 |         self.render("inner_page.html")
15 | 
16 | class RobotHandler(tornado.web.RequestHandler):
17 |     def get(self):
18 |         self.render("robots.txt")
19 | 
20 | def make_app():
21 |     basedir = "/".join(os.path.abspath(__file__).split("/")[:-1])
22 |     return tornado.web.Application([
23 |         (r"/", MainHandler,),
24 |         (r"/robots.txt", RobotHandler,),
25 |         (r"/page", PageHandler,),
26 |         (r"/page2", PageHandler,),
27 |         (r"/page3", PageHandler,),
28 |         (r"/static/(.*)", tornado.web.StaticFileHandler, {'path': basedir}),
29 |     ])
30 | 
31 | if __name__ == "__main__":
32 |     app = make_app()
33 |     app.listen(PORT)
34 |     print("### Test Scrape Target")
35 |     print("    localhost:8888")
36 |     tornado.ioloop.IOLoop.current().start()
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/web/.babelrc:
--------------------------------------------------------------------------------
 1 | {
 2 |     presets: [[
 3 |         "@babel/preset-env",
 4 |         {
 5 |             forceAllTransforms: true,
 6 |             debug: true,
 7 |             useBuiltIns: "entry",
 8 |             modules: "commonjs",
 9 |             targets: "> 0.25%, ie 11, not dead",
10 |             corejs: { version: 3, proposals: true}
11 |         }
12 |     ]],
13 |     plugins: [
14 |         "@babel/plugin-transform-arrow-functions",
15 |         "@babel/plugin-transform-for-of",
16 |         "@babel/plugin-transform-typeof-symbol"
17 |     ]
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/web/.dockerignore:
--------------------------------------------------------------------------------
1 | venv
2 | env
3 | docs
4 | node_modules
5 | build/web/static/*
6 | 


--------------------------------------------------------------------------------
/web/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": {
 3 |         "sourceType": "module",
 4 | 	"commonjs": true,
 5 |         "browser": true,
 6 |         "es6": true
 7 |     },
 8 |     "extends": "eslint:recommended",
 9 |     "globals": {
10 |         "Atomics": "readonly",
11 |         "SharedArrayBuffer": "readonly"
12 |     },
13 |     "parserOptions": {
14 |         "ecmaVersion": 2018
15 |     },
16 |     "rules": {
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/web/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:12.7.0-alpine as builder
 2 | 
 3 | WORKDIR /opt/reach/web
 4 | 
 5 | COPY ./package.json /opt/reach/web/package.json
 6 | 
 7 | RUN \
 8 |     npm install -g parcel && \
 9 |     npm install
10 | 
11 | COPY ./web/src /opt/reach/web/src
12 | COPY ./.babelrc /opt/reach/web/.babelrc
13 | 
14 | RUN parcel build /opt/reach/web/src/js/app.js --out-dir /opt/reach/web/static/js/
15 | RUN parcel build /opt/reach/web/src/css/style.less --out-dir /opt/reach/web/static/css/
16 | 
17 | 
18 | FROM reach.base
19 | 
20 | WORKDIR /opt/reach
21 | 
22 | COPY ./requirements.txt /opt/reach/requirements.web.txt
23 | 
24 | RUN pip install -U pip && \
25 |         python3 -m pip install -r /opt/reach/requirements.web.txt
26 | 
27 | COPY ./web /opt/reach/web
28 | COPY --from=builder /opt/reach/web/static /opt/reach/build/static/
29 | 
30 | COPY ./web/src/images/ /opt/reach/build/static/images/
31 | COPY ./web/src/favicon/ /opt/reach/build/static/favicon/
32 | COPY ./web/src/favicon/favicon.ico /opt/reach/web/favicon.ico
33 | 


--------------------------------------------------------------------------------
/web/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON := ${PWD}/venv/bin/python
 2 | GUNICORN := ${PWD}/venv/bin/gunicorn
 3 | STATIC_ROOT := ${PWD}/build/web/static
 4 | DOCS_STATIC_ROOT := ${PWD}/docs/build/html/_static
 5 | SENTRY_DSN := ""
 6 | CMD_ARGS := "--bind=127.0.0.1 --workers=1 --reload"
 7 | CONFIG_FILE := ${PWD}/config/dev.config.toml
 8 | APP_OUT_DIR := ${STATIC_ROOT}/js
 9 | APP_OUT_DIR := ${CSS_OUT_DIR}/css
10 | 
11 | 
12 | .PHONY: setup
13 | setup:
14 | 	python3 -m venv --copies venv
15 | 	./venv/bin/pip install -r requirements.txt
16 | 
17 | .PHONY: run-server
18 | run-server:
19 | 	CONFIG_FILE=${PWD}/config/dev.config.toml ${GUNICORN} web:application
20 | 
21 | .PHONY: watch
22 | watch: watch-styles watch-app
23 | 
24 | .PHONY: watch-app
25 | watch-app:
26 | 	parcel watch web/src/js/app.js --out-dir ${APP_OUT_DIR}
27 | 
28 | .PHONY: watch-styles
29 | watch-styles:
30 | 	parcel watch web/src/css/style.less --out-dir ${CSS_OUT_DIR}
31 | 
32 | 
33 | .PHONY: run
34 | run: run-server watch
35 | 


--------------------------------------------------------------------------------
/web/bin/update_vendor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #
 3 | # Updates files in static/vendor from the internet.
 4 | 
 5 | DESTDIR=$1
 6 | if [ -z "$DESTDIR" ]; then
 7 |     echo "Usage: $0 /path/to/static/vendor" >&2
 8 |     exit 1
 9 | fi
10 | 
11 | 
12 | SPECTRE_VERSION=0.5.8
13 | 
14 | 
15 | # Spectre CSS
16 | mkdir -p $DESTDIR/spectre-${SPECTRE_VERSION}
17 | curl -L https://github.com/picturepan2/spectre/archive/v${SPECTRE_VERSION}.tar.gz \
18 |     | tar -C $DESTDIR/spectre-${SPECTRE_VERSION} \
19 |         -xzf - \
20 |         --strip-components 2 \
21 |         spectre-${SPECTRE_VERSION}/dist
22 | 


--------------------------------------------------------------------------------
/web/config/docker.config.toml:
--------------------------------------------------------------------------------
 1 | debug = true
 2 | static_root = "../build/web/static"
 3 | docs_static_root = "docs/build/html/_static"
 4 | 
 5 | [database]
 6 | db_port = 5432
 7 | db_name = ""
 8 | db_host = ""
 9 | db_user = ""
10 | db_password = ""
11 | min_conns = 1
12 | max_conns = 30
13 | 
14 | 
15 | [sentry]
16 | dsn = ""
17 | 
18 | [analytics]
19 | ga_code = null
20 | hotjar_code = null
21 | 
22 | [github]
23 | github_token = ""
24 | github_user = ""
25 | 


--------------------------------------------------------------------------------
/web/config/local.config.toml:
--------------------------------------------------------------------------------
 1 | debug = true
 2 | static_root = "../build/web/static"
 3 | docs_static_root = "docs/build/html/_static"
 4 | 
 5 | [database]
 6 | db_port = 5432
 7 | db_name = ""
 8 | db_host = ""
 9 | db_user = ""
10 | db_password = ""
11 | min_conns = 1
12 | max_conns = 30
13 | 
14 | [sentry]
15 | dsn = ""
16 | 
17 | [analytics]
18 | ga_code = ""
19 | hotjar_code = ""
20 | 
21 | [github]
22 | github_token = ""
23 | github_user = ""
24 | 


--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "reach-web",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "main.js",
 6 |   "scripts": {
 7 |     "test": "echo \"Error: no test specified\" && exit 1"
 8 |   },
 9 |   "author": "",
10 |   "license": "MIT",
11 |   "devDependencies": {
12 |     "@babel/core": "^7.10.5",
13 |     "@babel/plugin-transform-arrow-functions": "^7.10.4",
14 |     "@babel/plugin-transform-for-of": "^7.10.4",
15 |     "@babel/plugin-transform-typeof-symbol": "^7.10.4",
16 |     "@babel/preset-env": "^7.10.4"
17 |   },
18 |   "dependencies": {
19 |     "core-js": "^3.6.5",
20 |     "less": "^3.12.2"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/web/requirements.txt:
--------------------------------------------------------------------------------
1 | falcon
2 | gunicorn
3 | jinja2
4 | sentry-sdk
5 | psycopg2-binary
6 | toml
7 | uuid
8 | requests
9 | 


--------------------------------------------------------------------------------
/web/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | # Current directory must be data-labs repo before running setup.py!
 4 | with open("README.md", "r") as f:
 5 |     long_description = f.read()
 6 | 
 7 | with open('unpinned_requirements.txt') as f:
 8 |     unpinned_requirements = [
 9 |         l.strip() for l in f
10 |         if not l.startswith('#')
11 |     ]
12 | 
13 | setuptools.setup(
14 |     name="wellcome-reach",
15 |     version="0.0.1",
16 |     author="Wellcome Trust Data Labs Team",
17 |     author_email="datalabs-engineering@wellcomecloud.onmicrosoft.com",
18 |     description="Wellcome Trust Data Labs Reach",
19 |     long_description=long_description,
20 |     long_description_content_type="text/markdown",
21 |     url="https://github.com/wellcometrust/data-labs/common",
22 |     packages=setuptools.find_packages(
23 |         include=["reach.*"],
24 |     ),
25 |     classifiers=[
26 |         "Programming Language :: Python :: 3",
27 |         "Operating System :: OS Independent",
28 |     ],
29 |     tests_require=[
30 |         'pytest',
31 |     ],
32 |     install_requires=unpinned_requirements,
33 | )
34 | 


--------------------------------------------------------------------------------
/web/web/__init__.py:
--------------------------------------------------------------------------------
1 | from .wsgi import application
2 | 


--------------------------------------------------------------------------------
/web/web/db.py:
--------------------------------------------------------------------------------
 1 | import psycopg2
 2 | from contextlib import contextmanager
 3 | 
 4 | from psycopg2.extras import RealDictCursor
 5 | from psycopg2.pool import ThreadedConnectionPool
 6 | 
 7 | from web import config as conf
 8 | 
 9 | pool = None
10 | MIN_CONNS = 1
11 | MAX_CONNS = 30
12 | 
13 | def create_pool():
14 |     global pool
15 | 
16 |     if pool is None:
17 |         pool = ThreadedConnectionPool(
18 |                 conf.CONFIG.min_conns,
19 |                 conf.CONFIG.max_conns,
20 |                 database=conf.CONFIG.db_name,
21 |                 user=conf.CONFIG.db_user,
22 |                 password=conf.CONFIG.db_password,
23 |                 host=conf.CONFIG.db_host,
24 |                 port=conf.CONFIG.db_port,
25 |             )
26 |     return pool
27 | 
28 | @contextmanager
29 | def get_db_connection():
30 |     """ Yields a database connection from the pool
31 |     """
32 |     connection = None
33 |     try:
34 |         if pool is None:
35 |             create_pool()
36 |         connection = pool.getconn()
37 |         yield connection
38 |     finally:
39 |         pool.putconn(connection)
40 | 
41 | @contextmanager
42 | def get_db_cur(commit=False, name=None):
43 |     """ Yields a cursor against the database
44 | 
45 |     Args:
46 |         commit: Whether to commit at the end of a transaction
47 |     """
48 |     with get_db_connection() as connection:
49 |         cursor = connection.cursor(cursor_factory=RealDictCursor, name=name)
50 |         try:
51 |             yield cursor
52 |             if commit:
53 |                 connection.commit()
54 |         finally:
55 |             cursor.close()
56 | 
57 | 


--------------------------------------------------------------------------------
/web/web/docs/.gitignore:
--------------------------------------------------------------------------------
1 | env
2 | !requirements.txt
3 | 


--------------------------------------------------------------------------------
/web/web/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/web/web/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Reach API Documentation
 2 | 
 3 | ## How to contribute
 4 | 
 5 | ### Requirements:
 6 |   - Python > 3.6
 7 |   - Virtualenv
 8 |   - Pip
 9 | 
10 | ### Install the documentation stack:
11 | 
12 | ```
13 | virtualenv env -p python3
14 | source env/bin/activate
15 | pip install -r requirements.txt
16 | ```
17 | 
18 | ### Build the documentation
19 | 
20 | Sphinx accepts two types of files:
21 |   - `.rst`: re:Structured files
22 |   - `.md`: Markdown formatted files
23 | 
24 | While both are allowed, for consistency, writing .md files is recommended.
25 | Once all files are written, add them by name to index.rst and run `make html`.
26 | 


--------------------------------------------------------------------------------
/web/web/docs/build/doctrees/api.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/doctrees/api.doctree


--------------------------------------------------------------------------------
/web/web/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/web/web/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/doctrees/index.doctree


--------------------------------------------------------------------------------
/web/web/docs/build/doctrees/intro.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/doctrees/intro.doctree


--------------------------------------------------------------------------------
/web/web/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 06336bc7aab796cbac45a22359d55abd
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
 1 | var DOCUMENTATION_OPTIONS = {
 2 |     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
 3 |     VERSION: '2020.01.01',
 4 |     LANGUAGE: 'None',
 5 |     COLLAPSE_INDEX: false,
 6 |     BUILDER: 'html',
 7 |     FILE_SUFFIX: '.html',
 8 |     LINK_SUFFIX: '.html',
 9 |     HAS_SOURCE: true,
10 |     SOURCELINK_SUFFIX: '.txt',
11 |     NAVIGATION_WITH_KEYS: false
12 | };


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/file.png


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Inconsolata-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Inconsolata-Bold.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Inconsolata-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Inconsolata-Regular.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Inconsolata.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Inconsolata.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato-Bold.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato-Regular.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bold.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bold.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bold.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bold.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-bolditalic.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-italic.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-italic.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-italic.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-italic.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-regular.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-regular.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-regular.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/Lato/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/Lato/lato-regular.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab-Bold.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab-Regular.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-bold.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/RobotoSlab/roboto-slab-v7-regular.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/minus.png


--------------------------------------------------------------------------------
/web/web/docs/build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/_static/plus.png


--------------------------------------------------------------------------------
/web/web/docs/build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/docs/build/html/objects.inv


--------------------------------------------------------------------------------
/web/web/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/web/web/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx
2 | recommonmark
3 | sphinx_rtd_theme
4 | 


--------------------------------------------------------------------------------
/web/web/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'reach-web-api'
21 | copyright = '2020, Datalabs'
22 | author = 'Datalabs'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '2020.01.01'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     'recommonmark',
35 | ]
36 | 
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['_templates']
39 | 
40 | # List of patterns, relative to source directory, that match files and
41 | # directories to ignore when looking for source files.
42 | # This pattern also affects html_static_path and html_extra_path.
43 | exclude_patterns = []
44 | 
45 | source_suffix = ['.md', '.rst']
46 | 
47 | # -- Options for HTML output -------------------------------------------------
48 | 
49 | # The theme to use for HTML and HTML Help pages.  See the documentation for
50 | # a list of builtin themes.
51 | #
52 | html_theme = 'sphinx_rtd_theme'
53 | 
54 | # Add any paths that contain custom static files (such as style sheets) here,
55 | # relative to this directory. They are copied after the builtin static files,
56 | # so a file named "default.css" will overwrite the builtin "default.css".
57 | html_static_path = ['_static']
58 | 


--------------------------------------------------------------------------------
/web/web/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. rech-web-api documentation master file, created by
 2 |    sphinx-quickstart on Tue Jan 21 17:40:24 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Reach's web API documentation!
 7 | =========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    intro
14 |    api
15 | 


--------------------------------------------------------------------------------
/web/web/docs/source/intro.md:
--------------------------------------------------------------------------------
 1 | # Wellcome Reach
 2 | 
 3 | Wellcome Reach is an open source service for discovering how research
 4 | publications are cited in global policy documents, including those
 5 | produced by policy organizations such as the WHO, MSF, and the UK
 6 | government. Key parts of it include:
 7 | 
 8 | 1. Web scrapers for pulling PDF "policy documents" from policy
 9 |    organizations,
10 | 1. A reference parser for extracting references from these documents,
11 | 1. A task for sourcing publications from Europe PMC (EPMC),
12 | 1. A task for matching policy document references to EPMC publications,
13 | 1. An Airflow installation for automating the above tasks, and
14 | 1. A web application for searching and retrieving data from the datasets
15 |    produced above.
16 | 
17 | Wellcome Reach is written in Python and developed using docker-compose.
18 | It's deployed into Kubernetes.
19 | 
20 | Although parts of the Wellcome Reach have been in use at Wellcome since
21 | mid-2018, the project has only been open source since March 2019. Given
22 | these early days, please be patient as various parts of it are made
23 | accessible to external users. All issues and pull requests are welcome.
24 | 
25 | 
26 | ## Further reading
27 | - [Github repository](https://github.com/wellcometrust/reach)
28 | 


--------------------------------------------------------------------------------
/web/web/src/css/about.less:
--------------------------------------------------------------------------------
1 | @import "variables";
2 | 
3 | #about-page,
4 | #how-it-works-page {
5 | 	h3.category-title {
6 | 		font-weight: bold;
7 | 	}
8 | }
9 | 


--------------------------------------------------------------------------------
/web/web/src/css/contact.less:
--------------------------------------------------------------------------------
  1 | .contact-form {
  2 |     margin-bottom: 0;
  3 |     margin-left: auto;
  4 |     margin-right: auto;
  5 | 
  6 |     .ctc-form-field {
  7 |         display: flex;
  8 |         flex-direction: column;
  9 |         margin-bottom: 18px;
 10 |     }
 11 | 
 12 | 
 13 |     label {
 14 |         flex: 1;
 15 |         max-width: 30%;
 16 |         min-width: 30%;
 17 |         display: block;
 18 |         font-size: 1rem;
 19 |         letter-spacing: 0.5px;
 20 |         line-height: 1.5rem;
 21 |         color: #292929;
 22 |         font-family: Helvetica Neue, Helvetica, Arial, sans-serif;
 23 |         margin-bottom: 0.5rem;
 24 |     }
 25 | 
 26 |     .ctc-form-input {
 27 |         flex: 2;
 28 |         display: block;
 29 | 
 30 |         input[type="text"], input[type="email"] {
 31 |             width: 100%;
 32 |             height: 2.75rem;
 33 |             border: 1px solid #CCCCCC;
 34 |             color: #333;
 35 |             text-indent: 3px;
 36 |         }
 37 | 
 38 |         textarea {
 39 |             width: 100%;
 40 |             resize: none;
 41 |             height: 200px;
 42 |             overflow-y: auto;
 43 |             border: 1px solid #CCCCCC;
 44 |             color: #333333;
 45 |             padding: 4px;
 46 |         }
 47 |     }
 48 | 
 49 | 
 50 |     .ctc-controls {
 51 |         display: flex;
 52 |         flex-direction: row;
 53 |         justify-content: flex-end;
 54 |         align-items: flex-end;
 55 | 
 56 |         .ctc-note {
 57 |             flex: 1;
 58 |             display: flex;
 59 |             justify-content: flex-start;
 60 |             align-items: flex-start;
 61 |             flex-direction: column;
 62 | 
 63 | 
 64 |             p {
 65 |                 margin: 0;
 66 |                 padding: 0;
 67 |                 font-size: 12px;
 68 |                 color: #CCC;
 69 |             }
 70 |         }
 71 | 
 72 |         .ctct-submit {
 73 |             flex: 0;
 74 |             white-space: nowrap;
 75 |             display: inline-block;
 76 |             width: 9.875rem;
 77 |             height: 2.75rem;
 78 |             border-radius: 2px;
 79 |             background-color: #006272;
 80 |             color: white;
 81 |             padding-left: 21px;
 82 |             padding-right: 21px;
 83 |             border: none;
 84 |             cursor: pointer;
 85 |             transition: all 0.3s ease-in-out;
 86 | 
 87 |             &:disabled {
 88 |                 background: #CCC;
 89 | 
 90 |                 &:hover {
 91 |                     background: #CCC !important;
 92 |                 }
 93 |             }
 94 | 
 95 |             &:hover {
 96 |                 background-color: #005361;
 97 |             }
 98 |         }
 99 | 
100 |     }
101 | 
102 | }
103 | 
104 | 
105 | 
106 | #ctc-result-success {
107 |     display: none;
108 |     text-align: center;
109 | }
110 | 
111 | #ctc-result-failure {
112 |     display: none;
113 |     text-align: center;
114 | }
115 | 


--------------------------------------------------------------------------------
/web/web/src/css/footer.less:
--------------------------------------------------------------------------------
 1 | @import "variables";
 2 | 
 3 | footer {
 4 | 	/* height: 6.25rem; */
 5 |   /* line-height: 6.25rem; */
 6 |   padding: 2.625rem 0;
 7 | 	font-size: @smallFontSize;
 8 | 	width: 100%;
 9 | }
10 | 
11 | footer.home {
12 | 	position: relative;
13 | 	background-color: white;
14 | }
15 | 
16 | #wellcome-logo-container{
17 | 
18 | 	display: inline-block;
19 | 	margin-right: @smallFontSize;
20 | 
21 | 	#wellcome-logo {
22 | 		height: 1.6rem;
23 | 		width: 1.6rem;
24 | 		vertical-align: middle;
25 | 	}
26 | 
27 | }
28 | 
29 | footer a, footer p {
30 | 	font-size: @smallFontSize;
31 | 	text-decoration: none;
32 | 	display: inline-block;
33 | }
34 | 
35 | footer p {
36 | 	a {
37 | 		text-decoration: underline;
38 | 	}
39 | }
40 | 
41 | /*  Offsets the grid */
42 | @media screen and (min-width: 780px) {
43 | 	footer {
44 | 		text-align: left;
45 | 	}
46 | }
47 | 
48 | @media screen and (max-width: 780px) {
49 | 	footer {
50 | 		text-align: center;
51 | 	}
52 | }
53 | 


--------------------------------------------------------------------------------
/web/web/src/css/header.less:
--------------------------------------------------------------------------------
  1 | @import "variables";
  2 | 
  3 | 
  4 | 
  5 | @media screen and (min-width: 1280px) {
  6 |   header.navbar {
  7 |     padding: 0 5.75rem;
  8 |   }
  9 | 
 10 | }
 11 | 
 12 | @media screen and (max-width: 1280px) {
 13 |   header.navbar {
 14 |     padding: 0 @smallPadding;
 15 |   }
 16 | 
 17 | }
 18 | 
 19 | header.navbar {
 20 |   color: white;
 21 |   background-color: @cyanDark;
 22 |   height: 3.75rem;
 23 | 
 24 |   img {
 25 |     color: white;
 26 |     font-weight: normal;
 27 |     margin: 0;
 28 |     height: 3.5rem;
 29 |   }
 30 | 
 31 |   a {
 32 |     white-space: nowrap;
 33 |     text-decoration: none;
 34 |     font-weight: normal;
 35 |   }
 36 | 
 37 |   #navbar-links a {
 38 |     margin: 0 @smallPadding;
 39 |     line-height: 3.5rem;
 40 |   }
 41 | 
 42 |   #navbar-links a:hover {
 43 |     color: @cyanLight;
 44 |   }
 45 | 
 46 |   #navbar-links a.active {
 47 |     border-bottom: .25rem solid white;
 48 |     padding-top: 0.25rem;
 49 |     line-height: 3.25rem;
 50 |   }
 51 | 
 52 | }
 53 | 
 54 | header.navbar.home {
 55 |   height: 5rem;
 56 | 
 57 |   #navbar-links a {
 58 |     line-height: 4.75rem;
 59 |   }
 60 | 
 61 |   img {
 62 |     margin-top: 8px;
 63 |     height: 3.5rem;
 64 |   }
 65 | 
 66 | }
 67 | 
 68 | .btn.cta-link {
 69 |   background-color: @cyanDark;
 70 |   border-radius: 28px;
 71 |   font-size: @smallFontSize;
 72 |   line-height: 2.75rem !important;
 73 |   height: 2.75rem;
 74 |   border: 1px solid white;
 75 |   text-decoration: none;
 76 |   color: white;
 77 |   margin: 0;
 78 |   padding: 0 @smallPadding;
 79 |   text-align: center;
 80 |   font-weight: normal;
 81 |   font-stretch: normal;
 82 |   font-style: normal;
 83 |   letter-spacing: normal;
 84 | }
 85 | 
 86 | .btn.cta-link:hover {
 87 |   background-color: @cyanPrimary;
 88 |   color: white !important;
 89 | }
 90 | 
 91 | /* Override Spectre default */
 92 | .breadcrumb .breadcrumb-item:not(:last-child) a {
 93 |     color: @cyanPrimary;
 94 | }
 95 | 
 96 | 
 97 | .breadcrumb .breadcrumb-item:not(:first-child)::before {
 98 |   color: @greyDark;
 99 |   content: ">";
100 |   padding-right: .4rem;
101 | }
102 | 
103 | .breadcrumb .breadcrumb-item {
104 |   color: @greyDark;
105 |   font-size: @smallFontSize;
106 | }
107 | 
108 | #breadcrumbs {
109 |   background: white;
110 |   border-bottom: 1px solid @greyLight;
111 | }
112 | 
113 | 
114 | @media screen and (min-width: 1280px) {
115 |    #breadcrumbs {
116 |      padding: 0 @guidePadding;
117 |    }
118 | }
119 | 
120 | @media screen and (max-width: 1280px) {
121 |    #breadcrumbs {
122 |      padding: 0 1.375rem;
123 |    }
124 | }
125 | 


--------------------------------------------------------------------------------
/web/web/src/css/home.less:
--------------------------------------------------------------------------------
  1 | @import "variables";
  2 | 
  3 | body.home {
  4 | 	background-image: none;
  5 | }
  6 | 
  7 | .bg2 {
  8 | 	background-color: @backgroundTint2;
  9 | }
 10 | 
 11 | @media screen and (min-width: 840px) {
 12 | 
 13 | }
 14 | 
 15 | @media screen and (max-width: 840px) {
 16 | 
 17 | }
 18 | 
 19 | /*
 20 |  * Page structures
 21 |  */
 22 | section#hero, section#about-us {
 23 | 	display: flex;
 24 | }
 25 | 
 26 | .hero-picture {
 27 | 	position: relative;
 28 | 
 29 | 	.img-container {
 30 | 		background-image: url(../images/reach_site_view.png);
 31 | 		background-repeat: no-repeat;
 32 | 		background-size: 100%;
 33 | 		box-shadow: inset 0 -100px 20px -25px @backgroundTint1,
 34 | 		            10px -10px 10px -15px black,
 35 | 		            -10px -10px 10px -15px black;
 36 |     min-height: 40vh;
 37 | 	}
 38 | }
 39 | 
 40 | section#hero {
 41 |   background-color: @backgroundTint2;
 42 |   background-image: linear-gradient(-177deg, @backgroundTint2 70%, @backgroundTint1 calc(70% + 2px));
 43 |   color: white;
 44 |   /* .container {
 45 |     min-height: 50%;
 46 |   } */
 47 | }
 48 | 
 49 | #hero-picture-container {
 50 |   position: relative;
 51 |   bottom: 0;
 52 |   /* height: 90%;
 53 | 
 54 |   .column, .column .hero-picture {
 55 |     height: 90%;
 56 |   } */
 57 | }
 58 | 
 59 | section#hero h1 {
 60 |   font-family: Wellcome, Helvetica, Arial, sans-serif;
 61 | }
 62 | 
 63 | section#scroll-arrow {
 64 |   position: absolute;
 65 |   width: 100%;
 66 |   bottom: 0;
 67 | 
 68 |   .container {
 69 |     position: relative;
 70 |     bottom: 0;
 71 |   }
 72 | }
 73 | 
 74 | 
 75 | section#hero {
 76 |   min-height: 85vh;
 77 | }
 78 | 
 79 | section#home-header {
 80 |   height: 5vh;
 81 |   min-height: 5rem;
 82 |   background-color: @backgroundTint2;
 83 | }
 84 | 
 85 | 
 86 | section#about-reach {
 87 | 	background-image: url(../images/Shape_01.svg);
 88 | 	background-repeat: no-repeat;
 89 | 	background-size: cover;
 90 | 
 91 | 	a {
 92 | 		font-size: @baseFontSize;
 93 | 	}
 94 | }
 95 | 
 96 | section#about-us {
 97 | 	background-image: url(../images/Shape_02.svg);
 98 | 	background-repeat: no-repeat;
 99 | 	background-size: cover;
100 | }
101 | 
102 | .home-hero-view {
103 |   height: 100vh;
104 | }
105 | 
106 | #about-us h3, #about-reach h3 {
107 |   font-weight: bold;
108 | }
109 | 


--------------------------------------------------------------------------------
/web/web/src/css/icons.less:
--------------------------------------------------------------------------------
 1 | @import "variables";
 2 | 
 3 | .icn.icn-search::before {
 4 | 	color: white;
 5 | 	fill: white;
 6 | 	margin-bottom: -10px;
 7 | 	content: url(../images/Icon_Search_16px.svg);
 8 | }
 9 | 
10 | .icn.icn-download::before {
11 | 	color: white;
12 | 	fill: white;
13 | 	margin-bottom: -4px;
14 | 	content: url(../images/Icon_Download_16px.svg);
15 | }
16 | 
17 | .icn.icn-research-paper::before {
18 | 	color: black;
19 | 	fill: black;
20 | 	margin-bottom: -4px;
21 | 	content: url(../images/Icon_Research_24px.svg);
22 | }
23 | 
24 | .icn.icn-sort {
25 | 	color: grey;
26 | 	fill: grey;
27 |   margin-bottom: -3px;
28 |   padding: 4px 4px 0 4px;
29 | 	content: url(../images/Icon_Chevron_Double.svg);
30 | }
31 | 
32 | .icn.icn-sorted {
33 |   margin-bottom: -1px;
34 |   margin-left: 4px;
35 |   padding: 4px 4px 0 4px;
36 |   content: url(../images/Icon_Arrow_down.svg);
37 | }
38 | 
39 | .icn.icn-sorted-asc {
40 |   transform: rotate(180deg);
41 | }
42 | 
43 | .icn.icn-info {
44 |   margin-bottom: -4px;
45 |   margin-right: 4px;
46 | 	content: url(../images/Icon_Info.svg);
47 | }
48 | 
49 | .icn.icn-new-page {
50 |   margin-bottom: -3px;
51 |   margin-right: -4px;
52 | 	content: url(../images/Icon_new_window.svg);
53 | }
54 | 
55 | .icn.icn-chevron-left {
56 |   margin-bottom: 2px;
57 |   transform: rotate(90deg);
58 |   filter: invert(25%) sepia(95%) saturate(911%) hue-rotate(153deg) brightness(93%) contrast(101%);
59 |   content: url(../images/Icon_Chevron_Down.svg);
60 | }
61 | 
62 | .icn.icn-chevron-right {
63 |   margin-bottom: 2px;
64 |   filter: invert(25%) sepia(95%) saturate(911%) hue-rotate(153deg) brightness(93%) contrast(101%);
65 |   transform: rotate(270deg);
66 |   content: url(../images/Icon_Chevron_Down.svg);
67 | }
68 | 
69 | .icn-down {
70 |   transform: rotate(0deg);
71 |   color: @greyDark;
72 | }
73 | .icn-up {
74 |   transform: rotate(180deg);
75 |   color: @cyanLight;
76 | }
77 | 


--------------------------------------------------------------------------------
/web/web/src/css/search.less:
--------------------------------------------------------------------------------
 1 | @import "variables";
 2 | 
 3 | .search-box {
 4 |     background: @backgroundTint1;
 5 | }
 6 | 
 7 | .search-tips {
 8 |     margin: @mediumPadding 0;
 9 | }
10 | 
11 | .search-tips .btn.help {
12 |   background-color: @cyanPrimary;
13 |   margin-top: 0;
14 | }
15 | 
16 | .insights {
17 |     padding: @mediumPadding 0;
18 | }
19 | 
20 | .feedback-box {
21 | 	background-color: @backgroundTint2;
22 | 	padding: @smallPadding;
23 | }
24 | 
25 | .form-label {
26 | 	margin-bottom: 0.5rem;
27 |   font-size: .875rem;
28 |   color: #292929;
29 | }
30 | 
31 | /* Help tooltip */
32 | .help {
33 |   width: 22px;
34 |   height: 22px;
35 |   padding: 0;
36 |   margin-left: @smallFontSize;
37 |   color: white;
38 |   font-size: 14px;
39 |   font-weight: bold;
40 |   border-radius: 50%;
41 |   line-height: 22px;
42 |   text-align: center;
43 |   background-color: @cyanPrimary;
44 | }
45 | 
46 | .popover-container {
47 |   display: block;
48 |   opacity: 1;
49 |   transform: translate(-50%, -100%) scale(1);
50 | }
51 | 
52 | .popover-container svg.arrow {
53 |   margin: -1em auto 0 auto;
54 |   width: 2em;
55 |   height: 1em;
56 | }
57 | 
58 | .popover-container .card {
59 |   color: @greyDark;
60 | }
61 | 


--------------------------------------------------------------------------------
/web/web/src/css/variables.less:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Colors and typefaces
 3 |  */
 4 | 
 5 | 
 6 | /*
 7 |  * Colors
 8 |  */
 9 | @cyanLight: #009BB2;
10 | @cyanPrimary: #006272;
11 | @cyanDark: #005361;
12 | @cyanFocused: #E5EFF1;
13 | 
14 | @backgroundTint1: #F1FCFD;
15 | @backgroundTint2: #DCF4F9;
16 | @backgroundTint3: #BEEBF4;
17 | 
18 | @greyLight: #CCCCCC;
19 | @greyLink: #767676;
20 | @greyDark: #292929;
21 | @greyTable: #E6E6E6;
22 | 
23 | 
24 | /*
25 |  * Fonts
26 |  */
27 | 
28 | :root {
29 |     font-size: 16px;
30 | }
31 | 
32 | @h1FontSize: 2rem;
33 | @h2FontSize: 1.5rem;
34 | @h3FontSize: 1.25rem;
35 | @h4FontSize: 1rem;
36 | @baseFontSize: 1rem;
37 | @smallFontSize: 0.875rem;
38 | 
39 | @font-face {
40 |     font-family: Wellcome;
41 |     /* NB: this will be inlined by postcss-url */
42 |     src: url("./wellcome-bold-webfont.woff2") format("woff2");
43 | }
44 | 
45 | /*
46 |  * Common spacings
47 |  */
48 | 
49 | @guidePadding: 5.75rem;
50 | @heroPadding: 2.5rem;
51 | @mediumPadding: 1.5rem;
52 | @smallPadding: 1rem;
53 | 


--------------------------------------------------------------------------------
/web/web/src/css/wellcome-bold-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/css/wellcome-bold-webfont.woff2


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-144x144.png


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-192x192.png


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-36x36.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-36x36.png


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-48x48.png


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-72x72.png


--------------------------------------------------------------------------------
/web/web/src/favicon/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/android-icon-96x96.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-114x114.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-120x120.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-144x144.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-152x152.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-180x180.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-57x57.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-60x60.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-72x72.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-76x76.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon-precomposed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon-precomposed.png


--------------------------------------------------------------------------------
/web/web/src/favicon/apple-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/apple-icon.png


--------------------------------------------------------------------------------
/web/web/src/favicon/browserconfig.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <browserconfig><msapplication><tile><square70x70logo src="/ms-icon-70x70.png"/><square150x150logo src="/ms-icon-150x150.png"/><square310x310logo src="/ms-icon-310x310.png"/><TileColor>#ffffff</TileColor></tile></msapplication></browserconfig>


--------------------------------------------------------------------------------
/web/web/src/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/web/web/src/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/web/web/src/favicon/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/favicon-96x96.png


--------------------------------------------------------------------------------
/web/web/src/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/favicon.ico


--------------------------------------------------------------------------------
/web/web/src/favicon/manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "name": "App",
 3 |  "icons": [
 4 |   {
 5 |    "src": "\/android-icon-36x36.png",
 6 |    "sizes": "36x36",
 7 |    "type": "image\/png",
 8 |    "density": "0.75"
 9 |   },
10 |   {
11 |    "src": "\/android-icon-48x48.png",
12 |    "sizes": "48x48",
13 |    "type": "image\/png",
14 |    "density": "1.0"
15 |   },
16 |   {
17 |    "src": "\/android-icon-72x72.png",
18 |    "sizes": "72x72",
19 |    "type": "image\/png",
20 |    "density": "1.5"
21 |   },
22 |   {
23 |    "src": "\/android-icon-96x96.png",
24 |    "sizes": "96x96",
25 |    "type": "image\/png",
26 |    "density": "2.0"
27 |   },
28 |   {
29 |    "src": "\/android-icon-144x144.png",
30 |    "sizes": "144x144",
31 |    "type": "image\/png",
32 |    "density": "3.0"
33 |   },
34 |   {
35 |    "src": "\/android-icon-192x192.png",
36 |    "sizes": "192x192",
37 |    "type": "image\/png",
38 |    "density": "4.0"
39 |   }
40 |  ]
41 | }


--------------------------------------------------------------------------------
/web/web/src/favicon/ms-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/ms-icon-144x144.png


--------------------------------------------------------------------------------
/web/web/src/favicon/ms-icon-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/ms-icon-150x150.png


--------------------------------------------------------------------------------
/web/web/src/favicon/ms-icon-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/ms-icon-310x310.png


--------------------------------------------------------------------------------
/web/web/src/favicon/ms-icon-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/favicon/ms-icon-70x70.png


--------------------------------------------------------------------------------
/web/web/src/images/Icon_ New-window.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="14px" height="14px" viewBox="0 0 14 14" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/ New-window</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Citations-Results_No-filters" transform="translate(-153.000000, -1014.000000)" fill="#006272">
 8 |             <g id="Group" transform="translate(92.000000, 459.000000)">
 9 |                 <g id="Open-Row" transform="translate(0.000000, 375.000000)">
10 |                     <g id="Icon/-New-window" transform="translate(56.000000, 175.000000)">
11 |                         <path d="M17.4444444,17.4444444 L6.55555556,17.4444444 L6.55555556,6.55555556 L10,6.55555556 L10,5 L6.55555556,5 C5.69222222,5 5,5.7 5,6.55555556 L5,17.4444444 C5,18.3 5.69222222,19 6.55555556,19 L17.4444444,19 C18.3,19 19,18.3 19,17.4444444 L19,14 L17.4444444,14 L17.4444444,17.4444444 Z M13.5555556,5 L13.5555556,6.55555556 L16.3477778,6.55555556 L8.70222222,14.2011111 L9.79888889,15.2977778 L17.4444444,7.65222222 L17.4444444,10.4444444 L19,10.4444444 L19,5 L13.5555556,5 Z" id="Shape"/>
12 |                     </g>
13 |                 </g>
14 |             </g>
15 |         </g>
16 |     </g>
17 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_About_Accuracy_100px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="100px" height="100px" viewBox="0 0 100 100" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_About_Accuracy_100px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_About_Accuracy_100px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Group-4" transform="translate(22.000000, 22.000000)" stroke="#009BB2" stroke-width="3">
 8 |             <rect id="Rectangle-Copy-8" x="20.5" y="20.5" width="14" height="14" rx="7"></rect>
 9 |             <rect id="Rectangle-Copy-11" x="10.5" y="10.5" width="34" height="34" rx="17"></rect>
10 |             <rect id="Rectangle-Copy-12" x="1.5" y="1.5" width="52" height="52" rx="26"></rect>
11 |             <g id="Group" transform="translate(46.500000, 22.373414) rotate(-14.000000) translate(-46.500000, -22.373414) translate(27.000000, 4.873414)" stroke-linecap="round">
12 |                 <polyline id="Path-3" stroke-linejoin="round" transform="translate(33.046553, 17.126586) scale(-1, 1) translate(-33.046553, -17.126586) " points="29.0465527 12.1265861 37.0465527 17.1265861 29.0465527 22.1265861"></polyline>
13 |                 <line x1="9" y1="33.2531722" x2="29.0931054" y2="1" id="Path-4" transform="translate(19.046553, 17.126586) rotate(58.077888) translate(-19.046553, -17.126586) "></line>
14 |             </g>
15 |         </g>
16 |     </g>
17 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_About_Open-source_100px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="100px" height="100px" viewBox="0 0 100 100" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_About_Open-source_100px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_About_Open-source_100px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Iconography-/-Open-source" transform="translate(19.000000, 23.000000)" stroke="#009BB2" stroke-width="3">
 8 |             <rect id="Rectangle" x="1.5" y="11.5" width="59" height="40"></rect>
 9 |             <rect id="Rectangle-Copy-4" x="1.5" y="1.5" width="59" height="10" rx="1.728"></rect>
10 |             <polyline id="Path-3" stroke-linecap="round" stroke-linejoin="round" points="43.2 25.616 51.84 30.8 43.2 35.984"></polyline>
11 |             <polyline id="Path-3-Copy" stroke-linecap="round" stroke-linejoin="round" transform="translate(14.112000, 30.800000) scale(-1, 1) translate(-14.112000, -30.800000) " points="9.792 25.616 18.432 30.8 9.792 35.984"></polyline>
12 |             <line x1="24.768" y1="40.7187218" x2="36.6981737" y2="21.008" id="Path-4" stroke-linecap="round" stroke-linejoin="round"></line>
13 |         </g>
14 |     </g>
15 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_About_Transparent_100px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="100px" height="100px" viewBox="0 0 100 100" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_About_Transparent_100px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_About_Transparent_100px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Group" transform="translate(1.000000, 20.000000)" stroke="#009BB2" stroke-width="3">
 8 |             <path d="M73.1569113,16.2727292 C58.8762176,7.36557843 40.8072361,7.21751731 26.3824956,15.8894496 L19.2485502,20.1782679 C17.6421895,21.1439873 16.2972119,22.4889649 15.3314925,24.0953256 C12.0704309,29.5197185 13.8241574,36.5606705 19.2485502,39.8217321 L26.3824956,44.1105504 C40.8072361,52.7824827 58.8762176,52.6344216 73.1569113,43.7272708 L79.5758509,39.7236513 C81.0584432,38.7989292 82.3099467,37.5474257 83.2346688,36.0648334 C86.5841838,30.6946091 84.9460753,23.6258637 79.5758509,20.2763487 L73.1569113,16.2727292 Z" id="Rectangle"></path>
 9 |             <rect id="Rectangle" x="33.5" y="13.5" width="32" height="32" rx="16"></rect>
10 |             <rect id="Rectangle-Copy-9" x="44.5" y="24.5" width="10" height="10" rx="5"></rect>
11 |             <line x1="58.6279376" y1="35.2978601" x2="61.2470624" y2="38.819787" id="Path-5-Copy" opacity="0.9" stroke-linecap="round" stroke-linejoin="round" transform="translate(59.937500, 37.058824) rotate(-15.000000) translate(-59.937500, -37.058824) "></line>
12 |             <line x1="37.6260611" y1="23.8259168" x2="41.9989389" y2="26.4682009" id="Path-5-Copy-2" stroke-linecap="round" stroke-linejoin="round" transform="translate(39.812500, 25.147059) rotate(-7.000000) translate(-39.812500, -25.147059) "></line>
13 |             <line x1="60" y1="30" x2="65" y2="30" id="Path-5" stroke-linecap="round" stroke-linejoin="round"></line>
14 |         </g>
15 |     </g>
16 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Arrow_down.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="8px" height="12px" viewBox="0 0 8 12" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/Arrow/down</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <defs>
 7 |         <polygon id="path-1" points="3.34375 0 4.65625 0 4.65625 9.4375 7.0625 7.0625 8 8 4 12 0 8 0.9375 7.0625 3.34375 9.4375"/>
 8 |     </defs>
 9 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
10 |         <g id="Citations-Results_No-filters" transform="translate(-306.000000, -557.000000)">
11 |             <g id="Group" transform="translate(92.000000, 459.000000)">
12 |                 <g id="Column-header" transform="translate(0.000000, 76.000000)">
13 |                     <g id="Icon/Arrow/down" transform="translate(206.000000, 16.000000)">
14 |                         <g id="Group" transform="translate(8.000000, 6.000000)">
15 |                             <mask id="mask-2" fill="white">
16 |                                 <use xlink:href="#path-1"/>
17 |                             </mask>
18 |                             <use id="Mask" fill="#333333" xlink:href="#path-1"/>
19 |                             <g id="↳🎨icon-color" mask="url(#mask-2)" fill="#006272">
20 |                                 <g transform="translate(-32.000000, -30.000000)" id="Icon-Shade">
21 |                                     <rect x="0" y="0" width="72" height="72"/>
22 |                                 </g>
23 |                             </g>
24 |                         </g>
25 |                     </g>
26 |                 </g>
27 |             </g>
28 |         </g>
29 |     </g>
30 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Chevron_Double.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="8px" height="12px" viewBox="0 0 8 12" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/Chevron/Double</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <defs>
 7 |         <path d="M7.0625,7 L8,7.9375 L4,11.9375 L0,7.9375 L0.9375,7 L4,10.0625 L7.0625,7 Z M4,0 L8,4 L7.0625,4.9375 L4,1.875 L0.9375,4.9375 L0,4 L4,0 Z" id="path-1"/>
 8 |     </defs>
 9 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
10 |         <g id="Citations-Results_No-filters" transform="translate(-679.000000, -557.000000)">
11 |             <g id="Group" transform="translate(92.000000, 459.000000)">
12 |                 <g id="Column-header" transform="translate(0.000000, 76.000000)">
13 |                     <g id="Icon/Chevron/Double" transform="translate(579.000000, 16.000000)">
14 |                         <g id="Group" transform="translate(8.000000, 6.000000)">
15 |                             <mask id="mask-2" fill="white">
16 |                                 <use xlink:href="#path-1"/>
17 |                             </mask>
18 |                             <use id="Combined-Shape" fill="#333333" xlink:href="#path-1"/>
19 |                             <g id="↳🎨icon-color" mask="url(#mask-2)" fill="#B3B3B3">
20 |                                 <g transform="translate(-8.000000, -6.000000)" id="Icon-Shade">
21 |                                     <rect x="0" y="0" width="24" height="24"/>
22 |                                 </g>
23 |                             </g>
24 |                         </g>
25 |                     </g>
26 |                 </g>
27 |             </g>
28 |         </g>
29 |     </g>
30 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Chevron_Down.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="12px" height="7px" viewBox="0 0 12 7" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/Chevron/Down</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <defs>
 7 |         <polygon id="path-1" points="9.41666667 0 10.6666667 1.25 5.33333333 6.58333333 0 1.25 1.25 0 5.33333333 4.08333333"/>
 8 |     </defs>
 9 |     <g id="Page-1" stroke="none" stroke-width="1" fill="black" fill-rule="evenodd">
10 |         <g id="Citations-Results_No-filters" transform="translate(-114.000000, -791.000000)">
11 |             <g id="Group" transform="translate(92.000000, 459.000000)">
12 |                 <g id="Row" transform="translate(12.000000, 311.000000)">
13 |                     <g id="Icon/Chevron/Down" transform="translate(0.000000, 8.000000)">
14 |                         <g id="Group" transform="translate(10.666667, 13.333333)">
15 |                             <mask id="mask-2" fill="black">
16 |                                 <use xlink:href="#path-1"/>
17 |                             </mask>
18 |                             <use id="Mask" fill="black" xlink:href="#path-1"/>
19 |                             <g id="↳🎨icon-color" mask="url(#mask-2)" fill="black">
20 |                                 <g transform="translate(-42.666667, -45.333333)" id="Icon-Shade">
21 |                                     <rect x="0" y="0" width="96" height="96"/>
22 |                                 </g>
23 |                             </g>
24 |                         </g>
25 |                     </g>
26 |                 </g>
27 |             </g>
28 |         </g>
29 |     </g>
30 | </svg>
31 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Download_16px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="16px" height="16px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Download_16px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Download_16px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Component-/-Iconography-/-16px-/-Download" fill="#ffffff">
 8 |             <path d="M2.23863636,10 L2.23863636,13.75 L13.7272727,13.75 L13.7272727,10 L15,10 L15,14.3939394 L14.3302557,15 L1.63636364,15 L1,14.3939394 L1,10 L2.23863636,10 Z M8.77169573,1 L8.7542551,7.62838104 L11.6816121,5.50023529 L12.4814037,6.31082353 L8,11 L3.48140366,6.31082353 L4.28044211,5.49947059 L7.26874049,7.62838104 L7.28618113,1 L8.77169573,1 Z" id="Combined-Shape"></path>
 9 |         </g>
10 |     </g>
11 | </svg>
12 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Info.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="18px" height="19px" viewBox="0 0 18 19" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/Add</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <defs>
 7 |         <path d="M9.5,7.50801086 L9.5,13.1455399 L8,13.1455399 L8,7.50801086 L9.5,7.50801086 Z M9.63114754,5.23560763 L9.63114754,5.34726127 C9.63114754,5.83390562 9.23664435,6.22840881 8.75,6.22840881 C8.26335565,6.22840881 7.86885246,5.83390562 7.86885246,5.34726127 L7.86885246,5.23560763 C7.86885246,4.74896329 8.26335565,4.35446009 8.75,4.35446009 C9.23664435,4.35446009 9.63114754,4.74896329 9.63114754,5.23560763 Z M8.7295082,0 C11.1338918,0 13.1967127,0.855820863 14.9180328,2.56748826 C16.6393529,4.27915566 17.5,6.3399723 17.5,8.75 C17.5,11.1600277 16.6393529,13.2208443 14.9180328,14.9325117 C13.1967127,16.6441791 11.1338918,17.5 8.7295082,17.5 C6.32512459,17.5 4.26913422,16.6441791 2.56147541,14.9325117 C0.853816598,13.2208443 0,11.1600277 0,8.75 C0,6.3399723 0.853816598,4.27915566 2.56147541,2.56748826 C4.26913422,0.855820863 6.32512459,0 8.7295082,0 Z M8.75,16.1258803 C10.7725511,16.1258803 12.5010174,15.4069908 13.9354508,13.9691901 C15.3698842,12.5313895 16.0870902,10.7916769 16.0870902,8.75 C16.0870902,6.70832313 15.3698842,4.96861048 13.9354508,3.53080986 C12.5010174,2.09300924 10.7725511,1.37411972 8.75,1.37411972 C6.7274489,1.37411972 4.99898258,2.09300924 3.56454918,3.53080986 C2.13011578,4.96861048 1.41290984,6.70832313 1.41290984,8.75 C1.41290984,10.7916769 2.13011578,12.5313895 3.56454918,13.9691901 C4.99898258,15.4069908 6.7274489,16.1258803 8.75,16.1258803 Z" id="path-1"></path>
 8 |     </defs>
 9 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
10 |         <g id="Citations-Results_No-filters" transform="translate(-95.000000, -411.000000)">
11 |             <g id="Info-link" transform="translate(92.000000, 398.000000)">
12 |                 <g id="Icon/Info" transform="translate(0.000000, 10.500000)">
13 |                     <g id="info-24px">
14 |                         <polygon id="Path" points="0 0 24 0 24 24 0 24"></polygon>
15 |                     </g>
16 |                     <g id="Group" transform="translate(3.250000, 3.250000)">
17 |                         <mask id="mask-2" fill="white">
18 |                             <use xlink:href="#path-1"></use>
19 |                         </mask>
20 |                         <use id="Mask" fill="#333333" xlink:href="#path-1"></use>
21 |                         <g id="↳🎨icon-color" mask="url(#mask-2)" fill="#006272">
22 |                             <g transform="translate(-36.250000, -36.250000)" id="Icon-Shade">
23 |                                 <rect x="0" y="0" width="90" height="90"></rect>
24 |                             </g>
25 |                         </g>
26 |                     </g>
27 |                 </g>
28 |             </g>
29 |         </g>
30 |     </g>
31 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Menu_16px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="16px" height="16px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Menu_16px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Menu_16px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <path d="M15.2,13 C15.648,13 16,13.44 16,14 C16,14.56 15.648,15 15.2,15 L15.2,15 L0.8,15 C0.352,15 0,14.56 0,14 C0,13.44 0.352,13 0.8,13 L0.8,13 Z M15.2,7 C15.648,7 16,7.44 16,8 C16,8.56 15.648,9 15.2,9 L15.2,9 L0.8,9 C0.352,9 0,8.56 0,8 C0,7.44 0.352,7 0.8,7 L0.8,7 Z M15.2,1 C15.648,1 16,1.44 16,2 C16,2.56 15.648,3 15.2,3 L15.2,3 L0.8,3 C0.352,3 0,2.56 0,2 C0,1.44 0.352,1 0.8,1 L0.8,1 Z" id="Combined-Shape" fill="#ffffff" fill-rule="nonzero"></path>
 8 |     </g>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Policy_24px.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <svg width="24px" height="24px" viewBox="0 0 24 24" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
4 |     <title>Icon_Policy_24px</title>
5 |     <desc>Created with Sketch.</desc>
6 |     <g id="Icon_Policy_24px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
7 |         <path d="M12.7615336,1.99940767 L13,2 C13.8070713,2.00241868 14.7982511,2.15289838 15.0672727,2.96428571 L15.0672727,2.96428571 L17.0909091,2.96428571 C18.1452709,2.96428571 19,3.87570604 19,5 L19,5 L19,18.9642858 C19,20.0885797 18.1452709,21 17.0909091,21 L17.0909091,21 L6.90909092,21 C5.85472912,21 5,20.0885797 5,18.9642858 L5,18.9642858 L5,5 C5,3.87570604 5.85472912,2.96428571 6.90909092,2.96428571 L6.90909092,2.96428571 L8.93272729,2.96428571 C9.20174893,2.15289838 10.1929287,2.00241868 11,2 C11.7952858,1.99838755 12.4619525,1.99838755 13,2 Z M9,4 L6.66666667,4 C6.29847683,4 6,4.32561109 6,4.72727273 L6,4.72727273 L6,19.2727273 C6,19.6743889 6.29847683,20 6.66666667,20 L6.66666667,20 L17.3333333,20 C17.7015232,20 18,19.6743889 18,19.2727273 L18,19.2727273 L18,4.72727273 C18,4.32561109 17.7015232,4 17.3333333,4 L17.3333333,4 L15,4 L15,5 L9,5 L9,4 Z M15,16 L15,17 L8,17 L8,16 L15,16 Z M14,13 L14,14 L8,14 L8,13 L14,13 Z M16,10 L16,11 L8,11 L8,10 L16,10 Z M15,7 L15,8 L8,8 L8,7 L15,7 Z M13,3 L11,3 C10.4477153,3 10,3.44771525 10,4 L10,4 L14,4 L13.9932723,3.88337887 C13.9355072,3.38604019 13.5128358,3 13,3 L13,3 Z" id="Style" fill="#292929"></path>
8 |     </g>
9 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Research_24px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="24px" height="24px" viewBox="0 -5 24 24" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Research_24px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Research_24px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <path d="M10.5,5 C11.3179292,5 12.0441159,5.39279608 12.5002169,6.00004507 L12.7638055,6.00009834 C13.3131288,5.3862919 14.1114527,5 15,5 L19,5 C19.8885473,5 20.6868712,5.3862919 21.2361945,6.00009834 L22,6 L21.9998622,7.97094722 C21.999954,7.9806206 22,7.99030493 22,8 L21.999,8.029 L21.999,17.971 L22,18 L21,18 L20.9998461,17.9749325 C20.9864209,16.8819148 20.0962015,16 19,16 L19,16 L15,16 C13.8954305,16 13,16.8954305 13,18 L13,18 C13,18.5522847 12.5522847,19 12,19 C11.4477153,19 11,18.5522847 11,18 L10.9945143,17.8507377 C10.9181651,16.8158778 10.0543618,16 9,16 L9,16 L5,16 C3.8954305,16 3,16.8954305 3,18 L3,18 L2,18 L2,6 L2.49978306,6.00004507 C2.95588414,5.39279608 3.68207085,5 4.5,5 L10.5,5 Z M9.92857143,6 L5.07142857,6 C4.41820988,6 3.8625137,6.41754351 3.65667809,7.00029246 L3,7 L3.00009834,15.7638055 C3.53084306,15.2888179 4.23169302,15 5,15 L5,15 L9,15 C10.3117996,15 11.4269497,15.8419562 11.8345346,17.0149527 C11.8879864,17.0046948 11.9434449,17 12,17 L12,7 L11.3433219,7.00029246 C11.1374863,6.41754351 10.5817901,6 9.92857143,6 Z M19,6 L15,6 C13.9456382,6 13.0818349,6.81587779 13.0054857,7.85073766 L13,8 L13.0000983,15.7638055 C13.5308431,15.2888179 14.231693,15 15,15 L15,15 L19,15 C19.768307,15 20.4691569,15.2888179 20.9999017,15.7638055 L20.999,7.975 L20.9945143,7.85073766 C20.9181651,6.81587779 20.0543618,6 19,6 Z M19,12 L19,13 L14,13 L14,12 L19,12 Z M10,12 L10,13 L5,13 L5,12 L10,12 Z M20,10 L20,11 L14,11 L14,10 L20,10 Z M11,10 L11,11 L5,11 L5,10 L11,10 Z M19,8 L19,9 L14,9 L14,8 L19,8 Z M10,8 L10,9 L5,9 L5,8 L10,8 Z" id="Combined-Shape" fill="#292929"></path>
 8 |     </g>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Scroll-arow.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="17px" height="30px" viewBox="0 0 17 30" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Scroll-arow_</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Scroll-arow_" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Icon-/-Arrow" stroke="#005361" stroke-width="2">
 8 |             <line x1="8.5" y1="-7.12346848e-14" x2="8.5" y2="30" id="Path-2"></line>
 9 |             <line x1="4.5" y1="21" x2="4.5" y2="31" id="Path-2-Copy" transform="translate(4.500000, 26.000000) rotate(-50.000000) translate(-4.500000, -26.000000) "></line>
10 |             <line x1="12.5" y1="21" x2="12.5" y2="31" id="Path-2-Copy-2" transform="translate(12.500000, 26.000000) scale(-1, 1) rotate(-50.000000) translate(-12.500000, -26.000000) "></line>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Search_16px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="16px" height="16px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Search_16px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Search_16px" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <path d="M7.18322896,1 C10.3219896,1 12.8664579,3.54446828 12.8664579,6.68322896 C12.8664579,7.99623315 12.4211988,9.20524148 11.6734375,10.1674969 L15.5,13.9953376 L14.4953376,15 L10.6674969,11.1734375 C9.70524148,11.9211988 8.49623315,12.3664579 7.18322896,12.3664579 C4.04446828,12.3664579 1.5,9.82198965 1.5,6.68322896 C1.5,3.54446828 4.04446828,1 7.18322896,1 Z M7.18322896,2.42080724 C4.82915845,2.42080724 2.92080724,4.32915845 2.92080724,6.68322896 C2.92080724,9.03729947 4.82915845,10.9456507 7.18322896,10.9456507 C9.53729947,10.9456507 11.4456507,9.03729947 11.4456507,6.68322896 C11.4456507,4.32915845 9.53729947,2.42080724 7.18322896,2.42080724 Z" id="Icon-/-16px-/-Search" fill="#ffffff"></path>
 8 |     </g>
 9 | </svg>
10 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_Sort-by_16px.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="16px" height="16px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Icon_Sort-by_16px</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Icon_Sort-by_16px" stroke="#cccccc" stroke-width="1" fill="#cccccc" fill-rule="evenodd">
 7 |         <polygon id="icn-expand-panel" fill="#cccccc" transform="translate(8.000000, 11.500000) rotate(180.000000) translate(-8.000000, -11.500000) " points="4 14 8.01053392 9 12 14"></polygon>
 8 |         <polygon id="icn-expand-panel" fill="#cccccc" points="4 7 8.01053392 2 12 7"></polygon>
 9 |     </g>
10 | </svg>
11 | 


--------------------------------------------------------------------------------
/web/web/src/images/Icon_new_window.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="14px" height="14px" viewBox="0 0 14 14" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <!-- Generator: Sketch 64 (93537) - https://sketch.com -->
 4 |     <title>Icon/ New-window</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="Citations-Results_No-filters" transform="translate(-153.000000, -1014.000000)" fill="#006272">
 8 |             <g id="Group" transform="translate(92.000000, 459.000000)">
 9 |                 <g id="Open-Row" transform="translate(0.000000, 375.000000)">
10 |                     <g id="Icon/-New-window" transform="translate(56.000000, 175.000000)">
11 |                         <path d="M17.4444444,17.4444444 L6.55555556,17.4444444 L6.55555556,6.55555556 L10,6.55555556 L10,5 L6.55555556,5 C5.69222222,5 5,5.7 5,6.55555556 L5,17.4444444 C5,18.3 5.69222222,19 6.55555556,19 L17.4444444,19 C18.3,19 19,18.3 19,17.4444444 L19,14 L17.4444444,14 L17.4444444,17.4444444 Z M13.5555556,5 L13.5555556,6.55555556 L16.3477778,6.55555556 L8.70222222,14.2011111 L9.79888889,15.2977778 L17.4444444,7.65222222 L17.4444444,10.4444444 L19,10.4444444 L19,5 L13.5555556,5 Z" id="Shape"></path>
12 |                     </g>
13 |                 </g>
14 |             </g>
15 |         </g>
16 |     </g>
17 | </svg>


--------------------------------------------------------------------------------
/web/web/src/images/Image_Product-shot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/images/Image_Product-shot.png


--------------------------------------------------------------------------------
/web/web/src/images/Shape_01.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg viewBox="0 0 1440 1076" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMin slice" height="100%" width="100%">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Shape_01</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="1.2-Designs-across-devices" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="1.1_Home_XXL" transform="translate(0.000000, -1716.000000)" fill="#DCF4F9">
 8 |             <g id="Background-shapes" transform="translate(-459.000000, 0.000000)">
 9 |                 <path d="M931.699109,3282.34453 C819.173033,3242.77424 758.284099,3158.65796 726.836693,3038.20213 C700.272978,2936.49064 601.188353,2554.90638 429.582817,1893.44936 C808.428365,1647.4464 997.851139,1524.44493 997.851139,1524.44493 L1781.04207,2730.4532 C1219.85833,3100.15481 936.744005,3284.11859 931.699109,3282.34453 Z" id="Shape_01" transform="translate(1105.312444, 2403.407375) scale(-1, -1) rotate(-57.000000) translate(-1105.312444, -2403.407375) "></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/web/web/src/images/Shape_02.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="100%" height="100%" viewBox="0 0 1438 929" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMin slice">
 3 |     <!-- Generator: Sketch 59.1 (86144) - https://sketch.com -->
 4 |     <title>Shape_02</title>
 5 |     <desc>Created with Sketch.</desc>
 6 |     <g id="1.2-Designs-across-devices" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 7 |         <g id="1.1_Home_XXL" transform="translate(-2.000000, -2455.000000)" fill="#FFFFFF">
 8 |             <g id="Background-shapes" transform="translate(-459.000000, 0.000000)">
 9 |                 <path d="M1863.61399,3481.74472 C1374.74841,3730.83417 1100.81085,3870.53892 1041.80131,3900.85896 C959.502425,3849.18354 910.512871,3767.15439 882.380588,3659.37631 C856.146714,3558.9087 757.461438,3178.83929 586.324761,2519.16807 L1210.77565,2200.47734 L1863.61399,3481.74472 Z" id="Shape_02" transform="translate(1224.969376, 3050.668149) scale(1, -1) rotate(-63.000000) translate(-1224.969376, -3050.668149) "></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/web/web/src/images/reach_site_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wellcometrust/reach/1aa42c7d8aaf0a91d033af8448a33f37563b0365/web/web/src/images/reach_site_view.png


--------------------------------------------------------------------------------
/web/web/src/images/wave.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <svg
 4 | 	version="1.1"
 5 | 	xmlns="http://www.w3.org/2000/svg"
 6 | 	xmlns:xlink="http://www.w3.org/1999/xlink"
 7 | 	preserveAspectRatio="none"
 8 | 	viewBox="-22 230 670 250"
 9 | 	width="100%"
10 | 	height="100%"
11 | >
12 | 	<defs>
13 | 		<path d="M-20.98 489.78L660.98 489.78L660.98 432.41L135.9 257.41C113.27 251.58 99.12 247.94 93.46 246.48C81.1 243.3 68.29 242.25 55.57 243.38C51.08 243.78 66.17 242.44 64.01 242.63C43.61 244.44 23.75 250.18 5.53 259.54C1.99 261.35 -6.84 265.89 -20.98 273.15L-20.98 489.78Z" id="dqTW9pVn8"></path>
14 | 	</defs>
15 | 	<g><g><use xlink:href="#dqTW9pVn8" opacity="1" fill="#dcf4f9" fill-opacity="1"></use></g></g>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/web/web/src/images/wellcome-logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <svg
 4 | 	id="wellcomelogo"
 5 | 	xmlns="http://www.w3.org/2000/svg"
 6 | 	viewBox="0 0 241.893 250"
 7 | 	height="241.893"
 8 | 	width="241.893"
 9 | >
10 | 	<path
11 | 		d="M178.141 190.981s-15.257-4.55-28.557-8.461c-9.835-2.893-15.49-8.372-17.836-16.977-1.57-5.759-5.585-20.415-10.829-40.352l-17.298 65.79s-20.242-6.026-30.074-8.697c-9.505-2.579-14.49-8.94-16.977-18.345-3.888-14.696-25.159-95.68-25.159-95.68l38.727-4.502 17.267 65.607 17.267-65.607h.083s7.385.012 17.598.012c9.432 0 15.83 4.615 18.462 14.222 1.021 3.72 7.048 26.316 13.622 51.32l17.255-65.554 38.788 4.264zM0 .004v241.89h241.89V.003H0"
12 | 		fill="#006272"
13 | 	/>
14 | </svg>
15 | 


--------------------------------------------------------------------------------
/web/web/src/images/white-wave.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <svg
 4 | 	version="1.1"
 5 | 	xmlns="http://www.w3.org/2000/svg"
 6 | 	xmlns:xlink="http://www.w3.org/1999/xlink"
 7 | 	preserveAspectRatio="none"
 8 | 	viewBox="25 73 600 120"
 9 | 	width="100%"
10 | 	height="100%"
11 | >
12 | 	<defs>
13 | 		<path d="M0 198.68L0 198.68C297.6 126.37 462.94 87.28 496 81.39C517.38 78.44 530.74 76.61 536.09 75.87C547.77 74.26 559.87 73.73 571.88 74.3C576.12 74.51 561.87 73.83 563.91 73.93C583.18 74.84 594.17 72.05 619.15 82.46C644.14 92.87 642.45 99.39 644.19 102.53C645.36 104.62 645.36 136.67 644.19 198.68L0 198.68Z" id="bh5dVY2uh"></path>
14 | 	</defs>
15 | 	<g><g><use xlink:href="#bh5dVY2uh" opacity="1" fill="#ffffff" fill-opacity="1"></use></g></g>
16 | </svg>
17 | 


--------------------------------------------------------------------------------
/web/web/src/js/app.js:
--------------------------------------------------------------------------------
 1 | import "core-js/stable";
 2 | import "core-js/stable/array";
 3 | 
 4 | import clearSearch from './clearSearch.js';
 5 | import policyTable from './policyTable.js';
 6 | import citationsTable from './citationsTable.js';
 7 | import contact from "./v.contact";
 8 | import home from './home.js';
 9 | 
10 | document.addEventListener('DOMContentLoaded', function(event) {
11 |     String.prototype.toTitleCase = function() {
12 |         let lower = this.valueOf().toLowerCase();
13 |         return lower.replace(/^\w/, c => c.toUpperCase());;
14 |     };
15 | 
16 |     clearSearch();
17 |     policyTable();
18 |     citationsTable();
19 |     home();
20 |     contact();
21 | 
22 |     // Tracking
23 |     const headerLinks = document.getElementsByClassName('navbar');
24 |     headerLinks.forEach(item => {
25 |       item.addEventListener('click', (e) => {
26 |         if (e.target.tagName == "A") {
27 |           gtag('event', 'Internal click', {
28 |             event_category: 'Header',
29 |             event_label: e.target.innerHTML
30 |           });
31 |         }
32 |       });
33 |     });
34 | 
35 |     const footerLinks = document.getElementsByTagName('footer');
36 |     footerLinks.forEach(item => {
37 |       item.addEventListener('click', (e) => {
38 |         if (e.target.tagName == "A") {
39 |           gtag('event', 'Internal click', {
40 |             event_category: 'Footer',
41 |             event_label: e.target.innerHTML
42 |           });
43 |         }
44 |       });
45 |     });
46 | 
47 |     const resultsContactLink = document.getElementById('search-results-contact');
48 |     if (resultsContactLink) {
49 |       resultsContactLink.addEventListener('click', (e) => {
50 |         let source = (e.target.getAttribute('data-from') == "citations")? "Discover citations":"Browse pol docs";
51 |         gtag('event', 'Click', {
52 |           event_category: source,
53 |           event_label: 'Email: search results'
54 |         });
55 |       });
56 |     }
57 | });
58 | 


--------------------------------------------------------------------------------
/web/web/src/js/clearSearch.js:
--------------------------------------------------------------------------------
 1 | const clearSearch = (reach) => {
 2 |     let clearButton = document.getElementById('search-clear');
 3 |     let searchInput = document.getElementById('search-term');
 4 |     if (clearButton) {
 5 |         clearButton.addEventListener('click', () => {
 6 |             searchInput.value = '';
 7 |         });
 8 | 
 9 |     }
10 | };
11 | 
12 | export default clearSearch;
13 | 


--------------------------------------------------------------------------------
/web/web/src/js/home.js:
--------------------------------------------------------------------------------
 1 | const home = () => {
 2 |   const startButton = document.getElementById('start-button');
 3 |   if (startButton) {
 4 | 
 5 |     startButton.addEventListener("click", (e) => {
 6 |       e.preventDefault();
 7 |       document.getElementById('discover-reach').scrollIntoView({behavior: "smooth", block: "start"});
 8 |     });
 9 |   }
10 | }
11 | 
12 | export default home;
13 | 


--------------------------------------------------------------------------------
/web/web/src/js/templates/no_results.js:
--------------------------------------------------------------------------------
 1 | const getNoResultsTemplate = (term, source) => {
 2 | 
 3 |   let noResultsTitle = ``;
 4 |   let formLabel = ``;
 5 |   let formAction = ``;
 6 |   let formSubmit = ``;
 7 | 
 8 |   if (source == 'policies') {
 9 |     noResultsTitle = `Your search for "${term}" in policy&nbsp;documents did not return any results`;
10 |     formLabel = `Search by topic, research area or policy document title`;
11 |     formAction = `/search/policy-docs`;
12 |     formSubmit = `Browse policy documents`;
13 |   } else {
14 |     noResultsTitle = `Your search for "${term}" in citations did not return any results`;
15 |     formLabel = `Search by scientific publication title, topic or journal`;
16 |     formAction = `/search/citations`;
17 |     formSubmit = `Discover citations`;
18 |   }
19 | 
20 |   const template = `
21 |   <div class="container">
22 |     <hr class="hs">
23 |     <div class="columns">
24 |       <div class="column col-8 col-md-12">
25 |         <h1>${noResultsTitle}</h1>
26 |       </div>
27 |     </div>
28 |     <div class="columns">
29 |       <div class="column col-6 col-md-12"></div>
30 |       <div class="column col-6 hide-md"></div>
31 |     </div>
32 |     <div class="columns">
33 |       <hr class="hs">
34 |       <div class="column col-12">
35 |         <p class="form-label">${formLabel}</p>
36 |       </div>
37 |       <div class="column col-6 col-md-12">
38 |         <form action="${formAction}">
39 |           <div class="input-group">
40 |             <input type="text" class="form-input input-xl" placeholder="Search" name="terms" id="search-term" value="${term}">
41 |             <button type="submit" class="btn btn-primary input-group-btn input-btn-xl">${formSubmit}<span class="icn icn-search"></span></button>
42 |           </div>
43 |         </form>
44 |       </div>
45 |       <div class="column col-6 hide-md"></div>
46 |     </div>
47 |     <hr class="fs">
48 |     <div class="columns">
49 |       <div class="column col-6 col-md-12">
50 |         <h3 class="tips-title">Search tips</h3>
51 |         <ul class="tips-list">
52 |           <li>Check your spelling</li>
53 |           <li>Broaden your search by using fewer words or more general terms</li>
54 |           <li>Try searching by topic, area or work or insitute</li>
55 |         </ul>
56 |       </div>
57 |       <div class="column col-6 hide-md"></div>
58 |     </div>
59 |     <hr class="fs">
60 |     <hr class="fs">
61 |       <div class="columns">
62 |         <div class="column col-6 col-md-12">
63 |           <div class="feedback-box">
64 |             <p class="bold">Can't find what you're looking for?</p>
65 |             <p>If something doesn’t look quite right, please <a href="/contact" id="no-results-contact" data-from="${source}">get in touch with the team</a>.</p>
66 |           </div>
67 |         </div>
68 |       <div class="column col-6 hide-md"></div>
69 |     </div>
70 |   </div>
71 |   <hr class="fs">
72 |   `;
73 | 
74 |   return template;
75 | };
76 | 
77 | export default getNoResultsTemplate;
78 | 


--------------------------------------------------------------------------------
/web/web/src/w-avatar-pitch-1.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
 4 |    xmlns:cc="http://creativecommons.org/ns#"
 5 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 6 |    xmlns:svg="http://www.w3.org/2000/svg"
 7 |    xmlns="http://www.w3.org/2000/svg"
 8 |    viewBox="0 0 241.89333 241.89333"
 9 |    height="241.89333"
10 |    width="241.89333"
11 |    xml:space="preserve"
12 |    id="svg2"
13 |    version="1.1"><metadata
14 |      id="metadata8"><rdf:RDF><cc:Work
15 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
16 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
17 |      id="defs6" /><g
18 |      transform="matrix(1.3333333,0,0,-1.3333333,0,241.89333)"
19 |      id="g10"><g
20 |        transform="scale(0.1)"
21 |        id="g12"><path
22 |          id="path14"
23 |          style="fill:#231f20;fill-opacity:1;fill-rule:nonzero;stroke:none"
24 |          d="m 1336.06,381.84 c 0,0 -114.43,34.129 -214.18,63.461 C 1048.12,467 1005.71,508.09 988.113,572.629 976.328,615.82 946.223,725.738 906.895,875.27 L 777.156,381.84 c 0,0 -151.812,45.199 -225.554,65.23 -71.289,19.34 -108.672,67.051 -127.325,137.59 -29.16,110.219 -188.691,717.6 -188.691,717.6 l 290.449,33.76 129.5,-492.051 129.504,492.051 c 0,0 0.254,0 0.625,0 0,0 55.383,-0.09 131.981,-0.09 70.746,0 118.725,-34.61 138.465,-106.66 7.66,-27.9 52.86,-197.37 102.17,-384.899 l 129.41,491.649 290.91,-31.98 z M 0,1814.17 V 0 H 1814.18 V 1814.17 H 0" /></g></g></svg>


--------------------------------------------------------------------------------
/web/web/templates/search/citations.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | 
 4 | {% block header %}
 5 | 
 6 | <header class="navbar">
 7 |   <section class="navbar-section">
 8 |     <a href="/" class="navbar-brand"><img src="/static/images/reach_alpha_branding.svg" alt=""></a>
 9 |   </section>
10 |   <section class="navbar-section hide-md" id="navbar-links">
11 |     <a href="/about" class="a-light">About Reach</a>
12 |     <a href="/how-it-works" class="a-light">How Reach works</a>
13 |     <a href="/search/citations" class="a-light active">Discover citations</a>
14 |     <a href="/search/policy-docs" class="a-light">Browse policy documents</a>
15 |   </section>
16 | </header>
17 | 
18 | {% endblock %}
19 | 
20 | {% block main %}
21 | 
22 | <section id="breadcrumbs">
23 |     <div class="breadcrumb-box">
24 |         <ul class="breadcrumb">
25 |             <li class="breadcrumb-item"><a href="/">Home</a></li>
26 |             <li class="breadcrumb-item">Discover citations</li>
27 |         </ul>
28 |     </div>
29 | </section>
30 | 
31 | <section class="search-citation">
32 |   <div class="container">
33 |     <hr class="cs">
34 |     <div class="columns">
35 |       <div class="column col-2 hide-md"></div>
36 |       <div class="column col-8 col-md-12 text-center">
37 |           <h1>Discover scientific publications that <br> have been cited in policy documents</h1>
38 |       </div>
39 |       <div class="column col-2 hide-md"></div>
40 |     </div>
41 |     <div class="columns">
42 |       <div class="column col-2 hide-md"></div>
43 |       <div class="column col-8 col-md-12 text-center">
44 |         <h3>Reach uses machine learning to find where health policy <br> organisations are using scientific research.</h3>
45 |       </div>
46 |       <div class="column col-2 hide-md"></div>
47 |     </div>
48 |     <hr class="fs">
49 |     <div class="columns">
50 |       <div class="column col-3 hide-md"></div>
51 |       <div class="column col-6 col-md-12">
52 |         <p class="form-label">Search by scientific publication title, topic or journal</p>
53 |         <form action="/search/citations">
54 |           <div class="input-group">
55 |             <input type="text" class="form-input input-xl" placeholder="Search" name="terms" id="search-term">
56 |             <button type="submit" class="btn btn-primary input-btn-xl input-group-btn">Discover citations <span class="icn icn-search"></span></button>
57 |           </div>
58 |         </form>
59 |       </div>
60 |       <div class="column col-3 hide-md"></div>
61 |     </div>
62 |   </div>
63 | </section>
64 | {% endblock %}
65 | 


--------------------------------------------------------------------------------
/web/web/templates/search/policy-docs.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | 
 4 | {% block header %}
 5 | 
 6 | <header class="navbar">
 7 |   <section class="navbar-section">
 8 |     <a href="/" class="navbar-brand"><img src="/static/images/reach_alpha_branding.svg" alt=""></a>
 9 |   </section>
10 |   <section class="navbar-section hide-md" id="navbar-links">
11 |     <a href="/about" class="a-light">About Reach</a>
12 |     <a href="/how-it-works" class="a-light">How Reach works</a>
13 |     <a href="/search/citations" class="a-light">Discover citations</a>
14 |     <a href="/search/policy-docs" class="a-light active">Browse policy documents</a>
15 |   </section>
16 | </header>
17 | 
18 | {% endblock %}
19 | 
20 | {% block main %}
21 | 
22 | <section id="breadcrumbs">
23 |     <div class="breadcrumb-box">
24 |         <ul class="breadcrumb">
25 |             <li class="breadcrumb-item"><a href="/">Home</a></li>
26 |             <li class="breadcrumb-item">Browse policy documents</li>
27 |         </ul>
28 |     </div>
29 | </section>
30 | 
31 | <section class="search-policy-docs">
32 |   <div class="container">
33 |     <hr class="cs">
34 |     <div class="columns">
35 |       <div class="column col-2 hide-md"></div>
36 |       <div class="column col-8 col-md-12 text-center">
37 |           <h1>Browse our collection of <br> over 129k policy documents</h1>
38 |       </div>
39 |       <div class="column col-2 hide-md"></div>
40 |     </div>
41 |     <div class="columns">
42 |       <div class="column col-1 hide-md"></div>
43 |       <div class="column col-10 col-md-12 text-center">
44 |         <h3>We source policy documents from UNICEF, Médecins Sans Frontières (MSF), <br>
45 |             National Institute of Clinical Excellence (NICE), the World Health Organisation (WHO), <br>
46 |             the UK government and the UK parliament.</h3>
47 |       </div>
48 |       <div class="column col-1 hide-md"></div>
49 |     </div>
50 |     <hr class="fs">
51 |     <div class="columns">
52 |       <div class="column col-3 hide-md"></div>
53 |       <div class="column col-6 col-md-12">
54 |         <p class="form-label">Search by topic, research area or policy document title</p>
55 |         <form action="/search/policy-docs">
56 |           <div class="input-group">
57 |             <input type="text" class="form-input input-xl" placeholder="Search" name="terms" id="search-term">
58 |             <button type="submit" class="btn btn-primary input-btn-xl input-group-btn">Browse policy documents <span class="icn icn-search"></span></button>
59 |           </div>
60 |         </form>
61 |       </div>
62 |       <div class="column col-3 hide-md"></div>
63 |     </div>
64 |   </div>
65 | </section>
66 | {% endblock %}
67 | 


--------------------------------------------------------------------------------
/web/web/tests/test_template.py:
--------------------------------------------------------------------------------
 1 | from reach.web.views import template
 2 | 
 3 | 
 4 | def test_to_template_names():
 5 |     cases = [
 6 |         ('/', ('index.html',)),
 7 |         ('/foo', ('foo.html', 'foo/index.html')),
 8 |         ('/foo.html', ('foo.html', 'foo/index.html')),
 9 |         ('/foo/gar', ('foo/gar.html', 'foo/gar/index.html')),
10 |         ('/_macros.html', tuple()),
11 |     ]
12 |     for path, expected in cases:
13 |         assert expected == template.to_template_names(path)
14 | 


--------------------------------------------------------------------------------
/web/web/utils.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import time
 3 | 
 4 | import falcon
 5 | 
 6 | Argument = collections.namedtuple("Argument", ('resource', 'window_size',
 7 |     'per_second', 'error_message'))
 8 | 
 9 | class _RateLimitDB(object):
10 |     _RATE_LIMIT_DB = collections.defaultdict(
11 |         lambda: collections.defaultdict(list)
12 |     )
13 | 
14 |     @staticmethod
15 |     def filter(user, resource_name, window_size):
16 |         p = _RateLimitDB._RATE_LIMIT_DB[user][resource_name]
17 |         t = time.time()
18 |         exp_int = t - window_size
19 |         p = [s for s in p if s >= exp_int]
20 |         _RateLimitDB._RATE_LIMIT_DB[user][resource_name] = p
21 | 
22 |     @staticmethod
23 |     def add_call(user, resource_name):
24 |         _RateLimitDB._RATE_LIMIT_DB[user][resource_name].append(
25 |             time.time()
26 |         )
27 | 
28 |     @staticmethod
29 |     def check_for(user, argument):
30 |         _RateLimitDB.filter(user, argument.resource, argument.window_size)
31 |         _RateLimitDB.add_call(user, argument.resource)
32 |         p = len(_RateLimitDB._RATE_LIMIT_DB[user][argument.resource])
33 |         return (p / argument.window_size) > argument.per_second
34 | 
35 | def _rate_db(req, resp, argument):
36 |     if _RateLimitDB.check_for(req.forwarded_host, argument):
37 |         print("RATE_LIMITED")
38 |         resp.status = falcon.HTTP_429
39 |         raise falcon.HTTPTooManyRequests(argument.error_message)
40 | 
41 | def rate_limit(per_second=30, resource=u'default', window_size=10,
42 |         error_message="429 Too Many Requests"):
43 |     arg = Argument(resource, window_size, per_second, error_message)
44 | 
45 |     def hook(req, resp, resource, params):
46 |         _rate_db(req, resp, arg)
47 | 
48 |     return hook
49 | 


--------------------------------------------------------------------------------
/web/web/views/__init__.py:
--------------------------------------------------------------------------------
 1 | from .search import SearchCitations
 2 | from .search import SearchPolicies
 3 | from .search import ExportCitationsSearch
 4 | from .search import ExportPoliciesSearch
 5 | 
 6 | from .api import ApiSearchCitations
 7 | from .api import ApiSearchPolicies
 8 | 
 9 | from .contact import ContactView
10 | 


--------------------------------------------------------------------------------
/web/web/views/api/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_search_citations import ApiSearchCitations
2 | from .api_search_policies import ApiSearchPolicies
3 | 


--------------------------------------------------------------------------------
/web/web/views/api/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import datetime
 3 | import uuid
 4 | 
 5 | class JSONEncoder(json.JSONEncoder):
 6 |     def default(self, obj):
 7 |         if isinstance(obj, datetime.datetime):
 8 |             return obj.isoformat()
 9 |         elif isinstance(obj, datetime.date):
10 |             return obj.isoformat()
11 |         elif isinstance(obj, uuid.UUID):
12 |             return str(obj)
13 |         else:
14 |             return str(obj)
15 | 
16 |         return json.JSONEncoder.default(self, obj)
17 | 


--------------------------------------------------------------------------------
/web/web/views/apidocs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import jinja2
 3 | import falcon
 4 | 
 5 | 
 6 | class APIDocRessource(object):
 7 |     """
 8 |     Serves HTML templates. Note that templates are read from the FS for
 9 |     every request.
10 |     """
11 | 
12 |     def __init__(self, template_dir, context=None):
13 |         self.env = jinja2.Environment(
14 |             loader=jinja2.FileSystemLoader(template_dir),
15 |             autoescape=jinja2.select_autoescape(['html']),
16 |         )
17 |         if context is not None:
18 |             self.context = context
19 |         else:
20 |             self.context = {}
21 | 
22 |     def render_template(self, resp, tname):
23 |         tname = to_template_names(tname.replace('api/docs', ''))
24 |         try:
25 |             template = self.env.select_template(tname)
26 |             resp.body = template.render(**self.context)
27 |             resp.content_type = 'text/html'
28 |         except jinja2.TemplateNotFound:
29 |             resp.status = falcon.HTTP_404
30 |             return
31 | 
32 |     def on_get(self, req, resp, name):
33 |         self.render_template(resp, req.path)
34 | 
35 | 
36 | def to_template_names(path):
37 |     """
38 |     Maps HTTP request paths to Jinja template paths.
39 | 
40 |     Args:
41 |         path: path portion of HTTP GET request
42 | 
43 |     Returns:
44 |         Tuple of file paths that Jinja should search for.
45 |     """
46 | 
47 |     if not path.startswith('/'):
48 |         raise ValueError
49 |     path = path[1:]  # remove leading /, jinja won't want it
50 | 
51 |     if os.path.basename(path).startswith('_'):
52 |         # Macros are kept in templates starting with _; don't allow
53 |         # access to them.
54 |         return tuple()
55 | 
56 |     if path == '':
57 |         return ('index.html',)
58 | 
59 |     if path.endswith('/'):
60 |         return (
61 |             path[:-1] + '.html',
62 |             os.path.join(path, 'index.html'),
63 |         )
64 | 
65 |     if path.endswith('.html'):
66 |         return (
67 |             path,
68 |             os.path.join(path[:-5], 'index.html'),
69 |         )
70 | 
71 |     return (
72 |         path + '.html',
73 |         os.path.join(path, 'index.html'),
74 |     )
75 | 


--------------------------------------------------------------------------------
/web/web/views/robotstxt.py:
--------------------------------------------------------------------------------
 1 | """ Serve GET /robots.txt. """
 2 | 
 3 | # Allow all indexing following launch in July 2020
 4 | # (cf.
 5 | # https://medium.com/wellcome-data-labs/introducing-reach-find-and-track-research-being-put-into-action-dec2a2fca93b)
 6 | ROBOTS_TXT = \
 7 | """User-agent: *
 8 | Allow: /
 9 | """
10 | 
11 | class RobotsTxtResource(object):
12 |     def on_get(self, req, resp):
13 |         resp.body = ROBOTS_TXT
14 |         resp.content_type = 'text/plain'
15 | 


--------------------------------------------------------------------------------
/web/web/views/search/__init__.py:
--------------------------------------------------------------------------------
1 | from .citations import SearchCitations
2 | from .policies import SearchPolicies
3 | from .export_citations import ExportCitationsSearch
4 | from .export_policies import ExportPoliciesSearch
5 | 


--------------------------------------------------------------------------------
/web/web/views/search/citations.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | 
 4 | import falcon
 5 | 
 6 | from web.db import get_db_cur
 7 | from web.views import template
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | class SearchCitations(template.TemplateResource):
12 |     """ Search through publications returning a list of publications with inlined policies that have cited
13 |     the publication, search rank.
14 |     """
15 |     def __init__(self, template_dir, context=None):
16 |         super(SearchCitations, self).__init__(template_dir, context)
17 | 
18 |     def on_get(self, req, resp):
19 |         logger.info("Requesting some citations")
20 | 
21 |         if not req.params:
22 |             super(SearchCitations, self).render_template(
23 |                 resp,
24 |                 "/search/citations",
25 |             )
26 |             return
27 | 
28 |         term = req.params.get("terms", "")
29 | 
30 |         self.context.update(dict(
31 |             term=term
32 |         ))
33 | 
34 |         super(SearchCitations, self).render_template(
35 |             resp,
36 |             "/results/citations",
37 |         )
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/web/web/views/search/policies.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | 
 4 | import falcon
 5 | 
 6 | from web.views import template
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | class SearchPolicies(template.TemplateResource):
11 |     """ Search through publications returning a list of publications with inlined policies that have cited
12 |     the publication, search rank.
13 |     """
14 |     def __init__(self, template_dir, context=None):
15 |         super(SearchPolicies, self).__init__(template_dir, context)
16 | 
17 |     def on_get(self, req, resp):
18 |         logger.info("Requesting some policies")
19 | 
20 |         if not req.params:
21 |             super(SearchPolicies, self).render_template(
22 |                 resp,
23 |                 "/search/policy-docs",
24 |             )
25 |             return
26 | 
27 |         term = req.params.get("terms", None)
28 | 
29 |         self.context.update(dict(
30 |             term=term
31 |         ))
32 | 
33 |         super(SearchPolicies, self).render_template(
34 |             resp,
35 |             "/results/policy-docs",
36 |         )
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/web/web/views/template.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import jinja2
 3 | import falcon
 4 | 
 5 | 
 6 | class TemplateResource(object):
 7 |     """
 8 |     Serves HTML templates. Note that templates are read from the FS for
 9 |     every request.
10 |     """
11 | 
12 |     def __init__(self, template_dir, context=None):
13 |         from web.config import CONFIG
14 |         self.env = jinja2.Environment(
15 |             loader=jinja2.FileSystemLoader(template_dir),
16 |             autoescape=jinja2.select_autoescape(['html']),
17 |         )
18 | 
19 |         self.env.globals.update(ga_code=CONFIG.ga_code)
20 |         self.env.globals.update(hotjar_code=CONFIG.hotjar_code)
21 | 
22 |         if context is not None:
23 |             self.context = context
24 |         else:
25 |             self.context = {}
26 | 
27 |     def render_template(self, resp, tname):
28 |         tname = to_template_names(tname)
29 |         try:
30 |             template = self.env.select_template(tname)
31 |             resp.body = template.render(**self.context)
32 |             resp.content_type = 'text/html'
33 |         except jinja2.TemplateNotFound:
34 |             resp.status = falcon.HTTP_404
35 |             return
36 | 
37 |     def on_get(self, req, resp):
38 |         self.render_template(resp, req.path)
39 | 
40 | 
41 | def to_template_names(path):
42 |     """
43 |     Maps HTTP request paths to Jinja template paths.
44 | 
45 |     Args:
46 |         path: path portion of HTTP GET request
47 | 
48 |     Returns:
49 |         Tuple of file paths that Jinja should search for.
50 |     """
51 | 
52 |     if not path.startswith('/'):
53 |         raise ValueError
54 |     path = path[1:]  # remove leading /, jinja won't want it
55 | 
56 |     if os.path.basename(path).startswith('_'):
57 |         # Macros are kept in templates starting with _; don't allow
58 |         # access to them.
59 |         return tuple()
60 | 
61 |     if path == '':
62 |         return ('index.html',)
63 | 
64 |     if path.endswith('/'):
65 |         return (
66 |             path[:-1] + '.html',
67 |             os.path.join(path, 'index.html'),
68 |         )
69 | 
70 |     if path.endswith('.html'):
71 |         return (
72 |             path,
73 |             os.path.join(path[:-5], 'index.html'),
74 |         )
75 | 
76 |     return (
77 |         path + '.html',
78 |         os.path.join(path, 'index.html'),
79 |     )
80 | 


--------------------------------------------------------------------------------
/web/web/wsgi.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import toml
 4 | 
 5 | from . import api
 6 | 
 7 | 
 8 | class Configuration:
 9 |     def __init__(self):
10 |         """
11 |         Parses webapp configuration from the environment. Key variables:
12 | 
13 |         - ELASTICSEARCH_HOST
14 |         - ELASTICSEARCH_EXPLAIN
15 |         - ELASTICSEARCH_POLICYDOCS_INDEX
16 |         - ELASTICSEARCH_CITATIONS_INDEX
17 |         - STATIC_ROOT
18 |         """
19 | 
20 |         self.database_url = os.environ['DATABASE_URL']
21 |         if not self.database_url:
22 |             raise Exception(
23 |                 "Database URL not found. DATABASE_URL=%r" %
24 |                 self.database_url
25 |             )
26 | 
27 |         self.static_root = os.environ.get('STATIC_ROOT')
28 |         if not self.static_root or not os.path.isdir(self.static_root):
29 |             raise Exception(
30 |                 "No static directory found. STATIC_ROOT=%r" %
31 |                 self.static_root
32 |             )
33 | 
34 |         self.docs_static_root = os.environ.get('DOCS_STATIC_ROOT')
35 |         if not self.docs_static_root or not os.path.isdir(
36 |             self.docs_static_root
37 |         ):
38 |             raise Exception(
39 |                 "No docs static directory found. DOCS_STATIC_ROOT=%r" %
40 |                 self.docs_static_root
41 |             )
42 | 
43 | def parse_config_file():
44 | 
45 |     config_path = os.environ.get("CONFIG_FILE", None)
46 | 
47 |     if config_path is None:
48 |         return {}
49 | 
50 |     if not config_path.startswith("/"):
51 |         config_path = os.path.join(os.path.basedir(__file__), config_path)
52 | 
53 |     config_data = toml.load(config_path)
54 | 
55 |     return config_data
56 | 
57 | config = parse_config_file()
58 | application = api.create_api(config)
59 | 


--------------------------------------------------------------------------------