├── .coveragerc ├── .dockerignore ├── .editorconfig ├── .gitattributes ├── .github └── workflows │ ├── lambda.yml │ └── test.yml ├── .gitignore ├── .isort.cfg ├── .pylintrc ├── .python-version ├── .slugignore ├── CONTRIBUTORS.txt ├── Procfile ├── README.md ├── compose ├── local │ ├── django │ │ ├── Dockerfile │ │ ├── celery │ │ │ ├── beat │ │ │ │ └── start │ │ │ ├── flower │ │ │ │ └── start │ │ │ └── worker │ │ │ │ └── start │ │ └── start │ └── vue │ │ └── Dockerfile └── production │ ├── aws │ ├── Dockerfile │ └── maintenance │ │ ├── download │ │ └── upload │ ├── django │ ├── Dockerfile │ ├── celery │ │ ├── beat │ │ │ └── start │ │ ├── flower │ │ │ └── start │ │ └── worker │ │ │ └── start │ ├── entrypoint │ └── start │ ├── postgres │ ├── Dockerfile │ └── maintenance │ │ ├── _sourced │ │ ├── constants.sh │ │ ├── countdown.sh │ │ ├── messages.sh │ │ └── yes_no.sh │ │ ├── backup │ │ ├── backups │ │ └── restore │ └── traefik │ ├── Dockerfile │ └── traefik.toml ├── config ├── __init__.py ├── aws │ └── lambda │ │ ├── build.sh │ │ ├── build_document_conversion.sh │ │ ├── build_info_and_image.sh │ │ ├── build_ocr.sh │ │ ├── build_sidekick.sh │ │ ├── build_utils.sh │ │ ├── cloud-requirements.txt │ │ ├── codeship_deploy_lambdas.sh │ │ ├── deploy.sh │ │ ├── deploy_topics.sh │ │ ├── info-and-image-requirements.txt │ │ ├── libreoffice │ │ ├── README.md │ │ └── lo.tar.gz │ │ ├── ocr_libraries │ │ ├── libgomp.so.1 │ │ ├── libjbig.so.2.0 │ │ ├── libjpeg.so.62 │ │ ├── liblept.so.5 │ │ ├── libpng15.so.15 │ │ ├── libtesseract.so.5 │ │ ├── libtiff.so.5 │ │ └── libwebp.so.4 │ │ ├── replace_params.py │ │ └── template_params.yaml ├── celery_app.py ├── gunicorn.conf ├── languages │ ├── choices.py │ └── languages.tsv ├── nginx.conf.erb ├── settings │ ├── __init__.py │ ├── base.py │ ├── local.py │ ├── production.py │ └── test.py ├── solr │ ├── lang │ │ ├── contractions_ca.txt │ │ ├── contractions_fr.txt │ │ ├── contractions_ga.txt │ │ ├── contractions_it.txt │ │ ├── hyphenations_ga.txt │ │ ├── stemdict_nl.txt │ │ ├── stoptags_ja.txt │ │ ├── stopwords_ar.txt │ │ ├── stopwords_bg.txt │ │ ├── stopwords_ca.txt │ │ ├── stopwords_cz.txt │ │ ├── stopwords_da.txt │ │ ├── stopwords_de.txt │ │ ├── stopwords_el.txt │ │ ├── stopwords_en.txt │ │ ├── stopwords_es.txt │ │ ├── stopwords_eu.txt │ │ ├── stopwords_fa.txt │ │ ├── stopwords_fi.txt │ │ ├── stopwords_fr.txt │ │ ├── stopwords_ga.txt │ │ ├── stopwords_gl.txt │ │ ├── stopwords_hi.txt │ │ ├── stopwords_hu.txt │ │ ├── stopwords_hy.txt │ │ ├── stopwords_id.txt │ │ ├── stopwords_it.txt │ │ ├── stopwords_ja.txt │ │ ├── stopwords_lv.txt │ │ ├── stopwords_nl.txt │ │ ├── stopwords_no.txt │ │ ├── stopwords_pt.txt │ │ ├── stopwords_ro.txt │ │ ├── stopwords_ru.txt │ │ ├── stopwords_sv.txt │ │ ├── stopwords_th.txt │ │ ├── stopwords_tr.txt │ │ └── userdict_ja.txt │ ├── lib │ │ └── solr-plugins-1.0.0-SNAPSHOT.jar │ ├── managed-schema │ ├── params.json │ ├── protwords.txt │ ├── solrconfig.local.xml │ ├── solrconfig.xml │ ├── stopwords.txt │ └── synonyms.txt ├── urls.py └── wsgi.py ├── docs ├── Makefile ├── __init__.py ├── api │ ├── api.md │ └── search.md ├── conf.py ├── index.rst ├── lambda.gv └── make.bat ├── documentcloud ├── __init__.py ├── addons │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── choices.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_addonrun_dismissed.py │ │ ├── 0003_addon_error.py │ │ ├── 0004_addon_access.py │ │ ├── 0005_auto_20220330_1908.py │ │ ├── 0006_auto_20220404_1747.py │ │ ├── 0007_auto_20220407_1311.py │ │ ├── 0008_rename_github_token_addon__github_token.py │ │ ├── 0009_alter_addon_parameters.py │ │ ├── 0010_auto_20220411_1515.py │ │ ├── 0011_auto_20220411_2039.py │ │ ├── 0012_alter_addon_organization.py │ │ ├── 0013_githubinstallation_removed.py │ │ ├── 0014_alter_addon__user.py │ │ ├── 0015_auto_20220419_1824.py │ │ ├── 0016_remove_addon__github_token.py │ │ ├── 0017_alter_addonrun_run_id.py │ │ ├── 0018_alter_addon_organization.py │ │ ├── 0019_auto_20220505_1845.py │ │ ├── 0020_alter_addonevent_event.py │ │ ├── 0021_addonevent_scratch.py │ │ ├── 0022_auto_20221019_1746.py │ │ ├── 0023_auto_20230321_1452.py │ │ ├── 0024_alter_addonrun_created_at.py │ │ ├── 0025_addonrun_credits_spent.py │ │ ├── 0026_addondisablelog.py │ │ ├── 0027_visualaddon.py │ │ └── __init__.py │ ├── models.py │ ├── querysets.py │ ├── rules.py │ ├── serializers.py │ ├── signals.py │ ├── tasks.py │ ├── tests │ │ ├── __init__.py │ │ ├── factories.py │ │ └── test_views.py │ └── views.py ├── common │ ├── __init__.py │ ├── access_choices.py │ ├── apps.py │ ├── environment │ │ ├── __init__.py │ │ ├── aws │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── httpsub.py │ │ │ ├── processing_token.py │ │ │ ├── pubsub.py │ │ │ └── storage.py │ │ ├── gcp │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── httpsub.py │ │ │ ├── processing_token.py │ │ │ ├── pubsub.py │ │ │ └── storage.py │ │ ├── local │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── httpsub.py │ │ │ ├── processing_token.py │ │ │ ├── pubsub.py │ │ │ └── storage.py │ │ └── minio │ │ │ ├── __init__.py │ │ │ └── storage.py │ ├── extensions.py │ ├── path.py │ ├── redis_fields.py │ ├── serverless │ │ ├── __init__.py │ │ ├── error_handling.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_error_handling.py │ │ └── utils.py │ ├── session.py │ ├── tests │ │ └── test_utils.py │ ├── utils.py │ └── wikidata.py ├── conftest.py ├── core │ ├── __init__.py │ ├── apps.py │ ├── authentication.py │ ├── choices.py │ ├── fields.py │ ├── filters.py │ ├── mail.py │ ├── management │ │ └── commands │ │ │ ├── import.py │ │ │ └── upload_languages.py │ ├── middleware.py │ ├── pagination.py │ ├── permissions.py │ ├── rules.py │ ├── signals.py │ ├── templatetags │ │ └── markdown.py │ ├── tests.py │ ├── utils.py │ ├── versioning.py │ └── views.py ├── documents │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── choices.py │ ├── constants.py │ ├── decorators.py │ ├── entity_extraction.py │ ├── fields.py │ ├── local_tasks.py │ ├── management │ │ └── commands │ │ │ └── solr_reindex.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20190925_1816.py │ │ ├── 0003_auto_20190925_1848.py │ │ ├── 0004_section.py │ │ ├── 0005_entity_entitydate.py │ │ ├── 0006_auto_20191021_1518.py │ │ ├── 0007_documenterror.py │ │ ├── 0008_auto_20191106_2010.py │ │ ├── 0008_auto_20191107_2031.py │ │ ├── 0009_merge_20191112_1553.py │ │ ├── 0010_auto_20191211_2057.py │ │ ├── 0011_auto_20200128_1418.py │ │ ├── 0012_auto_20200205_1535.py │ │ ├── 0013_auto_20200210_1849.py │ │ ├── 0014_auto_20200210_1900.py │ │ ├── 0015_auto_20200211_1651.py │ │ ├── 0015_auto_20200213_1650.py │ │ ├── 0016_merge_20200213_2145.py │ │ ├── 0017_auto_20200226_1902.py │ │ ├── 0018_auto_20200311_1936.py │ │ ├── 0018_auto_20200405_1736.py │ │ ├── 0019_auto_20200405_1736.py │ │ ├── 0020_merge_20200407_1320.py │ │ ├── 0021_auto_20200429_0121.py │ │ ├── 0022_auto_20200430_1411.py │ │ ├── 0023_auto_20200525_1942.py │ │ ├── 0024_auto_20200805_2006.py │ │ ├── 0025_auto_20200805_2031.py │ │ ├── 0026_auto_20200805_2051.py │ │ ├── 0027_auto_20200807_1659.py │ │ ├── 0028_auto_20200925_2001.py │ │ ├── 0028_deleteddocument.py │ │ ├── 0029_merge_20201001_1908.py │ │ ├── 0030_auto_20201211_1452.py │ │ ├── 0031_auto_20201215_1859.py │ │ ├── 0032_auto_20201222_1942.py │ │ ├── 0033_auto_20201223_0115.py │ │ ├── 0034_auto_20201223_0116.py │ │ ├── 0035_auto_20201223_0116.py │ │ ├── 0036_auto_20201223_1830.py │ │ ├── 0037_entity_description.py │ │ ├── 0038_auto_20201223_2111.py │ │ ├── 0039_auto_20210113_1550.py │ │ ├── 0040_auto_20210216_2032.py │ │ ├── 0041_auto_20210316_1424.py │ │ ├── 0042_auto_20210317_0147.py │ │ ├── 0043_document_cache_dirty.py │ │ ├── 0044_auto_20210422_2056.py │ │ ├── 0045_auto_20211102_1709.py │ │ ├── 0046_auto_20220307_1434.py │ │ ├── 0047_alter_document_original_extension.py │ │ ├── 0048_note_solr_dirty.py │ │ ├── 0049_document_delayed_index.py │ │ ├── 0050_document_noindex.py │ │ ├── 0051_auto_20230214_1451.py │ │ ├── 0051_auto_20230303_1923.py │ │ ├── 0052_merge_0051_auto_20230214_1451_0051_auto_20230303_1923.py │ │ ├── 0053_auto_20230622_1623.py │ │ └── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── document.py │ │ ├── entity.py │ │ └── note.py │ ├── modifications.py │ ├── oembed.py │ ├── processing │ │ ├── __init__.py │ │ ├── document_conversion │ │ │ ├── cloud-requirements.txt │ │ │ ├── common │ │ │ ├── libreoffice │ │ │ │ ├── README.md │ │ │ │ ├── install.sh │ │ │ │ └── lo.tar.gz │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── info_and_image │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── cloud-requirements.txt │ │ │ ├── common │ │ │ ├── graft.py │ │ │ ├── graft_adapter.py │ │ │ ├── libpdfium.so2 │ │ │ ├── main.py │ │ │ ├── pdfium.py │ │ │ └── requirements.txt │ │ ├── ocr │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── cloud-requirements.txt │ │ │ ├── common │ │ │ ├── main.py │ │ │ ├── requirements.txt │ │ │ ├── tess.py │ │ │ └── tesseract │ │ │ │ ├── liblept.so.5 │ │ │ │ ├── libtesseract.so.5 │ │ │ │ └── tessdata │ │ │ │ └── pdf.ttf │ │ ├── sidekick │ │ │ ├── __init__.py │ │ │ ├── common │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── imagediff.py │ │ │ ├── images │ │ │ │ ├── imagediff_alteration_orange_square.png │ │ │ │ ├── imagediff_alteration_red_scratch.png │ │ │ │ ├── imagediff_alteration_small_redaction.png │ │ │ │ ├── imagediff_illustrator_page.png │ │ │ │ ├── imagediff_pdfium_page.png │ │ │ │ ├── imagediff_preview_page.png │ │ │ │ ├── redaction_0_0.png │ │ │ │ ├── redaction_diagonal.png │ │ │ │ ├── redaction_sentence.png │ │ │ │ └── redaction_unredacted.png │ │ │ ├── pdfs │ │ │ │ ├── doc_3.pdf │ │ │ │ ├── doc_3_modified.pdf │ │ │ │ ├── doc_3_overlaid.pdf │ │ │ │ ├── doc_3_pg0.png │ │ │ │ ├── doc_3_pg0_CC.png │ │ │ │ ├── doc_3_pg1.png │ │ │ │ ├── doc_3_pg1_180.png │ │ │ │ ├── doc_3_pg1_CW.png │ │ │ │ ├── doc_3_pg1_redacted.png │ │ │ │ ├── doc_3_pg2.png │ │ │ │ ├── doc_3_pg2_CW.png │ │ │ │ ├── output_test2.pdf │ │ │ │ ├── output_test3.pdf │ │ │ │ ├── output_test4.pdf │ │ │ │ ├── output_test5.pdf │ │ │ │ ├── output_test6.pdf │ │ │ │ ├── shakespeare.pdf │ │ │ │ └── shakespeare.png │ │ │ ├── pipeline_tests │ │ │ │ ├── __init__.py │ │ │ │ ├── fake_pdf.py │ │ │ │ ├── mocks.py │ │ │ │ ├── test_fakepdf.py │ │ │ │ └── test_pipeline.py │ │ │ ├── report_generator.py │ │ │ ├── report_test_case.py │ │ │ ├── reports.html │ │ │ ├── test_imagediff.py │ │ │ ├── test_pdf_processor.py │ │ │ ├── textdiff.py │ │ │ └── texts │ │ │ │ └── pg2.txt │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── cloud-requirements.txt │ │ │ ├── common │ │ │ ├── main.py │ │ │ └── requirements.txt │ ├── querysets.py │ ├── rules │ │ ├── __init__.py │ │ ├── document_errors.py │ │ ├── documents.py │ │ ├── entities.py │ │ ├── notes.py │ │ └── sections.py │ ├── search.py │ ├── search_escape.py │ ├── serializers.py │ ├── solr.py │ ├── tasks.py │ ├── tests │ │ ├── __init__.py │ │ ├── factories.py │ │ ├── search_data.py │ │ ├── test_models.py │ │ ├── test_modifications.py │ │ ├── test_oembed.py │ │ ├── test_rules.py │ │ ├── test_search.py │ │ ├── test_serializers.py │ │ └── test_views.py │ └── views.py ├── drf_bulk │ ├── __init__.py │ ├── apps.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── routers.py │ ├── serializers.py │ └── views.py ├── entities │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── choices.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20230201_1554.py │ │ ├── 0003_auto_20230201_1814.py │ │ ├── 0004_alter_entity_access.py │ │ ├── 0005_auto_20230206_1943.py │ │ ├── 0006_entity_metadata.py │ │ ├── 0007_entityoccurrence.py │ │ ├── 0008_alter_entity_metadata.py │ │ ├── 0009_auto_20230214_1515.py │ │ ├── 0010_auto_20230214_1531.py │ │ ├── 0011_alter_entity_name.py │ │ ├── 0012_alter_entity_wikidata_id.py │ │ ├── 0013_auto_20230223_1503.py │ │ ├── 0014_auto_20230223_1614.py │ │ └── __init__.py │ ├── models.py │ ├── querysets.py │ ├── rules.py │ ├── serializers.py │ ├── tests │ │ ├── factories.py │ │ └── test_views.py │ └── views.py ├── flatpages │ ├── serializers.py │ └── views.py ├── oembed │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── decorators.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── oembed.py │ ├── registry.py │ ├── tests │ │ ├── __init__.py │ │ └── test_views.py │ ├── urls.py │ ├── utils.py │ └── views.py ├── organizations │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── exceptions.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20200128_1418.py │ │ ├── 0003_auto_20200214_1640.py │ │ ├── 0004_auto_20200306_2000.py │ │ ├── 0005_auto_20200526_1332.py │ │ ├── 0006_auto_20200526_1336.py │ │ ├── 0007_auto_20200526_1338.py │ │ ├── 0008_auto_20200526_1940.py │ │ ├── 0009_organization_entitlement.py │ │ ├── 0010_auto_20200527_1732.py │ │ ├── 0011_remove_organization_plan.py │ │ ├── 0012_auto_20200925_2001.py │ │ ├── 0013_auto_20211102_1707.py │ │ ├── 0014_auto_20221025_1350.py │ │ ├── 0015_aicreditlog.py │ │ ├── 0016_alter_aicreditlog_options.py │ │ ├── 0017_organization_merged.py │ │ └── __init__.py │ ├── models.py │ ├── querysets.py │ ├── rules.py │ ├── serializers.py │ ├── tests │ │ ├── __init__.py │ │ ├── factories.py │ │ ├── test_models.py │ │ ├── test_rules.py │ │ └── test_views.py │ └── views.py ├── projects │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── choices.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20200128_1418.py │ │ ├── 0003_auto_20200210_1548.py │ │ ├── 0004_auto_20200210_2050.py │ │ ├── 0005_collaboration_access.py │ │ ├── 0006_auto_20200214_1641.py │ │ ├── 0007_auto_20200311_1936.py │ │ ├── 0007_auto_20200406_0048.py │ │ ├── 0008_merge_20200407_1320.py │ │ ├── 0009_auto_20200407_1320.py │ │ ├── 0010_auto_20200429_0121.py │ │ ├── 0011_auto_20210216_2032.py │ │ ├── 0012_auto_20210407_1801.py │ │ └── __init__.py │ ├── models.py │ ├── oembed.py │ ├── querysets.py │ ├── rules.py │ ├── serializers.py │ ├── tests │ │ ├── __init__.py │ │ ├── factories.py │ │ ├── test_rules.py │ │ └── test_views.py │ └── views.py ├── sidekick │ ├── __init__.py │ ├── apps.py │ ├── choices.py │ ├── lego.py │ ├── local_tasks.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20210723_2029.py │ │ └── __init__.py │ ├── models.py │ ├── routers.py │ ├── rules.py │ ├── serializers.py │ ├── sidekick.py │ ├── signals.py │ ├── tasks.py │ ├── tests │ │ ├── __init__.py │ │ └── test_views.py │ └── views.py ├── static │ ├── css │ │ └── project.css │ ├── fonts │ │ └── .gitkeep │ ├── images │ │ └── favicons │ │ │ └── favicon.ico │ ├── js │ │ └── project.js │ └── sass │ │ ├── custom_bootstrap_vars.scss │ │ └── project.scss ├── statistics │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20200807_1430.py │ │ ├── 0003_auto_20210323_1522.py │ │ └── __init__.py │ ├── models.py │ ├── rules.py │ ├── serializers.py │ ├── tasks.py │ ├── tests.py │ └── views.py ├── templates │ ├── addons │ │ ├── dashboard.html │ │ ├── email │ │ │ ├── base_disabled.html │ │ │ └── disabled.html │ │ └── scraper.html │ ├── admin │ │ ├── addons │ │ │ └── addon │ │ │ │ └── change_form.html │ │ └── users │ │ │ └── user │ │ │ └── change_list.html │ ├── core │ │ └── email │ │ │ ├── base.html │ │ │ ├── import.html │ │ │ ├── mailkey.html │ │ │ ├── mailkey_delete.html │ │ │ └── upcoming.html │ ├── flatpages │ │ └── default.html │ └── oembed │ │ ├── document.html │ │ ├── note.html │ │ ├── page.html │ │ └── project.html └── users │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── managers.py │ ├── migrations │ ├── 0001_initial.py │ ├── 0002_auto_20200128_1418.py │ ├── 0003_auto_20200214_1640.py │ ├── 0004_auto_20200306_2000.py │ ├── 0005_auto_20200523_1534.py │ ├── 0006_auto_20200925_2001.py │ ├── 0007_auto_20211102_1707.py │ ├── 0008_user_active_addons.py │ ├── 0009_user_mailkey.py │ ├── 0010_user_bio_alter_user_email_alter_user_username.py │ ├── 0011_user_pinned_projects.py │ ├── 0012_default_pinned_projects.py │ └── __init__.py │ ├── models.py │ ├── querysets.py │ ├── rules.py │ ├── serializers.py │ ├── signals.py │ ├── tests │ ├── __init__.py │ ├── factories.py │ ├── test_rules.py │ └── test_views.py │ └── views.py ├── initialize_dotenvs.py ├── license.md ├── local.yml ├── locale └── README.rst ├── manage.py ├── merge_production_dotenvs_in_dotenv.py ├── postdeploy.sh ├── production.yml ├── pytest.ini ├── requirements.txt ├── requirements ├── base.in ├── base.txt ├── local.in ├── local.txt ├── production.in └── production.txt ├── setup.cfg └── tasks.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include = documentcloud/* 3 | omit = *migrations*, *tests* 4 | plugins = 5 | django_coverage_plugin 6 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .* 2 | !.coveragerc 3 | !.env 4 | !.pylintrc 5 | 6 | # Ignore generated files 7 | **/*.pyc 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.{py,rst,ini}] 12 | indent_style = space 13 | indent_size = 4 14 | 15 | [*.py] 16 | line_length=120 17 | known_first_party=documentcloud 18 | multi_line_output=3 19 | default_section=THIRDPARTY 20 | 21 | [*.{html,css,scss,json,yml}] 22 | indent_style = space 23 | indent_size = 2 24 | 25 | [*.md] 26 | trim_trailing_whitespace = false 27 | 28 | [Makefile] 29 | indent_style = tab 30 | 31 | [nginx.conf] 32 | indent_style = space 33 | indent_size = 2 34 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.tar.gz filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /.github/workflows/lambda.yml: -------------------------------------------------------------------------------- 1 | name: Post-Deploy Lambda 2 | 3 | on: 4 | deployment: 5 | 6 | jobs: 7 | deploy-lambdas: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | 12 | - name: Show deployment info 13 | run: | 14 | echo "Deployment environment: $DEPLOYMENT_ENVIRONMENT" 15 | 16 | - name: Run Lambda deploy 17 | run: | 18 | if [[ "$DEPLOYMENT_ENVIRONMENT" == "documentcloud-staging" ]]; then 19 | echo "Deploying staging lambda updates" 20 | bash config/aws/lambda/codeship_deploy_lambdas.sh staging-lambda --staging 21 | else 22 | echo "Deploying production lambda updates" 23 | bash config/aws/lambda/codeship_deploy_lambdas.sh prod-lambda 24 | fi 25 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length=88 3 | force_to_top= 4 | skip= 5 | skip_glob=*/migrations/* 6 | known_future_library=future 7 | known_django=django,celery,rest_framework 8 | known_first_party=documentcloud 9 | indent=' ' 10 | multi_line_output=3 11 | length_sort=False 12 | include_trailing_comma=True 13 | combine_as_imports=True 14 | default_section=THIRDPARTY 15 | import_heading_django=Django 16 | import_heading_stdlib=Standard Library 17 | import_heading_thirdparty=Third Party 18 | import_heading_firstparty=DocumentCloud 19 | import_heading_localfolder=Local 20 | sections=FUTURE,DJANGO,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 21 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | # https://stackoverflow.com/a/39207275/10952222 3 | init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.join(os.path.dirname(find_pylintrc()), 'documentcloud'))" 4 | load-plugins=pylint_django, pylint_celery 5 | ignore=migrations 6 | 7 | [FORMAT] 8 | max-line-length=88 9 | good-names=pk,x,y,i,x1,x2,y1,y2 10 | max-args=6 11 | 12 | [MESSAGES CONTROL] 13 | enable=useless-suppression 14 | disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured 15 | 16 | [TYPECHECK] 17 | generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete 18 | 19 | [BASIC] 20 | const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$ 21 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /.slugignore: -------------------------------------------------------------------------------- 1 | config/aws/ 2 | documentcloud/documents/processing/ 3 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | Mitchell Kotler 2 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | release: ./postdeploy.sh 2 | web: bin/start-nginx gunicorn -c config/gunicorn.conf config.wsgi:application 3 | worker: REMAP_SIGTERM=SIGQUIT celery --app=config.celery_app worker --loglevel=info 4 | solr_worker: REMAP_SIGTERM=SIGQUIT celery --app=config.celery_app worker --loglevel=info -Q solr,celery 5 | beat: REMAP_SIGTERM=SIGQUIT celery --app=config.celery_app beat --loglevel=info -------------------------------------------------------------------------------- /compose/local/django/celery/beat/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o nounset 5 | 6 | 7 | rm -f './celerybeat.pid' 8 | celery -A config.celery_app beat -l INFO 9 | -------------------------------------------------------------------------------- /compose/local/django/celery/flower/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o nounset 5 | 6 | 7 | celery flower \ 8 | --app=config.celery_app \ 9 | --broker="${CELERY_BROKER_URL}" \ 10 | --basic_auth="${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}" 11 | -------------------------------------------------------------------------------- /compose/local/django/celery/worker/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o nounset 5 | 6 | 7 | celery -A config.celery_app worker -l INFO 8 | -------------------------------------------------------------------------------- /compose/local/django/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | 8 | python manage.py migrate 9 | python manage.py runserver_plus 0.0.0.0:80 10 | -------------------------------------------------------------------------------- /compose/local/vue/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:10.9-slim 2 | 3 | RUN printf "deb http://archive.debian.org/debian/ jessie main\ndeb-src http://archive.debian.org/debian/ jessie main\ndeb http://security.debian.org jessie/updates main\ndeb-src http://security.debian.org jessie/updates main" > /etc/apt/sources.list 4 | 5 | # From https://daten-und-bass.io/blog/getting-started-with-vue-cli-on-docker/ 6 | RUN apt-get -y update \ 7 | && apt-get install -y git 8 | 9 | RUN npm install -g @vue/cli 10 | 11 | RUN apt-get autoremove -y \ 12 | && apt-get autoclean -y \ 13 | && apt-get clean -y \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | EXPOSE 8080 5000 17 | 18 | USER node 19 | 20 | CMD cd /frontend && npm install && npm run serve -- --port 5000 21 | 22 | -------------------------------------------------------------------------------- /compose/production/aws/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM garland/aws-cli-docker:1.15.47 2 | 3 | COPY ./compose/production/aws/maintenance /usr/local/bin/maintenance 4 | COPY ./compose/production/postgres/maintenance/_sourced /usr/local/bin/maintenance/_sourced 5 | 6 | RUN chmod +x /usr/local/bin/maintenance/* 7 | 8 | RUN mv /usr/local/bin/maintenance/* /usr/local/bin \ 9 | && rmdir /usr/local/bin/maintenance 10 | -------------------------------------------------------------------------------- /compose/production/aws/maintenance/download: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ### Download a file from your Amazon S3 bucket to the postgres /backups folder 4 | ### 5 | ### Usage: 6 | ### $ docker-compose -f production.yml run --rm awscli <1> 7 | 8 | set -o errexit 9 | set -o pipefail 10 | set -o nounset 11 | 12 | working_dir="$(dirname ${0})" 13 | source "${working_dir}/_sourced/constants.sh" 14 | source "${working_dir}/_sourced/messages.sh" 15 | 16 | export AWS_ACCESS_KEY_ID="${DJANGO_AWS_ACCESS_KEY_ID}" 17 | export AWS_SECRET_ACCESS_KEY="${DJANGO_AWS_SECRET_ACCESS_KEY}" 18 | export AWS_STORAGE_BUCKET_NAME="${DJANGO_AWS_STORAGE_BUCKET_NAME}" 19 | 20 | 21 | aws s3 cp s3://${AWS_STORAGE_BUCKET_NAME}${BACKUP_DIR_PATH}/${1} ${BACKUP_DIR_PATH}/${1} 22 | 23 | message_success "Finished downloading ${1}." 24 | 25 | -------------------------------------------------------------------------------- /compose/production/aws/maintenance/upload: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ### Upload the /backups folder to Amazon S3 4 | ### 5 | ### Usage: 6 | ### $ docker-compose -f production.yml run --rm awscli upload 7 | 8 | set -o errexit 9 | set -o pipefail 10 | set -o nounset 11 | 12 | working_dir="$(dirname ${0})" 13 | source "${working_dir}/_sourced/constants.sh" 14 | source "${working_dir}/_sourced/messages.sh" 15 | 16 | export AWS_ACCESS_KEY_ID="${DJANGO_AWS_ACCESS_KEY_ID}" 17 | export AWS_SECRET_ACCESS_KEY="${DJANGO_AWS_SECRET_ACCESS_KEY}" 18 | export AWS_STORAGE_BUCKET_NAME="${DJANGO_AWS_STORAGE_BUCKET_NAME}" 19 | 20 | 21 | message_info "Upload the backups directory to S3 bucket {$AWS_STORAGE_BUCKET_NAME}" 22 | 23 | aws s3 cp ${BACKUP_DIR_PATH} s3://${AWS_STORAGE_BUCKET_NAME}${BACKUP_DIR_PATH} --recursive 24 | 25 | message_info "Cleaning the directory ${BACKUP_DIR_PATH}" 26 | 27 | rm -rf ${BACKUP_DIR_PATH}/* 28 | 29 | message_success "Finished uploading and cleaning." 30 | 31 | -------------------------------------------------------------------------------- /compose/production/django/celery/beat/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | 8 | celery -A config.celery_app beat -l INFO 9 | -------------------------------------------------------------------------------- /compose/production/django/celery/flower/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o nounset 5 | 6 | 7 | celery flower \ 8 | --app=config.celery_app \ 9 | --broker="${CELERY_BROKER_URL}" \ 10 | --basic_auth="${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}" 11 | -------------------------------------------------------------------------------- /compose/production/django/celery/worker/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | 8 | celery -A config.celery_app worker -l INFO 9 | -------------------------------------------------------------------------------- /compose/production/django/entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | 8 | 9 | # N.B. If only .env files supported variable expansion... 10 | export CELERY_BROKER_URL="${REDIS_URL}" 11 | 12 | 13 | if [ -z "${POSTGRES_USER}" ]; then 14 | base_postgres_image_default_user='postgres' 15 | export POSTGRES_USER="${base_postgres_image_default_user}" 16 | fi 17 | export DATABASE_URL="postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}" 18 | 19 | postgres_ready() { 20 | python << END 21 | import sys 22 | 23 | import psycopg2 24 | 25 | try: 26 | psycopg2.connect( 27 | dbname="${POSTGRES_DB}", 28 | user="${POSTGRES_USER}", 29 | password="${POSTGRES_PASSWORD}", 30 | host="${POSTGRES_HOST}", 31 | port="${POSTGRES_PORT}", 32 | ) 33 | except psycopg2.OperationalError: 34 | sys.exit(-1) 35 | sys.exit(0) 36 | 37 | END 38 | } 39 | until postgres_ready; do 40 | >&2 echo 'Waiting for PostgreSQL to become available...' 41 | sleep 1 42 | done 43 | >&2 echo 'PostgreSQL is available' 44 | 45 | exec "$@" 46 | -------------------------------------------------------------------------------- /compose/production/django/start: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | 7 | 8 | python /app/manage.py collectstatic --noinput 9 | /usr/local/bin/gunicorn config.wsgi --bind 0.0.0.0:5000 --chdir=/app 10 | -------------------------------------------------------------------------------- /compose/production/postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:15.2 2 | 3 | COPY ./compose/production/postgres/maintenance /usr/local/bin/maintenance 4 | RUN chmod +x /usr/local/bin/maintenance/* 5 | RUN mv /usr/local/bin/maintenance/* /usr/local/bin \ 6 | && rmdir /usr/local/bin/maintenance 7 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/_sourced/constants.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | BACKUP_DIR_PATH='/backups' 5 | BACKUP_FILE_PREFIX='backup' 6 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/_sourced/countdown.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | countdown() { 5 | declare desc="A simple countdown. Source: https://superuser.com/a/611582" 6 | local seconds="${1}" 7 | local d=$(($(date +%s) + "${seconds}")) 8 | while [ "$d" -ge `date +%s` ]; do 9 | echo -ne "$(date -u --date @$(($d - `date +%s`)) +%H:%M:%S)\r"; 10 | sleep 0.1 11 | done 12 | } 13 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/_sourced/messages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | message_newline() { 5 | echo 6 | } 7 | 8 | message_debug() 9 | { 10 | echo -e "DEBUG: ${@}" 11 | } 12 | 13 | message_welcome() 14 | { 15 | echo -e "\e[1m${@}\e[0m" 16 | } 17 | 18 | message_warning() 19 | { 20 | echo -e "\e[33mWARNING\e[0m: ${@}" 21 | } 22 | 23 | message_error() 24 | { 25 | echo -e "\e[31mERROR\e[0m: ${@}" 26 | } 27 | 28 | message_info() 29 | { 30 | echo -e "\e[37mINFO\e[0m: ${@}" 31 | } 32 | 33 | message_suggestion() 34 | { 35 | echo -e "\e[33mSUGGESTION\e[0m: ${@}" 36 | } 37 | 38 | message_success() 39 | { 40 | echo -e "\e[32mSUCCESS\e[0m: ${@}" 41 | } 42 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/_sourced/yes_no.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | yes_no() { 5 | declare desc="Prompt for confirmation. \$\"\{1\}\": confirmation message." 6 | local arg1="${1}" 7 | 8 | local response= 9 | read -r -p "${arg1} (y/[n])? " response 10 | if [[ "${response}" =~ ^[Yy]$ ]] 11 | then 12 | exit 0 13 | else 14 | exit 1 15 | fi 16 | } 17 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/backup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | ### Create a database backup. 5 | ### 6 | ### Usage: 7 | ### $ docker-compose -f .yml (exec |run --rm) postgres backup 8 | 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o nounset 13 | 14 | 15 | working_dir="$(dirname ${0})" 16 | source "${working_dir}/_sourced/constants.sh" 17 | source "${working_dir}/_sourced/messages.sh" 18 | 19 | 20 | message_welcome "Backing up the '${POSTGRES_DB}' database..." 21 | 22 | 23 | if [[ "${POSTGRES_USER}" == "postgres" ]]; then 24 | message_error "Backing up as 'postgres' user is not supported. Assign 'POSTGRES_USER' env with another one and try again." 25 | exit 1 26 | fi 27 | 28 | export PGHOST="${POSTGRES_HOST}" 29 | export PGPORT="${POSTGRES_PORT}" 30 | export PGUSER="${POSTGRES_USER}" 31 | export PGPASSWORD="${POSTGRES_PASSWORD}" 32 | export PGDATABASE="${POSTGRES_DB}" 33 | 34 | backup_filename="${BACKUP_FILE_PREFIX}_$(date +'%Y_%m_%dT%H_%M_%S').sql.gz" 35 | pg_dump | gzip > "${BACKUP_DIR_PATH}/${backup_filename}" 36 | 37 | 38 | message_success "'${POSTGRES_DB}' database backup '${backup_filename}' has been created and placed in '${BACKUP_DIR_PATH}'." 39 | -------------------------------------------------------------------------------- /compose/production/postgres/maintenance/backups: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | ### View backups. 5 | ### 6 | ### Usage: 7 | ### $ docker-compose -f .yml (exec |run --rm) postgres backups 8 | 9 | 10 | set -o errexit 11 | set -o pipefail 12 | set -o nounset 13 | 14 | 15 | working_dir="$(dirname ${0})" 16 | source "${working_dir}/_sourced/constants.sh" 17 | source "${working_dir}/_sourced/messages.sh" 18 | 19 | 20 | message_welcome "These are the backups you have got:" 21 | 22 | ls -lht "${BACKUP_DIR_PATH}" 23 | -------------------------------------------------------------------------------- /compose/production/traefik/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM traefik:alpine 2 | RUN mkdir -p /etc/traefik/acme 3 | RUN touch /etc/traefik/acme/acme.json 4 | RUN chmod 600 /etc/traefik/acme/acme.json 5 | COPY ./compose/production/traefik/traefik.toml /etc/traefik 6 | -------------------------------------------------------------------------------- /compose/production/traefik/traefik.toml: -------------------------------------------------------------------------------- 1 | logLevel = "INFO" 2 | defaultEntryPoints = ["http", "https"] 3 | 4 | # Entrypoints, http and https 5 | [entryPoints] 6 | # http should be redirected to https 7 | [entryPoints.http] 8 | address = ":80" 9 | [entryPoints.http.redirect] 10 | entryPoint = "https" 11 | # https is the default 12 | [entryPoints.https] 13 | address = ":443" 14 | [entryPoints.https.tls] 15 | 16 | # Enable ACME (Let's Encrypt): automatic SSL 17 | [acme] 18 | # Email address used for registration 19 | email = "mitch@muckrock.com" 20 | storage = "/etc/traefik/acme/acme.json" 21 | entryPoint = "https" 22 | onDemand = false 23 | OnHostRule = true 24 | # Use a HTTP-01 acme challenge rather than TLS-SNI-01 challenge 25 | [acme.httpChallenge] 26 | entryPoint = "http" 27 | 28 | [file] 29 | [backends] 30 | [backends.django] 31 | [backends.django.servers.server1] 32 | url = "http://django:5000" 33 | 34 | [frontends] 35 | [frontends.django] 36 | backend = "django" 37 | passHostHeader = true 38 | [frontends.django.headers] 39 | HostsProxyHeaders = ['X-CSRFToken'] 40 | [frontends.django.routes.dr1] 41 | rule = "Host:documentcloud.org" 42 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | # This will make sure the app is always imported when 2 | # Django starts so that shared_task will use this app. 3 | from .celery_app import app as celery_app 4 | 5 | __all__ = ("celery_app",) 6 | -------------------------------------------------------------------------------- /config/aws/lambda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Run info_and_image build script 6 | ./build_info_and_image.sh 7 | 8 | # Run ocr build script 9 | ./build_ocr.sh 10 | 11 | # Run document conversion build script 12 | ./build_document_conversion.sh 13 | 14 | # Run sidekick build script 15 | ./build_sidekick.sh 16 | 17 | # Run utils build script 18 | ./build_utils.sh 19 | -------------------------------------------------------------------------------- /config/aws/lambda/build_document_conversion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CODE_DIR=awsbin/document_conversion 6 | 7 | # Clear the code directory if it already exists 8 | [ -d "$CODE_DIR" ] && rm -Rf $CODE_DIR 9 | # Make the code directory if it does not exist 10 | [ -d "$CODE_DIR" ] || mkdir -p $CODE_DIR 11 | 12 | # Copy the code from the Django app 13 | cp -Lr ../../../documentcloud/documents/processing/document_conversion/* $CODE_DIR 2>/dev/null || : 14 | 15 | # Copy in LibreOffice binary compiled for AWS 16 | cp libreoffice/lo.tar.gz $CODE_DIR/libreoffice/lo.tar.gz 17 | 18 | # Set AWS requirements 19 | cp cloud-requirements.txt $CODE_DIR/cloud-requirements.txt 20 | -------------------------------------------------------------------------------- /config/aws/lambda/build_info_and_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CODE_DIR=awsbin/info_and_image 6 | 7 | # Clear the code directory if it already exists 8 | [ -d "$CODE_DIR" ] && rm -Rf $CODE_DIR 9 | # Make the code directory if it does not exist 10 | [ -d "$CODE_DIR" ] || mkdir -p $CODE_DIR 11 | 12 | # Copy the code from the Django app 13 | cp -Lr ../../../documentcloud/documents/processing/info_and_image/* $CODE_DIR 2>/dev/null || : 14 | 15 | # Set AWS requirements 16 | cp info-and-image-requirements.txt $CODE_DIR/cloud-requirements.txt 17 | -------------------------------------------------------------------------------- /config/aws/lambda/build_ocr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CODE_DIR="awsbin/ocr" 6 | OCR_DIRECTORY="../../../documentcloud/documents/processing/ocr" 7 | 8 | # Clear the code directory if it already exists 9 | [ -d "$CODE_DIR" ] && rm -Rf $CODE_DIR 10 | # Make the code directory if it does not exist 11 | [ ! -d "$CODE_DIR" ] || mkdir -p $CODE_DIR 12 | 13 | # Copy the code from the Django app, excluding tesseract data 14 | rsync -aL "${OCR_DIRECTORY}/" $CODE_DIR --exclude tesseract 15 | 16 | # Sub in Amazon Linux compiled Tesseract libraries 17 | [ -f $CODE_DIR/tesseract ] && rm -r $CODE_DIR/tesseract 18 | cp -r ocr_libraries/ $CODE_DIR/tesseract 2>/dev/null || : 19 | mkdir $CODE_DIR/tesseract/tessdata 20 | 21 | # Set AWS requirements 22 | cp cloud-requirements.txt $CODE_DIR/cloud-requirements.txt 23 | -------------------------------------------------------------------------------- /config/aws/lambda/build_sidekick.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CODE_DIR=awsbin/sidekick 6 | 7 | # Clear the code directory if it already exists 8 | [ -d "$CODE_DIR" ] && rm -Rf $CODE_DIR 9 | # Make the code directory if it does not exist 10 | [ -d "$CODE_DIR" ] || mkdir -p $CODE_DIR 11 | 12 | # Copy the code from the Django app 13 | cp -Lr ../../../documentcloud/documents/processing/sidekick/* $CODE_DIR 2>/dev/null || : 14 | 15 | # Set AWS requirements 16 | cp cloud-requirements.txt $CODE_DIR/cloud-requirements.txt 17 | -------------------------------------------------------------------------------- /config/aws/lambda/build_utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | CODE_DIR=awsbin/utils 6 | 7 | # Clear the code directory if it already exists 8 | [ -d "$CODE_DIR" ] && rm -Rf $CODE_DIR 9 | # Make the code directory if it does not exist 10 | [ -d "$CODE_DIR" ] || mkdir -p $CODE_DIR 11 | 12 | # Copy the code from the Django app 13 | cp -Lr ../../../documentcloud/documents/processing/utils/* $CODE_DIR 2>/dev/null || : 14 | 15 | # Set AWS requirements 16 | cp cloud-requirements.txt $CODE_DIR/cloud-requirements.txt 17 | -------------------------------------------------------------------------------- /config/aws/lambda/cloud-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.10.14 2 | smart-open==1.8.4 3 | -------------------------------------------------------------------------------- /config/aws/lambda/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Build and deploy the lambda function 6 | python3 replace_params.py $2 && \ 7 | ./build.sh && \ 8 | sam build && \ 9 | sam package --output-template-file packaged.yaml --s3-bucket cloud-functions-test && \ 10 | sam deploy --template-file packaged.yaml --s3-bucket cloud-functions-test --region us-east-1 --capabilities CAPABILITY_IAM --stack-name $1 --no-fail-on-empty-changeset 11 | -------------------------------------------------------------------------------- /config/aws/lambda/deploy_topics.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Deploy all the AWS topics needed. This should only have to be run once. 4 | 5 | set -e 6 | 7 | # Get all language bundles 8 | envs="staging prod" 9 | 10 | for env in $envs 11 | do 12 | topic=$(echo $lang | tr "|" "-") 13 | aws sns create-topic --name "ocr-extraction-${env}" 14 | done 15 | -------------------------------------------------------------------------------- /config/aws/lambda/info-and-image-requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.21.21 2 | smart-open==1.8.4 3 | pdfplumber==0.5.28 4 | pikepdf==5.4.0 5 | -------------------------------------------------------------------------------- /config/aws/lambda/libreoffice/README.md: -------------------------------------------------------------------------------- 1 | `lo.tar.gz` downloaded from https://github.com/vladgolubev/serverless-libreoffice/releases/tag/v6.4.0.1 2 | -------------------------------------------------------------------------------- /config/aws/lambda/libreoffice/lo.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:869f9d2b449f95d648a7b0b6360d34512b45b00ab365cfd2243854e4b7fa148b 3 | size 139374362 4 | -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libgomp.so.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libgomp.so.1 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libjbig.so.2.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libjbig.so.2.0 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libjpeg.so.62: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libjpeg.so.62 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/liblept.so.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/liblept.so.5 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libpng15.so.15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libpng15.so.15 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libtesseract.so.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libtesseract.so.5 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libtiff.so.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libtiff.so.5 -------------------------------------------------------------------------------- /config/aws/lambda/ocr_libraries/libwebp.so.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/aws/lambda/ocr_libraries/libwebp.so.4 -------------------------------------------------------------------------------- /config/celery_app.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from celery import Celery 3 | from django.conf import settings 4 | 5 | # Standard Library 6 | import os 7 | import ssl 8 | 9 | # set the default Django settings module for the 'celery' program. 10 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local") 11 | 12 | if settings.CELERY_BROKER_URL.startswith("rediss:"): 13 | app = Celery( 14 | "documentcloud", 15 | broker_use_ssl={"ssl_cert_reqs": ssl.CERT_NONE}, 16 | redis_backend_use_ssl={"ssl_cert_reqs": ssl.CERT_NONE}, 17 | ) 18 | else: 19 | app = Celery("documentcloud") 20 | 21 | # Using a string here means the worker doesn't have to serialize 22 | # the configuration object to child processes. 23 | # - namespace='CELERY' means all celery-related configuration keys 24 | # should have a `CELERY_` prefix. 25 | app.config_from_object("django.conf:settings", namespace="CELERY") 26 | 27 | # Load task modules from all registered Django app configs. 28 | app.autodiscover_tasks() 29 | 30 | if "scout_apm.django" in settings.INSTALLED_APPS: 31 | import scout_apm.celery 32 | 33 | scout_apm.celery.install(app) 34 | -------------------------------------------------------------------------------- /config/gunicorn.conf: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | 4 | def pre_fork(server, worker): 5 | f = '/tmp/app-initialized' 6 | open(f, 'w').close() 7 | 8 | bind = 'unix:///tmp/nginx.socket' 9 | workers = int(os.environ.get('GUNICORN_WORKERS', 3)) 10 | threads = int(os.environ.get('GUNICORN_THREADS', 1)) 11 | loglevel = os.environ.get('GUNICORN_LOGLEVEL', 'info') 12 | preload_app = os.environ.get('GUNICORN_PRELOAD', 'False').lower() == 'true' 13 | max_requests = 50 14 | max_requests_jitter = 5 15 | -------------------------------------------------------------------------------- /config/languages/choices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Standard Library 3 | import re 4 | 5 | non_word_pattern = re.compile(r"\W+") 6 | 7 | file_ = open("./languages.tsv") 8 | next(file_) # skip headers 9 | 10 | for line in file_: 11 | iso, ocr_code, name = line.strip().split("\t", 2) 12 | attr_name = non_word_pattern.sub("_", name.lower()) 13 | print( 14 | ' {} = ChoiceItem("{}", _("{}"), ocr_code="{}")'.format( 15 | attr_name, iso, name, ocr_code 16 | ) 17 | ) 18 | -------------------------------------------------------------------------------- /config/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/settings/__init__.py -------------------------------------------------------------------------------- /config/solr/lang/contractions_ca.txt: -------------------------------------------------------------------------------- 1 | # Set of Catalan contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | l 5 | m 6 | n 7 | s 8 | t 9 | -------------------------------------------------------------------------------- /config/solr/lang/contractions_fr.txt: -------------------------------------------------------------------------------- 1 | # Set of French contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | l 4 | m 5 | t 6 | qu 7 | n 8 | s 9 | j 10 | d 11 | c 12 | jusqu 13 | quoiqu 14 | lorsqu 15 | puisqu 16 | -------------------------------------------------------------------------------- /config/solr/lang/contractions_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | d 4 | m 5 | b 6 | -------------------------------------------------------------------------------- /config/solr/lang/contractions_it.txt: -------------------------------------------------------------------------------- 1 | # Set of Italian contractions for ElisionFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | c 4 | l 5 | all 6 | dall 7 | dell 8 | nell 9 | sull 10 | coll 11 | pell 12 | gl 13 | agl 14 | dagl 15 | degl 16 | negl 17 | sugl 18 | un 19 | m 20 | t 21 | s 22 | v 23 | d 24 | -------------------------------------------------------------------------------- /config/solr/lang/hyphenations_ga.txt: -------------------------------------------------------------------------------- 1 | # Set of Irish hyphenations for StopFilter 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | h 4 | n 5 | t 6 | -------------------------------------------------------------------------------- /config/solr/lang/stemdict_nl.txt: -------------------------------------------------------------------------------- 1 | # Set of overrides for the dutch stemmer 2 | # TODO: load this as a resource from the analyzer and sync it in build.xml 3 | fiets fiets 4 | bromfiets bromfiets 5 | ei eier 6 | kind kinder 7 | -------------------------------------------------------------------------------- /config/solr/lang/stopwords_el.txt: -------------------------------------------------------------------------------- 1 | # Lucene Greek Stopwords list 2 | # Note: by default this file is used after GreekLowerCaseFilter, 3 | # so when modifying this file use 'σ' instead of 'ς' 4 | ο 5 | η 6 | το 7 | οι 8 | τα 9 | του 10 | τησ 11 | των 12 | τον 13 | την 14 | και 15 | κι 16 | κ 17 | ειμαι 18 | εισαι 19 | ειναι 20 | ειμαστε 21 | ειστε 22 | στο 23 | στον 24 | στη 25 | στην 26 | μα 27 | αλλα 28 | απο 29 | για 30 | προσ 31 | με 32 | σε 33 | ωσ 34 | παρα 35 | αντι 36 | κατα 37 | μετα 38 | θα 39 | να 40 | δε 41 | δεν 42 | μη 43 | μην 44 | επι 45 | ενω 46 | εαν 47 | αν 48 | τοτε 49 | που 50 | πωσ 51 | ποιοσ 52 | ποια 53 | ποιο 54 | ποιοι 55 | ποιεσ 56 | ποιων 57 | ποιουσ 58 | αυτοσ 59 | αυτη 60 | αυτο 61 | αυτοι 62 | αυτων 63 | αυτουσ 64 | αυτεσ 65 | αυτα 66 | εκεινοσ 67 | εκεινη 68 | εκεινο 69 | εκεινοι 70 | εκεινεσ 71 | εκεινα 72 | εκεινων 73 | εκεινουσ 74 | οπωσ 75 | ομωσ 76 | ισωσ 77 | οσο 78 | οτι 79 | -------------------------------------------------------------------------------- /config/solr/lang/stopwords_eu.txt: -------------------------------------------------------------------------------- 1 | # example set of basque stopwords 2 | al 3 | anitz 4 | arabera 5 | asko 6 | baina 7 | bat 8 | batean 9 | batek 10 | bati 11 | batzuei 12 | batzuek 13 | batzuetan 14 | batzuk 15 | bera 16 | beraiek 17 | berau 18 | berauek 19 | bere 20 | berori 21 | beroriek 22 | beste 23 | bezala 24 | da 25 | dago 26 | dira 27 | ditu 28 | du 29 | dute 30 | edo 31 | egin 32 | ere 33 | eta 34 | eurak 35 | ez 36 | gainera 37 | gu 38 | gutxi 39 | guzti 40 | haiei 41 | haiek 42 | haietan 43 | hainbeste 44 | hala 45 | han 46 | handik 47 | hango 48 | hara 49 | hari 50 | hark 51 | hartan 52 | hau 53 | hauei 54 | hauek 55 | hauetan 56 | hemen 57 | hemendik 58 | hemengo 59 | hi 60 | hona 61 | honek 62 | honela 63 | honetan 64 | honi 65 | hor 66 | hori 67 | horiei 68 | horiek 69 | horietan 70 | horko 71 | horra 72 | horrek 73 | horrela 74 | horretan 75 | horri 76 | hortik 77 | hura 78 | izan 79 | ni 80 | noiz 81 | nola 82 | non 83 | nondik 84 | nongo 85 | nor 86 | nora 87 | ze 88 | zein 89 | zen 90 | zenbait 91 | zenbat 92 | zer 93 | zergatik 94 | ziren 95 | zituen 96 | zu 97 | zuek 98 | zuen 99 | zuten 100 | -------------------------------------------------------------------------------- /config/solr/lang/stopwords_ga.txt: -------------------------------------------------------------------------------- 1 | 2 | a 3 | ach 4 | ag 5 | agus 6 | an 7 | aon 8 | ar 9 | arna 10 | as 11 | b' 12 | ba 13 | beirt 14 | bhúr 15 | caoga 16 | ceathair 17 | ceathrar 18 | chomh 19 | chtó 20 | chuig 21 | chun 22 | cois 23 | céad 24 | cúig 25 | cúigear 26 | d' 27 | daichead 28 | dar 29 | de 30 | deich 31 | deichniúr 32 | den 33 | dhá 34 | do 35 | don 36 | dtí 37 | dá 38 | dár 39 | dó 40 | faoi 41 | faoin 42 | faoina 43 | faoinár 44 | fara 45 | fiche 46 | gach 47 | gan 48 | go 49 | gur 50 | haon 51 | hocht 52 | i 53 | iad 54 | idir 55 | in 56 | ina 57 | ins 58 | inár 59 | is 60 | le 61 | leis 62 | lena 63 | lenár 64 | m' 65 | mar 66 | mo 67 | mé 68 | na 69 | nach 70 | naoi 71 | naonúr 72 | ná 73 | ní 74 | níor 75 | nó 76 | nócha 77 | ocht 78 | ochtar 79 | os 80 | roimh 81 | sa 82 | seacht 83 | seachtar 84 | seachtó 85 | seasca 86 | seisear 87 | siad 88 | sibh 89 | sinn 90 | sna 91 | sé 92 | sí 93 | tar 94 | thar 95 | thú 96 | triúr 97 | trí 98 | trína 99 | trínár 100 | tríocha 101 | tú 102 | um 103 | ár 104 | é 105 | éis 106 | í 107 | ó 108 | ón 109 | óna 110 | ónár 111 | -------------------------------------------------------------------------------- /config/solr/lang/stopwords_hy.txt: -------------------------------------------------------------------------------- 1 | # example set of Armenian stopwords. 2 | այդ 3 | այլ 4 | այն 5 | այս 6 | դու 7 | դուք 8 | եմ 9 | են 10 | ենք 11 | ես 12 | եք 13 | է 14 | էի 15 | էին 16 | էինք 17 | էիր 18 | էիք 19 | էր 20 | ըստ 21 | թ 22 | ի 23 | ին 24 | իսկ 25 | իր 26 | կամ 27 | համար 28 | հետ 29 | հետո 30 | մենք 31 | մեջ 32 | մի 33 | ն 34 | նա 35 | նաև 36 | նրա 37 | նրանք 38 | որ 39 | որը 40 | որոնք 41 | որպես 42 | ու 43 | ում 44 | պիտի 45 | վրա 46 | և 47 | -------------------------------------------------------------------------------- /config/solr/lang/userdict_ja.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) 3 | # 4 | # Add entries to this file in order to override the statistical model in terms 5 | # of segmentation, readings and part-of-speech tags. Notice that entries do 6 | # not have weights since they are always used when found. This is by-design 7 | # in order to maximize ease-of-use. 8 | # 9 | # Entries are defined using the following CSV format: 10 | # , ... , ... , 11 | # 12 | # Notice that a single half-width space separates tokens and readings, and 13 | # that the number tokens and readings must match exactly. 14 | # 15 | # Also notice that multiple entries with the same is undefined. 16 | # 17 | # Whitespace only lines are ignored. Comments are not allowed on entry lines. 18 | # 19 | 20 | # Custom segmentation for kanji compounds 21 | 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 22 | 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 23 | 24 | # Custom segmentation for compound katakana 25 | トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 26 | ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 27 | 28 | # Custom reading for former sumo wrestler 29 | 朝青龍,朝青龍,アサショウリュウ,カスタム人名 30 | -------------------------------------------------------------------------------- /config/solr/lib/solr-plugins-1.0.0-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/config/solr/lib/solr-plugins-1.0.0-SNAPSHOT.jar -------------------------------------------------------------------------------- /config/solr/params.json: -------------------------------------------------------------------------------- 1 | {"params":{ 2 | "query":{ 3 | "defType":"edismax", 4 | "q.alt":"*:*", 5 | "rows":"10", 6 | "fl":"*,score", 7 | "":{"v":0} 8 | }, 9 | "facets":{ 10 | "facet":"on", 11 | "facet.mincount": "1", 12 | "":{"v":0} 13 | }, 14 | "velocity":{ 15 | "wt": "velocity", 16 | "v.template":"browse", 17 | "v.layout": "layout", 18 | "":{"v":0} 19 | } 20 | }} -------------------------------------------------------------------------------- /config/solr/protwords.txt: -------------------------------------------------------------------------------- 1 | # The ASF licenses this file to You under the Apache License, Version 2.0 2 | # (the "License"); you may not use this file except in compliance with 3 | # the License. You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | #----------------------------------------------------------------------- 14 | # Use a protected word file to protect against the stemmer reducing two 15 | # unrelated words to the same base word. 16 | 17 | # Some non-words that normally won't be encountered, 18 | # just to test that they won't be stemmed. 19 | dontstems 20 | zwhacky 21 | 22 | -------------------------------------------------------------------------------- /config/solr/stopwords.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | a 17 | an 18 | and 19 | are 20 | as 21 | at 22 | be 23 | but 24 | by 25 | for 26 | if 27 | in 28 | into 29 | is 30 | it 31 | no 32 | not 33 | of 34 | on 35 | or 36 | such 37 | that 38 | the 39 | their 40 | then 41 | there 42 | these 43 | they 44 | this 45 | to 46 | was 47 | will 48 | with 49 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- 1 | # Included so that Django's startproject comment runs against the docs directory 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. DocumentCloud documentation master file, created by 2 | sphinx-quickstart. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | DocumentCloud Project Documentation 7 | ==================================================================== 8 | 9 | Table of Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | Indices & Tables 16 | ================ 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /documentcloud/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2" 2 | __version_info__ = tuple( 3 | int(num) if num.isdigit() else num 4 | for num in __version__.replace("-", ".", 1).split(".") 5 | ) 6 | -------------------------------------------------------------------------------- /documentcloud/addons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/addons/__init__.py -------------------------------------------------------------------------------- /documentcloud/addons/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class AddOnsConfig(AppConfig): 6 | default_auto_field = "django.db.models.BigAutoField" 7 | name = "documentcloud.addons" 8 | 9 | def ready(self): 10 | # pylint: disable=unused-import 11 | # load signals 12 | # DocumentCloud 13 | import documentcloud.addons.signals 14 | -------------------------------------------------------------------------------- /documentcloud/addons/choices.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | # Third Party 5 | from djchoices import ChoiceItem, DjangoChoices 6 | 7 | 8 | class Event(DjangoChoices): 9 | # `api` specifies if this attribute should be accessible via the API 10 | disabled = ChoiceItem(0, _("Disabled"), api=True) 11 | hourly = ChoiceItem(1, _("Hourly"), api=True) 12 | daily = ChoiceItem(2, _("Daily"), api=True) 13 | weekly = ChoiceItem(3, _("Weekly"), api=True) 14 | upload = ChoiceItem(4, _("Upload"), api=True) 15 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0002_addonrun_dismissed.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-02-23 19:00 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addonrun', 15 | name='dismissed', 16 | field=models.BooleanField(default=False, help_text='If this run has been dismissed from view and should no longer be shown to the user', verbose_name='dismissed'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0003_addon_error.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-03-14 20:21 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0002_addonrun_dismissed'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addon', 15 | name='error', 16 | field=models.BooleanField(default=False, help_text='There was an error with the configuration file', verbose_name='error'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0004_addon_access.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-03-30 15:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0003_addon_error'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addon', 15 | name='access', 16 | field=models.IntegerField(choices=[(0, 'Public'), (1, 'Organization'), (2, 'Private'), (3, 'Invisible')], default=2, help_text='Designates who may access this document by default', verbose_name='access'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0005_auto_20220330_1908.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-03-30 19:08 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | def make_add_ons_public(apps, schema_editor): 8 | AddOn = apps.get_model("addons", "AddOn") 9 | # 0 is public 10 | AddOn.objects.update(access=0) 11 | 12 | 13 | class Migration(migrations.Migration): 14 | 15 | dependencies = [("addons", "0004_addon_access")] 16 | 17 | operations = [migrations.RunPython(make_add_ons_public, migrations.RunPython.noop)] 18 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0007_auto_20220407_1311.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-07 13:11 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0006_auto_20220404_1747'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addon', 15 | name='removed', 16 | field=models.BooleanField(default=False, help_text='This add-on was removed', verbose_name='removed'), 17 | ), 18 | migrations.AlterField( 19 | model_name='addon', 20 | name='github_token', 21 | field=models.CharField(db_column='github_token', help_text="The token to access the add-on's GitHub repository", max_length=40, verbose_name='github token'), 22 | ), 23 | migrations.AlterField( 24 | model_name='addon', 25 | name='repository', 26 | field=models.CharField(help_text="The add-on's GitHub repository", max_length=140, unique=True, verbose_name='repository'), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0008_rename_github_token_addon__github_token.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-07 13:12 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0007_auto_20220407_1311'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameField( 14 | model_name='addon', 15 | old_name='github_token', 16 | new_name='_github_token', 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0009_alter_addon_parameters.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-07 13:30 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0008_rename_github_token_addon__github_token'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='addon', 15 | name='parameters', 16 | field=models.JSONField(default={}, help_text='The parameters for this add-on', verbose_name='parameters'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0012_alter_addon_organization.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-12 17:53 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | migrations.swappable_dependency(settings.SQUARELET_ORGANIZATION_MODEL), 12 | ('addons', '0011_auto_20220411_2039'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name='addon', 18 | name='organization', 19 | field=models.ForeignKey(help_text='The organization this add-on was created within', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='addons', to=settings.SQUARELET_ORGANIZATION_MODEL, verbose_name='organization'), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0013_githubinstallation_removed.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-12 18:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0012_alter_addon_organization'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='githubinstallation', 15 | name='removed', 16 | field=models.BooleanField(default=False, help_text='This installation was removed', verbose_name='removed'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0014_alter_addon__user.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-12 18:38 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | migrations.swappable_dependency(settings.AUTH_USER_MODEL), 12 | ('addons', '0013_githubinstallation_removed'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name='addon', 18 | name='_user', 19 | field=models.ForeignKey(db_column='user', help_text='The user who created this add-on', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='addons', to=settings.AUTH_USER_MODEL, verbose_name='user'), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0015_auto_20220419_1824.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-19 18:24 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('addons', '0014_alter_addon__user'), 11 | ] 12 | 13 | operations = [ 14 | migrations.RemoveField( 15 | model_name='addon', 16 | name='_user', 17 | ), 18 | migrations.AlterField( 19 | model_name='addon', 20 | name='github_account', 21 | field=models.ForeignKey(help_text='The GitHub account that added this add-on', on_delete=django.db.models.deletion.PROTECT, related_name='addons', to='addons.githubaccount', verbose_name='github account'), 22 | ), 23 | migrations.AlterField( 24 | model_name='addon', 25 | name='github_installation', 26 | field=models.ForeignKey(help_text='The GitHub installation that contains this add-on', on_delete=django.db.models.deletion.PROTECT, related_name='addons', to='addons.githubinstallation', verbose_name='github installation'), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0016_remove_addon__github_token.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-19 18:29 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0015_auto_20220419_1824'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name='addon', 15 | name='_github_token', 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0017_alter_addonrun_run_id.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-20 13:58 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0016_remove_addon__github_token'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='addonrun', 15 | name='run_id', 16 | field=models.BigIntegerField(help_text='The GitHub Action run_id for this run', null=True, unique=True, verbose_name='run_id'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0018_alter_addon_organization.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-04-20 14:18 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | migrations.swappable_dependency(settings.SQUARELET_ORGANIZATION_MODEL), 12 | ('addons', '0017_alter_addonrun_run_id'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name='addon', 18 | name='organization', 19 | field=models.ForeignKey(blank=True, help_text='The organization this add-on was created within', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='addons', to=settings.SQUARELET_ORGANIZATION_MODEL, verbose_name='organization'), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0020_alter_addonevent_event.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-05-05 19:49 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0019_auto_20220505_1845'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='addonevent', 15 | name='event', 16 | field=models.IntegerField(choices=[(0, 'Disabled'), (1, 'Hourly'), (2, 'Daily'), (3, 'Weekly'), (4, 'Upload')], help_text='The event to trigger the add-on run', verbose_name='event'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0021_addonevent_scratch.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-05-09 14:36 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0020_alter_addonevent_event'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addonevent', 15 | name='scratch', 16 | field=models.JSONField(default=dict, help_text='Field to store data for add-on between events', verbose_name='scratch'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0022_auto_20221019_1746.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-10-19 17:46 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0021_addonevent_scratch'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addonrun', 15 | name='comment', 16 | field=models.CharField(default='', help_text='A comment from the user on how this run went', max_length=255, verbose_name='comment'), 17 | ), 18 | migrations.AddField( 19 | model_name='addonrun', 20 | name='rating', 21 | field=models.SmallIntegerField(choices=[(-1, 'Thumbs Down'), (0, ''), (1, 'Thumbs Up')], default=0, help_text='A rating from the user on how this run went', verbose_name='rating'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0023_auto_20230321_1452.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-03-21 14:52 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0022_auto_20221019_1746'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addon', 15 | name='default', 16 | field=models.BooleanField(default=False, help_text='This add-on is enabled by default', verbose_name='default'), 17 | ), 18 | migrations.AddField( 19 | model_name='addon', 20 | name='featured', 21 | field=models.BooleanField(default=False, help_text='This add-on is featured in the browse add-on dialog', verbose_name='featured'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0024_alter_addonrun_created_at.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.2 on 2023-08-18 19:23 2 | 3 | from django.db import migrations 4 | import django.utils.timezone 5 | import documentcloud.core.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("addons", "0023_auto_20230321_1452"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="addonrun", 17 | name="created_at", 18 | field=documentcloud.core.fields.AutoCreatedField( 19 | db_index=True, 20 | default=django.utils.timezone.now, 21 | editable=False, 22 | help_text="Timestamp of when the add-on was ran", 23 | verbose_name="created at", 24 | ), 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/0025_addonrun_credits_spent.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.2 on 2023-11-14 19:11 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0024_alter_addonrun_created_at'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='addonrun', 15 | name='credits_spent', 16 | field=models.IntegerField(default=0, help_text='The amount of premium credits spent by this run', verbose_name='credits spent'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/addons/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/addons/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/addons/signals.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.dispatch import receiver 3 | 4 | # Standard Library 5 | import logging 6 | 7 | # Third Party 8 | from squarelet_auth.users.utils import user_update 9 | 10 | # DocumentCloud 11 | from documentcloud.addons.models import GitHubAccount 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | @receiver( 17 | user_update, dispatch_uid="documentcloud.addons.signals.update_github_account" 18 | ) 19 | def update_github_account(user, data, **_kwargs): 20 | """Save the GitHub account information when a user is updated""" 21 | logger.info("Update GitHub Account information") 22 | for acct in data.get("social_accounts", []): 23 | if acct["provider"] != "github_app": 24 | continue 25 | if acct["tokens"]: 26 | token = acct["tokens"][0]["token"] 27 | else: 28 | token = "" 29 | name = acct["extra_data"].get("login", "") 30 | GitHubAccount.objects.update_or_create( 31 | uid=acct["uid"], defaults={"user": user, "token": token, "name": name} 32 | ) 33 | -------------------------------------------------------------------------------- /documentcloud/addons/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/addons/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/access_choices.py: -------------------------------------------------------------------------------- 1 | """ 2 | Access level constants as defined in `documentcloud/documents/choices.py` 3 | They are copied here so the serverless environment can have access to them 4 | """ 5 | 6 | PUBLIC = 0 7 | ORGANIZATION = 1 8 | PRIVATE = 2 9 | INVISIBLE = 3 10 | -------------------------------------------------------------------------------- /documentcloud/common/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class CommonConfig(AppConfig): 6 | name = "documentcloud.common" 7 | -------------------------------------------------------------------------------- /documentcloud/common/environment/aws/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/environment/aws/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/environment/aws/data.py: -------------------------------------------------------------------------------- 1 | # Standard Library 2 | import json 3 | 4 | 5 | def get_http_data(request): 6 | """Extract data from an HTTP request.""" 7 | return json.loads(request["body"]) 8 | 9 | 10 | def get_pubsub_data(data): 11 | """Extract data from a pubsub request.""" 12 | return json.loads(data["Records"][0]["Sns"]["Message"]) 13 | 14 | 15 | def encode_pubsub_data(data): 16 | """Encode data into the proper format for a pubsub request.""" 17 | return json.dumps(data).encode("utf8") 18 | 19 | 20 | def encode_response(data): 21 | """Encodes response into the proper format for an HTTP function.""" 22 | return { 23 | "statusCode": 200, 24 | "headers": {"Content-Type": "application/json"}, 25 | "body": json.dumps(data), 26 | } 27 | -------------------------------------------------------------------------------- /documentcloud/common/environment/aws/httpsub.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-import 2 | # Local 3 | from ...session import session as httpsub 4 | -------------------------------------------------------------------------------- /documentcloud/common/environment/aws/processing_token.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import environ 3 | 4 | env = environ.Env() 5 | 6 | # Common environment variables 7 | PROCESSING_TOKEN = env.str("PROCESSING_TOKEN") 8 | 9 | AUTHORIZATION = "Authorization" 10 | 11 | 12 | def processing_auth(func): 13 | """Authenticate a function by ensuring the processing token matches.""" 14 | 15 | def authenticate_token(*args, **kwargs): 16 | event = args[0] 17 | headers = event["headers"] 18 | 19 | if headers.get(AUTHORIZATION) != f"processing-token {PROCESSING_TOKEN}": 20 | raise Exception("Authentication Failed.") 21 | 22 | # If all passes, auth succeeded 23 | return func(*args, **kwargs) 24 | 25 | return authenticate_token 26 | -------------------------------------------------------------------------------- /documentcloud/common/environment/aws/pubsub.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import boto3 3 | import environ 4 | 5 | env = environ.Env() 6 | 7 | 8 | class AwsPubsub: 9 | def __init__(self): 10 | 11 | self.arn_prefix = env.str("AWS_ARN_PREFIX") 12 | self.sns = boto3.client("sns") 13 | 14 | def topic_path(self, _namespace, name): 15 | return f"{self.arn_prefix}:{name}" 16 | 17 | def publish(self, topic_path, data): 18 | self.sns.publish(TopicArn=topic_path, Message=data.decode("utf8")) 19 | 20 | 21 | publisher = AwsPubsub() 22 | -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/environment/gcp/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/data.py: -------------------------------------------------------------------------------- 1 | # Standard Library 2 | import base64 3 | import json 4 | 5 | 6 | def get_http_data(request): 7 | """Extract data from an HTTP request.""" 8 | return request.get_json() 9 | 10 | 11 | def get_pubsub_data(data): 12 | """Extract data from a pubsub request.""" 13 | return json.loads(base64.b64decode(data["data"]).decode("utf-8")) 14 | 15 | 16 | def encode_pubsub_data(data): 17 | """Encode data into the proper format for a pubsub request.""" 18 | return json.dumps(data).encode("utf8") 19 | 20 | 21 | def encode_response(data): 22 | """Encodes response into the proper format for an HTTP function.""" 23 | return data 24 | -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/httpsub.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-import 2 | 3 | # Third Party 4 | from common import session as httpsub 5 | -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/processing_token.py: -------------------------------------------------------------------------------- 1 | def processing_auth(_func): 2 | """Authenticate a function by ensuring the processing token matches.""" 3 | 4 | def authenticate_token(*args, **kwargs): 5 | raise NotImplementedError("Need to implement on GCP") 6 | 7 | return authenticate_token 8 | -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/pubsub.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from google.cloud import pubsub_v1 3 | 4 | publisher = pubsub_v1.PublisherClient() 5 | -------------------------------------------------------------------------------- /documentcloud/common/environment/gcp/storage.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import gcsfs 3 | 4 | storage = gcsfs.GCSFileSystem() 5 | -------------------------------------------------------------------------------- /documentcloud/common/environment/local/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/environment/local/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/environment/local/data.py: -------------------------------------------------------------------------------- 1 | # Standard Library 2 | import base64 3 | import json 4 | 5 | 6 | def get_http_data(request): 7 | """Extract data from an HTTP request.""" 8 | return request 9 | 10 | 11 | def get_pubsub_data(data): 12 | """Extract data from a pubsub request.""" 13 | return json.loads(base64.b64decode(data["data"]).decode("utf-8")) 14 | 15 | 16 | def encode_pubsub_data(data): 17 | """Encode data into the proper format for a pubsub request.""" 18 | return json.dumps(data).encode("utf8") 19 | 20 | 21 | def encode_response(data): 22 | """Encodes response into the proper format for an HTTP function.""" 23 | return data 24 | -------------------------------------------------------------------------------- /documentcloud/common/environment/local/processing_token.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import environ 3 | 4 | env = environ.Env() 5 | 6 | # Common environment variables 7 | PROCESSING_TOKEN = env.str("PROCESSING_TOKEN") 8 | 9 | 10 | def processing_auth(func): 11 | """Authenticate a function by ensuring the processing token matches.""" 12 | 13 | def authenticate_token(*args, **kwargs): 14 | # Pass everything locally 15 | return func(*args, **kwargs) 16 | 17 | return authenticate_token 18 | -------------------------------------------------------------------------------- /documentcloud/common/environment/minio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/environment/minio/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/environment/minio/storage.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import environ 3 | from botocore.client import Config 4 | 5 | # Local 6 | from ..aws.storage import AwsStorage 7 | 8 | env = environ.Env() 9 | 10 | 11 | class MinIOStorage(AwsStorage): 12 | def __init__(self, resource_kwargs=None, minio=True): 13 | if resource_kwargs is None: 14 | resource_kwargs = { 15 | "endpoint_url": env.str("MINIO_URL"), 16 | "aws_access_key_id": env.str("MINIO_ACCESS_KEY"), 17 | "aws_secret_access_key": env.str("MINIO_SECRET_KEY"), 18 | "config": Config(signature_version="s3v4"), 19 | "region_name": "us-east-1", 20 | } 21 | super().__init__(resource_kwargs, minio) 22 | 23 | 24 | storage = MinIOStorage() 25 | -------------------------------------------------------------------------------- /documentcloud/common/serverless/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/serverless/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/serverless/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/common/serverless/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/common/session.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | import environ 3 | import requests 4 | 5 | env = environ.Env() 6 | 7 | # Processing token environment variable 8 | PROCESSING_TOKEN = env.str("PROCESSING_TOKEN") 9 | 10 | # Name of authorization field for processing token 11 | PROCESSING_TOKEN_AUTH_FIELD = "processing-token" 12 | 13 | session = requests.Session() 14 | session.headers.update( 15 | {"Authorization": f"{PROCESSING_TOKEN_AUTH_FIELD} {PROCESSING_TOKEN}"} 16 | ) 17 | -------------------------------------------------------------------------------- /documentcloud/common/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions shared by both application and processing code 3 | """ 4 | 5 | # Standard Library 6 | import math 7 | 8 | # Third Party 9 | import pymupdf 10 | 11 | 12 | def graft_page(positions, pdf_page): 13 | """Graft words with position information onto a PDF page""" 14 | 15 | default_fontsize = 15 16 | 17 | for position in positions: 18 | word_text = position["text"] 19 | text_length = pymupdf.get_text_length( 20 | word_text, 21 | fontsize=default_fontsize, 22 | ) 23 | width = (position["x2"] - position["x1"]) * pdf_page.rect.width 24 | fontsize_optimal = int(math.floor((width / text_length) * default_fontsize)) 25 | pdf_page.insert_text( 26 | point=pymupdf.Point( 27 | position["x1"] * pdf_page.rect.width, 28 | position["y2"] * pdf_page.rect.height, 29 | ), 30 | text=word_text, 31 | fontsize=fontsize_optimal, 32 | fill_opacity=0, 33 | ) 34 | -------------------------------------------------------------------------------- /documentcloud/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/core/__init__.py -------------------------------------------------------------------------------- /documentcloud/core/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class CoreConfig(AppConfig): 6 | name = "documentcloud.core" 7 | 8 | def ready(self): 9 | # pylint: disable=unused-import 10 | # load signals 11 | # DocumentCloud 12 | from documentcloud.core.signals import flatpage_invalidate_cache 13 | -------------------------------------------------------------------------------- /documentcloud/core/fields.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.db import models 3 | from django.utils.timezone import now 4 | 5 | # taken from: 6 | # https://github.com/jazzband/django-model-utils 7 | 8 | 9 | class AutoCreatedField(models.DateTimeField): 10 | """ 11 | A DateTimeField that automatically populates itself at 12 | object creation. 13 | By default, sets editable=False, default=datetime.now. 14 | """ 15 | 16 | def __init__(self, *args, **kwargs): 17 | kwargs.setdefault("editable", False) 18 | kwargs.setdefault("default", now) 19 | super().__init__(*args, **kwargs) 20 | 21 | 22 | class AutoLastModifiedField(AutoCreatedField): 23 | """ 24 | A DateTimeField that updates itself on each save() of the model. 25 | By default, sets editable=False and default=datetime.now. 26 | """ 27 | 28 | def pre_save(self, model_instance, add): 29 | value = now() 30 | setattr(model_instance, self.attname, value) 31 | return value 32 | -------------------------------------------------------------------------------- /documentcloud/core/management/commands/upload_languages.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.core.management.base import BaseCommand, CommandError 3 | 4 | # Standard Library 5 | import os 6 | 7 | # DocumentCloud 8 | from documentcloud.common.environment import storage 9 | 10 | TESSERACT_DATA_DIRECTORY = "documentcloud/documents/processing/ocr/tesseract/tessdata/" 11 | MINIO_DATA_DIRECTORY = "ocr-languages" 12 | 13 | 14 | class Command(BaseCommand): 15 | """Uploads tesseract language data into minio for local development""" 16 | 17 | help = "Uploads tesseract language data into minio" 18 | 19 | def handle(self, *args, **options): 20 | data_files = os.listdir(TESSERACT_DATA_DIRECTORY) 21 | print("UPLOADING", data_files) 22 | print("...") 23 | for data_file_path in data_files: 24 | with open( 25 | os.path.join(TESSERACT_DATA_DIRECTORY, data_file_path), "rb" 26 | ) as data_file: 27 | with storage.open( 28 | os.path.join(MINIO_DATA_DIRECTORY, data_file_path), "wb" 29 | ) as minio_file: 30 | minio_file.write(data_file.read()) 31 | 32 | print("WROTE ALL FILES") 33 | -------------------------------------------------------------------------------- /documentcloud/core/rules.py: -------------------------------------------------------------------------------- 1 | # Standard Library 2 | from functools import wraps 3 | 4 | 5 | def skip_if_not_obj(func): 6 | """Decorator for predicates 7 | Skip the predicate if obj is None""" 8 | 9 | @wraps(func) 10 | def inner(user, obj): 11 | if obj is None: 12 | return None 13 | else: 14 | return func(user, obj) 15 | 16 | return inner 17 | -------------------------------------------------------------------------------- /documentcloud/core/signals.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.flatpages.models import FlatPage 3 | from django.core.cache import cache 4 | from django.core.cache.utils import make_template_fragment_key 5 | from django.db.models.signals import post_save 6 | from django.dispatch import receiver 7 | 8 | # Third Party 9 | from corsheaders.signals import check_request_enabled 10 | 11 | 12 | @receiver( 13 | post_save, 14 | sender=FlatPage, 15 | dispatch_uid="documentcloud.core.signals.flatpage_invalidate_cache", 16 | ) 17 | def flatpage_invalidate_cache(instance, **kwargs): 18 | key = make_template_fragment_key("flatpage", [instance.pk]) 19 | cache.delete(key) 20 | 21 | 22 | @receiver(check_request_enabled, dispatch_uid="documentcloud.core.signals.check_cors") 23 | def check_cors(sender, request, **kwargs): 24 | """Allow anonymous GET/OPTIONS requests to the pre-defined allowed paths""" 25 | # pylint: disable=unused-argument 26 | anonymous = not hasattr(request, "user") or request.user.is_anonymous 27 | return request.method.lower() in ["get", "options"] and anonymous 28 | -------------------------------------------------------------------------------- /documentcloud/core/templatetags/markdown.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.template import Library 3 | from django.template.defaultfilters import stringfilter 4 | from django.utils.safestring import mark_safe 5 | 6 | # Third Party 7 | import markdown 8 | 9 | register = Library() 10 | 11 | 12 | @register.filter(name="markdown") 13 | @stringfilter 14 | def markdown_filter(text): 15 | """Take the provided markdown-formatted text and convert it to HTML.""" 16 | extensions = [ 17 | # for smart quotes 18 | "markdown.extensions.smarty", 19 | # for adding IDs to all headings for intra document linking 20 | "markdown.extensions.toc", 21 | # for table support 22 | "markdown.extensions.tables", 23 | # for better code block support 24 | "markdown.extensions.fenced_code", 25 | ] 26 | return mark_safe( 27 | markdown.markdown(text, extensions=extensions, output_format="html") 28 | ) 29 | -------------------------------------------------------------------------------- /documentcloud/core/versioning.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework import versioning 3 | from rest_framework.utils.urls import remove_query_param, replace_query_param 4 | 5 | 6 | class QueryParameterVersioning(versioning.QueryParameterVersioning): 7 | # pylint: disable=redefined-builtin 8 | def reverse( 9 | self, viewname, args=None, kwargs=None, request=None, format=None, **extra 10 | ): 11 | url = super().reverse(viewname, args, kwargs, request, format, **extra) 12 | if request.version == self.default_version: 13 | return remove_query_param(url, self.version_param) 14 | if request.version is not None: 15 | return replace_query_param(url, self.version_param, request.version) 16 | return url 17 | -------------------------------------------------------------------------------- /documentcloud/documents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class DocumentsConfig(AppConfig): 6 | name = "documentcloud.documents" 7 | -------------------------------------------------------------------------------- /documentcloud/documents/constants.py: -------------------------------------------------------------------------------- 1 | # Data keys must be alphanumeric including dash and hyphens and less than 50 characters 2 | DATA_KEY_REGEX = r"[A-Za-z0-9-_]{1,50}" 3 | -------------------------------------------------------------------------------- /documentcloud/documents/fields.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework import serializers 3 | 4 | 5 | class ChoiceField(serializers.ChoiceField): 6 | """Choice field enhanced to use the choices label and ability to omit choices""" 7 | 8 | def __init__(self, choices, **kwargs): 9 | choices = [ 10 | (choice.value, label) 11 | for label, choice in choices._fields.items() 12 | if choice.api 13 | ] 14 | self.choice_map = {label: value for value, label in choices} 15 | super().__init__(choices, **kwargs) 16 | 17 | def to_representation(self, value): 18 | if value in ("", None): 19 | return value 20 | return self.choices.get(value, value) 21 | 22 | # pylint: disable=inconsistent-return-statements 23 | def to_internal_value(self, data): 24 | if data == "" and self.allow_blank: 25 | return "" 26 | 27 | try: 28 | return self.choice_map[str(data)] 29 | except KeyError: 30 | self.fail("invalid_choice", input=data) 31 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0004_section.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-09-25 19:48 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0003_auto_20190925_1848'), 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='Section', 16 | fields=[ 17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('page_number', models.IntegerField(help_text='Which page this section appears on', verbose_name='page number')), 19 | ('title', models.TextField(help_text='A title for the section', verbose_name='title')), 20 | ('document', models.ForeignKey(help_text='The document this section belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='sections', to='documents.Document', verbose_name='document')), 21 | ], 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0006_auto_20191021_1518.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-10-21 15:18 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0005_entity_entitydate'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='document', 15 | options={'permissions': (('process_document', 'Document processor - can set `page_count`, `page_spec`, and `status` through the API'),)}, 16 | ), 17 | migrations.AddField( 18 | model_name='document', 19 | name='page_spec', 20 | field=models.TextField(blank=True, help_text='A cached and compressed specification of each pages dimensions', verbose_name='page specification'), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0007_documenterror.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-10-24 18:15 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | import django.utils.timezone 6 | import documentcloud.core.fields 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('documents', '0006_auto_20191021_1518'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='DocumentError', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('created_at', documentcloud.core.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the error occured', verbose_name='created at')), 21 | ('message', models.TextField(help_text='The error message', verbose_name='message')), 22 | ('document', models.ForeignKey(help_text='The document this page belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='errors', to='documents.Document', verbose_name='document')), 23 | ], 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0008_auto_20191106_2010.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-11-06 20:10 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0007_documenterror'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='status', 16 | field=models.IntegerField(choices=[(0, 'Success'), (1, 'Readable'), (2, 'Pending'), (3, 'Error'), (4, 'No file')], default=4, help_text='The processing status of this document', verbose_name='status'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0008_auto_20191107_2031.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-11-07 20:31 2 | 3 | import django.contrib.postgres.fields.jsonb 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0007_documenterror'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name='document', 16 | name='data', 17 | field=django.contrib.postgres.fields.jsonb.JSONField(default=dict), 18 | ), 19 | migrations.AlterField( 20 | model_name='document', 21 | name='status', 22 | field=models.IntegerField(choices=[(0, 'Success'), (1, 'Readable'), (2, 'Pending'), (3, 'Error'), (4, 'No file')], default=4, help_text='The processing status of this document', verbose_name='status'), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0009_merge_20191112_1553.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-11-12 15:53 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0008_auto_20191106_2010'), 10 | ('documents', '0008_auto_20191107_2031'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0010_auto_20191211_2057.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2019-12-11 20:57 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0009_merge_20191112_1553'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='document', 15 | name='solr_dirty', 16 | field=models.BooleanField(default=False, help_text='Tracks if the Solr Index is out of date with the SQL model', verbose_name='solr dirty'), 17 | ), 18 | migrations.AlterField( 19 | model_name='document', 20 | name='status', 21 | field=models.IntegerField(choices=[(0, 'Success'), (1, 'Readable'), (2, 'Pending'), (3, 'Error'), (4, 'No file'), (5, 'Deleted')], default=4, help_text='The processing status of this document', verbose_name='status'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0012_auto_20200205_1535.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-05 15:35 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0011_auto_20200128_1418'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='note', 15 | name='content', 16 | field=models.TextField(blank=True, help_text='The contents of the note', verbose_name='content'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0013_auto_20200210_1849.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-10 18:49 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0012_auto_20200205_1535'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='page', 15 | name='page_number', 16 | field=models.PositiveIntegerField(db_index=True, help_text='The page number', verbose_name='page number'), 17 | ), 18 | migrations.AlterField( 19 | model_name='section', 20 | name='page_number', 21 | field=models.PositiveIntegerField(help_text='Which page this section appears on', verbose_name='page number'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0014_auto_20200210_1900.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-10 19:00 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0013_auto_20200210_1849'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterUniqueTogether( 14 | name='section', 15 | unique_together={('document', 'page_number')}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0015_auto_20200211_1651.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-11 16:51 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("documents", "0014_auto_20200210_1900")] 10 | 11 | operations = [ 12 | migrations.RunSQL( 13 | "ALTER SEQUENCE documents_document_id_seq RESTART WITH 20000000" 14 | ), 15 | migrations.RunSQL("ALTER SEQUENCE documents_note_id_seq RESTART WITH 2000000"), 16 | ] 17 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0015_auto_20200213_1650.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-13 16:50 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0014_auto_20200210_1900'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='document', 15 | options={'ordering': ('created_at',), 'permissions': (('share_document', 'Can share edit access to the document through a project'), ('process_document', 'Document processor - can set `page_count`, `page_spec`, and `status` through the API'))}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0016_merge_20200213_2145.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-13 21:45 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0015_auto_20200211_1651'), 10 | ('documents', '0015_auto_20200213_1650'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0018_auto_20200311_1936.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-03-11 19:36 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0017_auto_20200226_1902'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='note', 16 | name='document', 17 | field=models.ForeignKey(db_constraint=False, help_text='The document this note belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='notes', to='documents.Document', verbose_name='document'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0018_auto_20200405_1736.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-05 17:36 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0017_auto_20200226_1902'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameField( 14 | model_name='document', 15 | old_name='remote_url', 16 | new_name='published_url', 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0019_auto_20200405_1736.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-05 17:36 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0018_auto_20200405_1736'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='published_url', 16 | field=models.URLField(blank=True, help_text='URL where this article is embedded', max_length=1024, verbose_name='published url'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0020_merge_20200407_1320.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-07 13:20 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0019_auto_20200405_1736'), 10 | ('documents', '0018_auto_20200311_1936'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0021_auto_20200429_0121.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-29 01:21 2 | 3 | from django.db import migrations 4 | import django_extensions.db.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0020_merge_20200407_1320'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='document', 16 | name='slug', 17 | field=django_extensions.db.fields.AutoSlugField(allow_duplicates=True, blank=True, editable=False, help_text='A slug for the document which may be used in a URL', max_length=255, populate_from='title', verbose_name='slug'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0022_auto_20200430_1411.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-30 14:11 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0021_auto_20200429_0121'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='slug', 16 | field=models.SlugField(help_text='A slug for the document which may be used in a URL', max_length=255, verbose_name='slug'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0023_auto_20200525_1942.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-25 19:42 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | # Standard Library 7 | import re 8 | 9 | # Third Party 10 | from unidecode import unidecode 11 | 12 | 13 | def fix_data_keys(apps, schema_editor): 14 | Document = apps.get_model("documents", "Document") 15 | for document in Document.objects.exclude(data={}): 16 | document.data = { 17 | re.sub(r"[^A-Za-z0-9_-]", "-", unidecode(k)): v 18 | for k, v in document.data.items() 19 | } 20 | document.save() 21 | 22 | 23 | class Migration(migrations.Migration): 24 | 25 | dependencies = [("documents", "0022_auto_20200430_1411")] 26 | 27 | operations = [migrations.RunPython(fix_data_keys, migrations.RunPython.noop)] 28 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0024_auto_20200805_2006.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-08-05 20:06 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('documents', '0023_auto_20200525_1942'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='note', 17 | name='user', 18 | field=models.ForeignKey(db_constraint=False, help_text='The user who created this note', on_delete=django.db.models.deletion.PROTECT, related_name='notes', to=settings.AUTH_USER_MODEL, verbose_name='user'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0025_auto_20200805_2031.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-08-05 20:31 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0024_auto_20200805_2006'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='entity', 16 | name='document', 17 | field=models.ForeignKey(db_constraint=False, help_text='The document this entity belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='entities', to='documents.Document', verbose_name='document'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0026_auto_20200805_2051.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-08-05 20:51 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0025_auto_20200805_2031'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='entitydate', 16 | name='document', 17 | field=models.ForeignKey(db_constraint=False, help_text='The document this entity belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='dates', to='documents.Document', verbose_name='document'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0027_auto_20200807_1659.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-08-07 16:59 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0026_auto_20200805_2051'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='section', 16 | name='document', 17 | field=models.ForeignKey(db_constraint=False, help_text='The document this section belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='sections', to='documents.Document', verbose_name='document'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0028_deleteddocument.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-09-30 19:02 2 | 3 | from django.db import migrations, models 4 | import django.utils.timezone 5 | import documentcloud.core.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('documents', '0027_auto_20200807_1659'), 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='DeletedDocument', 17 | fields=[ 18 | ('id', models.IntegerField(help_text='The ID of the document that was deleted', primary_key=True, serialize=False, verbose_name='id')), 19 | ('created_at', documentcloud.core.fields.AutoCreatedField(db_index=True, default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the document was deleted', verbose_name='created at')), 20 | ], 21 | options={ 22 | 'ordering': ('created_at',), 23 | }, 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0029_merge_20201001_1908.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-10-01 19:08 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0028_deleteddocument'), 10 | ('documents', '0028_auto_20200925_2001'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0030_auto_20201211_1452.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-11 14:52 2 | 3 | # Django 4 | import django.db.models.deletion 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [("documents", "0029_merge_20201001_1908")] 11 | 12 | operations = [migrations.RenameModel("Entity", "LegacyEntity")] 13 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0032_auto_20201222_1942.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-22 19:42 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0031_auto_20201215_1859'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='entity', 15 | name='mid', 16 | field=models.CharField(blank=True, help_text='The Google Knowledge Graph ID for this entity', max_length=13, verbose_name='knowledge graph id'), 17 | ), 18 | migrations.AddField( 19 | model_name='entity', 20 | name='wikipedia_url', 21 | field=models.URLField(blank=True, help_text='The URL to the Wikipedia entry for this entity', verbose_name='wikipedia url'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0033_auto_20201223_0115.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 01:15 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0032_auto_20201222_1942'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameModel( 14 | old_name='EntityOccurence', 15 | new_name='EntityOccurrence', 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0034_auto_20201223_0116.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 01:16 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0033_auto_20201223_0115'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameField( 14 | model_name='entityoccurrence', 15 | old_name='occurences', 16 | new_name='occurrences', 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0035_auto_20201223_0116.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 01:16 2 | 3 | import django.contrib.postgres.fields.jsonb 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0034_auto_20201223_0116'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='entityoccurrence', 16 | name='occurrences', 17 | field=django.contrib.postgres.fields.jsonb.JSONField(default=dict, help_text='Extra data asociated with this entity', verbose_name='occurrences'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0036_auto_20201223_1830.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 18:30 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0035_auto_20201223_0116'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddConstraint( 14 | model_name='entity', 15 | constraint=models.UniqueConstraint(condition=models.Q(_negated=True, mid=''), fields=('mid',), name='unique_mid'), 16 | ), 17 | migrations.AddConstraint( 18 | model_name='entity', 19 | constraint=models.UniqueConstraint(condition=models.Q(mid=''), fields=('name', 'kind'), name='unique_name_kind'), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0037_entity_description.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 19:50 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0036_auto_20201223_1830'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='entity', 15 | name='description', 16 | field=models.TextField(blank=True, help_text='Detailed description from Google Knowledge Graph', verbose_name='description'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0038_auto_20201223_2111.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-12-23 21:11 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0037_entity_description'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterUniqueTogether( 14 | name='entityoccurrence', 15 | unique_together={('document', 'entity')}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0039_auto_20210113_1550.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-01-13 15:50 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0038_auto_20201223_2111'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='publish_at', 16 | field=models.DateTimeField(blank=True, db_index=True, help_text='Scheduled time to make document public', null=True, verbose_name='publish at'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0040_auto_20210216_2032.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-02-16 20:32 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0039_auto_20210113_1550'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='note', 16 | name='document', 17 | field=models.ForeignKey(help_text='The document this note belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='notes', to='documents.Document', verbose_name='document'), 18 | ), 19 | migrations.AlterField( 20 | model_name='section', 21 | name='document', 22 | field=models.ForeignKey(help_text='The document this section belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='sections', to='documents.Document', verbose_name='document'), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0041_auto_20210316_1424.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-03-16 14:24 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0040_auto_20210216_2032'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='solr_dirty', 16 | field=models.BooleanField(default=True, help_text='Tracks if the Solr Index is out of date with the SQL model', verbose_name='solr dirty'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0043_document_cache_dirty.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-03-17 19:22 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0042_auto_20210317_0147'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='document', 15 | name='cache_dirty', 16 | field=models.BooleanField(default=False, help_text='A destructive operation is taking place and the CDN cache for this document should be invalidated when it is done processing', verbose_name='cache dirty'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0044_auto_20210422_2056.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-04-22 20:56 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0043_document_cache_dirty'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='document', 15 | options={'ordering': ('created_at',), 'permissions': (('share_document', 'Can share edit access to the document through a project'), ('process_document', 'Document processor - can set `page_count`, `page_spec`, and `status` through the API'), ('change_ownership_document', 'Can change the user or organization which owns the document'))}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0045_auto_20211102_1709.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2021-11-02 17:09 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0044_auto_20210422_2056'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='document', 15 | name='data', 16 | field=models.JSONField(default=dict), 17 | ), 18 | migrations.AlterField( 19 | model_name='entity', 20 | name='metadata', 21 | field=models.JSONField(default=dict, help_text='Extra data asociated with this entity', verbose_name='metadata'), 22 | ), 23 | migrations.AlterField( 24 | model_name='entityoccurrence', 25 | name='occurrences', 26 | field=models.JSONField(default=dict, help_text='Extra data asociated with this entity', verbose_name='occurrences'), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0048_note_solr_dirty.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-05-20 17:45 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0047_alter_document_original_extension'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='note', 15 | name='solr_dirty', 16 | field=models.BooleanField(default=True, help_text='Tracks if the Solr Index is out of date with the SQL model', verbose_name='solr dirty'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0049_document_delayed_index.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-09-01 19:47 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0048_note_solr_dirty'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='document', 15 | name='delayed_index', 16 | field=models.BooleanField(default=False, help_text='Do not index the document in Solr immediately - Wait for it to be batched indexed by the dirty indexer. Useful when uploading in bulk to not overwhelm the Celery queue.', verbose_name='delayed index'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0050_document_noindex.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-10-13 21:16 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("documents", "0049_document_delayed_index"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="document", 15 | name="noindex", 16 | field=models.BooleanField( 17 | default=False, 18 | help_text="Ask search engines and DocumentCloud search to not index this document", 19 | verbose_name="noindex", 20 | ), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0051_auto_20230214_1451.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 14:51 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('documents', '0050_document_noindex'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='entityoccurrence', 16 | name='document', 17 | field=models.ForeignKey(help_text='The document this entity belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='legacy_entities_2', to='documents.document', verbose_name='document'), 18 | ), 19 | migrations.AlterField( 20 | model_name='page', 21 | name='document', 22 | field=models.ForeignKey(help_text='The document this page belongs to', on_delete=django.db.models.deletion.CASCADE, related_name='+', to='documents.document', verbose_name='document'), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0052_merge_0051_auto_20230214_1451_0051_auto_20230303_1923.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-05-16 14:53 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0051_auto_20230214_1451'), 10 | ('documents', '0051_auto_20230303_1923'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/0053_auto_20230622_1623.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-06-22 16:23 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0052_merge_0051_auto_20230214_1451_0051_auto_20230303_1923'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='revision', 15 | options={'ordering': ('version',)}, 16 | ), 17 | migrations.AddField( 18 | model_name='document', 19 | name='admin_noindex', 20 | field=models.BooleanField(default=False, help_text='Ask search engines and DocumentCloud search to not index this document (Admin override)', verbose_name='admin noindex'), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /documentcloud/documents/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/models/__init__.py: -------------------------------------------------------------------------------- 1 | # DocumentCloud 2 | from documentcloud.documents.models.document import * 3 | from documentcloud.documents.models.entity import * 4 | from documentcloud.documents.models.note import * 5 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/document_conversion/cloud-requirements.txt: -------------------------------------------------------------------------------- 1 | gcsfs==0.2.2 2 | google-cloud-pubsub==0.42.1 3 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/document_conversion/common: -------------------------------------------------------------------------------- 1 | ../../../common/ -------------------------------------------------------------------------------- /documentcloud/documents/processing/document_conversion/libreoffice/README.md: -------------------------------------------------------------------------------- 1 | `lo.tar.gz` is generated by following the build script at ./install.sh, adapted from https://github.com/vladgolubev/serverless-libreoffice/blob/master/compile.sh 2 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/document_conversion/libreoffice/lo.tar.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a75f59bbb593eab0907af0a34a5e8e68c2e111a513fbe4557a47e4bd69cffc1c 3 | size 119627562 4 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/document_conversion/requirements.txt: -------------------------------------------------------------------------------- 1 | -r cloud-requirements.txt 2 | 3 | django-environ==0.4.5 4 | furl==2.1.0 5 | pebble==4.5.0 6 | redis==3.4.1 7 | requests==2.22.0 8 | sentry-sdk==0.14.0 9 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/README.md: -------------------------------------------------------------------------------- 1 | # Updating functions 2 | 3 | ## process_pdf 4 | 5 | A pipeline that kicks off when a PDF file is first uploaded 6 | 7 | ```bash 8 | gcloud functions deploy process_pdf --runtime python37 --trigger-resource documentcloud-upload --trigger-event google.storage.object.finalize --memory=2048MB --timeout 540 --retry 9 | ``` 10 | 11 | ## extract_image 12 | 13 | A pipeline to go from a PDF file and page number to a page image (.gif) 14 | 15 | ```bash 16 | # gcloud functions deploy extract_image --runtime python37 --trigger-http 17 | gcloud functions deploy extract_image --runtime python37 --trigger-topic page-image-ready-for-extraction --memory=2048MB --timeout 540 --retry 18 | ``` 19 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/info_and_image/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/cloud-requirements.txt: -------------------------------------------------------------------------------- 1 | gcsfs==0.2.2 2 | google-cloud-pubsub==0.42.1 3 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/common: -------------------------------------------------------------------------------- 1 | ../../../common/ -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/libpdfium.so2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/info_and_image/libpdfium.so2 -------------------------------------------------------------------------------- /documentcloud/documents/processing/info_and_image/requirements.txt: -------------------------------------------------------------------------------- 1 | -r cloud-requirements.txt 2 | 3 | Pillow==9.2.0 4 | aioboto3==9.6.0 5 | django-environ==0.4.5 6 | furl==2.1.0 7 | listcrunch==0.1.0 8 | pebble==4.5.0 9 | redis==3.4.1 10 | requests==2.22.0 11 | sentry-sdk==0.14.0 12 | pymupdf==1.25.3 13 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/ocr/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/cloud-requirements.txt: -------------------------------------------------------------------------------- 1 | gcsfs==0.2.2 2 | google-cloud-pubsub==0.42.1 3 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/common: -------------------------------------------------------------------------------- 1 | ../../../common/ -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.26.115 2 | smart-open==1.8.4 3 | 4 | Pillow==9.2.0 5 | cpuprofile==1.0.1 6 | django-environ==0.4.5 7 | furl==2.1.0 8 | pebble==4.5.0 9 | redis==3.4.1 10 | requests==2.22.0 11 | sentry-sdk==0.14.0 12 | pymupdf==1.25.3 13 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/tesseract/liblept.so.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/ocr/tesseract/liblept.so.5 -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/tesseract/libtesseract.so.5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/ocr/tesseract/libtesseract.so.5 -------------------------------------------------------------------------------- /documentcloud/documents/processing/ocr/tesseract/tessdata/pdf.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/ocr/tesseract/tessdata/pdf.ttf -------------------------------------------------------------------------------- /documentcloud/documents/processing/sidekick/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/sidekick/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/sidekick/common: -------------------------------------------------------------------------------- 1 | ../../../common -------------------------------------------------------------------------------- /documentcloud/documents/processing/sidekick/requirements.txt: -------------------------------------------------------------------------------- 1 | -r cloud-requirements.txt 2 | 3 | aioboto3==6.5.0 4 | django-environ==0.4.5 5 | furl==2.1.0 6 | pebble==4.5.0 7 | redis==3.4.1 8 | requests==2.22.0 9 | sentry-sdk==0.14.0 10 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_alteration_orange_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_alteration_orange_square.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_alteration_red_scratch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_alteration_red_scratch.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_alteration_small_redaction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_alteration_small_redaction.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_illustrator_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_illustrator_page.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_pdfium_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_pdfium_page.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/imagediff_preview_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/imagediff_preview_page.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/redaction_0_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/redaction_0_0.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/redaction_diagonal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/redaction_diagonal.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/redaction_sentence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/redaction_sentence.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/images/redaction_unredacted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/images/redaction_unredacted.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_modified.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_modified.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_overlaid.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_overlaid.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg0.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg0_CC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg0_CC.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg1.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg1_180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg1_180.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg1_CW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg1_CW.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg1_redacted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg1_redacted.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg2.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/doc_3_pg2_CW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/doc_3_pg2_CW.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/output_test2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/output_test2.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/output_test3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/output_test3.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/output_test4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/output_test4.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/output_test5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/output_test5.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/output_test6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/output_test6.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/shakespeare.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/shakespeare.pdf -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pdfs/shakespeare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pdfs/shakespeare.png -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pipeline_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/tests/pipeline_tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pipeline_tests/fake_pdf.py: -------------------------------------------------------------------------------- 1 | class FakePdf: 2 | def __init__(self, pages): 3 | """Initializes the fake PDF with a string containing its pages. 4 | 5 | The pages are specified in a format indicating which pages need "OCR". 6 | For example: 7 | 8 | `..o.o` 9 | 10 | indicates a 5 page PDF document where the 3rd and 5th pages need OCR. 11 | 12 | """ 13 | self.pages = pages 14 | self.page_count = len(pages) 15 | 16 | def needs_ocr(self, page_number): 17 | """Returns whether the 0-based page number requires OCR or not.""" 18 | return self.pages[page_number] == "o" 19 | 20 | def redact(self, pages): 21 | """Redact pages by making them need OCR.""" 22 | for page in pages: 23 | self.pages = self.pages[:page] + "o" + self.pages[page + 1 :] 24 | 25 | 26 | class FakePage: 27 | def __init__(self, has_text): 28 | self.has_text = has_text 29 | 30 | @property 31 | def text(self): 32 | if self.has_text: 33 | return "text" 34 | return "" 35 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/pipeline_tests/test_fakepdf.py: -------------------------------------------------------------------------------- 1 | # Local 2 | from .fake_pdf import FakePdf 3 | 4 | 5 | class FakePdfTest: 6 | def test_page_count(self): 7 | pdf = FakePdf("...") 8 | assert pdf.page_count == 3 9 | 10 | pdf = FakePdf("") 11 | assert pdf.page_count == 0 12 | 13 | pdf = FakePdf("..o.o") 14 | assert pdf.page_count == 5 15 | 16 | def test_needs_ocr(self): 17 | pdf = FakePdf("...") 18 | assert not pdf.needs_ocr(0) 19 | assert not pdf.needs_ocr(1) 20 | assert not pdf.needs_ocr(2) 21 | 22 | pdf = FakePdf("..o.o") 23 | assert not pdf.needs_ocr(0) 24 | assert not pdf.needs_ocr(1) 25 | assert pdf.needs_ocr(2) 26 | assert not pdf.needs_ocr(3) 27 | assert pdf.needs_ocr(4) 28 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/tests/report_test_case.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.test import TestCase 3 | 4 | # Standard Library 5 | import os 6 | import re 7 | 8 | # Local 9 | from .report_generator import ReportGenerator 10 | 11 | 12 | def convert(name: str) -> str: 13 | string = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) 14 | result = re.sub("([a-z0-9])([A-Z])", r"\1_\2", string).lower() 15 | return re.sub("_test$", "", result) 16 | 17 | 18 | def normalize(name: str) -> str: 19 | return " ".join([n.capitalize() for n in name.split("_")]) 20 | 21 | 22 | base_dir = os.path.dirname(os.path.abspath(__file__)) 23 | reports = os.path.join(base_dir, "reports") 24 | 25 | 26 | class ReportTestCase(TestCase): 27 | report_generator: ReportGenerator 28 | 29 | @classmethod 30 | def setUpClass(cls) -> None: 31 | name = convert(cls.__name__) 32 | super().setUpClass() 33 | cls.report_generator = ReportGenerator(os.path.join(reports, f"{name}.html")) 34 | cls.report_generator.add_heading(f"{normalize(name)} Tests") 35 | 36 | @classmethod 37 | def tearDownClass(cls) -> None: 38 | cls.report_generator.close() 39 | super().tearDownClass() 40 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/processing/utils/__init__.py -------------------------------------------------------------------------------- /documentcloud/documents/processing/utils/cloud-requirements.txt: -------------------------------------------------------------------------------- 1 | gcsfs==0.2.2 2 | google-cloud-pubsub==0.42.1 3 | -------------------------------------------------------------------------------- /documentcloud/documents/processing/utils/common: -------------------------------------------------------------------------------- 1 | ../../../common/ -------------------------------------------------------------------------------- /documentcloud/documents/processing/utils/requirements.txt: -------------------------------------------------------------------------------- 1 | -r cloud-requirements.txt 2 | 3 | django-environ==0.4.5 4 | furl==2.1.0 5 | pebble==4.5.0 6 | redis==3.4.1 7 | requests==2.22.0 8 | sentry-sdk==0.14.0 9 | -------------------------------------------------------------------------------- /documentcloud/documents/rules/__init__.py: -------------------------------------------------------------------------------- 1 | # DocumentCloud 2 | from documentcloud.documents.rules import ( 3 | document_errors, 4 | documents, 5 | entities, 6 | notes, 7 | sections, 8 | ) 9 | -------------------------------------------------------------------------------- /documentcloud/documents/rules/document_errors.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from rules import add_perm, always_allow, always_deny 3 | 4 | add_perm("documents.view_documenterror", always_allow) 5 | add_perm("documents.add_documenterror", always_deny) 6 | add_perm("documents.change_documenterror", always_deny) 7 | add_perm("documents.delete_documenterror", always_deny) 8 | -------------------------------------------------------------------------------- /documentcloud/documents/rules/entities.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from rules import add_perm, always_allow, always_deny, is_authenticated 3 | 4 | add_perm("documents.view_entityoccurrence", always_allow) 5 | add_perm("documents.add_entityoccurrence", is_authenticated) 6 | add_perm("documents.change_entityoccurrence", always_deny) 7 | add_perm("documents.delete_entityoccurrence", is_authenticated) 8 | -------------------------------------------------------------------------------- /documentcloud/documents/rules/sections.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from rules import add_perm, always_allow, is_authenticated, predicate 3 | 4 | # DocumentCloud 5 | from documentcloud.core.rules import skip_if_not_obj 6 | from documentcloud.documents.rules import documents 7 | 8 | 9 | @predicate 10 | @skip_if_not_obj 11 | def can_change_document(user, section): 12 | return documents.can_change(user, section.document) 13 | 14 | 15 | can_change = is_authenticated & can_change_document 16 | 17 | 18 | add_perm("documents.view_section", always_allow) 19 | add_perm("documents.add_section", is_authenticated) 20 | add_perm("documents.change_section", can_change) 21 | add_perm("documents.delete_section", can_change) 22 | -------------------------------------------------------------------------------- /documentcloud/documents/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/documents/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/drf_bulk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/drf_bulk/__init__.py -------------------------------------------------------------------------------- /documentcloud/drf_bulk/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class DrfBulkConfig(AppConfig): 6 | name = "drf_bulk" 7 | -------------------------------------------------------------------------------- /documentcloud/drf_bulk/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/drf_bulk/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/drf_bulk/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/drf_bulk/models.py -------------------------------------------------------------------------------- /documentcloud/drf_bulk/routers.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework.routers import DefaultRouter 3 | 4 | # Standard Library 5 | import copy 6 | 7 | 8 | class BulkRouterMixin: 9 | routes = copy.deepcopy(DefaultRouter.routes) 10 | routes[0].mapping.update( 11 | {"put": "bulk_update", "patch": "bulk_partial_update", "delete": "bulk_destroy"} 12 | ) 13 | 14 | 15 | class BulkDefaultRouter(BulkRouterMixin, DefaultRouter): 16 | pass 17 | -------------------------------------------------------------------------------- /documentcloud/entities/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/entities/__init__.py -------------------------------------------------------------------------------- /documentcloud/entities/admin.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib import admin 3 | 4 | # Third Party 5 | from parler.admin import TranslatableAdmin 6 | 7 | # DocumentCloud 8 | from documentcloud.core.pagination import LargeTablePaginator 9 | from documentcloud.entities.models import Entity 10 | 11 | 12 | @admin.register(Entity) 13 | class EntityAdmin(TranslatableAdmin): 14 | """Entity Admin""" 15 | 16 | list_display = ( 17 | "name", 18 | "wikidata_id", 19 | "user", 20 | "access", 21 | ) 22 | list_filter = ("access",) 23 | search_fields = ( 24 | "translations__name", 25 | "wikidata_id", 26 | ) 27 | show_full_result_count = False 28 | paginator = LargeTablePaginator 29 | ordering = ("pk",) 30 | fields = ( 31 | "name", 32 | "wikidata_id", 33 | "wikipedia_url", 34 | "user", 35 | "description", 36 | "created_at", 37 | "updated_at", 38 | "access", 39 | ) 40 | readonly_fields = ( 41 | "name", 42 | "wikidata_id", 43 | "wikipedia_url", 44 | "user", 45 | "description", 46 | "created_at", 47 | "updated_at", 48 | "access", 49 | ) 50 | -------------------------------------------------------------------------------- /documentcloud/entities/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class EntitiesConfig(AppConfig): 6 | default_auto_field = "django.db.models.BigAutoField" 7 | name = "documentcloud.entities" 8 | -------------------------------------------------------------------------------- /documentcloud/entities/choices.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | # Third Party 5 | from djchoices import ChoiceItem, DjangoChoices 6 | 7 | 8 | class EntityAccess(DjangoChoices): 9 | # `api` specifies if this attribute should be accessible via the API 10 | # Free and public to all. 11 | public = ChoiceItem(0, _("Public"), api=True) 12 | # Visible to both the owner and her organization. 13 | private = ChoiceItem(2, _("Private"), api=True) 14 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0002_auto_20230201_1554.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-01 15:54 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='entity', 15 | name='localized_names', 16 | field=models.JSONField(default={}), 17 | preserve_default=False, 18 | ), 19 | migrations.AlterField( 20 | model_name='entity', 21 | name='name', 22 | field=models.CharField(max_length=500), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0003_auto_20230201_1814.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-01 18:14 2 | 3 | from django.db import migrations 4 | import django.utils.timezone 5 | import documentcloud.core.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('entities', '0002_auto_20230201_1554'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='entity', 17 | name='created_at', 18 | field=documentcloud.core.fields.AutoCreatedField(db_index=True, default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the entity was created', verbose_name='created at'), 19 | ), 20 | migrations.AlterField( 21 | model_name='entity', 22 | name='updated_at', 23 | field=documentcloud.core.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the entitywas last updated', verbose_name='updated at'), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0004_alter_entity_access.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-01 19:44 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0003_auto_20230201_1814'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='entity', 15 | name='access', 16 | field=models.IntegerField(choices=[(0, 'Public'), (2, 'Private')], help_text='Designates who may access this entity.', verbose_name='access'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0005_auto_20230206_1943.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-06 19:43 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | migrations.swappable_dependency(settings.AUTH_USER_MODEL), 12 | ('entities', '0004_alter_entity_access'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name='entity', 18 | name='owner', 19 | field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='entities', to=settings.AUTH_USER_MODEL), 20 | ), 21 | migrations.AlterField( 22 | model_name='entity', 23 | name='wikidata_id', 24 | field=models.CharField(max_length=16, unique=True), 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0006_entity_metadata.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-09 17:50 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0005_auto_20230206_1943'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='entity', 15 | name='metadata', 16 | field=models.JSONField(null=True), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0008_alter_entity_metadata.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 14:57 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0007_entityoccurrence'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='entity', 15 | name='metadata', 16 | field=models.JSONField(default=dict, help_text='Extra data about this entity'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0009_auto_20230214_1515.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 15:15 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0008_alter_entity_metadata'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RenameField( 14 | model_name='entity', 15 | old_name='owner', 16 | new_name='user', 17 | ), 18 | migrations.AlterField( 19 | model_name='entity', 20 | name='access', 21 | field=models.IntegerField(choices=[(0, 'Public'), (2, 'Private')], default=0, help_text='Designates who may access this entity.', verbose_name='access'), 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0010_auto_20230214_1531.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 15:31 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0009_auto_20230214_1515'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='entity', 15 | name='description', 16 | field=models.JSONField(default=dict), 17 | ), 18 | migrations.AlterField( 19 | model_name='entity', 20 | name='localized_names', 21 | field=models.JSONField(default=dict), 22 | ), 23 | migrations.AlterField( 24 | model_name='entity', 25 | name='wikipedia_url', 26 | field=models.JSONField(default=dict), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0011_alter_entity_name.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 17:11 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0010_auto_20230214_1531'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='entity', 15 | name='name', 16 | field=models.CharField(blank=True, max_length=500), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0012_alter_entity_wikidata_id.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-14 18:22 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0011_alter_entity_name'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='entity', 15 | name='wikidata_id', 16 | field=models.CharField(blank=True, max_length=16, null=True, unique=True), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/0014_auto_20230223_1614.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2023-02-23 16:14 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('entities', '0013_auto_20230223_1503'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='entity', 15 | options={'verbose_name_plural': 'entities'}, 16 | ), 17 | migrations.AlterField( 18 | model_name='entitytranslation', 19 | name='wikipedia_url', 20 | field=models.URLField(blank=True, max_length=500, verbose_name='wikipedia url'), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /documentcloud/entities/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/entities/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/entities/querysets.py: -------------------------------------------------------------------------------- 1 | """Custom querysets for entities app""" 2 | 3 | # Django 4 | from django.db.models import Q 5 | 6 | # Third Party 7 | from parler.managers import TranslatableQuerySet 8 | 9 | # DocumentCloud 10 | from documentcloud.entities.choices import EntityAccess 11 | 12 | 13 | class EntityQuerySet(TranslatableQuerySet): 14 | """Custom queryset for entities""" 15 | 16 | def get_viewable(self, user): 17 | if user.is_authenticated: 18 | return self.filter(Q(user=user) | Q(access=EntityAccess.public)) 19 | else: 20 | return self.filter(access=EntityAccess.public) 21 | -------------------------------------------------------------------------------- /documentcloud/flatpages/serializers.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.flatpages.models import FlatPage 3 | from rest_framework import serializers 4 | 5 | 6 | class FlatPageSerializer(serializers.ModelSerializer): 7 | class Meta: 8 | model = FlatPage 9 | fields = [ 10 | "url", 11 | "title", 12 | "content", 13 | ] 14 | extra_kwargs = { 15 | "url": {"help_text": "URL of the flatpage"}, 16 | "title": {"help_text": "Title of the flatpage"}, 17 | "content": {"help_text": "The content of the flatpage"}, 18 | } 19 | -------------------------------------------------------------------------------- /documentcloud/flatpages/views.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.flatpages.models import FlatPage 3 | from rest_framework import mixins, viewsets 4 | 5 | # DocumentCloud 6 | from documentcloud.flatpages.serializers import FlatPageSerializer 7 | 8 | 9 | class FlatPageViewSet( 10 | mixins.RetrieveModelMixin, mixins.ListModelMixin, viewsets.GenericViewSet 11 | ): 12 | serializer_class = FlatPageSerializer 13 | queryset = FlatPage.objects.all() 14 | lookup_field = "url" 15 | lookup_value_regex = ".+" 16 | 17 | def get_object(self): 18 | self.kwargs["url"] = "/" + self.kwargs["url"] + "/" 19 | return super().get_object() 20 | -------------------------------------------------------------------------------- /documentcloud/oembed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/__init__.py -------------------------------------------------------------------------------- /documentcloud/oembed/admin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/admin.py -------------------------------------------------------------------------------- /documentcloud/oembed/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class OembedConfig(AppConfig): 6 | name = "documentcloud.oembed" 7 | 8 | def ready(self): 9 | # Django 10 | from django.utils.module_loading import autodiscover_modules 11 | 12 | autodiscover_modules("oembed") 13 | -------------------------------------------------------------------------------- /documentcloud/oembed/decorators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/decorators.py -------------------------------------------------------------------------------- /documentcloud/oembed/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/oembed/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/models.py -------------------------------------------------------------------------------- /documentcloud/oembed/registry.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.core.exceptions import ImproperlyConfigured 3 | 4 | # DocumentCloud 5 | from documentcloud.oembed.oembed import OEmbed 6 | 7 | registry = [] 8 | 9 | 10 | def register(oembed_class): 11 | if not issubclass(oembed_class, OEmbed): 12 | raise ImproperlyConfigured("Only subclasses of OEmbed may be registered") 13 | registry.append(oembed_class()) 14 | return oembed_class 15 | -------------------------------------------------------------------------------- /documentcloud/oembed/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/oembed/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/oembed/urls.py: -------------------------------------------------------------------------------- 1 | """URL mappings for OEmbed app""" 2 | 3 | # Django 4 | from django.urls import re_path 5 | 6 | # DocumentCloud 7 | from documentcloud.oembed import views 8 | 9 | app_name = "oembed" 10 | urlpatterns = [re_path("oembed(?:.json)?/?", views.OEmbedView.as_view(), name="oembed")] 11 | -------------------------------------------------------------------------------- /documentcloud/oembed/utils.py: -------------------------------------------------------------------------------- 1 | # Standard Library 2 | from urllib.parse import parse_qs 3 | 4 | 5 | class Query: 6 | """Class to handle query string parameters for OEmbed requests.""" 7 | 8 | def __init__(self, qs): 9 | """Initialize with a query string.""" 10 | self.query_string = qs 11 | self.params = {} 12 | 13 | if qs: 14 | # Parse the query string into a dictionary 15 | parsed = parse_qs(qs) 16 | # Convert lists to single values for easier access 17 | self.params = { 18 | k: v[0] if v and len(v) == 1 else v for k, v in parsed.items() 19 | } 20 | 21 | def __bool__(self): 22 | """Return True if there are parameters.""" 23 | return bool(self.params) 24 | 25 | def __str__(self): 26 | """Convert back to query string for URL construction.""" 27 | return self.query_string 28 | -------------------------------------------------------------------------------- /documentcloud/organizations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/organizations/__init__.py -------------------------------------------------------------------------------- /documentcloud/organizations/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class OrganizationsConfig(AppConfig): 6 | name = "documentcloud.organizations" 7 | -------------------------------------------------------------------------------- /documentcloud/organizations/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exceptions for Organizations 3 | """ 4 | 5 | 6 | class InsufficientAICreditsError(Exception): 7 | """Organization needs to buy more AI credits""" 8 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0002_auto_20200128_1418.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-01-28 14:18 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('organizations', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='organization', 15 | options={'ordering': ('slug',)}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0003_auto_20200214_1640.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-14 16:40 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("organizations", "0002_auto_20200128_1418")] 10 | 11 | operations = [ 12 | migrations.RunSQL( 13 | "ALTER SEQUENCE organizations_organization_id_seq RESTART WITH 10000" 14 | ) 15 | ] 16 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0008_auto_20200526_1940.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-26 19:40 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('organizations', '0007_auto_20200526_1338'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name='organization', 15 | name='plan_old', 16 | ), 17 | migrations.DeleteModel( 18 | name='Membership', 19 | ), 20 | migrations.DeleteModel( 21 | name='Plan', 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0009_organization_entitlement.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-27 20:10 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('squarelet_auth_organizations', '0005_auto_20200527_0921'), 11 | ('organizations', '0008_auto_20200526_1940'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='organization', 17 | name='entitlement', 18 | field=models.ForeignKey(help_text='The subscription type for this organization', null=True, on_delete=django.db.models.deletion.PROTECT, to='squarelet_auth_organizations.Entitlement', verbose_name='entitlement'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0010_auto_20200527_1732.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-27 17:32 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | def set_entitlements(apps, schema_editor): 8 | Organization = apps.get_model("organizations", "Organization") 9 | Plan = apps.get_model("squarelet_auth_organizations", "Plan") 10 | Entitlement = apps.get_model("squarelet_auth_organizations", "Entitlement") 11 | 12 | for plan in Plan.objects.all(): 13 | entitlement = Entitlement.objects.create( 14 | name=plan.name, slug=plan.slug, resources=plan.resources 15 | ) 16 | Organization.objects.filter(plan=plan).update(entitlement=entitlement) 17 | 18 | 19 | def delete_entitlements(apps, schema_editor): 20 | Organization = apps.get_model("organizations", "Organization") 21 | Organization.objects.update(entitlement=None) 22 | 23 | 24 | class Migration(migrations.Migration): 25 | 26 | dependencies = [("organizations", "0009_organization_entitlement")] 27 | 28 | operations = [migrations.RunPython(set_entitlements, delete_entitlements)] 29 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0011_remove_organization_plan.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-28 12:44 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("organizations", "0010_auto_20200527_1732")] 10 | run_before = [("squarelet_auth_organizations", "0006_auto_20200527_1646")] 11 | 12 | operations = [migrations.RemoveField(model_name="organization", name="plan")] 13 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0016_alter_aicreditlog_options.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.2 on 2023-11-14 19:11 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('organizations', '0015_aicreditlog'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='aicreditlog', 15 | options={'verbose_name': 'AI Credit Log'}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/0017_organization_merged.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.2 on 2025-05-01 19:12 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("organizations", "0016_alter_aicreditlog_options"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name="organization", 17 | name="merged", 18 | field=models.ForeignKey( 19 | blank=True, 20 | help_text="The agency this agency was merged in to", 21 | null=True, 22 | on_delete=django.db.models.deletion.PROTECT, 23 | related_name="+", 24 | to=settings.SQUARELET_ORGANIZATION_MODEL, 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /documentcloud/organizations/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/organizations/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/organizations/querysets.py: -------------------------------------------------------------------------------- 1 | """Custom querysets for organization app""" 2 | 3 | # Django 4 | from django.db import models 5 | from django.db.models import Q 6 | 7 | 8 | class OrganizationQuerySet(models.QuerySet): 9 | """Object manager for organizations""" 10 | 11 | def get_viewable(self, user): 12 | if user.is_authenticated: 13 | return self.filter(Q(users=user) | Q(private=False)).distinct() 14 | else: 15 | return self.filter(private=False) 16 | -------------------------------------------------------------------------------- /documentcloud/organizations/rules.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-argument, invalid-unary-operand-type 2 | 3 | # Third Party 4 | from rules import add_perm, always_deny, is_authenticated, predicate 5 | 6 | # DocumentCloud 7 | from documentcloud.core.rules import skip_if_not_obj 8 | 9 | 10 | @predicate 11 | @skip_if_not_obj 12 | def is_private(user, organization): 13 | return organization.private 14 | 15 | 16 | @predicate 17 | @skip_if_not_obj 18 | def is_member(user, organization): 19 | return organization.has_member(user) 20 | 21 | 22 | is_public = ~is_private 23 | 24 | can_view = is_public | (is_authenticated & is_member) 25 | 26 | add_perm("organizations.view_organization", can_view) 27 | add_perm("organizations.add_organization", always_deny) 28 | add_perm("organizations.change_organization", always_deny) 29 | add_perm("organizations.delete_organization", always_deny) 30 | -------------------------------------------------------------------------------- /documentcloud/organizations/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/organizations/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/organizations/tests/test_rules.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.auth.models import AnonymousUser 3 | 4 | # Third Party 5 | import pytest 6 | 7 | # DocumentCloud 8 | from documentcloud.organizations.tests.factories import OrganizationFactory 9 | from documentcloud.users.tests.factories import UserFactory 10 | 11 | 12 | @pytest.mark.django_db() 13 | def test_rules(): 14 | anonymous = AnonymousUser() 15 | public_member = UserFactory() 16 | private_member = UserFactory() 17 | 18 | public_organization = OrganizationFactory(private=False, members=[public_member]) 19 | private_organization = OrganizationFactory(private=True, members=[private_member]) 20 | 21 | for user, organization, can_view in [ 22 | (anonymous, public_organization, True), 23 | (public_member, public_organization, True), 24 | (private_member, public_organization, True), 25 | (anonymous, private_organization, False), 26 | (public_member, private_organization, False), 27 | (private_member, private_organization, True), 28 | ]: 29 | assert ( 30 | user.has_perm("organizations.view_organization", organization) is can_view 31 | ) 32 | -------------------------------------------------------------------------------- /documentcloud/projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/projects/__init__.py -------------------------------------------------------------------------------- /documentcloud/projects/admin.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib import admin 3 | 4 | # DocumentCloud 5 | from documentcloud.projects.models import Project 6 | 7 | 8 | @admin.register(Project) 9 | class ProjectAdmin(admin.ModelAdmin): 10 | """Document Admin""" 11 | 12 | list_display = ("title", "user", "private") 13 | list_filter = ("private",) 14 | search_fields = ("title", "user__username") 15 | date_hierarchy = "created_at" 16 | fields = ( 17 | "title", 18 | "slug", 19 | "user", 20 | "description", 21 | "private", 22 | "created_at", 23 | "updated_at", 24 | ) 25 | readonly_fields = ("slug", "user", "created_at", "updated_at") 26 | -------------------------------------------------------------------------------- /documentcloud/projects/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class ProjectsConfig(AppConfig): 6 | name = "documentcloud.projects" 7 | -------------------------------------------------------------------------------- /documentcloud/projects/choices.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | # Third Party 5 | from djchoices import ChoiceItem, DjangoChoices 6 | 7 | 8 | class CollaboratorAccess(DjangoChoices): 9 | # `api` specifies if this attribute should be accessible via the API 10 | # This collaborator has read access 11 | view = ChoiceItem(0, _("View"), api=True) 12 | # This collaborator can edit the documents in the project 13 | edit = ChoiceItem(1, _("Edit"), api=True) 14 | # This collaborator can edit the documents and the project itself 15 | admin = ChoiceItem(2, _("Admin"), api=True) 16 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0002_auto_20200128_1418.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-01-28 14:18 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('projects', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='collaboration', 15 | options={'ordering': ('id',)}, 16 | ), 17 | migrations.AlterModelOptions( 18 | name='project', 19 | options={'ordering': ('slug',)}, 20 | ), 21 | migrations.AlterModelOptions( 22 | name='projectmembership', 23 | options={'ordering': ('id',)}, 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0003_auto_20200210_1548.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-10 15:48 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('documents', '0012_auto_20200205_1535'), 10 | ('projects', '0002_auto_20200128_1418'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterUniqueTogether( 15 | name='projectmembership', 16 | unique_together={('project', 'document')}, 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0004_auto_20200210_2050.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-10 20:50 2 | 3 | from django.conf import settings 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | migrations.swappable_dependency(settings.AUTH_USER_MODEL), 11 | ('projects', '0003_auto_20200210_1548'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterUniqueTogether( 16 | name='collaboration', 17 | unique_together={('project', 'user')}, 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0005_collaboration_access.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-14 15:05 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('projects', '0004_auto_20200210_2050'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='collaboration', 15 | name='access', 16 | field=models.IntegerField(choices=[(0, 'View'), (1, 'Edit'), (2, 'Admin')], default=0, help_text='The level of access granted to this collaborator', verbose_name='access'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0006_auto_20200214_1641.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-14 16:41 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("projects", "0005_collaboration_access")] 10 | 11 | operations = [ 12 | migrations.RunSQL("ALTER SEQUENCE projects_project_id_seq RESTART WITH 200000") 13 | ] 14 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0007_auto_20200311_1936.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-03-11 19:36 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('projects', '0006_auto_20200214_1641'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='collaboration', 17 | name='creator', 18 | field=models.ForeignKey(blank=True, db_constraint=False, help_text='The user who created this collaboration', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='+', to=settings.AUTH_USER_MODEL, verbose_name='creator'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0007_auto_20200406_0048.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-06 00:48 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('projects', '0006_auto_20200214_1641'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='projectmembership', 15 | name='edit_access', 16 | field=models.BooleanField(default=True, help_text='Whether collaborators on this project have edit access to this document', verbose_name='edit access'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0008_merge_20200407_1320.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-07 13:20 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('projects', '0007_auto_20200406_0048'), 10 | ('projects', '0007_auto_20200311_1936'), 11 | ] 12 | 13 | operations = [ 14 | ] 15 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0009_auto_20200407_1320.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-07 13:20 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('projects', '0008_merge_20200407_1320'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='project', 17 | name='user', 18 | field=models.ForeignKey(db_constraint=False, help_text='The user who created this project', on_delete=django.db.models.deletion.PROTECT, related_name='+', to=settings.AUTH_USER_MODEL, verbose_name='user'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0010_auto_20200429_0121.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-04-29 01:21 2 | 3 | from django.db import migrations 4 | import django_extensions.db.fields 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('projects', '0009_auto_20200407_1320'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='project', 16 | name='slug', 17 | field=django_extensions.db.fields.AutoSlugField(allow_duplicates=True, blank=True, editable=False, help_text='A slug for the project which may be used in a URL', max_length=255, populate_from='title', verbose_name='slug'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0011_auto_20210216_2032.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-02-16 20:32 2 | 3 | from django.conf import settings 4 | from django.db import migrations, models 5 | import django.db.models.deletion 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('projects', '0010_auto_20200429_0121'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='collaboration', 17 | name='creator', 18 | field=models.ForeignKey(blank=True, help_text='The user who created this collaboration', null=True, on_delete=django.db.models.deletion.PROTECT, related_name='+', to=settings.AUTH_USER_MODEL, verbose_name='creator'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/0012_auto_20210407_1801.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-04-07 18:01 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('projects', '0011_auto_20210216_2032'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='project', 15 | options={'ordering': ('slug',), 'permissions': (('add_remove_project', 'Can add & remove documents from a project'),)}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/projects/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/projects/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/projects/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/projects/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/sidekick/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/sidekick/__init__.py -------------------------------------------------------------------------------- /documentcloud/sidekick/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class SidekickConfig(AppConfig): 6 | name = "documentcloud.sidekick" 7 | 8 | def ready(self): 9 | # pylint: disable=unused-import 10 | # load signals 11 | # DocumentCloud 12 | import documentcloud.sidekick.signals 13 | -------------------------------------------------------------------------------- /documentcloud/sidekick/choices.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.utils.translation import gettext_lazy as _ 3 | 4 | # Third Party 5 | from djchoices import ChoiceItem, DjangoChoices 6 | 7 | 8 | class Status(DjangoChoices): 9 | success = ChoiceItem(0, _("Success"), api=True) 10 | pending = ChoiceItem(1, _("Pending"), api=True) 11 | error = ChoiceItem(2, _("Error"), api=True) 12 | -------------------------------------------------------------------------------- /documentcloud/sidekick/local_tasks.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from celery import shared_task 3 | 4 | # DocumentCloud 5 | from documentcloud.documents.processing.sidekick.main import preprocess 6 | 7 | 8 | @shared_task 9 | def sidekick_preprocess(data): 10 | preprocess(data) 11 | -------------------------------------------------------------------------------- /documentcloud/sidekick/migrations/0002_auto_20210723_2029.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2021-07-23 20:29 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('sidekick', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name='sidekick', 15 | name='tag_name', 16 | ), 17 | migrations.AlterField( 18 | model_name='sidekick', 19 | name='status', 20 | field=models.IntegerField(choices=[(0, 'Success'), (1, 'Pending'), (2, 'Error')], default=1, help_text='The status of this sidekick', verbose_name='status'), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /documentcloud/sidekick/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/sidekick/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/sidekick/routers.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework.routers import DynamicRoute, Route 3 | 4 | # Third Party 5 | from rest_framework_nested.routers import NestedDefaultRouter 6 | 7 | 8 | class SidekickRouter(NestedDefaultRouter): 9 | """Route list URL to detail views""" 10 | 11 | routes = [ 12 | # List route. 13 | Route( 14 | url=r"^{prefix}{trailing_slash}$", 15 | mapping={ 16 | "get": "retrieve", 17 | "put": "update", 18 | "patch": "partial_update", 19 | "post": "create", 20 | "delete": "destroy", 21 | }, 22 | name="{basename}-detail", 23 | detail=True, 24 | initkwargs={"suffix": "Instance"}, 25 | ), 26 | # Dynamically generated list routes. Generated using 27 | # @action(detail=False) decorator on methods of the viewset. 28 | DynamicRoute( 29 | url=r"^{prefix}/{url_path}{trailing_slash}$", 30 | name="{basename}-{url_name}", 31 | detail=True, 32 | initkwargs={}, 33 | ), 34 | ] 35 | -------------------------------------------------------------------------------- /documentcloud/sidekick/rules.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from rules import add_perm, is_authenticated, predicate 3 | 4 | # DocumentCloud 5 | from documentcloud.core.rules import skip_if_not_obj 6 | from documentcloud.projects import rules as projects_rules 7 | 8 | 9 | @predicate 10 | @skip_if_not_obj 11 | def can_view(user, sidekick): 12 | return projects_rules.can_view(user, sidekick.project) 13 | 14 | 15 | @predicate 16 | @skip_if_not_obj 17 | def can_change(user, sidekick): 18 | return projects_rules.can_change(user, sidekick.project) 19 | 20 | 21 | add_perm("sidekick.view_sidekick", can_view) 22 | add_perm("sidekick.add_sidekick", is_authenticated) 23 | add_perm("sidekick.change_sidekick", is_authenticated & can_change) 24 | add_perm("sidekick.delete_sidekick", is_authenticated & can_change) 25 | -------------------------------------------------------------------------------- /documentcloud/sidekick/serializers.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework import serializers 3 | 4 | # DocumentCloud 5 | from documentcloud.documents.fields import ChoiceField 6 | from documentcloud.sidekick.choices import Status 7 | from documentcloud.sidekick.models import Sidekick 8 | 9 | 10 | class SidekickSerializer(serializers.ModelSerializer): 11 | status = ChoiceField( 12 | Status, read_only=True, help_text=Sidekick._meta.get_field("status").help_text 13 | ) 14 | 15 | class Meta: 16 | model = Sidekick 17 | fields = ["status"] 18 | 19 | def __init__(self, *args, **kwargs): 20 | super().__init__(*args, **kwargs) 21 | 22 | # Allow writing to status from processing lambda 23 | context = kwargs.get("context", {}) 24 | request = context.get("request") 25 | has_request_auth = ( 26 | request and hasattr(request, "auth") and request.auth is not None 27 | ) 28 | if has_request_auth and "processing" in request.auth.get("permissions", []): 29 | self.fields["status"].read_only = False 30 | -------------------------------------------------------------------------------- /documentcloud/sidekick/sidekick.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/sidekick/sidekick.py -------------------------------------------------------------------------------- /documentcloud/sidekick/signals.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.db.models.signals import post_delete 3 | from django.dispatch import receiver 4 | 5 | # DocumentCloud 6 | from documentcloud.common import path 7 | from documentcloud.common.environment import storage 8 | from documentcloud.sidekick.models import Sidekick 9 | 10 | 11 | @receiver( 12 | post_delete, 13 | sender=Sidekick, 14 | dispatch_uid="documentcloud.core.signals.delete_vectors", 15 | ) 16 | def delete_vectors(instance, **kwargs): 17 | """Delete vector files when deleting a sidekick instance""" 18 | storage.delete(path.sidekick_document_vectors_path(instance.project_id)) 19 | -------------------------------------------------------------------------------- /documentcloud/sidekick/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/sidekick/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/static/css/project.css: -------------------------------------------------------------------------------- 1 | /* These styles are generated from project.scss. */ 2 | 3 | .alert-debug { 4 | color: black; 5 | background-color: white; 6 | border-color: #d6e9c6; 7 | } 8 | 9 | .alert-error { 10 | color: #b94a48; 11 | background-color: #f2dede; 12 | border-color: #eed3d7; 13 | } 14 | -------------------------------------------------------------------------------- /documentcloud/static/fonts/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/static/fonts/.gitkeep -------------------------------------------------------------------------------- /documentcloud/static/images/favicons/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/static/images/favicons/favicon.ico -------------------------------------------------------------------------------- /documentcloud/static/js/project.js: -------------------------------------------------------------------------------- 1 | /* Project specific Javascript goes here. */ 2 | -------------------------------------------------------------------------------- /documentcloud/static/sass/custom_bootstrap_vars.scss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/static/sass/custom_bootstrap_vars.scss -------------------------------------------------------------------------------- /documentcloud/static/sass/project.scss: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | // project specific CSS goes here 6 | 7 | //////////////////////////////// 8 | //Variables// 9 | //////////////////////////////// 10 | 11 | // Alert colors 12 | 13 | $white: #fff; 14 | $mint-green: #d6e9c6; 15 | $black: #000; 16 | $pink: #f2dede; 17 | $dark-pink: #eed3d7; 18 | $red: #b94a48; 19 | 20 | //////////////////////////////// 21 | //Alerts// 22 | //////////////////////////////// 23 | 24 | // bootstrap alert CSS, translated to the django-standard levels of 25 | // debug, info, success, warning, error 26 | 27 | .alert-debug { 28 | background-color: $white; 29 | border-color: $mint-green; 30 | color: $black; 31 | } 32 | 33 | .alert-error { 34 | background-color: $pink; 35 | border-color: $dark-pink; 36 | color: $red; 37 | } 38 | -------------------------------------------------------------------------------- /documentcloud/statistics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/statistics/__init__.py -------------------------------------------------------------------------------- /documentcloud/statistics/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class StatisticsConfig(AppConfig): 6 | name = "documentcloud.statistics" 7 | -------------------------------------------------------------------------------- /documentcloud/statistics/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/statistics/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/statistics/rules.py: -------------------------------------------------------------------------------- 1 | # Third Party 2 | from rules import add_perm, always_deny, is_staff 3 | 4 | add_perm("statistics.view_statistics", is_staff) 5 | add_perm("statistics.add_statistics", always_deny) 6 | add_perm("statistics.change_statistics", always_deny) 7 | add_perm("statistics.delete_statistics", always_deny) 8 | -------------------------------------------------------------------------------- /documentcloud/statistics/serializers.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework import serializers 3 | 4 | # DocumentCloud 5 | from documentcloud.statistics.models import Statistics 6 | 7 | 8 | class StatisticsSerializer(serializers.ModelSerializer): 9 | """Serializer for DocumentCloud Statistics""" 10 | 11 | class Meta: 12 | model = Statistics 13 | fields = "__all__" 14 | -------------------------------------------------------------------------------- /documentcloud/statistics/views.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from rest_framework import mixins, permissions, viewsets 3 | 4 | # DocumentCloud 5 | from documentcloud.statistics.models import Statistics 6 | from documentcloud.statistics.serializers import StatisticsSerializer 7 | 8 | 9 | class StatisticsViewSet( 10 | mixins.RetrieveModelMixin, 11 | mixins.ListModelMixin, 12 | viewsets.GenericViewSet, 13 | ): 14 | 15 | serializer_class = StatisticsSerializer 16 | queryset = Statistics.objects.all() 17 | filterset_fields = ("date",) 18 | permission_classes = [permissions.IsAdminUser] 19 | -------------------------------------------------------------------------------- /documentcloud/templates/addons/email/base_disabled.html: -------------------------------------------------------------------------------- 1 | {% extends "core/email/base.html" %} 2 | 3 | {% block body %} 4 | 5 |

6 | Your scheduled run of the Add-On {{ run.addon.name }} has not run 7 | succesfully the last five times it was ran. We have therefore disabled the 8 | scheduled run. View the scheduled run details here. 9 |

10 |

11 | {{ footer_content|urlize }} 12 |

13 | 14 | {% endblock body %} -------------------------------------------------------------------------------- /documentcloud/templates/addons/email/disabled.html: -------------------------------------------------------------------------------- 1 | {% extends "core/email/base.html" %} 2 | 3 | {% block body %} 4 | 5 |

6 | Your scheduled run of the Add-On {{ run.addon.name }} has not run 7 | succesfully the last five times it was ran. We have therefore disabled the 8 | scheduled run. View the scheduled run details here. 9 |

10 | 11 |

12 | You may debug the issue 13 | 14 | using the GitHub logs. 15 | Once fixed, you may re-enable it from the Add-On menu on 16 | DocumentCloud. If you need assistance debugging, please ask on 17 | our Slack channel. 18 |

19 | 20 | {% endblock body %} 21 | -------------------------------------------------------------------------------- /documentcloud/templates/addons/scraper.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Scraper Dashboard 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | {% for host, values in hosts.items %} 22 | 23 | 24 | 25 | 26 | 27 | {% endfor %} 28 | 29 |
HostSuccess CountFailure Count
{{ host }}{{ values.success }}{{ values.failure }}
30 |
31 | 32 | 33 | -------------------------------------------------------------------------------- /documentcloud/templates/admin/addons/addon/change_form.html: -------------------------------------------------------------------------------- 1 | {% extends "admin/change_form.html" %} 2 | {% block object-tools-items %} 3 | {% if original %} 4 |
  • 5 | 6 | Update config from GitHub 7 | 8 |
  • 9 | {% endif %} 10 | {{ block.super }} 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /documentcloud/templates/admin/users/user/change_list.html: -------------------------------------------------------------------------------- 1 | {% extends "admin/change_list.html" %} 2 | 3 | {% block content %} 4 | 9 | {{ block.super }} 10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /documentcloud/templates/core/email/mailkey.html: -------------------------------------------------------------------------------- 1 | {% extends "core/email/base.html" %} 2 | 3 | {% block body %} 4 |

    5 | A new upload email address has just been created for your DocumentCloud 6 | account. This allows you to email documents and have them directly uploaded 7 | in to your account. To keep this address private, it was only show to you 8 | once, when you generated it. If you have forgotten it, you may create a new 9 | one at under your account menu towards the top right at 10 | https://www.documentcloud.org. You may also use that menu area to disable 11 | this feature entirely. If you did not generate this email address, please 12 | log in to DocumentCloud, change your password and let us know by replying 13 | to this email. 14 |

    15 | {% endblock body %} 16 | -------------------------------------------------------------------------------- /documentcloud/templates/core/email/mailkey_delete.html: -------------------------------------------------------------------------------- 1 | {% extends "core/email/base.html" %} 2 | 3 | {% block body %} 4 |

    5 | An upload email address has just been deleted from your account. This 6 | means you will no longer be able to email documents to a private email 7 | address to upload emails directly. You may create a new upload email 8 | address at https://www.documentcloud.org if you would like to re-enable 9 | this feature. If you did not delete the email address, please log in to 10 | your DocumentCloud account and change your password and let us know by 11 | replying to this email. 12 |

    13 | {% endblock body %} 14 | -------------------------------------------------------------------------------- /documentcloud/templates/flatpages/default.html: -------------------------------------------------------------------------------- 1 | {% load cache %} 2 | {% load markdown %} 3 | {% cache None flatpage flatpage.pk %} 4 | {{ flatpage.content|markdown }} 5 | {% endcache %} 6 | -------------------------------------------------------------------------------- /documentcloud/templates/oembed/document.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /documentcloud/templates/oembed/note.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /documentcloud/templates/oembed/page.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /documentcloud/templates/oembed/project.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /documentcloud/users/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/users/__init__.py -------------------------------------------------------------------------------- /documentcloud/users/apps.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.apps import AppConfig 3 | 4 | 5 | class UsersConfig(AppConfig): 6 | name = "documentcloud.users" 7 | 8 | def ready(self): 9 | # pylint: disable=unused-import 10 | # load signals 11 | # DocumentCloud 12 | import documentcloud.users.signals 13 | -------------------------------------------------------------------------------- /documentcloud/users/managers.py: -------------------------------------------------------------------------------- 1 | """Custom querysets for account app""" 2 | 3 | # Django 4 | from django.contrib.auth.models import UserManager as AuthUserManager 5 | 6 | # Standard Library 7 | import logging 8 | 9 | # DocumentCloud 10 | from documentcloud.users.querysets import UserQuerySet 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class UserManager(AuthUserManager.from_queryset(UserQuerySet)): 16 | pass 17 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0002_auto_20200128_1418.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-01-28 14:18 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('users', '0001_initial'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterModelOptions( 14 | name='user', 15 | options={'ordering': ('username',)}, 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0003_auto_20200214_1640.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-02-14 16:40 2 | 3 | # Django 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [("users", "0002_auto_20200128_1418")] 10 | 11 | operations = [ 12 | migrations.RunSQL("ALTER SEQUENCE users_user_id_seq RESTART WITH 100000") 13 | ] 14 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0005_auto_20200523_1534.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.5 on 2020-05-23 15:34 2 | 3 | from django.db import migrations 4 | import django.utils.timezone 5 | import squarelet_auth.fields 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('users', '0004_auto_20200306_2000'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='user', 17 | name='created_at', 18 | field=squarelet_auth.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the user was created', verbose_name='created at'), 19 | ), 20 | migrations.AlterField( 21 | model_name='user', 22 | name='updated_at', 23 | field=squarelet_auth.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, help_text='Timestamp of when the user was last updated', verbose_name='updated at'), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0008_user_active_addons.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-03-30 19:14 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('addons', '0005_auto_20220330_1908'), 10 | ('users', '0007_auto_20211102_1707'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name='user', 16 | name='active_addons', 17 | field=models.ManyToManyField(help_text='Add-Ons shown for this user', related_name='users', to='addons.AddOn', verbose_name='active add-ons'), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0009_user_mailkey.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.9 on 2022-05-27 13:30 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('users', '0008_user_active_addons'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name='user', 15 | name='mailkey', 16 | field=models.UUIDField(help_text='Mail key for uploading documents via email', null=True, verbose_name='mailkey'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/0011_user_pinned_projects.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2.2 on 2024-02-14 19:41 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("projects", "0012_auto_20210407_1801"), 10 | ("users", "0010_user_bio_alter_user_email_alter_user_username"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="user", 16 | name="pinned_projects", 17 | field=models.ManyToManyField( 18 | help_text="Projects pinned for this user", 19 | related_name="pinners", 20 | to="projects.project", 21 | verbose_name="pinned projects", 22 | ), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /documentcloud/users/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/users/migrations/__init__.py -------------------------------------------------------------------------------- /documentcloud/users/signals.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.auth.signals import user_logged_in 3 | from django.dispatch import receiver 4 | 5 | # DocumentCloud 6 | from documentcloud.addons.models import AddOn 7 | 8 | 9 | @receiver(user_logged_in, dispatch_uid="documentcloud.user.signals.default_addons") 10 | def default_addons(sender, user, request, **kwargs): 11 | """Activate default add-ons for user on login if they do not have any add-ons 12 | activated""" 13 | # pylint: disable=unused-argument 14 | 15 | if not user.active_addons.exists(): 16 | user.active_addons.set(AddOn.objects.filter(default=True)) 17 | -------------------------------------------------------------------------------- /documentcloud/users/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MuckRock/documentcloud/50edadf1d07e9ca96ed2423d099285948cc79111/documentcloud/users/tests/__init__.py -------------------------------------------------------------------------------- /documentcloud/users/tests/test_rules.py: -------------------------------------------------------------------------------- 1 | # Django 2 | from django.contrib.auth.models import AnonymousUser 3 | 4 | # Third Party 5 | import pytest 6 | 7 | # DocumentCloud 8 | from documentcloud.organizations.tests.factories import OrganizationFactory 9 | from documentcloud.projects.tests.factories import ProjectFactory 10 | from documentcloud.users.tests.factories import UserFactory 11 | 12 | 13 | @pytest.mark.django_db() 14 | def test_rules(): 15 | anonymous = AnonymousUser() 16 | myself = UserFactory() 17 | organization_user = UserFactory() 18 | collaborator = UserFactory() 19 | unknown_user = UserFactory() 20 | 21 | OrganizationFactory(members=[myself, organization_user]) 22 | ProjectFactory(collaborators=[myself, collaborator]) 23 | 24 | for user, user_, can_view, can_change in [ 25 | (anonymous, organization_user, False, False), 26 | (myself, myself, True, True), 27 | (myself, organization_user, True, False), 28 | (myself, collaborator, True, False), 29 | (myself, unknown_user, False, False), 30 | ]: 31 | assert user.has_perm("users.view_user", user_) is can_view 32 | assert user.has_perm("users.change_user", user_) is can_change 33 | -------------------------------------------------------------------------------- /locale/README.rst: -------------------------------------------------------------------------------- 1 | Translations 2 | ============ 3 | 4 | Translations will be placed in this folder when running:: 5 | 6 | python manage.py makemessages 7 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Standard Library 3 | import os 4 | import sys 5 | 6 | if __name__ == "__main__": 7 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local") 8 | 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError: 12 | # The above import may fail for some other reason. Ensure that the 13 | # issue is really that Django is missing to avoid masking other 14 | # exceptions on Python 2. 15 | try: 16 | import django # noqa 17 | except ImportError: 18 | raise ImportError( 19 | "Couldn't import Django. Are you sure it's installed and " 20 | "available on your PYTHONPATH environment variable? Did you " 21 | "forget to activate a virtual environment?" 22 | ) 23 | 24 | raise 25 | 26 | # This allows easy placement of apps within the interior 27 | # documentcloud directory. 28 | current_path = os.path.dirname(os.path.abspath(__file__)) 29 | sys.path.append(os.path.join(current_path, "documentcloud")) 30 | 31 | execute_from_command_line(sys.argv) 32 | -------------------------------------------------------------------------------- /postdeploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Always run migrations 5 | python manage.py migrate --noinput 6 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --ds=config.settings.test 3 | python_files = tests.py test_*.py 4 | markers = 5 | solr: mark a test as requiring a solr test instance 6 | slow: mark a test as being slow 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file is expected by Heroku. 2 | 3 | -r requirements/production.txt 4 | -------------------------------------------------------------------------------- /requirements/local.in: -------------------------------------------------------------------------------- 1 | -r ./base.txt 2 | 3 | Werkzeug 4 | ipdb 5 | Sphinx 6 | psycopg2 --no-binary psycopg2 7 | pip-tools 8 | 9 | # Testing 10 | # ------------------------------------------------------------------------------ 11 | pytest 12 | pytest-sugar 13 | pytest-mock 14 | -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock 15 | pytest-watch 16 | 17 | # Code quality 18 | # ------------------------------------------------------------------------------ 19 | flake8 20 | coverage 21 | black 22 | pylint-django 23 | pylint-celery 24 | 25 | # Django 26 | # ------------------------------------------------------------------------------ 27 | factory-boy 28 | 29 | django-coverage-plugin 30 | pytest-django 31 | fakeredis 32 | 33 | # Processing 34 | # ------------------------------------------------------------------------------ 35 | python-Levenshtein 36 | Unidecode 37 | opencv-python 38 | requests-mock 39 | fasttext==0.9.3 40 | scikit-learn 41 | 42 | # Documentation 43 | # ------------------------------------------------------------------------------ 44 | drf-spectacular -------------------------------------------------------------------------------- /requirements/production.in: -------------------------------------------------------------------------------- 1 | -r ./base.txt 2 | 3 | gunicorn 4 | psycopg2 --no-binary psycopg2 5 | Collectfast 6 | sentry-sdk 7 | scout-apm 8 | 9 | # Django 10 | # ------------------------------------------------------------------------------ 11 | django-storages[boto3] 12 | django-anymail[mailgun] 13 | django-celery-email 14 | 15 | drf-spectacular -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | exclude = .tox,.git,*/migrations/*,*/static/CACHE/*,docs,node_modules 4 | 5 | [pycodestyle] 6 | max-line-length = 120 7 | exclude = .tox,.git,*/migrations/*,*/static/CACHE/*,docs,node_modules 8 | 9 | [mypy] 10 | python_version = 3.6 11 | check_untyped_defs = True 12 | ignore_errors = False 13 | ignore_missing_imports = True 14 | strict_optional = True 15 | warn_unused_ignores = True 16 | warn_redundant_casts = True 17 | warn_unused_configs = True 18 | 19 | [mypy-*.migrations.*] 20 | # Django migrations should not produce any errors: 21 | ignore_errors = True 22 | --------------------------------------------------------------------------------