├── .github └── workflows │ ├── bump-and-publish.yml │ ├── lint.yml │ ├── pull-request.yml │ ├── push-master.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── poetry.lock ├── pyproject.toml ├── refextract ├── __init__.py ├── authors │ ├── __init__.py │ └── regexs.py ├── config.cfg ├── documents │ ├── __init__.py │ ├── pdf.py │ └── text.py ├── extract.py └── references │ ├── __init__.py │ ├── api.py │ ├── config.py │ ├── engine.py │ ├── errors.py │ ├── find.py │ ├── kbs.py │ ├── kbs │ ├── authors.kb │ ├── books.kb │ ├── collaborations.kb │ ├── journal-titles-re.kb │ ├── journal-titles.kb │ ├── publishers.kb │ ├── report-numbers.kb │ └── special-journals.kb │ ├── pdf.py │ ├── record.py │ ├── regexs.py │ ├── tag.py │ └── text.py ├── ruff.toml ├── setup.cfg └── tests ├── conftest.py ├── data ├── 1503.07589v1.pdf ├── 1508.05632v2.pdf ├── 1706.09498v1.pdf ├── 1707.04066v1.pdf ├── 1805.05865.pdf ├── 2110.02751.pdf ├── 2301.05883.pdf ├── 2303.03819.pdf ├── 2304.10117.pdf ├── 2406.06875.pdf ├── 2502.18907.pdf ├── 2502.21088.pdf ├── 2503.05372.pdf ├── 2503.05621.pdf ├── DIS_SHEILA_final.pdf ├── file_resolving.csv ├── packed_pdf.pdf └── wepml008.pdf ├── integration ├── cassettes │ └── test_extract_extract_references_from_url.yaml └── conftest.py ├── test_api.py ├── test_engine.py ├── test_extract.py ├── test_find.py ├── test_kbs.py ├── test_pdf.py ├── test_regexs.py ├── test_tag.py └── test_text.py /.github/workflows/bump-and-publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.github/workflows/bump-and-publish.yml -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.github/workflows/lint.yml -------------------------------------------------------------------------------- /.github/workflows/pull-request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.github/workflows/pull-request.yml -------------------------------------------------------------------------------- /.github/workflows/push-master.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.github/workflows/push-master.yml -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.github/workflows/test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/README.md -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/pyproject.toml -------------------------------------------------------------------------------- /refextract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/__init__.py -------------------------------------------------------------------------------- /refextract/authors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/authors/__init__.py -------------------------------------------------------------------------------- /refextract/authors/regexs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/authors/regexs.py -------------------------------------------------------------------------------- /refextract/config.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/config.cfg -------------------------------------------------------------------------------- /refextract/documents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/documents/__init__.py -------------------------------------------------------------------------------- /refextract/documents/pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/documents/pdf.py -------------------------------------------------------------------------------- /refextract/documents/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/documents/text.py -------------------------------------------------------------------------------- /refextract/extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/extract.py -------------------------------------------------------------------------------- /refextract/references/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/__init__.py -------------------------------------------------------------------------------- /refextract/references/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/api.py -------------------------------------------------------------------------------- /refextract/references/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/config.py -------------------------------------------------------------------------------- /refextract/references/engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/engine.py -------------------------------------------------------------------------------- /refextract/references/errors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/errors.py -------------------------------------------------------------------------------- /refextract/references/find.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/find.py -------------------------------------------------------------------------------- /refextract/references/kbs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs.py -------------------------------------------------------------------------------- /refextract/references/kbs/authors.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/authors.kb -------------------------------------------------------------------------------- /refextract/references/kbs/books.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/books.kb -------------------------------------------------------------------------------- /refextract/references/kbs/collaborations.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/collaborations.kb -------------------------------------------------------------------------------- /refextract/references/kbs/journal-titles-re.kb: -------------------------------------------------------------------------------- 1 | DAN---Dokl.Akad.Nauk Ser.Fiz. 2 | -------------------------------------------------------------------------------- /refextract/references/kbs/journal-titles.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/journal-titles.kb -------------------------------------------------------------------------------- /refextract/references/kbs/publishers.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/publishers.kb -------------------------------------------------------------------------------- /refextract/references/kbs/report-numbers.kb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/kbs/report-numbers.kb -------------------------------------------------------------------------------- /refextract/references/kbs/special-journals.kb: -------------------------------------------------------------------------------- 1 | JHEP 2 | JCAP 3 | -------------------------------------------------------------------------------- /refextract/references/pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/pdf.py -------------------------------------------------------------------------------- /refextract/references/record.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/record.py -------------------------------------------------------------------------------- /refextract/references/regexs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/regexs.py -------------------------------------------------------------------------------- /refextract/references/tag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/tag.py -------------------------------------------------------------------------------- /refextract/references/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/refextract/references/text.py -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/ruff.toml -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/setup.cfg -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/data/1503.07589v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/1503.07589v1.pdf -------------------------------------------------------------------------------- /tests/data/1508.05632v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/1508.05632v2.pdf -------------------------------------------------------------------------------- /tests/data/1706.09498v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/1706.09498v1.pdf -------------------------------------------------------------------------------- /tests/data/1707.04066v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/1707.04066v1.pdf -------------------------------------------------------------------------------- /tests/data/1805.05865.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/1805.05865.pdf -------------------------------------------------------------------------------- /tests/data/2110.02751.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2110.02751.pdf -------------------------------------------------------------------------------- /tests/data/2301.05883.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2301.05883.pdf -------------------------------------------------------------------------------- /tests/data/2303.03819.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2303.03819.pdf -------------------------------------------------------------------------------- /tests/data/2304.10117.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2304.10117.pdf -------------------------------------------------------------------------------- /tests/data/2406.06875.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2406.06875.pdf -------------------------------------------------------------------------------- /tests/data/2502.18907.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2502.18907.pdf -------------------------------------------------------------------------------- /tests/data/2502.21088.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2502.21088.pdf -------------------------------------------------------------------------------- /tests/data/2503.05372.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2503.05372.pdf -------------------------------------------------------------------------------- /tests/data/2503.05621.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/2503.05621.pdf -------------------------------------------------------------------------------- /tests/data/DIS_SHEILA_final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/DIS_SHEILA_final.pdf -------------------------------------------------------------------------------- /tests/data/file_resolving.csv: -------------------------------------------------------------------------------- 1 | 1|2|3 2 | 4|5|6 3 | -------------------------------------------------------------------------------- /tests/data/packed_pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/packed_pdf.pdf -------------------------------------------------------------------------------- /tests/data/wepml008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/data/wepml008.pdf -------------------------------------------------------------------------------- /tests/integration/cassettes/test_extract_extract_references_from_url.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/integration/cassettes/test_extract_extract_references_from_url.yaml -------------------------------------------------------------------------------- /tests/integration/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/integration/conftest.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_api.py -------------------------------------------------------------------------------- /tests/test_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_engine.py -------------------------------------------------------------------------------- /tests/test_extract.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_extract.py -------------------------------------------------------------------------------- /tests/test_find.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_find.py -------------------------------------------------------------------------------- /tests/test_kbs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_kbs.py -------------------------------------------------------------------------------- /tests/test_pdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_pdf.py -------------------------------------------------------------------------------- /tests/test_regexs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_regexs.py -------------------------------------------------------------------------------- /tests/test_tag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_tag.py -------------------------------------------------------------------------------- /tests/test_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inspirehep/refextract/HEAD/tests/test_text.py --------------------------------------------------------------------------------