├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── codeql-analysis.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE.md ├── Makefile ├── README.md ├── exploration-notebooks ├── exploration-10k-risks.ipynb ├── exploration-10q-amended.ipynb ├── exploration-TOC-action.ipynb └── exploration-s1-risks.ipynb ├── img └── unstructured_logo.png ├── logger_config.yaml ├── pipeline-notebooks └── pipeline-section.ipynb ├── prepline_sec_filings ├── __init__.py ├── api │ ├── __init__.py │ ├── app.py │ └── section.py ├── fetch.py ├── sec_document.py └── sections.py ├── preprocessing-pipeline-family.yaml ├── requirements ├── base.in ├── base.txt ├── dev.in ├── dev.txt ├── test.in └── test.txt ├── sample-docs └── sample-sec-docs.sha256 ├── scripts ├── check-and-format-notebooks.py ├── docker-build.sh ├── shellcheck.sh ├── test-doc-pipeline-apis-consistent.sh └── version-sync.sh ├── setup.cfg ├── test_real_docs ├── fixtures │ ├── list-item-content.json │ ├── list-item-counts.json │ ├── risk-samples.json │ └── sample-first-last.json ├── generate_first_last.py └── test_real_examples.py ├── test_sec_filings ├── sec_filings │ └── test_section_api.py ├── test_fetch.py └── test_sec_document.py ├── test_sec_filings_integration └── test_notebooks.py └── test_utils ├── README-generating-validation-csvs.md ├── create_validation_csv_files.py ├── examples.json ├── get_sec_docs_from_edgar.py └── symbols-for-validation-csvs.txt /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/.github/workflows/codeql-analysis.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/.gitignore -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/LICENSE.md -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/README.md -------------------------------------------------------------------------------- /exploration-notebooks/exploration-10k-risks.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/exploration-notebooks/exploration-10k-risks.ipynb -------------------------------------------------------------------------------- /exploration-notebooks/exploration-10q-amended.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/exploration-notebooks/exploration-10q-amended.ipynb -------------------------------------------------------------------------------- /exploration-notebooks/exploration-TOC-action.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/exploration-notebooks/exploration-TOC-action.ipynb -------------------------------------------------------------------------------- /exploration-notebooks/exploration-s1-risks.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/exploration-notebooks/exploration-s1-risks.ipynb -------------------------------------------------------------------------------- /img/unstructured_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/img/unstructured_logo.png -------------------------------------------------------------------------------- /logger_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/logger_config.yaml -------------------------------------------------------------------------------- /pipeline-notebooks/pipeline-section.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/pipeline-notebooks/pipeline-section.ipynb -------------------------------------------------------------------------------- /prepline_sec_filings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepline_sec_filings/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prepline_sec_filings/api/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/prepline_sec_filings/api/app.py -------------------------------------------------------------------------------- /prepline_sec_filings/api/section.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/prepline_sec_filings/api/section.py -------------------------------------------------------------------------------- /prepline_sec_filings/fetch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/prepline_sec_filings/fetch.py -------------------------------------------------------------------------------- /prepline_sec_filings/sec_document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/prepline_sec_filings/sec_document.py -------------------------------------------------------------------------------- /prepline_sec_filings/sections.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/prepline_sec_filings/sections.py -------------------------------------------------------------------------------- /preprocessing-pipeline-family.yaml: -------------------------------------------------------------------------------- 1 | name: sec-filings 2 | version: 0.2.1 3 | -------------------------------------------------------------------------------- /requirements/base.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/base.in -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/base.txt -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/dev.in -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/dev.txt -------------------------------------------------------------------------------- /requirements/test.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/test.in -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/requirements/test.txt -------------------------------------------------------------------------------- /sample-docs/sample-sec-docs.sha256: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/sample-docs/sample-sec-docs.sha256 -------------------------------------------------------------------------------- /scripts/check-and-format-notebooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/scripts/check-and-format-notebooks.py -------------------------------------------------------------------------------- /scripts/docker-build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/scripts/docker-build.sh -------------------------------------------------------------------------------- /scripts/shellcheck.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/scripts/shellcheck.sh -------------------------------------------------------------------------------- /scripts/test-doc-pipeline-apis-consistent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/scripts/test-doc-pipeline-apis-consistent.sh -------------------------------------------------------------------------------- /scripts/version-sync.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/scripts/version-sync.sh -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/setup.cfg -------------------------------------------------------------------------------- /test_real_docs/fixtures/list-item-content.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/fixtures/list-item-content.json -------------------------------------------------------------------------------- /test_real_docs/fixtures/list-item-counts.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/fixtures/list-item-counts.json -------------------------------------------------------------------------------- /test_real_docs/fixtures/risk-samples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/fixtures/risk-samples.json -------------------------------------------------------------------------------- /test_real_docs/fixtures/sample-first-last.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/fixtures/sample-first-last.json -------------------------------------------------------------------------------- /test_real_docs/generate_first_last.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/generate_first_last.py -------------------------------------------------------------------------------- /test_real_docs/test_real_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_real_docs/test_real_examples.py -------------------------------------------------------------------------------- /test_sec_filings/sec_filings/test_section_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_sec_filings/sec_filings/test_section_api.py -------------------------------------------------------------------------------- /test_sec_filings/test_fetch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_sec_filings/test_fetch.py -------------------------------------------------------------------------------- /test_sec_filings/test_sec_document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_sec_filings/test_sec_document.py -------------------------------------------------------------------------------- /test_sec_filings_integration/test_notebooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_sec_filings_integration/test_notebooks.py -------------------------------------------------------------------------------- /test_utils/README-generating-validation-csvs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_utils/README-generating-validation-csvs.md -------------------------------------------------------------------------------- /test_utils/create_validation_csv_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_utils/create_validation_csv_files.py -------------------------------------------------------------------------------- /test_utils/examples.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_utils/examples.json -------------------------------------------------------------------------------- /test_utils/get_sec_docs_from_edgar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_utils/get_sec_docs_from_edgar.py -------------------------------------------------------------------------------- /test_utils/symbols-for-validation-csvs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unstructured-IO/pipeline-sec-filings/HEAD/test_utils/symbols-for-validation-csvs.txt --------------------------------------------------------------------------------