├── .coveragerc ├── .dockerignore ├── .github └── workflows │ ├── cd.yml │ ├── ci.yml │ └── daily.yml ├── .gitignore ├── .noserc ├── Dockerfile ├── LICENSE.txt ├── README.rst ├── gutenberg ├── __init__.py ├── _domain_model │ ├── __init__.py │ ├── exceptions.py │ ├── persistence.py │ ├── text.py │ ├── types.py │ └── vocabulary.py ├── _util │ ├── __init__.py │ ├── abc.py │ ├── logging.py │ ├── objects.py │ └── os.py ├── acquire │ ├── __init__.py │ ├── metadata.py │ └── text.py ├── cleanup │ ├── __init__.py │ └── strip_headers.py └── query │ ├── __init__.py │ ├── api.py │ └── extractors.py ├── requirements-dev.pip ├── setup.py └── tests ├── __init__.py ├── _sample_metadata.py ├── _sample_text.py ├── _util.py ├── data ├── clean-texts │ ├── 100 │ ├── 1013 │ ├── 2701 │ ├── 10026 │ ├── 10028 │ ├── 10076 │ ├── 10089 │ ├── 10509 │ ├── 33956 │ ├── 35190 │ ├── 35589 │ ├── 36261 │ ├── 38003 │ ├── 38507 │ ├── 39397 │ └── 40815 ├── raw-texts │ ├── 100 │ ├── 1013 │ ├── 2701 │ ├── 10026 │ ├── 10028 │ ├── 10076 │ ├── 10089 │ ├── 10509 │ ├── 33956 │ ├── 35190 │ ├── 35589 │ ├── 36261 │ ├── 38003 │ ├── 38507 │ ├── 39397 │ └── 40815 ├── sample-metadata │ ├── 0 │ ├── 5 │ ├── 2701 │ ├── 14287 │ ├── 23962 │ ├── 30929 │ ├── 111111 │ ├── 111112 │ └── 111113 └── sample-rdf-files.tar.bz2 ├── test_acquire.py ├── test_domain_model.py ├── test_metadata_cache.py ├── test_query.py ├── test_strip_headers.py └── test_util.py /.coveragerc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.coveragerc -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.github/workflows/cd.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/daily.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.github/workflows/daily.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.gitignore -------------------------------------------------------------------------------- /.noserc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/.noserc -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/README.rst -------------------------------------------------------------------------------- /gutenberg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/__init__.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/__init__.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/exceptions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/exceptions.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/persistence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/persistence.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/text.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/types.py -------------------------------------------------------------------------------- /gutenberg/_domain_model/vocabulary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_domain_model/vocabulary.py -------------------------------------------------------------------------------- /gutenberg/_util/__init__.py: -------------------------------------------------------------------------------- 1 | """Package to hold shared/common utility functions.""" 2 | -------------------------------------------------------------------------------- /gutenberg/_util/abc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_util/abc.py -------------------------------------------------------------------------------- /gutenberg/_util/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_util/logging.py -------------------------------------------------------------------------------- /gutenberg/_util/objects.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_util/objects.py -------------------------------------------------------------------------------- /gutenberg/_util/os.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/_util/os.py -------------------------------------------------------------------------------- /gutenberg/acquire/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/acquire/__init__.py -------------------------------------------------------------------------------- /gutenberg/acquire/metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/acquire/metadata.py -------------------------------------------------------------------------------- /gutenberg/acquire/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/acquire/text.py -------------------------------------------------------------------------------- /gutenberg/cleanup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/cleanup/__init__.py -------------------------------------------------------------------------------- /gutenberg/cleanup/strip_headers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/cleanup/strip_headers.py -------------------------------------------------------------------------------- /gutenberg/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/query/__init__.py -------------------------------------------------------------------------------- /gutenberg/query/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/query/api.py -------------------------------------------------------------------------------- /gutenberg/query/extractors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/gutenberg/query/extractors.py -------------------------------------------------------------------------------- /requirements-dev.pip: -------------------------------------------------------------------------------- 1 | coverage 2 | flake8 3 | nose2 4 | responses 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/setup.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests for Gutenberg package.""" 2 | -------------------------------------------------------------------------------- /tests/_sample_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/_sample_metadata.py -------------------------------------------------------------------------------- /tests/_sample_text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/_sample_text.py -------------------------------------------------------------------------------- /tests/_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/_util.py -------------------------------------------------------------------------------- /tests/data/clean-texts/100: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/100 -------------------------------------------------------------------------------- /tests/data/clean-texts/10026: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/10026 -------------------------------------------------------------------------------- /tests/data/clean-texts/10028: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/10028 -------------------------------------------------------------------------------- /tests/data/clean-texts/10076: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/10076 -------------------------------------------------------------------------------- /tests/data/clean-texts/10089: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/10089 -------------------------------------------------------------------------------- /tests/data/clean-texts/1013: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/1013 -------------------------------------------------------------------------------- /tests/data/clean-texts/10509: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/10509 -------------------------------------------------------------------------------- /tests/data/clean-texts/2701: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/2701 -------------------------------------------------------------------------------- /tests/data/clean-texts/33956: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/33956 -------------------------------------------------------------------------------- /tests/data/clean-texts/35190: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/35190 -------------------------------------------------------------------------------- /tests/data/clean-texts/35589: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/35589 -------------------------------------------------------------------------------- /tests/data/clean-texts/36261: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/36261 -------------------------------------------------------------------------------- /tests/data/clean-texts/38003: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/38003 -------------------------------------------------------------------------------- /tests/data/clean-texts/38507: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/38507 -------------------------------------------------------------------------------- /tests/data/clean-texts/39397: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/39397 -------------------------------------------------------------------------------- /tests/data/clean-texts/40815: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/clean-texts/40815 -------------------------------------------------------------------------------- /tests/data/raw-texts/100: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/100 -------------------------------------------------------------------------------- /tests/data/raw-texts/10026: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/10026 -------------------------------------------------------------------------------- /tests/data/raw-texts/10028: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/10028 -------------------------------------------------------------------------------- /tests/data/raw-texts/10076: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/10076 -------------------------------------------------------------------------------- /tests/data/raw-texts/10089: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/10089 -------------------------------------------------------------------------------- /tests/data/raw-texts/1013: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/1013 -------------------------------------------------------------------------------- /tests/data/raw-texts/10509: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/10509 -------------------------------------------------------------------------------- /tests/data/raw-texts/2701: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/2701 -------------------------------------------------------------------------------- /tests/data/raw-texts/33956: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/33956 -------------------------------------------------------------------------------- /tests/data/raw-texts/35190: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/35190 -------------------------------------------------------------------------------- /tests/data/raw-texts/35589: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/35589 -------------------------------------------------------------------------------- /tests/data/raw-texts/36261: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/36261 -------------------------------------------------------------------------------- /tests/data/raw-texts/38003: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/38003 -------------------------------------------------------------------------------- /tests/data/raw-texts/38507: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/38507 -------------------------------------------------------------------------------- /tests/data/raw-texts/39397: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/39397 -------------------------------------------------------------------------------- /tests/data/raw-texts/40815: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/raw-texts/40815 -------------------------------------------------------------------------------- /tests/data/sample-metadata/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/0 -------------------------------------------------------------------------------- /tests/data/sample-metadata/111111: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/111111 -------------------------------------------------------------------------------- /tests/data/sample-metadata/111112: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/111112 -------------------------------------------------------------------------------- /tests/data/sample-metadata/111113: -------------------------------------------------------------------------------- 1 | { 2 | } 3 | -------------------------------------------------------------------------------- /tests/data/sample-metadata/14287: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/14287 -------------------------------------------------------------------------------- /tests/data/sample-metadata/23962: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/23962 -------------------------------------------------------------------------------- /tests/data/sample-metadata/2701: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/2701 -------------------------------------------------------------------------------- /tests/data/sample-metadata/30929: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/30929 -------------------------------------------------------------------------------- /tests/data/sample-metadata/5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-metadata/5 -------------------------------------------------------------------------------- /tests/data/sample-rdf-files.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/data/sample-rdf-files.tar.bz2 -------------------------------------------------------------------------------- /tests/test_acquire.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_acquire.py -------------------------------------------------------------------------------- /tests/test_domain_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_domain_model.py -------------------------------------------------------------------------------- /tests/test_metadata_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_metadata_cache.py -------------------------------------------------------------------------------- /tests/test_query.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_query.py -------------------------------------------------------------------------------- /tests/test_strip_headers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_strip_headers.py -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-w/gutenberg/HEAD/tests/test_util.py --------------------------------------------------------------------------------