├── .github ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md └── ISSUE_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── HISTORY.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── appveyor.yml ├── docs ├── HISTORY.md ├── MANUAL.md ├── Makefile ├── README.md ├── assets │ └── catalog.xlsx ├── backup.rst ├── conf.py ├── core.rst ├── federation.rst ├── index.rst ├── indicators.rst ├── make.bat ├── modules.rst ├── pydatajson.response_formatters.rst ├── pydatajson.rst ├── pydatajson.validators.rst ├── quick_reference.rst ├── readers.rst ├── reference.rst ├── reporting.rst ├── search.rst ├── validation.rst └── writers.rst ├── fix_github_links.sh ├── pydatajson ├── __init__.py ├── __main__.py ├── backup.py ├── catalog_readme.py ├── ckan_reader.py ├── ckan_utils.py ├── constants.py ├── core.py ├── custom_exceptions.py ├── custom_remote_ckan.py ├── documentation.py ├── download.py ├── federation.py ├── federation_indicators_generator.py ├── fields │ └── fields.json ├── helpers.py ├── indicators.py ├── readers.py ├── reporting.py ├── response_formatters │ ├── __init__.py │ ├── dict_formatter.py │ ├── list_formatter.py │ ├── tables_formatter.py │ └── validation_response_formatter.py ├── schemas │ ├── accrualPeriodicity.json │ ├── catalog.json │ ├── dataset.json │ ├── distribution.json │ ├── field.json │ ├── mixed-types.json │ ├── required_fields_schema.json │ ├── superThemeTaxonomy.json │ └── theme.json ├── search.py ├── status_indicators_generator.py ├── templates │ └── catalog_readme.txt ├── threading_helper.py ├── time_series.py ├── transformation.py ├── validation.py ├── validators │ ├── __init__.py │ ├── consistent_distribution_fields_validator.py │ ├── distribution_download_urls_validator.py │ ├── distribution_urls_validator.py │ ├── landing_pages_validator.py │ ├── simple_validator.py │ ├── theme_ids_not_repeated_validator.py │ └── url_validator.py └── writers.py ├── requirements.txt ├── requirements_2.7.txt ├── requirements_dev.txt ├── samples ├── archivos-tests │ ├── excel-no-validos │ │ └── catalogo-justicia-con-error-datasets.xlsx │ └── excel-validos │ │ ├── catalogo-justicia-06022017.xlsx │ │ ├── catalogo-justicia-56-distribuciones.xlsx │ │ └── catalogo-justicia.xlsx ├── caso-uso-1-pydatajson-xlsx-justicia-valido.ipynb ├── caso-uso-2-pydatajson-xlsx-justicia-no-valido.ipynb ├── caso-uso-3-pydatajson-xlsx-justicia-valido.ipynb ├── caso-uso-pydatajson-indicadores.ipynb ├── caso-uso-pydatajson-xlsx-validando-dev.ipynb ├── catalogo-justicia.json ├── catalogo-series-tiempo.json └── series-tiempo │ └── odg-total-millones-pesos-1960-trimestral.csv ├── setup.cfg ├── setup.py ├── tests ├── TEST_CASES.md ├── __init__.py ├── cassetes │ ├── backup │ │ ├── test_make_catalog_backup_data.yaml │ │ └── test_make_catalog_backup_data_without_file_name.yaml │ ├── ckan_integration │ │ ├── push_dataset │ │ │ ├── tearDown.yaml │ │ │ ├── test_dataset_is_created_correctly.yaml │ │ │ ├── test_dataset_is_updated_correctly.yaml │ │ │ └── test_resources_swapped_correctly.yaml │ │ └── remove_dataset │ │ │ ├── setUp.yaml │ │ │ ├── tearDown.yaml │ │ │ ├── test_empty_query_result.yaml │ │ │ ├── test_remove_dataset_by_filter_out.yaml │ │ │ ├── test_remove_dataset_by_filter_out_and_organization.yaml │ │ │ ├── test_remove_dataset_by_id.yaml │ │ │ ├── test_remove_dataset_by_organization.yaml │ │ │ ├── test_remove_dataset_by_publisher_and_organization.yaml │ │ │ ├── test_remove_dataset_by_title.yaml │ │ │ └── test_with_no_parametres.yaml │ ├── indicators │ │ ├── test_bad_date_indicators.yaml │ │ ├── test_bad_summary.yaml │ │ ├── test_date_indicators.yaml │ │ ├── test_date_network_indicators_empty_catalog.yaml │ │ ├── test_field_indicators_on_full_catalog.yaml │ │ ├── test_field_indicators_on_min_catalog.yaml │ │ ├── test_format_indicators.yaml │ │ ├── test_generate_catalog_indicators.yaml │ │ ├── test_indicators_invalid_periodicity.yaml │ │ ├── test_indicators_missing_dataset.yaml │ │ ├── test_indicators_missing_periodicity.yaml │ │ ├── test_last_updated_indicator_missing_issued_field.yaml │ │ ├── test_license_indicators.yaml │ │ ├── test_network_indicators.yaml │ │ ├── test_network_license_indicators.yaml │ │ ├── test_network_type_indicators.yaml │ │ ├── test_no_licenses_indicators.yaml │ │ ├── test_no_title_nor_identifier_catalog.yaml │ │ ├── test_types_indicators.yaml │ │ └── test_valid_and_unreachable_catalogs.yaml │ ├── profiling │ │ └── main.yaml │ ├── readers_and_writers │ │ └── test_read_remote_xlsx_catalog.yaml │ ├── test_generate_catalog_readme.yaml │ ├── test_generate_datasets_report.yaml │ ├── test_generate_readme.yaml │ ├── test_generate_readme_with_null_indicators.yaml │ ├── test_readme_file_write.yaml │ ├── test_validate_bad_remote_datajson.yaml │ └── test_validate_bad_remote_datajson2.yaml ├── context.py ├── profiling.py ├── results │ ├── catalog_readme.md │ ├── catalogo_justicia.json │ ├── datasets.json │ ├── datasets_filter_in.json │ ├── datasets_filter_out.json │ ├── datasets_meta_field.json │ ├── distributions.json │ ├── distributions_filter_in.json │ ├── distributions_filter_out.json │ ├── distributions_meta_field.json │ ├── distributions_only_time_series.json │ ├── empty_optional_string.json │ ├── expected_datasets_report.csv │ ├── expected_harvester_config.csv │ ├── fields.json │ ├── fields_filter_in.json │ ├── fields_filter_out.json │ ├── fields_meta_field.json │ ├── full_data.json │ ├── get_dataset.json │ ├── get_distribution.json │ ├── get_distribution_of_dataset.json │ ├── get_field.json │ ├── get_theme.json │ ├── invalid_dataset_theme_type.json │ ├── invalid_field_description_type.json │ ├── invalid_multiple_fields_type.json │ ├── minimum_data.json │ ├── mismatched_downloadURL_and_format.json │ ├── mismatched_fileName_and_format.json │ ├── multiple_missing_descriptions.json │ ├── null_dataset_theme.json │ ├── null_field_description.json │ ├── null_indicators_readme.md │ ├── several_assorted_errors.json │ ├── time_series.json │ ├── write_table.csv │ └── write_table.xlsx ├── samples │ ├── border_cases_ditribution_filenames.json │ ├── catalogo-justicia-missing-distribution-identifier.xlsx │ ├── catalogo_justicia.json │ ├── catalogo_justicia.xlsx │ ├── catalogo_justicia_extra_columns.xlsx │ ├── catalogo_justicia_no_xlsx_suffix │ ├── catalogo_justicia_removed.json │ ├── catalogo_justicia_removed_publisher.json │ ├── catalogo_justicia_with_defaults.json │ ├── catalogo_justicia_with_defaults.xlsx │ ├── central.json │ ├── empty_mandatory_string.json │ ├── empty_optional_string.json │ ├── empty_super_theme_list.json │ ├── example_time_series.json │ ├── federated_1.json │ ├── federated_2.json │ ├── full_data.json │ ├── full_data_no_json_suffix │ ├── integration_full_sample_data.json │ ├── invalid_catalog_empty.json │ ├── invalid_catalog_publisher_type.json │ ├── invalid_dataset_theme_type.json │ ├── invalid_dataset_type.json │ ├── invalid_ditribution_filenames.json │ ├── invalid_field_description_type.json │ ├── invalid_keywords.json │ ├── invalid_multiple_emails.json │ ├── invalid_multiple_fields_type.json │ ├── invalid_publisher_mbox_format.json │ ├── invalid_themeTaxonomy.json │ ├── lists_extra_commas.xlsx │ ├── malformed_accrualperiodicity.json │ ├── malformed_date.json │ ├── malformed_datetime.json │ ├── malformed_datetime2.json │ ├── malformed_email.json │ ├── malformed_temporal.json │ ├── malformed_temporal2.json │ ├── malformed_uri.json │ ├── minimum_data.json │ ├── mismatched_downloadURL_and_format.json │ ├── mismatched_fileName_and_format.json │ ├── missing_catalog_dataset.json │ ├── missing_catalog_description.json │ ├── missing_catalog_title.json │ ├── missing_dataset.json │ ├── missing_dataset_description.json │ ├── missing_dataset_title.json │ ├── missing_distribution_title.json │ ├── missing_periodicity.json │ ├── multiple_missing_descriptions.json │ ├── null_catalog_publisher.json │ ├── null_dataset_theme.json │ ├── null_field_description.json │ ├── numeric_distribution_identifier.json │ ├── organization_tree.json │ ├── processed_datasets_report.csv │ ├── prueba_sheet_to_table.xlsx │ ├── read_table.csv │ ├── read_table.xlsx │ ├── repeated_downloadURL.json │ ├── resource_sample.csv │ ├── several_assorted_errors.json │ ├── several_datasets.json │ ├── several_datasets_for_harvest.json │ ├── several_datasets_with_licenses.json │ ├── several_datasets_with_types.json │ ├── time_series_data.json │ ├── too_long_field_title.json │ └── valid_whitespace_emails.json ├── support │ ├── __init__.py │ ├── constants.py │ ├── decorators.py │ ├── factories │ │ ├── __init__.py │ │ ├── catalog_errors.py │ │ ├── core_files.py │ │ ├── dataset_errors.py │ │ ├── distribution_errors.py │ │ ├── other_errors.py │ │ └── xlsx.py │ └── utils.py ├── test_backup.py ├── test_catalog_readme.py ├── test_ckan_integration.py ├── test_ckan_reader.py ├── test_ckan_utils.py ├── test_core.py ├── test_documentation.py ├── test_federation.py ├── test_helpers.py ├── test_indicators.py ├── test_readers_and_writers.py ├── test_search.py ├── test_status_indicators_generator.py ├── test_threading.py ├── test_time_series.py ├── test_urls_validation.py ├── test_validation.py ├── test_validators.py └── xl_methods.py ├── tox.ini └── travis_pypi_setup.py /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Código de conducta 2 | 3 | ## Introducción 4 | **Gracias por ayudar**. Este código de conducta está basado en el **respeto a los miembros de la comunidad de Datos Argentina.** 5 | 6 | ## Somos 7 | **Cuidadosos con las palabras que elegimos.** Somos adultos y profesionales. Evitamos el lenguaje vulgar y no aceptamos de ninguna manera: 8 | 9 | * Trato violento hacia alguna persona o institución. 10 | * Posteos violentos o con matices sexuales. 11 | * Posteos sobre información personal. 12 | * Chistes o insultos personales, especialmente racistas o sexistas. 13 | * Acoso. De ningún tipo. 14 | 15 | **Respetuosos.** Valoramos el tiempo, el esfuerzo y las opiniones de los demás. Cuando no coinciden con las nuestras, intentamos entender su punto de vista. 16 | 17 | **Pacientes.** Como parte de la Administración Pública Nacional, los tiempos de respuesta de Datos Argentina están, en muchas ocasiones, atados a la enorme cantidad de compromisos que involucra el trabajo para toda la Nación. Por eso, somos pacientes con las respuestas y los tiempos de aplicación de las sugerencias o contribuciones. 18 | 19 | **Pedimos ayuda.** Si no entendemos o no sabemos algo, lo preguntamos. 20 | 21 | **Argumentamos con sustento.** Valoramos los argumentos y las decisiones basados en evidencia concreta. 22 | 23 | ## Participación 24 | Todo lo que se hace para la Nación es de todos. Alentamos la participación para construir políticas públicas de calidad. 25 | 26 | ## Contacto 27 | Envianos tus comentarios o consultas a datosargentina@jefatura.gob.ar. 28 | 29 | ## Créditos 30 | Para escribir nuestro código de conducta, nos basamos en: 31 | 32 | * [Twitter](https://github.com/twitter/code-of-conduct/blob/master/code-of-conduct.md) 33 | * [Django](https://www.djangoproject.com/conduct/) 34 | * [Python](https://www.python.org/psf/codeofconduct/) 35 | * [Ubuntu](https://www.ubuntu.com/about/about-ubuntu/conduct) 36 | * [TODO group](http://todogroup.org/opencodeofconduct/) 37 | * [Hackathon Hackers](https://github.com/HackathonHackers/code-of-conduct) 38 | * [Movimiento NCoC](https://github.com/domgetter/NCoC) 39 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribuciones 2 | Te invitamos a contribuir tanto ideas como código. Para hacerlo, tené en cuenta: 3 | 4 | * Nuestro [código de conducta](https://github.com/datosgobar/estandares/blob/master/github/CODE_OF_CONDUCT.md). 5 | * Nuestros [estándares de código (en desarrollo)](https://github.com/datosgobar/estandares/blob/master/codigo). 6 | 7 | ## Restricciones del Estado Nacional 8 | El equipo de Datos Argentina forma parte de la Administración Pública Nacional. Por eso, todas sus interfaces respetan: 9 | * El español como idioma oficial. 10 | * Los estándares definidos por el [Proyecto Poncho](https://argob.github.io/poncho/) para diseño. 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ¡Hola! Gracias por colaborar con este proyecto. 2 | 3 | Antes de crear el issue, por favor, asegurate de que nadie haya creado el issue que estás reportando. 4 | 5 | Si este issue es para pedir nuevas o mejores funcionalidades, contanos: 6 | 7 | * ¿Por qué querés esta funcionalidad? 8 | * ¿Cómo esperás que funcione? 9 | 10 | Si querés reportar un bug, especificanos: 11 | 12 | * ¿Cuál es el comportamiento que esperabas y cuál fue el comportamiento recibido? 13 | * ¿Cuáles son los pasos para reproducir el bug? 14 | * Detalles del contexto: ¿qué sistema operativo, qué versión de navegador estás usando, por ejemplo? 15 | * ¿Tenés algún stack trace o captura de pantalla? Adjuntalo. 16 | 17 | Si tenés otra consulta, dejanos una descripción bien específica, para que podamos ayudarte. 18 | 19 | Para preguntas de carácter privado, podés escribirnos a datosargentina@jefatura.gob.ar. 20 | 21 | ¡Muchas gracias por colaborar! 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | *.egg-info* 25 | .installed.cfg 26 | *.egg 27 | *.pyc 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | docs/_static/ 60 | docs/_templates/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | # pyenv python configuration file 66 | .python-version 67 | 68 | # Vim swap files 69 | *.swp 70 | 71 | # Archivos de desarrollo 72 | allresults.py 73 | TODO.md 74 | 75 | # Archivos temporales de prueba 76 | tests/temp/* 77 | tests/results/catalog/* 78 | .ipynb_checkpoints 79 | samples/archivos-generados/* 80 | .DS_Store 81 | .idea/ 82 | profiling_test.png 83 | backup 84 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This file was autogenerated and will overwrite each time you run travis_pypi_setup.py 2 | 3 | after_success: 4 | - coveralls 5 | 6 | install: 7 | - pip install -r requirements.txt 8 | - pip install -r requirements_dev.txt 9 | - pip install python-coveralls 10 | - pip install coveralls 11 | - mkdir tests/temp 12 | language: python 13 | notifications: 14 | slack: 15 | on_failure: always 16 | on_success: change 17 | secure: GwU9+hCiBlk1FL3YDeKjpnTVxx7jcdrS+MCEbj61jrVRdPfkWs/a6v3qNBXJNU2/qN2lJvrKio0X9CdjbQksv5KtUUr08r/8tk4r8S/kB+R2xyoMjvHtJd6frYxj+d0xwhmNtNbo8jacYDrWXrQV4GQPtKAegh8+OdEvmuMsX5vFMT83mDVre/pD8dz8jFHzwE8RjBn9QG513/EyaqTHq/uDSPCO+rtAb+FLfDCa5adPHl36ZQB2DgK/1qT3lFLkJW7gLQm5bsYB3vjAO5tOR2B6OSz/Y4Kzo9tmB4Y6i4kfAZpGJCvKJj9wSf02hkB6mKJuheveHTI4m/lYgyb5pIoZaPQVQQ4zdidjPdYMB1P+6QYTdKdiiJEQ30spbmEHCEcD0YQwHlKZ+VJMFdME+gGlxz+uaEJygWL4nM2W6UDthALRgKFkuvdMJVk0qZh08q+sK6CGH9AOHQXlvLn4slRFbgONjJJQ/b8affpnnirtPjkCK0EOHZYg7q1HHu8O/nPUEGtyFVGbGSOKSsQyCDb49UOCFQOeBBk4arItjAlZGjgvao2Zdm+CGvBARVWYkw4IELOeAsKaHc3AbYh32QoPZNlnV/xykqNTDdw0ef7DrdS1GXDrLb1G3hkkzzphkSNe7ajAJDOvTAN3hgl6MQwSKDQXj40HkA0JUWWHw7E= 18 | os: 19 | - linux 20 | 21 | sudo: false 22 | 23 | stages: 24 | - name: test 25 | - name: deploy 26 | if: tag IS present 27 | 28 | script: 29 | - nosetests --with-coverage --cover-package=pydatajson --cover-min-percentage=80 30 | - pycodestyle pydatajson tests 31 | 32 | jobs: 33 | include: 34 | - python: '2.7' 35 | - python: '3.6' 36 | - stage: deploy 37 | install: pip install coveralls 38 | script: skip 39 | deploy: 40 | repo: datosgobar/pydatajson 41 | distributions: sdist bdist_wheel 42 | password: 43 | secure: cWWqxX4mC/PJ0WqNCuCdnJcgfDzMjJXyi5HGWqJTSkohMfIljChXDBS/GlFUKSAXf8aeyFntQS3F1EJQRPYIVoD64JKG1IUMiTOfleHgRPqrvY1vMquGsnYj8ZK/bqTC6JJvVD0aXEzKe6TK9g83G24FY8qdVecb9VlMsZc2cQ+kdA0lz9aoQtYETYFp6UdbbvO8Zk473oGfgqzjTb1GqkZMha2gn5F1GrI23wrnL66fMZHqXji5S8ryhzpVJW4PuM36M7onxZ/xcZLb/aptXYa33B947Bxf9VvmbdxkBpL+As5CbeSVSpMz66c/wHrspoWnC6GEPdOVwJbYmHxjjjC8Me/qwbUYHsPPCM1hr5qRJiEcVMltIu/YrDJAy0VbmQkcR1xuZMkzTtVf7c9fEhYDni6oyflLEpYHiKvnyMJF5oscdv/Splf6WYvaixwPR43JCjZZIc/0KO/u//dhyjDwEfgPRcXl0V1aeGu/jBqTsRS6qextNjmx5Bb9u/oP6zszvTFE9PVsVeg3GryxF1Db5FuzM2jkDGHgR5MF2PPcG7ZIzSdOTTl1LZbAnSkWXdzMx5pwxqYBBw6sn9lQSGHRequmKHZerUU/L0zaGrF3IT66/w1q5IXOpLmituivtog86TFRWPfGsQJZtptpABTGz0rQ9jchcHmSJ1sGdMI= 44 | provider: pypi 45 | user: datosgobar 46 | on: 47 | tags: true 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) 2016, Datos Argentina 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include HISTORY.md 2 | include LICENSE 3 | include README.md 4 | include requirements.txt 5 | include requirements_dev.txt 6 | include requirements_2.7.txt 7 | 8 | recursive-include tests * 9 | recursive-include pydatajson * 10 | recursive-exclude * __pycache__ 11 | recursive-exclude * *.py[co] 12 | 13 | recursive-include docs *.rst *.md conf.py Makefile make.bat *.jpg *.png *.gif 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help dist 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | 50 | lint: ## check style with pylint 51 | pylint pydatajson 52 | 53 | test: ## run tests quickly with nose 54 | nosetests 55 | 56 | test-all: ## run tests on every Python version with tox 57 | tox 58 | 59 | coverage: ## check code coverage quickly with the default Python 60 | 61 | coverage run --source pydatajson setup.py test 62 | 63 | coverage report -m 64 | coverage html 65 | $(BROWSER) htmlcov/index.html 66 | 67 | # TEST 68 | profiling_test: 69 | python -m tests.profiling 70 | 71 | # DOCUMENTACIÓN Y RELEASES 72 | docs: ## generate Sphinx HTML documentation, including API docs 73 | cp README.md docs/README.md 74 | cp HISTORY.md docs/HISTORY.md 75 | rm -f docs/pydatajson.rst 76 | rm -f docs/modules.rst 77 | sphinx-apidoc -o docs/ pydatajson 78 | $(MAKE) -C docs clean 79 | $(MAKE) -C docs html 80 | $(BROWSER) docs/_build/html/index.html 81 | 82 | servedocs: docs ## compile the docs watching for changes 83 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 84 | 85 | release: dist ## package and upload a release 86 | twine upload dist/* 87 | 88 | dist: clean ## builds source and wheel package 89 | python setup.py sdist 90 | python setup.py bdist_wheel 91 | ls -l dist 92 | 93 | install: clean ## install the package to the active Python's site-packages 94 | python setup.py install 95 | 96 | pypi: dist ## register the package to PyPi get travis ready to deploy to pip 97 | twine upload dist/* 98 | python travis_pypi_setup.py 99 | 100 | doctoc: ## generate table of contents, doctoc command line tool required 101 | ## https://github.com/thlorenz/doctoc 102 | doctoc --github --title " " README.md 103 | bash fix_github_links.sh README.md 104 | doctoc --github --title " " docs/MANUAL.md 105 | bash fix_github_links.sh docs/MANUAL.md 106 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build: false 2 | 3 | environment: 4 | matrix: 5 | - PYTHON: "C:\\Python27" 6 | PYTHON_VERSION: "2.7.8" 7 | PYTHON_ARCH: "64" 8 | 9 | init: 10 | - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%" 11 | 12 | install: 13 | - "%PYTHON%/Scripts/pip.exe install -r requirements.txt" 14 | - "%PYTHON%/Scripts/pip.exe install -r requirements_dev.txt" 15 | - "%PYTHON%/Scripts/pip.exe install python-coveralls" 16 | - "%PYTHON%/Scripts/pip.exe install coveralls" 17 | - "mkdir tests\\temp" 18 | 19 | test_script: 20 | - "%PYTHON%/Scripts/nosetests" 21 | -------------------------------------------------------------------------------- /docs/assets/catalog.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/docs/assets/catalog.xlsx -------------------------------------------------------------------------------- /docs/backup.rst: -------------------------------------------------------------------------------- 1 | Backup 2 | ====== 3 | 4 | .. automodule:: pydatajson.backup 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/core.rst: -------------------------------------------------------------------------------- 1 | DataJson 2 | ======== 3 | 4 | .. automodule:: pydatajson.core 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/federation.rst: -------------------------------------------------------------------------------- 1 | Federación 2 | ========== 3 | 4 | .. automodule:: pydatajson.federation 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pydatajson 2 | ========== 3 | 4 | Documentación de pydatajson: librería con funcionalidades para gestionar los metadatos de catálogos de datos abiertos que cumplan con el Perfil Nacional de Metadatos. Pydatajson es parte del `Paquete de Apertura de Datos `_. 5 | 6 | Podés colaborar `cargando un nuevo issue `_, o `respondiendo a un issue ya existente `_. Lo mismo te invitamos a hacer en el `Paquete de Apertura de Datos `_. 7 | 8 | 9 | Indice 10 | ------ 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | README.md 16 | MANUAL.md 17 | quick_reference.rst 18 | 19 | Referencia 20 | ---------- 21 | 22 | .. toctree:: 23 | :maxdepth: 1 24 | 25 | reference.rst 26 | 27 | * :ref:`modindex` 28 | * :ref:`genindex` 29 | 30 | Versiones 31 | --------- 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | 36 | HISTORY.md 37 | -------------------------------------------------------------------------------- /docs/indicators.rst: -------------------------------------------------------------------------------- 1 | Indicadores 2 | =========== 3 | 4 | .. automodule:: pydatajson.indicators 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | pydatajson 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | pydatajson 8 | -------------------------------------------------------------------------------- /docs/pydatajson.response_formatters.rst: -------------------------------------------------------------------------------- 1 | pydatajson.response\_formatters package 2 | ======================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | pydatajson.response\_formatters.dict\_formatter module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: pydatajson.response_formatters.dict_formatter 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | pydatajson.response\_formatters.list\_formatter module 16 | ------------------------------------------------------ 17 | 18 | .. automodule:: pydatajson.response_formatters.list_formatter 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pydatajson.response\_formatters.tables\_formatter module 24 | -------------------------------------------------------- 25 | 26 | .. automodule:: pydatajson.response_formatters.tables_formatter 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pydatajson.response\_formatters.validation\_response\_formatter module 32 | ---------------------------------------------------------------------- 33 | 34 | .. automodule:: pydatajson.response_formatters.validation_response_formatter 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: pydatajson.response_formatters 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/pydatajson.rst: -------------------------------------------------------------------------------- 1 | pydatajson package 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | pydatajson.response_formatters 10 | pydatajson.validators 11 | 12 | Submodules 13 | ---------- 14 | 15 | pydatajson.backup module 16 | ------------------------ 17 | 18 | .. automodule:: pydatajson.backup 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pydatajson.catalog\_readme module 24 | --------------------------------- 25 | 26 | .. automodule:: pydatajson.catalog_readme 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pydatajson.ckan\_reader module 32 | ------------------------------ 33 | 34 | .. automodule:: pydatajson.ckan_reader 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | pydatajson.ckan\_utils module 40 | ----------------------------- 41 | 42 | .. automodule:: pydatajson.ckan_utils 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | pydatajson.constants module 48 | --------------------------- 49 | 50 | .. automodule:: pydatajson.constants 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | pydatajson.core module 56 | ---------------------- 57 | 58 | .. automodule:: pydatajson.core 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | pydatajson.custom\_exceptions module 64 | ------------------------------------ 65 | 66 | .. automodule:: pydatajson.custom_exceptions 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | pydatajson.custom\_remote\_ckan module 72 | -------------------------------------- 73 | 74 | .. automodule:: pydatajson.custom_remote_ckan 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | pydatajson.documentation module 80 | ------------------------------- 81 | 82 | .. automodule:: pydatajson.documentation 83 | :members: 84 | :undoc-members: 85 | :show-inheritance: 86 | 87 | pydatajson.download module 88 | -------------------------- 89 | 90 | .. automodule:: pydatajson.download 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | pydatajson.federation module 96 | ---------------------------- 97 | 98 | .. automodule:: pydatajson.federation 99 | :members: 100 | :undoc-members: 101 | :show-inheritance: 102 | 103 | pydatajson.federation\_indicators\_generator module 104 | --------------------------------------------------- 105 | 106 | .. automodule:: pydatajson.federation_indicators_generator 107 | :members: 108 | :undoc-members: 109 | :show-inheritance: 110 | 111 | pydatajson.helpers module 112 | ------------------------- 113 | 114 | .. automodule:: pydatajson.helpers 115 | :members: 116 | :undoc-members: 117 | :show-inheritance: 118 | 119 | pydatajson.indicators module 120 | ---------------------------- 121 | 122 | .. automodule:: pydatajson.indicators 123 | :members: 124 | :undoc-members: 125 | :show-inheritance: 126 | 127 | pydatajson.readers module 128 | ------------------------- 129 | 130 | .. automodule:: pydatajson.readers 131 | :members: 132 | :undoc-members: 133 | :show-inheritance: 134 | 135 | pydatajson.reporting module 136 | --------------------------- 137 | 138 | .. automodule:: pydatajson.reporting 139 | :members: 140 | :undoc-members: 141 | :show-inheritance: 142 | 143 | pydatajson.search module 144 | ------------------------ 145 | 146 | .. automodule:: pydatajson.search 147 | :members: 148 | :undoc-members: 149 | :show-inheritance: 150 | 151 | pydatajson.status\_indicators\_generator module 152 | ----------------------------------------------- 153 | 154 | .. automodule:: pydatajson.status_indicators_generator 155 | :members: 156 | :undoc-members: 157 | :show-inheritance: 158 | 159 | pydatajson.threading\_helper module 160 | ----------------------------------- 161 | 162 | .. automodule:: pydatajson.threading_helper 163 | :members: 164 | :undoc-members: 165 | :show-inheritance: 166 | 167 | pydatajson.time\_series module 168 | ------------------------------ 169 | 170 | .. automodule:: pydatajson.time_series 171 | :members: 172 | :undoc-members: 173 | :show-inheritance: 174 | 175 | pydatajson.transformation module 176 | -------------------------------- 177 | 178 | .. automodule:: pydatajson.transformation 179 | :members: 180 | :undoc-members: 181 | :show-inheritance: 182 | 183 | pydatajson.validation module 184 | ---------------------------- 185 | 186 | .. automodule:: pydatajson.validation 187 | :members: 188 | :undoc-members: 189 | :show-inheritance: 190 | 191 | pydatajson.writers module 192 | ------------------------- 193 | 194 | .. automodule:: pydatajson.writers 195 | :members: 196 | :undoc-members: 197 | :show-inheritance: 198 | 199 | 200 | Module contents 201 | --------------- 202 | 203 | .. automodule:: pydatajson 204 | :members: 205 | :undoc-members: 206 | :show-inheritance: 207 | -------------------------------------------------------------------------------- /docs/pydatajson.validators.rst: -------------------------------------------------------------------------------- 1 | pydatajson.validators package 2 | ============================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | pydatajson.validators.consistent\_distribution\_fields\_validator module 8 | ------------------------------------------------------------------------ 9 | 10 | .. automodule:: pydatajson.validators.consistent_distribution_fields_validator 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | pydatajson.validators.distribution\_download\_urls\_validator module 16 | -------------------------------------------------------------------- 17 | 18 | .. automodule:: pydatajson.validators.distribution_download_urls_validator 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pydatajson.validators.distribution\_urls\_validator module 24 | ---------------------------------------------------------- 25 | 26 | .. automodule:: pydatajson.validators.distribution_urls_validator 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pydatajson.validators.landing\_pages\_validator module 32 | ------------------------------------------------------ 33 | 34 | .. automodule:: pydatajson.validators.landing_pages_validator 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | pydatajson.validators.simple\_validator module 40 | ---------------------------------------------- 41 | 42 | .. automodule:: pydatajson.validators.simple_validator 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | pydatajson.validators.theme\_ids\_not\_repeated\_validator module 48 | ----------------------------------------------------------------- 49 | 50 | .. automodule:: pydatajson.validators.theme_ids_not_repeated_validator 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | pydatajson.validators.url\_validator module 56 | ------------------------------------------- 57 | 58 | .. automodule:: pydatajson.validators.url_validator 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: pydatajson.validators 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /docs/quick_reference.rst: -------------------------------------------------------------------------------- 1 | Referencia rápida 2 | ================= 3 | 4 | Lectura 5 | ------- 6 | .. autoclass:: pydatajson.core.DataJson 7 | :members: __init__ 8 | 9 | Escritura 10 | --------- 11 | .. autoclass:: pydatajson.core.DataJson 12 | :members: to_json, to_xlsx 13 | 14 | Validación 15 | ---------- 16 | .. autoclass:: pydatajson.core.DataJson 17 | :members: is_valid_catalog, validate_catalog 18 | 19 | Búsqueda 20 | -------- 21 | .. autoclass:: pydatajson.core.DataJson 22 | :members: get_datasets, get_dataset, get_fields, get_field 23 | 24 | Indicadores 25 | ----------- 26 | .. autoclass:: pydatajson.core.DataJson 27 | :members: generate_indicators 28 | 29 | Reportes 30 | -------- 31 | .. autoclass:: pydatajson.core.DataJson 32 | :members: generate_datasets_summary, generate_catalog_readme 33 | 34 | Federación 35 | ---------- 36 | .. autoclass:: pydatajson.core.DataJson 37 | :members: harvest_dataset_to_ckan, restore_dataset_to_ckan, harvest_catalog_to_ckan, restore_catalog_to_ckan, push_theme_to_ckan, push_new_themes 38 | 39 | .. autofunction:: pydatajson.federation.remove_dataset_from_ckan 40 | -------------------------------------------------------------------------------- /docs/readers.rst: -------------------------------------------------------------------------------- 1 | Lectura 2 | ======= 3 | 4 | .. automodule:: pydatajson.readers 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/reference.rst: -------------------------------------------------------------------------------- 1 | Referencia completa 2 | =================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | core.rst 8 | readers.rst 9 | writers.rst 10 | validation.rst 11 | search.rst 12 | indicators_list.rst 13 | reporting.rst 14 | federation.rst 15 | backup.rst 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/reporting.rst: -------------------------------------------------------------------------------- 1 | Reportes 2 | ======== 3 | 4 | .. automodule:: pydatajson.reporting 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/search.rst: -------------------------------------------------------------------------------- 1 | Búsqueda 2 | ======== 3 | 4 | .. automodule:: pydatajson.search 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/validation.rst: -------------------------------------------------------------------------------- 1 | Validación 2 | ========== 3 | 4 | .. automodule:: pydatajson.validation 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/writers.rst: -------------------------------------------------------------------------------- 1 | Escritura 2 | ========= 3 | 4 | .. automodule:: pydatajson.writers 5 | :members: 6 | -------------------------------------------------------------------------------- /fix_github_links.sh: -------------------------------------------------------------------------------- 1 | sed -i.bu 's/%C3%A1/a/' $1 2 | sed -i.bu 's/%C3%A9/e/' $1 3 | sed -i.bu 's/%C3%AD/i/' $1 4 | sed -i.bu 's/%C3%B3/o/' $1 5 | sed -i.bu 's/%C3%BA/u/' $1 6 | sed -i.bu 's/%C2%BF//' $1 7 | sed -i.bu 's/---/-/' $1 8 | rm $1.bu 9 | -------------------------------------------------------------------------------- /pydatajson/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Módulo pydatajson 4 | Conjunto de herramientas para validar y manipular la información presente en 5 | el archivo `data.json` de un Portal de Datos 6 | """ 7 | 8 | from __future__ import absolute_import 9 | 10 | import logging 11 | 12 | from . import helpers 13 | from .core import DataJson 14 | from .helpers import parse_repeating_time_interval 15 | 16 | __author__ = """Datos Argentina""" 17 | __email__ = 'datosargentina@jefatura.gob.ar' 18 | __version__ = '0.4.67' 19 | 20 | """ 21 | Logger base para librería pydatajson 22 | https://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library 23 | """ 24 | logger = logging.getLogger('pydatajson') 25 | logger.addHandler(logging.NullHandler()) 26 | -------------------------------------------------------------------------------- /pydatajson/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Módulo de entrada para la interfaz de línea de comandos 5 | 6 | Todos los módulos de pydatajson se pueden llamar por línea de comandos siempre 7 | que tengan un método main() definido en el módulo, que recibe argumentos y 8 | realiza acciones relacionadas con el core de su funcionalidad. 9 | 10 | Example: 11 | pydatajson backup http://infra.datos.gob.ar/catalog/modernizacion/data.json 12 | """ 13 | 14 | from __future__ import unicode_literals 15 | from __future__ import print_function 16 | from __future__ import with_statement 17 | import os 18 | import sys 19 | import importlib 20 | 21 | 22 | def main(): 23 | module_name = sys.argv[1] 24 | module = importlib.import_module("." + module_name, "pydatajson") 25 | args = sys.argv[2:] if len(sys.argv) > 2 else [] 26 | module.main(*args) 27 | 28 | 29 | if __name__ == '__main__': 30 | main() 31 | -------------------------------------------------------------------------------- /pydatajson/catalog_readme.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | from __future__ import with_statement 7 | 8 | import io 9 | import logging 10 | import os 11 | 12 | from six import string_types 13 | 14 | from pydatajson.helpers import traverse_dict 15 | from pydatajson.indicators import generate_catalogs_indicators 16 | from pydatajson.readers import read_catalog 17 | from pydatajson.validation import validate_catalog 18 | 19 | logger = logging.getLogger('pydatajson') 20 | 21 | CENTRAL_CATALOG = "http://datos.gob.ar/data.json" 22 | ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) 23 | TEMPLATES_PATH = os.path.join(ABSOLUTE_PROJECT_DIR, "templates") 24 | 25 | 26 | def generate_catalog_readme(_datajson, catalog, 27 | export_path=None, verify_ssl=True): 28 | """Este método está para mantener retrocompatibilidad con versiones 29 | anteriores. Se ignora el argumento _data_json.""" 30 | return generate_readme(catalog, export_path, verify_ssl=verify_ssl) 31 | 32 | 33 | def generate_readme(catalog, export_path=None, verify_ssl=True): 34 | """Genera una descripción textual en formato Markdown sobre los 35 | metadatos generales de un catálogo (título, editor, fecha de 36 | publicación, et cetera), junto con: 37 | - estado de los metadatos a nivel catálogo, 38 | - estado global de los metadatos, 39 | - cantidad de datasets federados y no federados, 40 | - detalles de los datasets no federados 41 | - cantidad de datasets y distribuciones incluidas 42 | 43 | Es utilizada por la rutina diaria de `libreria-catalogos` para generar 44 | un README con información básica sobre los catálogos mantenidos. 45 | 46 | Args: 47 | catalog (str o dict): Path a un catálogo en cualquier formato, 48 | JSON, XLSX, o diccionario de python. 49 | export_path (str): Path donde exportar el texto generado (en 50 | formato Markdown). Si se especifica, el método no devolverá 51 | nada. 52 | 53 | Returns: 54 | str: Texto de la descripción generada. 55 | """ 56 | # Si se paso una ruta, guardarla 57 | if isinstance(catalog, string_types): 58 | catalog_path_or_url = catalog 59 | else: 60 | catalog_path_or_url = None 61 | 62 | catalog = read_catalog(catalog) 63 | validation = validate_catalog(catalog, verify_ssl=verify_ssl) 64 | # Solo necesito indicadores para un catalogo 65 | indicators = generate_catalogs_indicators( 66 | catalog, CENTRAL_CATALOG)[0][0] 67 | 68 | with io.open(os.path.join(TEMPLATES_PATH, 'catalog_readme.txt'), 'r', 69 | encoding='utf-8') as template_file: 70 | readme_template = template_file.read() 71 | 72 | not_federated_datasets_list = "\n".join([ 73 | "- [{}]({})".format(dataset[0], dataset[1]) 74 | for dataset in indicators["datasets_no_federados"] 75 | ]) 76 | federated_removed_datasets_list = "\n".join([ 77 | "- [{}]({})".format(dataset[0], dataset[1]) 78 | for dataset in indicators["datasets_federados_eliminados"] 79 | ]) 80 | federated_datasets_list = "\n".join([ 81 | "- [{}]({})".format(dataset[0], dataset[1]) 82 | for dataset in indicators["datasets_federados"] 83 | ]) 84 | non_federated_pct = 1.0 - indicators["datasets_federados_pct"] if \ 85 | indicators["datasets_federados_pct"] is not None else \ 86 | indicators["datasets_federados_pct"] 87 | content = { 88 | "title": catalog.get("title"), 89 | "publisher_name": traverse_dict( 90 | catalog, ["publisher", "name"]), 91 | "publisher_mbox": traverse_dict( 92 | catalog, ["publisher", "mbox"]), 93 | "catalog_path_or_url": catalog_path_or_url, 94 | "description": catalog.get("description"), 95 | "global_status": validation["status"], 96 | "catalog_status": validation["error"]["catalog"]["status"], 97 | "no_of_datasets": len(catalog["dataset"]), 98 | "no_of_distributions": sum([len(dataset["distribution"]) for 99 | dataset in catalog["dataset"]]), 100 | "federated_datasets": indicators["datasets_federados_cant"], 101 | "not_federated_datasets": indicators["datasets_no_federados_cant"], 102 | "not_federated_datasets_pct": non_federated_pct, 103 | "not_federated_datasets_list": not_federated_datasets_list, 104 | "federated_removed_datasets_list": federated_removed_datasets_list, 105 | "federated_datasets_list": federated_datasets_list, 106 | } 107 | 108 | catalog_readme = readme_template.format(**content) 109 | 110 | if export_path: 111 | with io.open(export_path, 'w+', encoding='utf-8') as target: 112 | target.write(catalog_readme) 113 | else: 114 | return catalog_readme 115 | -------------------------------------------------------------------------------- /pydatajson/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | REQUESTS_TIMEOUT = 30 4 | DEFAULT_TIMEZONE = "America/Buenos_Aires" 5 | 6 | INVALID_STATUS_CODES_REGEX = ["^4[0-9]+$", "^5[0-9]+$"] 7 | EXCEPTION_STATUS_CODES = [429] 8 | 9 | DEFAULT_CHECK_TIMEOUT = 1 10 | -------------------------------------------------------------------------------- /pydatajson/custom_remote_ckan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from ckanapi import RemoteCKAN 4 | 5 | from pydatajson.constants import REQUESTS_TIMEOUT 6 | 7 | 8 | class CustomRemoteCKAN(RemoteCKAN): 9 | 10 | def __init__(self, address, apikey=None, user_agent=None, get_only=False, 11 | verify_ssl=False, requests_timeout=REQUESTS_TIMEOUT): 12 | self.verify_ssl = verify_ssl 13 | self.requests_timeout = requests_timeout 14 | super(CustomRemoteCKAN, self).__init__(address, apikey, 15 | user_agent, get_only) 16 | 17 | def call_action(self, action, data_dict=None, context=None, apikey=None, 18 | files=None, requests_kwargs=None): 19 | requests_kwargs = requests_kwargs or {} 20 | requests_kwargs.setdefault('verify', self.verify_ssl) 21 | requests_kwargs.setdefault('timeout', self.requests_timeout) 22 | return super(CustomRemoteCKAN, self).call_action( 23 | action, data_dict, context, apikey, files, requests_kwargs) 24 | -------------------------------------------------------------------------------- /pydatajson/documentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Módulo 'documentation' de Pydatajson 5 | 6 | Contiene métodos para generar documentación en markdown de distintos 7 | componentes de un catálogo. 8 | """ 9 | 10 | from __future__ import print_function, unicode_literals, with_statement 11 | 12 | from six.moves import map 13 | 14 | 15 | def dataset_to_markdown(dataset): 16 | """Genera texto en markdown a partir de los metadatos de una `dataset`. 17 | 18 | Args: 19 | dataset (dict): Diccionario con metadatos de una `dataset`. 20 | 21 | Returns: 22 | str: Texto que describe una `dataset`. 23 | """ 24 | text_template = """ 25 | # {title} 26 | 27 | {description} 28 | 29 | ## Recursos del dataset 30 | 31 | {distributions} 32 | """ 33 | 34 | if "distribution" in dataset: 35 | distributions = "".join( 36 | map(distribution_to_markdown, dataset["distribution"])) 37 | else: 38 | distributions = "" 39 | 40 | text = text_template.format( 41 | title=dataset["title"], 42 | description=dataset.get("description", ""), 43 | distributions=distributions 44 | ) 45 | 46 | return text 47 | 48 | 49 | def distribution_to_markdown(distribution): 50 | """Genera texto en markdown a partir de los metadatos de una 51 | `distribution`. 52 | 53 | Args: 54 | distribution (dict): Diccionario con metadatos de una 55 | `distribution`. 56 | 57 | Returns: 58 | str: Texto que describe una `distribution`. 59 | """ 60 | text_template = """ 61 | ### {title} 62 | 63 | {description} 64 | 65 | #### Campos del recurso 66 | 67 | {fields} 68 | """ 69 | 70 | if "field" in distribution: 71 | fields = "- " + \ 72 | "\n- ".join(map(field_to_markdown, distribution["field"])) 73 | else: 74 | fields = "" 75 | 76 | text = text_template.format( 77 | title=distribution["title"], 78 | description=distribution.get("description", ""), 79 | fields=fields 80 | ) 81 | 82 | return text 83 | 84 | 85 | def field_to_markdown(field): 86 | """Genera texto en markdown a partir de los metadatos de un `field`. 87 | 88 | Args: 89 | field (dict): Diccionario con metadatos de un `field`. 90 | 91 | Returns: 92 | str: Texto que describe un `field`. 93 | """ 94 | if "title" in field: 95 | field_title = "**{}**".format(field["title"]) 96 | else: 97 | raise Exception("Es necesario un `title` para describir un campo.") 98 | 99 | field_type = " ({})".format(field["type"]) if "type" in field else "" 100 | field_desc = ": {}".format( 101 | field["description"]) if "description" in field else "" 102 | 103 | text_template = "{title}{type}{description}" 104 | text = text_template.format(title=field_title, type=field_type, 105 | description=field_desc) 106 | 107 | return text 108 | -------------------------------------------------------------------------------- /pydatajson/download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Módulo 'download' de pydatajson 4 | 5 | Contiene métodos para descargar archivos a través del protocolo HTTP. 6 | """ 7 | 8 | from __future__ import unicode_literals, print_function, with_statement 9 | from __future__ import absolute_import 10 | 11 | import requests 12 | import time 13 | import sys 14 | 15 | DEFAULT_TRIES = 3 16 | RETRY_DELAY = 1 17 | 18 | 19 | def download(url, file_path, tries=DEFAULT_TRIES, retry_delay=RETRY_DELAY): 20 | """ 21 | Descarga un archivo a través del protocolo HTTP, en uno o más intentos. 22 | 23 | Args: 24 | url (str): URL (schema HTTP) del archivo a descargar. 25 | tries (int): Intentos a realizar (default: 3). 26 | retry_delay (int o float): Tiempo a esperar, en segundos, entre cada 27 | intento. 28 | try_timeout (int o float): Tiempo máximo a esperar por intento. 29 | proxies (dict): Proxies a utilizar. El diccionario debe contener los 30 | valores 'http' y 'https', cada uno asociados a la URL del proxy 31 | correspondiente. 32 | 33 | Returns: 34 | bytes: Contenido del archivo 35 | """ 36 | timeout = 10 37 | for i in range(1, tries + 1): 38 | try: 39 | with requests.get(url, timeout=timeout ** i, stream=True, 40 | verify=False) as r: 41 | r.raise_for_status() 42 | with open(file_path, 'wb') as f: 43 | for chunk in r.iter_content(chunk_size=8192): 44 | if chunk: # filter out keep-alive new chunks 45 | f.write(chunk) 46 | 47 | except requests.TooManyRedirects as e: 48 | raise e 49 | except Exception as e: 50 | download_exception = e 51 | raise download_exception 52 | 53 | 54 | def download_to_file(url, file_path, **kwargs): 55 | """ 56 | Descarga un archivo a través del protocolo HTTP, en uno o más intentos, y 57 | escribe el contenido descargado el el path especificado. 58 | 59 | Args: 60 | url (str): URL (schema HTTP) del archivo a descargar. 61 | file_path (str): Path del archivo a escribir. Si un archivo ya existe 62 | en el path especificado, se sobrescribirá con nuevos contenidos. 63 | kwargs: Parámetros para download(). 64 | """ 65 | content = download(url, file_path, **kwargs) 66 | 67 | 68 | if __name__ == '__main__': 69 | download_to_file(sys.argv[1], sys.argv[2]) 70 | -------------------------------------------------------------------------------- /pydatajson/fields/fields.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "requerido", 3 | "description": "requerido", 4 | "publisher": { 5 | "name": "requerido", 6 | "mbox": "requerido" 7 | }, 8 | "issued": "recomendado", 9 | "modified": "recomendado", 10 | "language": "recomendado", 11 | "superThemeTaxonomy": "requerido", 12 | "themeTaxonomy": "recomendado", 13 | "license": "recomendado", 14 | "homepage": "recomendado", 15 | "rights": "optativo", 16 | "spatial": "optativo", 17 | "dataset": { 18 | "title": "requerido", 19 | "description": "requerido", 20 | "publisher": { 21 | "name": "requerido", 22 | "mbox": "recomendado" 23 | }, 24 | "contactPoint": { 25 | "fn": "recomendado", 26 | "hasEmail": "recomendado" 27 | }, 28 | "superTheme": "requerido", 29 | "theme": "recomendado", 30 | "keyword": "recomendado", 31 | "accrualPeriodicity": "requerido", 32 | "issued": "requerido", 33 | "modified": "recomendado", 34 | "identifier": "requerido", 35 | "language": "optativo", 36 | "spatial": "optativo", 37 | "temporal": "recomendado", 38 | "landingPage": "optativo", 39 | "license": "recomendado", 40 | "distribution": { 41 | "accessURL": "requerido", 42 | "description": "recomendado", 43 | "format": "recomendado", 44 | "mediaType": "optativo", 45 | "downloadURL": "requerido", 46 | "title": "requerido", 47 | "license": "recomendado", 48 | "byteSize": "optativo", 49 | "issued": "requerido", 50 | "modified": "recomendado", 51 | "rights": "optativo", 52 | "field": { 53 | "title": "recomendado", 54 | "type": "recomendado", 55 | "description": "recomendado" 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /pydatajson/reporting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Módulo 'reporting' de Pydatajson 5 | 6 | Contiene los métodos para generar reportes sobre un catálogo. 7 | """ 8 | 9 | from __future__ import unicode_literals, print_function, \ 10 | with_statement, absolute_import 11 | 12 | from collections import OrderedDict 13 | 14 | from pydatajson import writers 15 | from . import helpers 16 | from . import readers 17 | from .validation import validate_catalog 18 | 19 | 20 | def generate_datasets_summary(catalog, export_path=None, validator=None, 21 | verify_ssl=True, url_check_timeout=1): 22 | """Genera un informe sobre los datasets presentes en un catálogo, 23 | indicando para cada uno: 24 | - Índice en la lista catalog["dataset"] 25 | - Título 26 | - Identificador 27 | - Cantidad de distribuciones 28 | - Estado de sus metadatos ["OK"|"ERROR"] 29 | 30 | Es utilizada por la rutina diaria de `libreria-catalogos` para reportar 31 | sobre los datasets de los catálogos mantenidos. 32 | 33 | Args: 34 | catalog (str o dict): Path a un catálogo en cualquier formato, 35 | JSON, XLSX, o diccionario de python. 36 | export_path (str): Path donde exportar el informe generado (en 37 | formato XLSX o CSV). Si se especifica, el método no devolverá 38 | nada. 39 | 40 | Returns: 41 | list: Contiene tantos dicts como datasets estén presentes en 42 | `catalogs`, con los datos antes mencionados. 43 | """ 44 | catalog = readers.read_catalog(catalog) 45 | 46 | # Trato de leer todos los datasets bien formados de la lista 47 | # catalog["dataset"], si existe. 48 | if "dataset" in catalog and isinstance(catalog["dataset"], list): 49 | datasets = [d if isinstance(d, dict) else {} for d in 50 | catalog["dataset"]] 51 | else: 52 | # Si no, considero que no hay datasets presentes 53 | datasets = [] 54 | 55 | validation = validate_catalog( 56 | catalog, validator=validator, verify_ssl=verify_ssl, 57 | url_check_timeout=url_check_timeout)["error"]["dataset"] 58 | 59 | def info_dataset(index, dataset): 60 | """Recolecta información básica de un dataset.""" 61 | info = OrderedDict() 62 | info["indice"] = index 63 | info["titulo"] = dataset.get("title") 64 | info["identificador"] = dataset.get("identifier") 65 | info["estado_metadatos"] = validation[index]["status"] 66 | info["cant_errores"] = len(validation[index]["errors"]) 67 | info["cant_distribuciones"] = len(dataset["distribution"]) 68 | if helpers.dataset_has_data_distributions(dataset): 69 | info["tiene_datos"] = "SI" 70 | else: 71 | info["tiene_datos"] = "NO" 72 | 73 | return info 74 | 75 | summary = [info_dataset(i, ds) for i, ds in enumerate(datasets)] 76 | if export_path: 77 | writers.write_table(summary, export_path) 78 | else: 79 | return summary 80 | -------------------------------------------------------------------------------- /pydatajson/response_formatters/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from pydatajson import custom_exceptions 6 | 7 | from pydatajson.response_formatters.dict_formatter import DictFormatter 8 | from pydatajson.response_formatters.list_formatter import ListFormatter 9 | from pydatajson.response_formatters.tables_formatter import TablesFormatter 10 | 11 | 12 | def format_response(validation, export_path, response_format): 13 | formats = { 14 | 'table': TablesFormatter(validation, export_path), 15 | 'dict': DictFormatter(validation), 16 | 'list': ListFormatter(validation), 17 | } 18 | try: 19 | return formats[response_format].format() 20 | except KeyError: 21 | msg = "No se reconoce el formato {}".format(response_format) 22 | raise custom_exceptions.FormatNameError(msg) 23 | -------------------------------------------------------------------------------- /pydatajson/response_formatters/dict_formatter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from pydatajson.response_formatters.validation_response_formatter import \ 6 | ValidationResponseFormatter 7 | 8 | 9 | class DictFormatter(ValidationResponseFormatter): 10 | 11 | def format(self): 12 | return self.response 13 | -------------------------------------------------------------------------------- /pydatajson/response_formatters/list_formatter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from pydatajson.response_formatters.validation_response_formatter import \ 6 | ValidationResponseFormatter 7 | 8 | 9 | class ListFormatter(ValidationResponseFormatter): 10 | 11 | def format(self): 12 | rows_catalog = [] 13 | validation_result = { 14 | "catalog_title": self.response["error"]["catalog"]["title"], 15 | "catalog_status": self.response["error"]["catalog"]["status"], 16 | } 17 | for error in self.response["error"]["catalog"]["errors"]: 18 | catalog_result = dict(validation_result) 19 | catalog_result.update({ 20 | "catalog_error_message": error["message"], 21 | "catalog_error_location": ", ".join(error["path"]), 22 | }) 23 | rows_catalog.append(catalog_result) 24 | 25 | if len(self.response["error"]["catalog"]["errors"]) == 0: 26 | catalog_result = dict(validation_result) 27 | catalog_result.update({ 28 | "catalog_error_message": None, 29 | "catalog_error_location": None 30 | }) 31 | rows_catalog.append(catalog_result) 32 | 33 | # crea una lista de dicts para volcarse en una tabla (dataset) 34 | rows_dataset = [] 35 | for dataset in self.response["error"]["dataset"]: 36 | validation_result = { 37 | "dataset_title": dataset["title"], 38 | "dataset_identifier": dataset["identifier"], 39 | "dataset_list_index": dataset["list_index"], 40 | "dataset_status": dataset["status"] 41 | } 42 | for error in dataset["errors"]: 43 | dataset_result = dict(validation_result) 44 | dataset_result.update({ 45 | "dataset_error_message": error["message"], 46 | "dataset_error_location": error["path"][-1] 47 | }) 48 | rows_dataset.append(dataset_result) 49 | 50 | if len(dataset["errors"]) == 0: 51 | dataset_result = dict(validation_result) 52 | dataset_result.update({ 53 | "dataset_error_message": None, 54 | "dataset_error_location": None 55 | }) 56 | rows_dataset.append(dataset_result) 57 | 58 | return {"catalog": rows_catalog, "dataset": rows_dataset} 59 | -------------------------------------------------------------------------------- /pydatajson/response_formatters/tables_formatter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | 5 | from openpyxl.styles import Alignment, Font 6 | 7 | from pydatajson import writers 8 | from pydatajson.response_formatters.list_formatter import ListFormatter 9 | from pydatajson.response_formatters.validation_response_formatter import\ 10 | ValidationResponseFormatter 11 | 12 | 13 | class TablesFormatter(ValidationResponseFormatter): 14 | 15 | def __init__(self, response, export_path): 16 | super(TablesFormatter, self).__init__(response) 17 | self.export_path = export_path 18 | 19 | def format(self): 20 | validation_lists = ListFormatter(self.response).format() 21 | 22 | column_styles = { 23 | "catalog": { 24 | "catalog_status": {"width": 20}, 25 | "catalog_error_location": {"width": 40}, 26 | "catalog_error_message": {"width": 40}, 27 | "catalog_title": {"width": 20}, 28 | }, 29 | "dataset": { 30 | "dataset_error_location": {"width": 20}, 31 | "dataset_identifier": {"width": 40}, 32 | "dataset_status": {"width": 20}, 33 | "dataset_title": {"width": 40}, 34 | "dataset_list_index": {"width": 20}, 35 | "dataset_error_message": {"width": 40}, 36 | } 37 | } 38 | cell_styles = { 39 | "catalog": [ 40 | {"alignment": Alignment(vertical="center")}, 41 | {"row": 1, "font": Font(bold=True)}, 42 | ], 43 | "dataset": [ 44 | {"alignment": Alignment(vertical="center")}, 45 | {"row": 1, "font": Font(bold=True)}, 46 | ] 47 | } 48 | 49 | # crea tablas en un sólo excel o varios CSVs 50 | writers.write_tables( 51 | tables=validation_lists, path=self.export_path, 52 | column_styles=column_styles, cell_styles=cell_styles 53 | ) 54 | -------------------------------------------------------------------------------- /pydatajson/response_formatters/validation_response_formatter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import unicode_literals 4 | import abc 5 | 6 | 7 | class ValidationResponseFormatter(object): 8 | 9 | def __init__(self, response): 10 | self.response = response 11 | 12 | @abc.abstractmethod 13 | def format(self): 14 | raise NotImplementedError 15 | -------------------------------------------------------------------------------- /pydatajson/schemas/accrualPeriodicity.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "R/P10Y", 4 | "description": "Cada diez años" 5 | }, 6 | { 7 | "id": "R/P4Y", 8 | "description": "Cada cuatro años" 9 | }, 10 | { 11 | "id": "R/P3Y", 12 | "description": "Cada tres años" 13 | }, 14 | { 15 | "id": "R/P2Y", 16 | "description": "Cada dos años" 17 | }, 18 | { 19 | "id": "R/P1Y", 20 | "description": "Anualmente" 21 | }, 22 | { 23 | "id": "R/P6M", 24 | "description": "Cada medio año" 25 | }, 26 | { 27 | "id": "R/P4M", 28 | "description": "Cuatrimestralmente" 29 | }, 30 | { 31 | "id": "R/P3M", 32 | "description": "Trimestralmente" 33 | }, 34 | { 35 | "id": "R/P2M", 36 | "description": "Bimestralmente" 37 | }, 38 | { 39 | "id": "R/P1M", 40 | "description": "Mensualmente" 41 | }, 42 | { 43 | "id": "R/P0.5M", 44 | "description": "Cada 15 días" 45 | }, 46 | { 47 | "id": "R/P0.33M", 48 | "description": "Tres veces por mes" 49 | }, 50 | { 51 | "id": "R/P1W", 52 | "description": "Semanalmente" 53 | }, 54 | { 55 | "id": "R/P0.5W", 56 | "description": "Dos veces a la semana" 57 | }, 58 | { 59 | "id": "R/P0.33W", 60 | "description": "Tres veces a la semana" 61 | }, 62 | { 63 | "id": "R/P1D", 64 | "description": "Diariamente" 65 | }, 66 | { 67 | "id": "R/PT1H", 68 | "description": "Cada hora" 69 | }, 70 | { 71 | "id": "R/PT1S", 72 | "description": "Continuamente actualizado" 73 | }, 74 | { 75 | "id": "eventual", 76 | "description": "Eventual" 77 | } 78 | ] 79 | -------------------------------------------------------------------------------- /pydatajson/schemas/catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "required": [ 5 | "dataset", 6 | "title", 7 | "description", 8 | "publisher", 9 | "superThemeTaxonomy" 10 | ], 11 | "properties": { 12 | "identifier": {"$ref": "mixed-types.json#stringOrNull"}, 13 | "publisher": { 14 | "type": "object", 15 | "required": ["name"], 16 | "properties": { 17 | "name": { "$ref": "mixed-types.json#nonEmptyString" }, 18 | "mbox": { 19 | "anyOf": [ 20 | { "type": "string", "format": "email" }, 21 | { "type": "string", "maxLength": 0 } 22 | ]} 23 | } 24 | }, 25 | "dataset": { 26 | "type": "array", 27 | "items": { "$ref": "dataset.json" }, 28 | "uniqueItems": true 29 | }, 30 | "title": { "$ref": "mixed-types.json#nonEmptyString" }, 31 | "description": { "$ref": "mixed-types.json#nonEmptyString" }, 32 | "superThemeTaxonomy": { "type": "string", "format": "uri" }, 33 | "issued": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" }, 34 | "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" }, 35 | "language": { "$ref": "mixed-types.json#arrayOrNull" }, 36 | "themeTaxonomy": { 37 | "type": "array", 38 | "items": { "$ref": "theme.json" } 39 | }, 40 | "license": { "$ref": "mixed-types.json#stringOrNull" }, 41 | "homepage": { 42 | "anyOf": [ 43 | { "type": "string", "format": "uri" }, 44 | { "$ref": "mixed-types.json#emptyValue" } 45 | ] 46 | }, 47 | "rights": { "$ref": "mixed-types.json#stringOrNull" }, 48 | "spatial": { 49 | "anyOf": [ 50 | { "type": "string" }, 51 | { "type": "array" }, 52 | { "type": "null" } 53 | ] 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pydatajson/schemas/dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "required": [ 5 | "title", 6 | "description", 7 | "publisher", 8 | "superTheme", 9 | "distribution", 10 | "accrualPeriodicity", 11 | "issued", 12 | "identifier" 13 | ], 14 | "properties": { 15 | "publisher": { 16 | "type": "object", 17 | "required": ["name"], 18 | "properties": { 19 | "name": { "$ref": "mixed-types.json#nonEmptyString" }, 20 | "mbox": { 21 | "anyOf": [ 22 | { "type": "string", "pattern": "^[ ]*[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[ ]*$" }, 23 | { "$ref": "mixed-types.json#emptyValue" } 24 | ] 25 | } 26 | } 27 | }, 28 | "distribution": { 29 | "type": "array", 30 | "items": { "$ref": "distribution.json" } 31 | }, 32 | "title": { 33 | "allOf": [ 34 | { "$ref": "mixed-types.json#nonEmptyString"}, 35 | { "maxLength" : 100} 36 | ] 37 | }, 38 | "description": { "$ref": "mixed-types.json#nonEmptyString" }, 39 | "issued": { "$ref": "mixed-types.json#dateOrDatetimeString" }, 40 | "superTheme": { 41 | "type": "array", 42 | "minItems": 1, 43 | "items": { "$ref": "mixed-types.json#superTheme" } 44 | }, 45 | "accrualPeriodicity": { 46 | "anyOf" : [ 47 | {"type": "string", "pattern": "^R/P\\d+(\\.\\d+)?[Y|M|W|D]$"}, 48 | {"type": "string", "pattern": "^R/PT\\d+(\\.\\d+)?[H|M|S]$"}, 49 | {"type": "string", "pattern": "^eventual$"}, 50 | {"type": "string", "pattern": "^EVENTUAL$"} 51 | ] 52 | }, 53 | "contactPoint": { 54 | "type": "object", 55 | "properties": { 56 | "fn": { "$ref": "mixed-types.json#stringOrNull" }, 57 | "hasEmail": { 58 | "anyOf": [ 59 | { "type": "string", "pattern": "^[ ]*[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[ ]*$" }, 60 | { "$ref": "mixed-types.json#emptyValue" } 61 | ] 62 | } 63 | } 64 | }, 65 | "theme": { "$ref": "mixed-types.json#arrayOrNull" }, 66 | "keyword": { 67 | "allOf": [ 68 | { "$ref": "mixed-types.json#arrayOrNull" }, 69 | { "anyOf": [ 70 | {"not": { "type": "array" }}, 71 | {"items": { "pattern": "^[ 0-9a-zá-źÁ-ŹA-ZñÑ._-]+$" }} 72 | ]} 73 | ] 74 | }, 75 | "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" }, 76 | "identifier": { "$ref": "mixed-types.json#nonEmptyString" }, 77 | "language": { "$ref": "mixed-types.json#arrayOrNull" }, 78 | "spatial": { 79 | "anyOf": [ 80 | { "type": "string" }, 81 | { "type": "array" }, 82 | { "type": "null" } 83 | ] 84 | }, 85 | "temporal": { "$ref": "mixed-types.json#temporalOrNull" }, 86 | "landingPage": { 87 | "anyOf": [ 88 | { "type": "string", "format": "uri" }, 89 | { "$ref": "mixed-types.json#emptyValue" } 90 | ] 91 | }, 92 | "license": { "$ref": "mixed-types.json#stringOrNull" }, 93 | "source": { "$ref": "mixed-types.json#stringOrNull" } 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /pydatajson/schemas/distribution.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "required": [ 5 | "accessURL", 6 | "downloadURL", 7 | "title", 8 | "issued", 9 | "identifier" 10 | ], 11 | "properties": { 12 | "identifier": { "$ref": "mixed-types.json#nonEmptyString" }, 13 | "accessURL": { "type": "string", "format": "uri" }, 14 | "downloadURL": { "type": "string", "format": "uri" }, 15 | "title": { "$ref": "mixed-types.json#nonEmptyString" }, 16 | "issued": { "$ref": "mixed-types.json#dateOrDatetimeString" }, 17 | "description": { "$ref": "mixed-types.json#stringOrNull" }, 18 | "format": { "$ref": "mixed-types.json#stringOrNull" }, 19 | "mediaType": { "$ref": "mixed-types.json#stringOrNull" }, 20 | "license": { "$ref": "mixed-types.json#stringOrNull" }, 21 | "byteSize": { 22 | "anyOf": [ 23 | { "type": "integer" }, 24 | { "$ref": "mixed-types.json#emptyValue" } 25 | ] 26 | }, 27 | "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" }, 28 | "rights": { "$ref": "mixed-types.json#stringOrNull" }, 29 | "fileName": { "$ref": "mixed-types.json#stringOrNull" }, 30 | "field": { 31 | "type": "array", 32 | "items": { "$ref": "field.json" } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /pydatajson/schemas/field.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "properties": { 5 | "title": { "$ref": "mixed-types.json#fieldTitle"}, 6 | "type": { "$ref": "mixed-types.json#stringOrNull" }, 7 | "description": { "$ref": "mixed-types.json#stringOrNull" } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /pydatajson/schemas/mixed-types.json: -------------------------------------------------------------------------------- 1 | { 2 | "superTheme": { 3 | "anyOf": [ 4 | {"type": "string", "pattern": "^AGRI$"}, 5 | {"type": "string", "pattern": "^ECON$"}, 6 | {"type": "string", "pattern": "^EDUC$"}, 7 | {"type": "string", "pattern": "^ENER$"}, 8 | {"type": "string", "pattern": "^ENVI$"}, 9 | {"type": "string", "pattern": "^GOVE$"}, 10 | {"type": "string", "pattern": "^HEAL$"}, 11 | {"type": "string", "pattern": "^INTR$"}, 12 | {"type": "string", "pattern": "^JUST$"}, 13 | {"type": "string", "pattern": "^REGI$"}, 14 | {"type": "string", "pattern": "^SOCI$"}, 15 | {"type": "string", "pattern": "^TECH$"}, 16 | {"type": "string", "pattern": "^TRAN$"}, 17 | {"type": "string", "pattern": "^agri$"}, 18 | {"type": "string", "pattern": "^econ$"}, 19 | {"type": "string", "pattern": "^educ$"}, 20 | {"type": "string", "pattern": "^ener$"}, 21 | {"type": "string", "pattern": "^envi$"}, 22 | {"type": "string", "pattern": "^gove$"}, 23 | {"type": "string", "pattern": "^heal$"}, 24 | {"type": "string", "pattern": "^intr$"}, 25 | {"type": "string", "pattern": "^just$"}, 26 | {"type": "string", "pattern": "^regi$"}, 27 | {"type": "string", "pattern": "^soci$"}, 28 | {"type": "string", "pattern": "^tech$"}, 29 | {"type": "string", "pattern": "^tran$"} 30 | ] 31 | }, 32 | "nonEmptyStringOrNull": { 33 | "anyOf": [ 34 | { "type": "string", "minLength": 1 }, 35 | { "type": "null" } 36 | ] 37 | }, 38 | "nonEmptyString": { "type": "string", "minLength": 1}, 39 | "arrayOrNull": { 40 | "anyOf": [ 41 | { 42 | "type": "array", 43 | "items": {"$ref": "#/nonEmptyString"} 44 | }, 45 | { "type": "null" } 46 | ] 47 | }, 48 | "dateOrDatetimeString": { 49 | "anyOf": [ 50 | { "type": "string", "format": "date" }, 51 | { "type": "string", "format": "date-time" } 52 | ] 53 | }, 54 | "dateOrDatetimeStringOrNull": { 55 | "anyOf": [ 56 | { "type": "string", "format": "date" }, 57 | { "type": "string", "format": "date-time" }, 58 | { "type": "null" }, 59 | { "type": "string", "maxLength": 0 } 60 | ] 61 | }, 62 | "stringOrNull": { 63 | "anyOf": [ 64 | { "type": "string" }, 65 | { "type": "null" } 66 | ] 67 | }, 68 | "fieldTitle": { 69 | "anyOf": [ 70 | { "type": "string", "maxLength": 60 }, 71 | { "type": "null" } 72 | ] 73 | }, 74 | "temporalOrNull": { 75 | "anyOf": [ 76 | { "type": "string", "pattern": "^(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)\\/(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)$" }, 77 | { "type": "string", "pattern": "^(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)$" }, 78 | { "type": "null" }, 79 | { "type": "string", "maxLength": 0 } 80 | ] 81 | }, 82 | "emptyValue": { 83 | "anyOf": [ 84 | { "type": "string", "maxLength": 0 }, 85 | { "type": "null" } 86 | ] 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /pydatajson/schemas/required_fields_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "required": [ 4 | "dataset", 5 | "title", 6 | "description", 7 | "publisher", 8 | "superThemeTaxonomy" 9 | ], 10 | "properties": { 11 | "publisher": { 12 | "type": "object", 13 | "required": ["name", "mbox"] 14 | }, 15 | "dataset": { 16 | "type": "array", 17 | "items": { 18 | "type": "object", 19 | "required": [ 20 | "title", 21 | "description", 22 | "publisher", 23 | "superTheme", 24 | "distribution", 25 | "accrualPeriodicity", 26 | "issued", 27 | "identifier" 28 | ], 29 | "properties": { 30 | "publisher": { 31 | "type": "object", 32 | "required": ["name"] 33 | }, 34 | "distribution": { 35 | "type": "array", 36 | "items": { 37 | "type": "object", 38 | "required": [ 39 | "accessURL", 40 | "downloadURL", 41 | "title", 42 | "issued", 43 | "identifier" 44 | ] 45 | } 46 | } 47 | } 48 | } 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /pydatajson/schemas/superThemeTaxonomy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "AGRI", 4 | "label": "Agroganadería, pesca y forestación", 5 | "description": "Por ejemplo: 'Lechería: precio pagado al productor' o 'Superficie forestada'." 6 | }, 7 | { 8 | "id": "ECON", 9 | "label": "Economía y finanzas", 10 | "description": "Por ejemplo: 'Deuda pública'." 11 | }, 12 | { 13 | "id": "EDUC", 14 | "label": "Educación, cultura y deportes", 15 | "description": "Por ejemplo: 'Registro de Establecimientos Educativos'." 16 | }, 17 | { 18 | "id": "ENER", 19 | "label": "Energía", 20 | "description": "Por ejemplo: 'Productos mineros exportados' o 'Precios del GNC'." 21 | }, 22 | { 23 | "id": "ENVI", 24 | "label": "Medio ambiente", 25 | "description": "Por ejemplo: 'Operadores de residuos peligrosos'." 26 | }, 27 | { 28 | "id": "GOVE", 29 | "label": "Gobierno y sector público", 30 | "description": "Por ejemplo: 'Inmuebles del estado Nacional'." 31 | }, 32 | { 33 | "id": "HEAL", 34 | "label": "Salud", 35 | "description": "Por ejemplo: 'Estadísticas nacionales de VIH/SIDA'." 36 | }, 37 | { 38 | "id": "INTR", 39 | "label": "Asuntos internacionales", 40 | "description": "Por ejemplo: 'Representaciones argentinas en el exterior'." 41 | }, 42 | { 43 | "id": "JUST", 44 | "label": "Justicia, seguridad y legales", 45 | "description": "Por ejemplo:'Censo penitenciario'." 46 | }, 47 | { 48 | "id": "REGI", 49 | "label": "Regiones y ciudades", 50 | "description": "Por ejemplo: 'Departamentos de la provincia de Río Negro'." 51 | }, 52 | { 53 | "id": "SOCI", 54 | "label": "Población y sociedad", 55 | "description": "Por ejemplo: 'Turistas residentes que viajan por Argentina'." 56 | }, 57 | { 58 | "id": "TECH", 59 | "label": "Ciencia y tecnología", 60 | "description": "Por ejemplo: 'Recursos humanos en ciencia y tecnología'." 61 | }, 62 | { 63 | "id": "TRAN", 64 | "label": "Transporte", 65 | "description": "Por ejemplo: 'Estadísticas viales'." 66 | } 67 | ] 68 | -------------------------------------------------------------------------------- /pydatajson/schemas/theme.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "properties": { 5 | "id": { "$ref": "mixed-types.json#stringOrNull" }, 6 | "label": { "$ref": "mixed-types.json#stringOrNull" }, 7 | "description": { "$ref": "mixed-types.json#stringOrNull" } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /pydatajson/status_indicators_generator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pydatajson.readers import read_catalog 4 | from pydatajson.reporting import generate_datasets_summary 5 | from pydatajson.validators\ 6 | .distribution_download_urls_validator \ 7 | import DistributionDownloadUrlsValidator 8 | 9 | 10 | class StatusIndicatorsGenerator(object): 11 | 12 | def __init__(self, catalog, validator=None, verify_ssl=True, 13 | url_check_timeout=1, threads_count=1): 14 | self.download_url_ok = None 15 | self.catalog = read_catalog(catalog) 16 | self.summary = generate_datasets_summary(self.catalog, 17 | validator=validator, 18 | verify_ssl=verify_ssl) 19 | self.verify_url = verify_ssl 20 | self.url_check_timeout = url_check_timeout 21 | self.threads_count = threads_count 22 | 23 | def datasets_cant(self): 24 | return len(self.summary) 25 | 26 | def distribuciones_cant(self): 27 | return sum(ds['cant_distribuciones'] for ds in self.summary) 28 | 29 | def datasets_meta_ok_cant(self): 30 | return sum(ds['estado_metadatos'] == 'OK' for ds in self.summary) 31 | 32 | def datasets_meta_error_cant(self): 33 | return sum(ds['estado_metadatos'] == 'ERROR' for ds in self.summary) 34 | 35 | def datasets_meta_ok_pct(self): 36 | return self._get_dataset_percentage(self.datasets_meta_ok_cant) 37 | 38 | def datasets_con_datos_cant(self): 39 | return sum(ds['tiene_datos'] == 'SI' for ds in self.summary) 40 | 41 | def datasets_sin_datos_cant(self): 42 | return sum(ds['tiene_datos'] == 'NO' for ds in self.summary) 43 | 44 | def datasets_con_datos_pct(self): 45 | return self._get_dataset_percentage(self.datasets_con_datos_cant) 46 | 47 | def distribuciones_download_url_ok_cant(self): 48 | if self.download_url_ok: 49 | return self.download_url_ok 50 | validator = DistributionDownloadUrlsValidator( 51 | self.catalog, self.verify_url, self.url_check_timeout, 52 | self.threads_count) 53 | self.download_url_ok = validator.validate() 54 | return self.download_url_ok 55 | 56 | def distribuciones_download_url_error_cant(self): 57 | return self.distribuciones_cant() - \ 58 | self.distribuciones_download_url_ok_cant() 59 | 60 | def distribuciones_download_url_ok_pct(self): 61 | total = self.distribuciones_cant() 62 | if not total: 63 | return None 64 | return \ 65 | round(float(self.distribuciones_download_url_ok_cant()) / total, 4) 66 | 67 | def _get_dataset_percentage(self, indicator): 68 | total = self.datasets_cant() 69 | if not total: 70 | return None 71 | return round(float(indicator()) / total, 4) 72 | -------------------------------------------------------------------------------- /pydatajson/templates/catalog_readme.txt: -------------------------------------------------------------------------------- 1 | 2 | # Catálogo: {title} 3 | 4 | ## Información General 5 | 6 | - **Autor**: {publisher_name} 7 | - **Correo Electrónico**: {publisher_mbox} 8 | - **Ruta del catálogo**: {catalog_path_or_url} 9 | - **Nombre del catálogo**: {title} 10 | - **Descripción**: 11 | 12 | > {description} 13 | 14 | ## Estado de los metadatos y cantidad de recursos 15 | 16 | - **Estado metadatos globales**: {global_status} 17 | - **Estado metadatos catálogo**: {catalog_status} 18 | - **Cantidad Total de Datasets**: {no_of_datasets} 19 | - **Cantidad Total de Distribuciones**: {no_of_distributions} 20 | 21 | - **Cantidad de Datasets Federados**: {federated_datasets} 22 | - **Cantidad de Datasets NO Federados**: {not_federated_datasets} 23 | - **Porcentaje de Datasets NO Federados**: {not_federated_datasets_pct} 24 | 25 | ## Datasets federados que fueron eliminados en el nodo original 26 | 27 | {federated_removed_datasets_list} 28 | 29 | ## Datasets no federados 30 | 31 | {not_federated_datasets_list} 32 | 33 | ## Datasets federados 34 | 35 | {federated_datasets_list} 36 | 37 | ## Reporte 38 | 39 | Por favor, consulte el informe [`datasets.csv`](datasets.csv). 40 | -------------------------------------------------------------------------------- /pydatajson/threading_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from multiprocessing.pool import ThreadPool 4 | 5 | 6 | def apply_threading(l, function, cant_threads, **kwargs): 7 | if cant_threads == 1: 8 | return [function(x, **kwargs) for x in l] 9 | pool = ThreadPool(processes=cant_threads) 10 | results = pool.map(function, l) 11 | pool.close() 12 | pool.join() 13 | return results 14 | -------------------------------------------------------------------------------- /pydatajson/time_series.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Módulo `time_series` de pydatajson 5 | 6 | Contiene funciones auxiliares para analizar catálogos con series de tiempo, 7 | definidas según la extensión del perfil de metadatos para series de tiempo. 8 | """ 9 | 10 | from __future__ import unicode_literals 11 | from __future__ import print_function 12 | from __future__ import with_statement 13 | 14 | from . import custom_exceptions as ce 15 | 16 | 17 | def field_is_time_series(field, distribution=None): 18 | field_may_be_ts = ( 19 | not field.get("specialType") and 20 | not field.get("specialTypeDetail") and 21 | ( 22 | field.get("type", "").lower() == "number" or 23 | field.get("type", "").lower() == "integer" 24 | ) and 25 | field.get("id") 26 | ) 27 | distribution_may_has_ts = ( 28 | not distribution or distribution_has_time_index(distribution) 29 | ) 30 | return field_may_be_ts and distribution_may_has_ts 31 | 32 | 33 | def get_distribution_time_index(distribution): 34 | for field in distribution.get('field', []): 35 | if field.get('specialType') == 'time_index': 36 | return field.get('title') 37 | 38 | raise ce.DistributionTimeIndexNonExistentError( 39 | distribution.get("title"), 40 | distribution.get("dataset_identifier"), 41 | "no tiene índice de tiempo." 42 | ) 43 | 44 | 45 | def get_distribution_time_index_frequency(distribution): 46 | for field in distribution.get('field', []): 47 | if field.get('specialType') == 'time_index': 48 | return field.get('specialTypeDetail') 49 | 50 | raise ce.DistributionTimeIndexNonExistentError( 51 | distribution.get("title"), 52 | distribution.get("dataset_identifier"), 53 | "no tiene índice de tiempo." 54 | ) 55 | 56 | 57 | def distribution_has_time_index(distribution): 58 | try: 59 | return any([field.get('specialType') == 60 | 'time_index' for field in distribution.get('field', [])]) 61 | except AttributeError: 62 | return False 63 | 64 | 65 | def dataset_has_time_series(dataset): 66 | for distribution in dataset.get('distribution', []): 67 | if distribution_has_time_index(distribution): 68 | return True 69 | return False 70 | -------------------------------------------------------------------------------- /pydatajson/transformation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Funciones auxiliares para realizar transformaciones de metadatos""" 5 | 6 | from __future__ import unicode_literals 7 | from __future__ import print_function 8 | from __future__ import with_statement 9 | import os 10 | 11 | 12 | def generate_distribution_ids(catalog): 13 | """Genera identificadores para las distribuciones que no los tienen. 14 | 15 | Los identificadores de distribuciones se generan concatenando el id del 16 | dataset al que pertenecen con el índice posicional de la distribución en el 17 | dataset: distribution_identifier = "{dataset_identifier}_{index}". 18 | """ 19 | 20 | for dataset in catalog.get("dataset", []): 21 | for distribution_index, distribution in enumerate( 22 | dataset.get("distribution", [])): 23 | if "identifier" not in distribution: 24 | distribution["identifier"] = "{}_{}".format( 25 | dataset["identifier"], distribution_index) 26 | -------------------------------------------------------------------------------- /pydatajson/validators/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /pydatajson/validators/consistent_distribution_fields_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import mimetypes 4 | import os 5 | 6 | try: 7 | from urlparse import urlparse 8 | except ImportError: 9 | from urllib.parse import urlparse 10 | 11 | import pydatajson.custom_exceptions as ce 12 | from pydatajson.validators.simple_validator import SimpleValidator 13 | 14 | EXTENSIONS_EXCEPTIONS = ["zip", "php", "asp", "aspx"] 15 | 16 | 17 | class ConsistentDistributionFieldsValidator(SimpleValidator): 18 | 19 | def validate(self): 20 | for dataset_idx, dataset in enumerate(self.catalog["dataset"]): 21 | for distribution_idx, distribution in enumerate( 22 | dataset["distribution"]): 23 | for attribute in ['downloadURL', 'fileName']: 24 | if not self._format_matches_extension(distribution, 25 | attribute): 26 | yield ce.ExtensionError(dataset_idx, distribution_idx, 27 | distribution, attribute) 28 | 29 | @staticmethod 30 | def _format_matches_extension(distribution, attribute): 31 | """Chequea si una extensión podría corresponder a un formato dado.""" 32 | 33 | if attribute in distribution and "format" in distribution: 34 | if "/" in distribution['format']: 35 | possible_format_extensions = mimetypes.guess_all_extensions( 36 | distribution['format']) 37 | else: 38 | possible_format_extensions = [ 39 | '.' + distribution['format'].lower() 40 | ] 41 | 42 | file_name = urlparse(distribution[attribute]).path 43 | extension = os.path.splitext(file_name)[-1].lower() 44 | 45 | if not extension: 46 | return True 47 | 48 | # hay extensiones exceptuadas porque enmascaran otros formatos 49 | if extension.lower().replace(".", "") in EXTENSIONS_EXCEPTIONS: 50 | return True 51 | 52 | if extension not in possible_format_extensions: 53 | if possible_format_extensions: 54 | return True 55 | return False 56 | 57 | return True 58 | -------------------------------------------------------------------------------- /pydatajson/validators/distribution_download_urls_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from pydatajson import threading_helper 4 | from pydatajson.validators.url_validator import UrlValidator 5 | 6 | 7 | class DistributionDownloadUrlsValidator(UrlValidator): 8 | 9 | def validate(self): 10 | async_results = [] 11 | for dataset in self.catalog.get('dataset', []): 12 | distribution_urls = \ 13 | [distribution.get('downloadURL', '') 14 | for distribution in dataset.get('distribution', [])] 15 | async_results += threading_helper \ 16 | .apply_threading(distribution_urls, 17 | self.is_working_url, 18 | self.threads_count) 19 | 20 | result = 0 21 | for res, _ in async_results: 22 | result += res 23 | 24 | return result 25 | -------------------------------------------------------------------------------- /pydatajson/validators/distribution_urls_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import pydatajson.custom_exceptions as ce 4 | from pydatajson import threading_helper 5 | from pydatajson.validators.url_validator import UrlValidator 6 | 7 | 8 | class DistributionUrlsValidator(UrlValidator): 9 | 10 | def validate(self): 11 | datasets = self.catalog.get('dataset') 12 | 13 | metadata = [] 14 | urls = [] 15 | for dataset_idx, dataset in enumerate(datasets): 16 | distributions = dataset.get('distribution') 17 | 18 | for distribution_idx, distribution in enumerate(distributions): 19 | distribution_title = distribution.get('title') 20 | access_url = distribution.get('accessURL') 21 | download_url = distribution.get('downloadURL') 22 | 23 | metadata.append({ 24 | "dataset_idx": dataset_idx, 25 | "dist_idx": distribution_idx, 26 | "dist_title": distribution_title 27 | }) 28 | urls += [access_url, download_url] 29 | 30 | sync_res = threading_helper \ 31 | .apply_threading(urls, 32 | self.is_working_url, 33 | self.threads_count) 34 | 35 | for i in range(len(metadata)): 36 | actual_metadata = metadata[i] 37 | dataset_idx = actual_metadata["dataset_idx"] 38 | distribution_idx = actual_metadata["dist_idx"] 39 | distribution_title = actual_metadata["dist_title"] 40 | 41 | k = i * 2 42 | access_url = urls[k] 43 | download_url = urls[k + 1] 44 | 45 | access_url_is_valid, access_url_status_code = sync_res[k] 46 | download_url_is_valid, download_url_status_code = sync_res[k + 1] 47 | 48 | if not access_url_is_valid: 49 | yield ce.BrokenAccessUrlError(dataset_idx, 50 | distribution_idx, 51 | distribution_title, 52 | access_url, 53 | access_url_status_code) 54 | if not download_url_is_valid: 55 | yield ce.BrokenDownloadUrlError(dataset_idx, 56 | distribution_idx, 57 | distribution_title, 58 | download_url, 59 | download_url_status_code) 60 | -------------------------------------------------------------------------------- /pydatajson/validators/landing_pages_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import pydatajson.custom_exceptions as ce 5 | from pydatajson import threading_helper 6 | from pydatajson.validators.url_validator import UrlValidator 7 | 8 | 9 | class LandingPagesValidator(UrlValidator): 10 | 11 | def validate(self): 12 | datasets = self.catalog.get('dataset') 13 | datasets = filter(lambda x: x.get('landingPage'), datasets) 14 | 15 | metadata = [] 16 | urls = [] 17 | 18 | for dataset_idx, dataset in enumerate(datasets): 19 | metadata.append({ 20 | "dataset_idx": dataset_idx, 21 | "dataset_title": dataset.get('title'), 22 | "landing_page": dataset.get('landingPage'), 23 | }) 24 | urls.append(dataset.get('landingPage')) 25 | 26 | sync_res = threading_helper \ 27 | .apply_threading(urls, 28 | self.is_working_url, 29 | self.threads_count) 30 | 31 | for i in range(len(sync_res)): 32 | valid, status_code = sync_res[i] 33 | act_metadata = metadata[i] 34 | dataset_idx = act_metadata["dataset_idx"] 35 | dataset_title = act_metadata["dataset_title"] 36 | landing_page = act_metadata["landing_page"] 37 | 38 | if not valid: 39 | yield ce.BrokenLandingPageError(dataset_idx, dataset_title, 40 | landing_page, status_code) 41 | -------------------------------------------------------------------------------- /pydatajson/validators/simple_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class SimpleValidator(object): 6 | 7 | def __init__(self, catalog): 8 | self.catalog = catalog 9 | 10 | def validate(self): 11 | raise NotImplementedError 12 | -------------------------------------------------------------------------------- /pydatajson/validators/theme_ids_not_repeated_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from collections import Counter 4 | 5 | import pydatajson.custom_exceptions as ce 6 | from pydatajson.validators.simple_validator import SimpleValidator 7 | 8 | 9 | class ThemeIdsNotRepeatedValidator(SimpleValidator): 10 | 11 | def validate(self): 12 | if "themeTaxonomy" in self.catalog: 13 | theme_ids = [theme["id"] 14 | for theme in self.catalog["themeTaxonomy"]] 15 | dups = self._find_dups(theme_ids) 16 | if len(dups) > 0: 17 | yield ce.ThemeIdRepeated(dups) 18 | 19 | @staticmethod 20 | def _find_dups(elements): 21 | return [item for item, count in Counter(elements).items() 22 | if count > 1] 23 | -------------------------------------------------------------------------------- /pydatajson/validators/url_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import re 4 | 5 | import requests 6 | from requests import RequestException, Timeout 7 | 8 | from pydatajson.constants import EXCEPTION_STATUS_CODES, \ 9 | INVALID_STATUS_CODES_REGEX 10 | from pydatajson.validators.simple_validator import SimpleValidator 11 | 12 | 13 | class UrlValidator(SimpleValidator): 14 | 15 | def __init__(self, catalog, verify_ssl, url_check_timeout, threads_count): 16 | super(UrlValidator, self).__init__(catalog) 17 | self.verify_ssl = verify_ssl 18 | self.url_check_timeout = url_check_timeout 19 | self.threads_count = threads_count 20 | 21 | def validate(self): 22 | raise NotImplementedError 23 | 24 | def is_working_url(self, url): 25 | try: 26 | response = requests.head(url, 27 | timeout=self.url_check_timeout, 28 | verify=self.verify_ssl) 29 | matches = [] 30 | if response.status_code not in EXCEPTION_STATUS_CODES: 31 | matches = \ 32 | [re.match(pattern, str(response.status_code)) is not None 33 | for pattern in INVALID_STATUS_CODES_REGEX] 34 | return True not in matches, response.status_code 35 | except Timeout: 36 | return False, 408 37 | except (RequestException, Exception): 38 | return False, None 39 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytz 2 | jsonschema==2.6.0 3 | requests 4 | # Para validar fechas y horas acorde a ISO 8601 5 | isodate==0.6.0 6 | # Para validar URIs 7 | rfc3987==1.3.7 8 | # Para exportar CSVs en unicode 9 | unicodecsv==0.14.1 10 | # Para leer y escribir XLSXs 11 | openpyxl>=2.4 12 | # Para consultar programáticamente la API de CKAN 13 | ckanapi==4.0 14 | urllib3 15 | Unidecode==0.4.21 16 | six 17 | python-dateutil==2.8.0 18 | requests-mock 19 | -------------------------------------------------------------------------------- /requirements_2.7.txt: -------------------------------------------------------------------------------- 1 | functools32==3.2.3.post2 -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | bumpversion==0.5.3 2 | watchdog==0.8.3 3 | flake8==2.6.0 4 | coverage==4.1 5 | sphinx==1.7.5 6 | cryptography==2.1.4 7 | PyYAML 8 | nose>=1.3 9 | recommonmark==0.4.0 10 | twine>=1.11 11 | sphinx-rtd-theme==0.4.0 12 | sphinxcontrib-napoleon==0.6.1 13 | mock==2.0.0;python_version<"3.4" 14 | pycallgraph 15 | setuptools>=38.6 16 | wheel>=0.31 17 | vcrpy 18 | -------------------------------------------------------------------------------- /samples/archivos-tests/excel-no-validos/catalogo-justicia-con-error-datasets.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-no-validos/catalogo-justicia-con-error-datasets.xlsx -------------------------------------------------------------------------------- /samples/archivos-tests/excel-validos/catalogo-justicia-06022017.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia-06022017.xlsx -------------------------------------------------------------------------------- /samples/archivos-tests/excel-validos/catalogo-justicia-56-distribuciones.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia-56-distribuciones.xlsx -------------------------------------------------------------------------------- /samples/archivos-tests/excel-validos/catalogo-justicia.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia.xlsx -------------------------------------------------------------------------------- /samples/series-tiempo/odg-total-millones-pesos-1960-trimestral.csv: -------------------------------------------------------------------------------- 1 | indice_tiempo,oferta_total_60,pib_total_pm_60,importaciones_60,demanda_total_60,consumo_60,inversion_bruta_interna_60,exportaciones_60 2 | 1970-01-01,15729,14354,1375,15729,10455,3530,1744 3 | 1970-04-01,17250,15743,1507,17250,11450,3664,2136 4 | 1970-07-01,16999,15486,1513,16999,11435,3746,1818 5 | 1970-10-01,17048,15555,1493,17048,11699,3894,1455 6 | 1971-01-01,16341,14864,1478,16341,11133,3761,1448 7 | 1971-04-01,18156,16453,1703,18156,12044,4324,1787 8 | 1971-07-01,18240,16546,1694,18240,12541,4162,1538 9 | 1971-10-01,18182,16867,1315,18182,12416,4228,1539 10 | 1972-01-01,17054,15655,1399,17054,11941,3541,1572 11 | 1972-04-01,18339,16893,1446,18339,12815,3981,1543 12 | 1972-07-01,18622,17154,1468,18622,13004,4229,1389 13 | 1972-10-01,18938,17492,1446,18938,12879,4632,1427 14 | 1973-01-01,17839,16538,1301,17839,12349,3797,1693 15 | 1973-04-01,19012,17798,1214,19012,13529,4008,1476 16 | 1973-07-01,18831,17605,1227,18831,13420,3808,1604 17 | 1973-10-01,19988,18417,1571,19988,14089,4384,1516 18 | 1974-01-01,18438,17230,1208,18438,13387,3655,1396 19 | 1974-04-01,20408,19030,1378,20408,14661,4158,1588 20 | 1974-07-01,20312,18803,1510,20312,14785,4096,1432 21 | 1974-10-01,21603,19597,2006,21603,14947,4803,1854 22 | 1975-01-01,19665,17877,1787,19665,14884,3714,1067 23 | 1975-04-01,21002,19275,1727,21002,15526,4121,1355 24 | 1975-07-01,19803,18267,1536,19803,14463,3966,1374 25 | 1975-10-01,19948,18593,1356,19948,14011,4554,1384 26 | 1976-01-01,18426,17284,1143,18426,13465,3659,1302 27 | 1976-04-01,19662,18574,1088,19662,13545,4342,1775 28 | 1976-07-01,19730,18360,1370,19730,13200,4536,1994 29 | 1976-10-01,19754,18531,1223,19754,12882,4661,2212 30 | 1977-01-01,18898,17525,1373,18898,12010,4349,2539 31 | 1977-04-01,21191,19643,1549,21191,12732,5266,3193 32 | 1977-07-01,21528,19900,1628,21528,13128,5506,2894 33 | 1977-10-01,20842,19274,1569,20842,13630,4864,2349 34 | 1978-01-01,18172,16888,1284,18172,11915,3761,2496 35 | 1978-04-01,20070,18785,1285,20070,12027,4790,3253 36 | 1978-07-01,20325,18924,1401,20325,12288,4577,3461 37 | 1978-10-01,20495,19121,1374,20495,13352,4776,2368 38 | 1979-01-01,20500,18855,1645,20500,13262,4570,2668 39 | 1979-04-01,22142,20429,1713,22142,13185,5246,3711 40 | 1979-07-01,22421,20392,2029,22421,14049,5084,3288 41 | 1979-10-01,22455,20215,2240,22455,14469,5415,2570 42 | 1980-01-01,21863,19440,2423,21863,13798,5147,2918 43 | 1980-04-01,22052,19831,2221,22052,13515,5719,2818 44 | 1980-07-01,22980,20400,2580,22980,14799,5275,2905 45 | 1980-10-01,22884,20093,2791,22884,15002,5145,2738 46 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.4.25 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:pydatajson/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | 20 | [aliases] 21 | test=nosetests 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | 6 | from setuptools import setup 7 | 8 | with open(os.path.abspath('README.md')) as readme_file: 9 | readme = readme_file.read() 10 | 11 | with open(os.path.abspath('HISTORY.md')) as history_file: 12 | history = history_file.read() 13 | 14 | with open(os.path.abspath("requirements.txt")) as f: 15 | requirements = [req.strip() for req in f.readlines()] 16 | 17 | with open(os.path.abspath("requirements_dev.txt")) as f: 18 | test_requirements = [req.strip() for req in f.readlines()] 19 | 20 | with open(os.path.abspath("requirements_2.7.txt")) as f: 21 | backport_requirements = [req.strip() for req in f.readlines()] 22 | 23 | setup( 24 | name='pydatajson', 25 | version='0.4.67', 26 | description="Paquete en python con herramientas para generar y validar metadatos de catálogos de datos en formato data.json.", 27 | long_description=readme + '\n\n' + history, 28 | long_description_content_type='text/markdown', 29 | author="Datos Argentina", 30 | author_email='datosargentina@jefatura.gob.ar', 31 | url='https://github.com/datosgobar/pydatajson', 32 | packages=[ 33 | 'pydatajson', 34 | ], 35 | package_dir={'pydatajson': 36 | 'pydatajson'}, 37 | include_package_data=True, 38 | install_requires=requirements, 39 | license="MIT license", 40 | zip_safe=False, 41 | keywords='pydatajson', 42 | classifiers=[ 43 | 'Development Status :: 2 - Pre-Alpha', 44 | 'Intended Audience :: Developers', 45 | 'License :: OSI Approved :: MIT License', 46 | 'Natural Language :: English', 47 | "Programming Language :: Python :: 2", 48 | 'Programming Language :: Python :: 2.7', 49 | "Programming Language :: Python :: 3", 50 | 'Programming Language :: Python :: 3.6', 51 | ], 52 | test_suite='tests', 53 | tests_require=test_requirements, 54 | extras_require={ 55 | ':python_version=="2.7"': backport_requirements 56 | }, 57 | entry_points={ 58 | 'console_scripts': [ 59 | 'pydatajson = pydatajson.__main__:main' 60 | ] 61 | } 62 | ) 63 | -------------------------------------------------------------------------------- /tests/TEST_CASES.md: -------------------------------------------------------------------------------- 1 | # Estrategia de testeo para `pydatajson` 2 | 3 | ## Tests de `is_valid_catalog` y `validate_catalog` locales 4 | 5 | Estas dos funciones son las principales herramientas de validación de archivos `data.json` (de ahroa en más, "datajsons"). Para testearlas, se utilizan datajsons de prueba guardados en [`samples/`](samples/). 6 | 7 | El archivo de prueba fundamental se llama [full_data.json`](samples/full_data.json), que contiene todas las claves (requeridos y opcionales) descritos en el [Perfil de Metadatos](https://docs.google.com/spreadsheets/d/1PqlkhB1o0u2xKDYuex3UC-UIPubSjxKCSBxfG9QhQaA/edit#gid=1493891225), con valores de ejemplo de tipo y formato correcto. Este archivo *siempre* debe pasar la validación sin errores. 8 | 9 | A partir de este archivo base, se crearon otros 26 datajsons, cada uno con una o (rara vez) unas pocas modificaciones que cubren distintas funcionaliades del validador. Por ejemplo, el archivo [`missing_dataset_title.json`](samples/missing_dataset_title.json) es idéntico a `full_data.json`, salvo que la clave `catalog["dataset"]["title"]` fue eliminada. 10 | 11 | Cada uno de ellos se utiliza en una función de testeo cuyo nombre tiene el formato `test_validity_of_[DATAJSON_FILENAME]` en el archivo[`test_pydatajson.json`](test_pydtajson.json). En caso de que el nombre del datajson no sea lo suficientemente esclarecedor respecto a su intención, la función de testeo tendrá un docstring brevísimo explicándola en cierto detalle. 12 | 13 | ### Casos de testeo **válidos**: 14 | 15 | - `full_data.json`: Ejemplo completo según las especificaciones de `paquete-apertura-datos`. 16 | - `minimum_data.json`: Idéntico a `full_data.json`, pero incluye únicamente con los campos obligatorios. 17 | - `null_dataset_theme.json`, `null_field_description.json`: Idénticos a `full_data.json`, con un campo opcional faltante. 18 | 19 | ## Casos de testeo **inválidos**: 20 | 21 | Todos los demás datajsons (23) son casos de testeo inválidos, y los errores que contienen caen en una de tres categorías: 22 | - una clave requerida está ausente del catálogo, 23 | - una clave requerida u opcional está presente, pero el tipo del valor que toma no es el esperado, o 24 | - una clave requerida u opcional está presente y su valor es del tipo esperado, pero el formato no es el correcto. 25 | 26 | ## Tests de `is_valid_catalog` y `validate_catalog` remotos 27 | 28 | Como ambas funciones tienen la capacidad de validar un datajson en una ubicación remota en caso de que se les pase una URL bien formada, la función `test_correctness_of_accrualPeriodicity_regex`. 29 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | -------------------------------------------------------------------------------- /tests/cassetes/ckan_integration/remove_dataset/test_with_no_parametres.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '{}' 4 | headers: 5 | Accept: ['*/*'] 6 | Accept-Encoding: ['gzip, deflate'] 7 | Connection: [keep-alive] 8 | Content-Length: ['2'] 9 | Content-Type: [application/json] 10 | User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)'] 11 | method: POST 12 | uri: http://localhost:8080/api/action/package_list 13 | response: 14 | body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list", 15 | "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2", 16 | "data3_3"]}'} 17 | headers: 18 | cache-control: [no-cache] 19 | connection: [Keep-Alive] 20 | content-length: ['169'] 21 | content-type: [application/json;charset=utf-8] 22 | date: ['Tue, 06 Mar 2018 17:22:51 GMT'] 23 | keep-alive: ['timeout=5, max=100'] 24 | pragma: [no-cache] 25 | server: [Apache/2.4.7 (Ubuntu)] 26 | status: {code: 200, message: OK} 27 | - request: 28 | body: '{}' 29 | headers: 30 | Accept: ['*/*'] 31 | Accept-Encoding: ['gzip, deflate'] 32 | Connection: [keep-alive] 33 | Content-Length: ['2'] 34 | Content-Type: [application/json] 35 | User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)'] 36 | method: POST 37 | uri: http://localhost:8080/api/action/package_list 38 | response: 39 | body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list", 40 | "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2", 41 | "data3_3"]}'} 42 | headers: 43 | cache-control: [no-cache] 44 | connection: [Keep-Alive] 45 | content-length: ['169'] 46 | content-type: [application/json;charset=utf-8] 47 | date: ['Tue, 06 Mar 2018 17:22:51 GMT'] 48 | keep-alive: ['timeout=5, max=99'] 49 | pragma: [no-cache] 50 | server: [Apache/2.4.7 (Ubuntu)] 51 | status: {code: 200, message: OK} 52 | - request: 53 | body: '{}' 54 | headers: 55 | Accept: ['*/*'] 56 | Accept-Encoding: ['gzip, deflate'] 57 | Connection: [keep-alive] 58 | Content-Length: ['2'] 59 | Content-Type: [application/json] 60 | User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)'] 61 | method: POST 62 | uri: http://localhost:8080/api/action/package_list 63 | response: 64 | body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list", 65 | "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2", 66 | "data3_3"]}'} 67 | headers: 68 | Cache-Control: [no-cache] 69 | Connection: [Keep-Alive] 70 | Content-Length: ['169'] 71 | Content-Type: [application/json;charset=utf-8] 72 | Date: ['Tue, 06 Mar 2018 17:24:01 GMT'] 73 | Keep-Alive: ['timeout=5, max=100'] 74 | Pragma: [no-cache] 75 | Server: [Apache/2.4.7 (Ubuntu)] 76 | status: {code: 200, message: OK} 77 | - request: 78 | body: '{}' 79 | headers: 80 | Accept: ['*/*'] 81 | Accept-Encoding: ['gzip, deflate'] 82 | Connection: [keep-alive] 83 | Content-Length: ['2'] 84 | Content-Type: [application/json] 85 | User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)'] 86 | method: POST 87 | uri: http://localhost:8080/api/action/package_list 88 | response: 89 | body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list", 90 | "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2", 91 | "data3_3"]}'} 92 | headers: 93 | Cache-Control: [no-cache] 94 | Connection: [Keep-Alive] 95 | Content-Length: ['169'] 96 | Content-Type: [application/json;charset=utf-8] 97 | Date: ['Tue, 06 Mar 2018 17:24:01 GMT'] 98 | Keep-Alive: ['timeout=5, max=99'] 99 | Pragma: [no-cache] 100 | Server: [Apache/2.4.7 (Ubuntu)] 101 | status: {code: 200, message: OK} 102 | version: 1 103 | -------------------------------------------------------------------------------- /tests/cassetes/test_generate_datasets_report.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Accept: ['*/*'] 6 | Accept-Encoding: ['gzip, deflate'] 7 | Connection: [keep-alive] 8 | User-Agent: [python-requests/2.11.1] 9 | method: GET 10 | uri: http://181.209.63.71/data.json 11 | response: 12 | body: {string: !!python/unicode "{\n \"title\": \"Andino\", \n \"description\": 13 | \"Portal Andino Demo\", \n \"superThemeTaxonomy\": \"http://datos.gob.ar/superThemeTaxonomy.json\", 14 | \n \"publisher\": {\n \"mbox\": \"\", \n \"name\": \"\"\n }, \n \"themeTaxonomy\": 15 | [\n {\n \"label\": \"Tema.demo\", \n \"id\": \"tema-demo\", \n 16 | \ \"description\": \"Ejemplo de un tema\"\n }\n ], \n \"dataset\": 17 | [\n {\n \"@type\": \"dcat:Dataset\", \n \"title\": \"Dataset 18 | Demo\", \n \"description\": \"Este es un dataset de ejemplo, se incluye 19 | como material DEMO y no contiene ningun valor estadistico.\", \n \"modified\": 20 | \"2016-11-30T22:25:33.503104\", \n \"accessLevel\": \"public\", \n \"identifier\": 21 | \"6897d435-8084-4685-b8ce-304b190755e4\", \n \"issued\": \"2016-11-30T22:22:48.635757\", 22 | \n \"landingPage\": \"https://github.com/datosgobar/portal-andino\", 23 | \n \"license\": \"Creative Commons Attribution\", \n \"publisher\": 24 | {\n \"mbox\": \"datosargentina@jefatura.gob.ar\", \n \"name\": \"Andino\"\n 25 | \ }, \n \"contactPoint\": {\n \"@type\": \"vcard:Contact\", 26 | \n \"fn\": \"Andino\", \n \"hasEmail\": \"mailto:datosargentina@jefatura.gob.ar\"\n 27 | \ }, \n \"distribution\": [\n {\n \"@type\": \"dcat:Distribution\", 28 | \n \"format\": \"CSV\", \n \"title\": \"Recurso de Ejemplo\", 29 | \n \"description\": \"Este es un recurso DEMO, se incluye como material 30 | de demostracion y no posee ningun valor estadistico.\", \n \"issued\": 31 | \"2016-11-30T22:24:01.259909\", \n \"modified\": \"2016-11-30T22:24:01.225394\", 32 | \n \"license\": \"cc-by\", \n \"accessURL\": \"http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/archivo/6145bf1c-a2fb-4bb5-b090-bb25f8419198\", 33 | \n \"downloadURL\": \"http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv\"\n 34 | \ }\n ], \n \"keyword\": [\n \"andino\", \n \"demo\", 35 | \n \"plan de datos\"\n ], \n \"superTheme\": [\n \"TECH\"\n 36 | \ ], \n \"accrualPeriodicity\": \"eventual\", \n \"language\": 37 | [\n \"spa\"\n ], \n \"theme\": [\n \"Tema.demo\"\n 38 | \ ]\n }\n ]\n}"} 39 | headers: 40 | connection: [keep-alive] 41 | content-length: ['2120'] 42 | content-type: [application/json; charset=UTF-8] 43 | date: ['Wed, 21 Dec 2016 18:39:34 GMT'] 44 | server: [nginx/1.4.6 (Ubuntu)] 45 | status: {code: 200, message: OK} 46 | version: 1 47 | -------------------------------------------------------------------------------- /tests/context.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sys 5 | 6 | import pydatajson 7 | import pydatajson.search 8 | import pydatajson.backup 9 | 10 | sys.path.insert(0, os.path.abspath(".")) 11 | sys.path.insert(0, os.path.abspath("..")) 12 | -------------------------------------------------------------------------------- /tests/profiling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Decorador auxiliar 5 | 6 | Debe instalarse 'graphviz' en el sistema para que funcione. 7 | 8 | Ubuntu: sudo apt-get install graphviz 9 | Mac: brew install graphviz 10 | """ 11 | 12 | from __future__ import unicode_literals 13 | from __future__ import print_function 14 | from __future__ import with_statement 15 | import os 16 | import sys 17 | import vcr 18 | 19 | from functools import wraps 20 | from pycallgraph import PyCallGraph 21 | from pycallgraph import Config 22 | from pycallgraph import GlobbingFilter 23 | from pycallgraph.output import GraphvizOutput 24 | 25 | # módulo de ejemplo que se quiere analizar 26 | import pydatajson 27 | 28 | SAMPLES_DIR = os.path.join("tests", "samples") 29 | TEMP_DIR = os.path.join("tests", "temp") 30 | PROFILING_DIR = os.path.join("tests", "profiling") 31 | os.makedirs(PROFILING_DIR) if not os.path.exists(PROFILING_DIR) else None 32 | 33 | VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'), 34 | cassette_library_dir=os.path.join( 35 | "tests", "cassetes", "profiling"), 36 | record_mode='once') 37 | 38 | 39 | def profile(profiling_result_path): 40 | """Decorador de una función para que se corra haciendo profiling.""" 41 | 42 | def fn_decorator(fn): 43 | """Decora una función con el análisis de profiling.""" 44 | 45 | @wraps(fn) 46 | def fn_decorated(*args, **kwargs): 47 | """Crea la función decorada.""" 48 | 49 | graphviz = GraphvizOutput() 50 | graphviz.output_file = profiling_result_path 51 | 52 | with PyCallGraph(output=graphviz, config=None): 53 | fn(*args, **kwargs) 54 | 55 | return fn_decorated 56 | return fn_decorator 57 | 58 | 59 | @VCR.use_cassette() 60 | @profile("tests/profiling/profiling_test.png") 61 | def main(): 62 | """Hace un profiling de la función para guarda un catálogo en Excel""" 63 | 64 | # ejemplo liviano 65 | # original_catalog = pydatajson.DataJson( 66 | # os.path.join(SAMPLES_DIR, "catalogo_justicia.json")) 67 | 68 | # ejemplo grande 69 | datasets_cant = 200 70 | original_catalog = pydatajson.DataJson( 71 | "http://infra.datos.gob.ar/catalog/sspm/data.json") 72 | original_catalog["dataset"] = original_catalog["dataset"][:datasets_cant] 73 | 74 | tmp_xlsx = os.path.join(TEMP_DIR, "xlsx_catalog.xlsx") 75 | original_catalog.to_xlsx(tmp_xlsx) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /tests/results/catalog_readme.md: -------------------------------------------------------------------------------- 1 | 2 | # Catálogo: Cosechando Datos Argentina 3 | 4 | ## Información General 5 | 6 | - **Autor**: Ministerio de Modernización 7 | - **Correo Electrónico**: datosargentina@jefatura.gob.ar 8 | - **Ruta del catálogo**: tests/samples/several_datasets_for_harvest.json 9 | - **Nombre del catálogo**: Cosechando Datos Argentina 10 | - **Descripción**: 11 | 12 | > Datasets para reporte pre cosecha 13 | 14 | ## Estado de los metadatos y cantidad de recursos 15 | 16 | - **Estado metadatos globales**: ERROR 17 | - **Estado metadatos catálogo**: OK 18 | - **Cantidad Total de Datasets**: 3 19 | - **Cantidad Total de Distribuciones**: 6 20 | 21 | - **Cantidad de Datasets Federados**: 0 22 | - **Cantidad de Datasets NO Federados**: 3 23 | - **Porcentaje de Datasets NO Federados**: 1.0 24 | 25 | ## Datasets federados que fueron eliminados en el nodo original 26 | 27 | 28 | 29 | ## Datasets no federados 30 | 31 | - [Sistema de contrataciones electrónicas UNO](None) 32 | - [Sistema de contrataciones electrónicas DOS](None) 33 | - [Sistema de contrataciones electrónicas TRES](None) 34 | 35 | ## Datasets federados 36 | 37 | 38 | 39 | ## Reporte 40 | 41 | Por favor, consulte el informe [`datasets.csv`](datasets.csv). 42 | -------------------------------------------------------------------------------- /tests/results/datasets_filter_out.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "publisher": { 4 | "mbox": "onc@modernizacion.gob.ar", 5 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones" 6 | }, 7 | "landingPage": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra", 8 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra) (sin datos)", 9 | "superTheme": [ 10 | "ECON" 11 | ], 12 | "title": "Sistema de contrataciones electrónicas (sin datos)", 13 | "issued": "2016-04-14T19:48:05.433640-03:00", 14 | "temporal": "2015-01-01/2015-12-31", 15 | "modified": "2016-04-19T19:48:05.433640-03:00", 16 | "language": [ 17 | "spa" 18 | ], 19 | "theme": [ 20 | "contrataciones", 21 | "compras", 22 | "convocatorias" 23 | ], 24 | "keyword": [ 25 | "bienes", 26 | "compras", 27 | "contrataciones", 28 | "bienes y compras" 29 | ], 30 | "accrualPeriodicity": "R/P1Y", 31 | "source": "Ministerio de modernizacion", 32 | "spatial": "ARG", 33 | "license": "Open Data Commons Open Database License 1.0", 34 | "contactPoint": { 35 | "hasEmail": "onc-compraselectronicas@modernizacion.gob.ar", 36 | "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones. Dirección de Compras Electrónicas." 37 | }, 38 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 39 | "distribution": [ 40 | { 41 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 42 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 43 | "rights": "Derechos especificados en la licencia.", 44 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 45 | "license": "Open Data Commons Open Database License 1.0", 46 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 47 | "title": "Convocatorias abiertas durante el año 2015", 48 | "byteSize": 5120, 49 | "fileName": "convocatoriasabiertasduranteelao.pdf", 50 | "format": "PDF", 51 | "type": "documentation", 52 | "mediaType": "application/pdf", 53 | "fileName": "convocatoriasabiertasduranteelao.pdf", 54 | "modified": "2016-04-19T19:48:05.433640-03:00", 55 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 56 | "issued": "2016-04-14T19:48:05.433640-03:00" 57 | } 58 | ] 59 | } 60 | ] 61 | -------------------------------------------------------------------------------- /tests/results/datasets_meta_field.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Sistema de contrataciones electrónicas", 3 | "Sistema de contrataciones electrónicas (sin datos)" 4 | ] 5 | -------------------------------------------------------------------------------- /tests/results/distributions.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 4 | "rights": "Derechos especificados en la licencia.", 5 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "title": "Convocatorias abiertas durante el año 2015", 8 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 9 | "byteSize": 5120, 10 | "type": "file", 11 | "format": "CSV", 12 | "mediaType": "text/csv", 13 | "modified": "2016-04-19T19:48:05.433640-03:00", 14 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 15 | "field": [ 16 | { 17 | "description": "Identificador único del procedimiento de contratación", 18 | "type": "integer", 19 | "id": "proc12", 20 | "title": "procedimiento_id" 21 | }, 22 | { 23 | "type": "integer", 24 | "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 25 | "title": "organismo_unidad_operativa_contrataciones_id" 26 | }, 27 | { 28 | "type": "integer", 29 | "description": "Identificador único de la unidad operativa de contrataciones", 30 | "title": "unidad_operativa_contrataciones_id" 31 | }, 32 | { 33 | "type": "string", 34 | "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 35 | "title": "organismo_unidad_operativa_contrataciones_desc" 36 | }, 37 | { 38 | "type": "string", 39 | "description": "Unidad operativa de contrataciones.", 40 | "title": "unidad_operativa_contrataciones_desc" 41 | }, 42 | { 43 | "type": "string", 44 | "description": "Tipo de procedimiento al que se adecua la contratación.", 45 | "title": "tipo_procedimiento_contratacion" 46 | }, 47 | { 48 | "type": "date", 49 | "description": "Año en el que se inició el proceso de la convocatoria.", 50 | "title": "ejercicio_procedimiento_anio" 51 | }, 52 | { 53 | "type": "date", 54 | "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.", 55 | "title": "fecha_publicacion_convocatoria" 56 | }, 57 | { 58 | "type": "string", 59 | "description": "Modalidad bajo la cual se realiza la convocatoria.", 60 | "title": "modalidad_convocatoria" 61 | }, 62 | { 63 | "type": "string", 64 | "description": "Clase de la convocatoria.", 65 | "title": "clase_convocatoria" 66 | }, 67 | { 68 | "type": "string", 69 | "description": "Objeto/objetivo de la convocatoria", 70 | "title": "objeto_convocatoria" 71 | } 72 | ], 73 | "issued": "2016-04-14T19:48:05.433640-03:00", 74 | "fileName": "convocatoriasabiertasduranteelao.csv", 75 | "identifier": "1.1" 76 | }, 77 | { 78 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 79 | "rights": "Derechos especificados en la licencia.", 80 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 81 | "license": "Open Data Commons Open Database License 1.0", 82 | "title": "Convocatorias abiertas durante el año 2015", 83 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 84 | "byteSize": 5120, 85 | "type": "documentation", 86 | "format": "PDF", 87 | "mediaType": "application/pdf", 88 | "modified": "2016-04-19T19:48:05.433640-03:00", 89 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 90 | "issued": "2016-04-14T19:48:05.433640-03:00", 91 | "fileName": "convocatoriasabiertasduranteelao.pdf", 92 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8" 93 | } 94 | ] 95 | -------------------------------------------------------------------------------- /tests/results/distributions_filter_in.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 4 | "rights": "Derechos especificados en la licencia.", 5 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "title": "Convocatorias abiertas durante el año 2015", 8 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 9 | "byteSize": 5120, 10 | "type": "file", 11 | "format": "CSV", 12 | "mediaType": "text/csv", 13 | "modified": "2016-04-19T19:48:05.433640-03:00", 14 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 15 | "field": [ 16 | { 17 | "description": "Identificador único del procedimiento de contratación", 18 | "type": "integer", 19 | "id": "proc12", 20 | "title": "procedimiento_id" 21 | }, 22 | { 23 | "type": "integer", 24 | "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 25 | "title": "organismo_unidad_operativa_contrataciones_id" 26 | }, 27 | { 28 | "type": "integer", 29 | "description": "Identificador único de la unidad operativa de contrataciones", 30 | "title": "unidad_operativa_contrataciones_id" 31 | }, 32 | { 33 | "type": "string", 34 | "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 35 | "title": "organismo_unidad_operativa_contrataciones_desc" 36 | }, 37 | { 38 | "type": "string", 39 | "description": "Unidad operativa de contrataciones.", 40 | "title": "unidad_operativa_contrataciones_desc" 41 | }, 42 | { 43 | "type": "string", 44 | "description": "Tipo de procedimiento al que se adecua la contratación.", 45 | "title": "tipo_procedimiento_contratacion" 46 | }, 47 | { 48 | "type": "date", 49 | "description": "Año en el que se inició el proceso de la convocatoria.", 50 | "title": "ejercicio_procedimiento_anio" 51 | }, 52 | { 53 | "type": "date", 54 | "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.", 55 | "title": "fecha_publicacion_convocatoria" 56 | }, 57 | { 58 | "type": "string", 59 | "description": "Modalidad bajo la cual se realiza la convocatoria.", 60 | "title": "modalidad_convocatoria" 61 | }, 62 | { 63 | "type": "string", 64 | "description": "Clase de la convocatoria.", 65 | "title": "clase_convocatoria" 66 | }, 67 | { 68 | "type": "string", 69 | "description": "Objeto/objetivo de la convocatoria", 70 | "title": "objeto_convocatoria" 71 | } 72 | ], 73 | "issued": "2016-04-14T19:48:05.433640-03:00", 74 | "fileName": "convocatoriasabiertasduranteelao.csv", 75 | "identifier": "1.1" 76 | }, 77 | { 78 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 79 | "rights": "Derechos especificados en la licencia.", 80 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 81 | "license": "Open Data Commons Open Database License 1.0", 82 | "title": "Convocatorias abiertas durante el año 2015", 83 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 84 | "byteSize": 5120, 85 | "type": "documentation", 86 | "format": "PDF", 87 | "mediaType": "application/pdf", 88 | "modified": "2016-04-19T19:48:05.433640-03:00", 89 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 90 | "issued": "2016-04-14T19:48:05.433640-03:00", 91 | "fileName": "convocatoriasabiertasduranteelao.pdf", 92 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8" 93 | } 94 | ] 95 | -------------------------------------------------------------------------------- /tests/results/distributions_filter_out.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 4 | "rights": "Derechos especificados en la licencia.", 5 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "fileName": "convocatoriasabiertasduranteelao.pdf", 8 | "title": "Convocatorias abiertas durante el año 2015", 9 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 10 | "byteSize": 5120, 11 | "type": "documentation", 12 | "format": "PDF", 13 | "mediaType": "application/pdf", 14 | "modified": "2016-04-19T19:48:05.433640-03:00", 15 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 16 | "issued": "2016-04-14T19:48:05.433640-03:00", 17 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420" 18 | } 19 | ] 20 | -------------------------------------------------------------------------------- /tests/results/distributions_meta_field.json: -------------------------------------------------------------------------------- 1 | [ 2 | "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 3 | "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a" 4 | ] 5 | -------------------------------------------------------------------------------- /tests/results/distributions_only_time_series.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "accessURL": "https://www.minhacienda.gob.ar/secretarias/politica-economica/programacion-macroeconomica/", 4 | "scrapingFileSheet": "1.2 OyD real s.e.", 5 | "description": "Oferta y Demanda Globales por componente, a precios de comprador, en millones de pesos de 1993 y valores anuales desestacionalizados.", 6 | "format": "CSV", 7 | "dataset_identifier": "1", 8 | "issued": "2017-09-28T00:00:00", 9 | "title": "Oferta y Demanda Global. Precios constantes desestacionalizados. Base 1993. Valores trimestrales.", 10 | "modified": "2017-09-28T00:00:00", 11 | "fileName": "oferta-demanda-globales-datos-desestacionalizados-valores-trimestrales-base-1993.csv", 12 | "field": [ 13 | { 14 | "title": "indice_tiempo", 15 | "scrapingIdentifierCell": "A46", 16 | "specialTypeDetail": "R/P3M", 17 | "specialType": "time_index", 18 | "type": "date", 19 | "id": "1.2_IT_D_1993_T_13", 20 | "scrapingDataStartCell": "A47" 21 | }, 22 | { 23 | "description": "PIB a precios de comprador, en millones de pesos de 1993 y valores anuales.", 24 | "title": "oferta_global_pib", 25 | "scrapingIdentifierCell": "B46", 26 | "units": "Millones de pesos a precios de 1993", 27 | "type": "number", 28 | "id": "1.2_OGP_D_1993_T_17", 29 | "scrapingDataStartCell": "B47" 30 | }, 31 | { 32 | "description": "Importación a precios de comprador, en millones de pesos de 1993 y valores anuales.", 33 | "title": "oferta_global_importacion", 34 | "scrapingIdentifierCell": "C46", 35 | "units": "Millones de pesos a precios de 1993", 36 | "type": "number", 37 | "id": "1.2_OGI_D_1993_T_25", 38 | "scrapingDataStartCell": "C47" 39 | }, 40 | { 41 | "description": "Oferta global total a precios de comprador, en millones de pesos de 1993 y valores anuales.", 42 | "title": "demanda_global_exportacion", 43 | "scrapingIdentifierCell": "D46", 44 | "units": "Millones de pesos a precios de 1993", 45 | "type": "number", 46 | "id": "1.2_DGE_D_1993_T_26", 47 | "scrapingDataStartCell": "D47" 48 | }, 49 | { 50 | "description": "Consumo privado, en millones de pesos de 1993 y valores anuales.", 51 | "title": "demanda_global_ibif", 52 | "scrapingIdentifierCell": "E46", 53 | "units": "Millones de pesos a precios de 1993", 54 | "type": "number", 55 | "id": "1.2_DGI_D_1993_T_19", 56 | "scrapingDataStartCell": "E47" 57 | }, 58 | { 59 | "description": "Consumo publico, en millones de pesos de 1993 y valores anuales.", 60 | "title": "demanda_global_consumo_priv", 61 | "scrapingIdentifierCell": "F46", 62 | "units": "Millones de pesos a precios de 1993", 63 | "type": "number", 64 | "scrapingDataStartCell": "F47" 65 | }, 66 | { 67 | "description": "Inversion bruta interna fija, en millones de pesos de 1993 y valores anuales.", 68 | "title": "demanda_global_consumo_publico", 69 | "scrapingIdentifierCell": "G46", 70 | "units": "Millones de pesos a precios de 1993", 71 | "type": "string", 72 | "id": "1.2_DGCP_D_1993_T_30", 73 | "scrapingDataStartCell": "G47" 74 | } 75 | ], 76 | "draft": false, 77 | "units": "Millones de pesos, a precios de 1993", 78 | "identifier": "1.2", 79 | "scrapingFileURL": "https://www.economia.gob.ar/download/infoeco/actividad_ied.xlsx" 80 | } 81 | ] 82 | -------------------------------------------------------------------------------- /tests/results/empty_optional_string.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electrónicas" 16 | } 17 | ] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/results/expected_datasets_report.csv: -------------------------------------------------------------------------------- 1 | catalog_metadata_url,catalog_title,catalog_description,valid_catalog_metadata,dataset_index,dataset_title,dataset_accrualPeriodicity,valid_dataset_metadata,harvest,dataset_description,dataset_publisher_name,dataset_superTheme,dataset_theme,dataset_landingPage,distributions_list 2 | tests/samples/full_data.json,Datos Argentina,Portal de Datos Abiertos del Gobierno de la República Argentina,1,0,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,"contrataciones, compras, convocatorias",http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 3 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,0,Sistema de contrataciones electrónicas,R/P1Y,0,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),,"ECON, HEAL",,,"""Convocatorias 2015"": None 4 | ""Convocatorias 2016"": [u'http://186.33.211.253/dataset2.csv'] 5 | ""Convocatorias 2017"": 444444 6 | ""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset3.csv" 7 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,1,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 8 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,2,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 9 | http://181.209.63.71/data.json,Andino,Portal Andino Demo,0,0,Dataset Demo,eventual,1,0,"Este es un dataset de ejemplo, se incluye como material DEMO y no contiene ningun valor estadistico.",Andino,TECH,Tema.demo,https://github.com/datosgobar/portal-andino,"""Recurso de Ejemplo"": http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv" 10 | -------------------------------------------------------------------------------- /tests/results/expected_harvester_config.csv: -------------------------------------------------------------------------------- 1 | catalog_metadata_url,dataset_title,dataset_accrualPeriodicity,job_name 2 | tests/samples/full_data.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion 3 | tests/samples/several_datasets_for_harvest.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion 4 | tests/samples/several_datasets_for_harvest.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion 5 | -------------------------------------------------------------------------------- /tests/results/fields.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "distribution_identifier": "1.1", 4 | "description": "Identificador único del procedimiento de contratación", 5 | "title": "procedimiento_id", 6 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 7 | "type": "integer", 8 | "id": "proc12" 9 | }, 10 | { 11 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 12 | "distribution_identifier": "1.1", 13 | "type": "integer", 14 | "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 15 | "title": "organismo_unidad_operativa_contrataciones_id" 16 | }, 17 | { 18 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 19 | "distribution_identifier": "1.1", 20 | "type": "integer", 21 | "description": "Identificador único de la unidad operativa de contrataciones", 22 | "title": "unidad_operativa_contrataciones_id" 23 | }, 24 | { 25 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 26 | "distribution_identifier": "1.1", 27 | "type": "string", 28 | "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 29 | "title": "organismo_unidad_operativa_contrataciones_desc" 30 | }, 31 | { 32 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 33 | "distribution_identifier": "1.1", 34 | "type": "string", 35 | "description": "Unidad operativa de contrataciones.", 36 | "title": "unidad_operativa_contrataciones_desc" 37 | }, 38 | { 39 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 40 | "distribution_identifier": "1.1", 41 | "type": "string", 42 | "description": "Tipo de procedimiento al que se adecua la contratación.", 43 | "title": "tipo_procedimiento_contratacion" 44 | }, 45 | { 46 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 47 | "distribution_identifier": "1.1", 48 | "type": "date", 49 | "description": "Año en el que se inició el proceso de la convocatoria.", 50 | "title": "ejercicio_procedimiento_anio" 51 | }, 52 | { 53 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 54 | "distribution_identifier": "1.1", 55 | "type": "date", 56 | "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.", 57 | "title": "fecha_publicacion_convocatoria" 58 | }, 59 | { 60 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 61 | "distribution_identifier": "1.1", 62 | "type": "string", 63 | "description": "Modalidad bajo la cual se realiza la convocatoria.", 64 | "title": "modalidad_convocatoria" 65 | }, 66 | { 67 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 68 | "distribution_identifier": "1.1", 69 | "type": "string", 70 | "description": "Clase de la convocatoria.", 71 | "title": "clase_convocatoria" 72 | }, 73 | { 74 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 75 | "distribution_identifier": "1.1", 76 | "type": "string", 77 | "description": "Objeto/objetivo de la convocatoria", 78 | "title": "objeto_convocatoria" 79 | } 80 | ] 81 | -------------------------------------------------------------------------------- /tests/results/fields_filter_in.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "proc12", 4 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 5 | "distribution_identifier": "1.1", 6 | "type": "integer", 7 | "description": "Identificador único del procedimiento de contratación", 8 | "title": "procedimiento_id" 9 | } 10 | ] 11 | -------------------------------------------------------------------------------- /tests/results/fields_filter_out.json: -------------------------------------------------------------------------------- 1 | [] 2 | -------------------------------------------------------------------------------- /tests/results/fields_meta_field.json: -------------------------------------------------------------------------------- 1 | [ 2 | "integer", 3 | "integer", 4 | "integer", 5 | "string", 6 | "string", 7 | "string", 8 | "date", 9 | "date", 10 | "string", 11 | "string", 12 | "string" 13 | ] 14 | -------------------------------------------------------------------------------- /tests/results/full_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electrónicas" 16 | }, 17 | { 18 | "status": "OK", 19 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 20 | "list_index": 1, 21 | "errors": [], 22 | "title": "Sistema de contrataciones electrónicas (sin datos)" 23 | } 24 | ] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /tests/results/get_distribution.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 4 | "rights": "Derechos especificados en la licencia.", 5 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "title": "Convocatorias abiertas durante el año 2015", 8 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 9 | "byteSize": 5120, 10 | "type": "file", 11 | "format": "CSV", 12 | "mediaType": "text/csv", 13 | "modified": "2016-04-19T19:48:05.433640-03:00", 14 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 15 | "field": [ 16 | { 17 | "description": "Identificador único del procedimiento de contratación", 18 | "type": "integer", 19 | "id": "proc12", 20 | "title": "procedimiento_id" 21 | }, 22 | { 23 | "type": "integer", 24 | "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 25 | "title": "organismo_unidad_operativa_contrataciones_id" 26 | }, 27 | { 28 | "type": "integer", 29 | "description": "Identificador único de la unidad operativa de contrataciones", 30 | "title": "unidad_operativa_contrataciones_id" 31 | }, 32 | { 33 | "type": "string", 34 | "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.", 35 | "title": "organismo_unidad_operativa_contrataciones_desc" 36 | }, 37 | { 38 | "type": "string", 39 | "description": "Unidad operativa de contrataciones.", 40 | "title": "unidad_operativa_contrataciones_desc" 41 | }, 42 | { 43 | "type": "string", 44 | "description": "Tipo de procedimiento al que se adecua la contratación.", 45 | "title": "tipo_procedimiento_contratacion" 46 | }, 47 | { 48 | "type": "date", 49 | "description": "Año en el que se inició el proceso de la convocatoria.", 50 | "title": "ejercicio_procedimiento_anio" 51 | }, 52 | { 53 | "type": "date", 54 | "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.", 55 | "title": "fecha_publicacion_convocatoria" 56 | }, 57 | { 58 | "type": "string", 59 | "description": "Modalidad bajo la cual se realiza la convocatoria.", 60 | "title": "modalidad_convocatoria" 61 | }, 62 | { 63 | "type": "string", 64 | "description": "Clase de la convocatoria.", 65 | "title": "clase_convocatoria" 66 | }, 67 | { 68 | "type": "string", 69 | "description": "Objeto/objetivo de la convocatoria", 70 | "title": "objeto_convocatoria" 71 | } 72 | ], 73 | "issued": "2016-04-14T19:48:05.433640-03:00", 74 | "fileName": "convocatoriasabiertasduranteelao.csv", 75 | "identifier": "1.1" 76 | }, 77 | { 78 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 79 | "rights": "Derechos especificados en la licencia.", 80 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 81 | "license": "Open Data Commons Open Database License 1.0", 82 | "title": "Convocatorias abiertas durante el año 2015", 83 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 84 | "byteSize": 5120, 85 | "type": "documentation", 86 | "format": "PDF", 87 | "mediaType": "application/pdf", 88 | "modified": "2016-04-19T19:48:05.433640-03:00", 89 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 90 | "issued": "2016-04-14T19:48:05.433640-03:00", 91 | "fileName": "convocatoriasabiertasduranteelao.pdf", 92 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8" 93 | } 94 | ] 95 | -------------------------------------------------------------------------------- /tests/results/get_distribution_of_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 3 | "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 4 | "rights": "Derechos especificados en la licencia.", 5 | "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas", 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "fileName": "convocatoriasabiertasduranteelao.pdf", 8 | "title": "Convocatorias abiertas durante el año 2015", 9 | "byteSize": 5120, 10 | "type": "documentation", 11 | "fileName": "convocatoriasabiertasduranteelao.pdf", 12 | "format": "PDF", 13 | "mediaType": "application/pdf", 14 | "modified": "2016-04-19T19:48:05.433640-03:00", 15 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf", 16 | "issued": "2016-04-14T19:48:05.433640-03:00", 17 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420" 18 | } 19 | -------------------------------------------------------------------------------- /tests/results/get_field.json: -------------------------------------------------------------------------------- 1 | { 2 | "distribution_identifier": "1.1", 3 | "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 4 | "id": "proc12", 5 | "type": "integer", 6 | "description": "Identificador único del procedimiento de contratación", 7 | "title": "procedimiento_id" 8 | } 9 | -------------------------------------------------------------------------------- /tests/results/get_theme.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "adjudicaciones", 3 | "label": "Adjudicaciones", 4 | "description": "Datasets sobre licitaciones adjudicadas." 5 | } 6 | -------------------------------------------------------------------------------- /tests/results/invalid_dataset_theme_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "ERROR", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "ERROR", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 13 | "list_index": 0, 14 | "errors": [ 15 | { 16 | "instance": "contrataciones", 17 | "validator": "anyOf", 18 | "path": [ 19 | "dataset", 20 | 0, 21 | "theme" 22 | ], 23 | "message": "u'contrataciones' is not valid under any of the given schemas", 24 | "error_code": 2, 25 | "validator_value": [ 26 | { 27 | "type": "array" 28 | }, 29 | { 30 | "type": "null" 31 | } 32 | ] 33 | } 34 | ], 35 | "title": "Sistema de contrataciones electrónicas" 36 | } 37 | ] 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tests/results/invalid_field_description_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "ERROR", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "ERROR", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 13 | "list_index": 0, 14 | "errors": [ 15 | { 16 | "instance": 123, 17 | "validator": "anyOf", 18 | "path": [ 19 | "dataset", 20 | 0, 21 | "distribution", 22 | 0, 23 | "field", 24 | 0, 25 | "description" 26 | ], 27 | "message": "123 is not valid under any of the given schemas", 28 | "error_code": 2, 29 | "validator_value": [ 30 | { 31 | "type": "string" 32 | }, 33 | { 34 | "type": "null" 35 | } 36 | ] 37 | } 38 | ], 39 | "title": "Sistema de contrataciones electrónicas" 40 | } 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tests/results/invalid_multiple_fields_type.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "ERROR", 3 | "error": { 4 | "catalog": { 5 | "status": "ERROR", 6 | "errors": [ 7 | { 8 | "instance": [ 9 | "Ministerio de Modernización", 10 | "datosargentina@jefatura.gob.ar" 11 | ], 12 | "validator": "type", 13 | "path": [ 14 | "publisher" 15 | ], 16 | "message": "[u'Ministerio de Modernizaci\\xf3n', u'datosargentina@jefatura.gob.ar'] is not of type u'object'", 17 | "error_code": 2, 18 | "validator_value": "object" 19 | } 20 | ], 21 | "title": "Datos Argentina" 22 | }, 23 | "dataset": [ 24 | { 25 | "status": "ERROR", 26 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 27 | "list_index": 0, 28 | "errors": [ 29 | { 30 | "instance": "5120", 31 | "validator": "anyOf", 32 | "path": [ 33 | "dataset", 34 | 0, 35 | "distribution", 36 | 0, 37 | "byteSize" 38 | ], 39 | "message": "u'5120' is not valid under any of the given schemas", 40 | "error_code": 2, 41 | "validator_value": [ 42 | { 43 | "type": "integer" 44 | }, 45 | { 46 | "type": "null" 47 | }, 48 | { 49 | "type": "string", 50 | "maxLength": 0 51 | } 52 | ] 53 | } 54 | ], 55 | "title": "Sistema de contrataciones electrónicas" 56 | } 57 | ] 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/results/minimum_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "1", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electrónicas" 16 | } 17 | ] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/results/mismatched_downloadURL_and_format.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electr\u00f3nicas" 16 | }, 17 | { 18 | "status": "OK", 19 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 20 | "list_index": 1, 21 | "errors": [], 22 | "title": "Sistema de contrataciones electr\u00f3nicas (sin datos)" 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /tests/results/mismatched_fileName_and_format.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electr\u00f3nicas" 16 | }, 17 | { 18 | "status": "OK", 19 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 20 | "list_index": 1, 21 | "errors": [], 22 | "title": "Sistema de contrataciones electr\u00f3nicas (sin datos)" 23 | } 24 | ] 25 | } 26 | } -------------------------------------------------------------------------------- /tests/results/multiple_missing_descriptions.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "ERROR", 3 | "error": { 4 | "catalog": { 5 | "status": "ERROR", 6 | "errors": [ 7 | { 8 | "instance": null, 9 | "validator": "required", 10 | "path": [], 11 | "message": "'description' is a required property", 12 | "error_code": 1, 13 | "validator_value": [ 14 | "dataset", 15 | "title", 16 | "description", 17 | "publisher", 18 | "superThemeTaxonomy" 19 | ] 20 | } 21 | ], 22 | "title": "Datos Argentina" 23 | }, 24 | "dataset": [ 25 | { 26 | "status": "ERROR", 27 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 28 | "list_index": 0, 29 | "errors": [ 30 | { 31 | "instance": null, 32 | "validator": "required", 33 | "path": [ 34 | "dataset", 35 | 0 36 | ], 37 | "message": "'description' is a required property", 38 | "error_code": 1, 39 | "validator_value": [ 40 | "title", 41 | "description", 42 | "publisher", 43 | "superTheme", 44 | "distribution", 45 | "accrualPeriodicity", 46 | "issued", 47 | "identifier" 48 | ] 49 | } 50 | ], 51 | "title": "Sistema de contrataciones electrónicas" 52 | } 53 | ] 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /tests/results/null_dataset_theme.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electrónicas" 16 | } 17 | ] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/results/null_field_description.json: -------------------------------------------------------------------------------- 1 | { 2 | "status": "OK", 3 | "error": { 4 | "catalog": { 5 | "status": "OK", 6 | "errors": [], 7 | "title": "Datos Argentina" 8 | }, 9 | "dataset": [ 10 | { 11 | "status": "OK", 12 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 13 | "list_index": 0, 14 | "errors": [], 15 | "title": "Sistema de contrataciones electrónicas" 16 | } 17 | ] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /tests/results/null_indicators_readme.md: -------------------------------------------------------------------------------- 1 | 2 | # Catálogo: Cosechando Datos Argentina 3 | 4 | ## Información General 5 | 6 | - **Autor**: Ministerio de Modernización 7 | - **Correo Electrónico**: datosargentina@jefatura.gob.ar 8 | - **Ruta del catálogo**: tests/samples/several_datasets_for_harvest.json 9 | - **Nombre del catálogo**: Cosechando Datos Argentina 10 | - **Descripción**: 11 | 12 | > Datasets para reporte pre cosecha 13 | 14 | ## Estado de los metadatos y cantidad de recursos 15 | 16 | - **Estado metadatos globales**: ERROR 17 | - **Estado metadatos catálogo**: OK 18 | - **Cantidad Total de Datasets**: 3 19 | - **Cantidad Total de Distribuciones**: 6 20 | 21 | - **Cantidad de Datasets Federados**: None 22 | - **Cantidad de Datasets NO Federados**: None 23 | - **Porcentaje de Datasets NO Federados**: None 24 | 25 | ## Datasets federados que fueron eliminados en el nodo original 26 | 27 | 28 | 29 | ## Datasets no federados 30 | 31 | 32 | 33 | ## Datasets federados 34 | 35 | 36 | 37 | ## Reporte 38 | 39 | Por favor, consulte el informe [`datasets.csv`](datasets.csv). 40 | -------------------------------------------------------------------------------- /tests/results/time_series.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "distribution_identifier": "1.2", 4 | "description": "PIB a precios de comprador, en millones de pesos de 1993 y valores anuales.", 5 | "title": "oferta_global_pib", 6 | "dataset_identifier": "1", 7 | "scrapingIdentifierCell": "B46", 8 | "units": "Millones de pesos a precios de 1993", 9 | "type": "number", 10 | "id": "1.2_OGP_D_1993_T_17", 11 | "scrapingDataStartCell": "B47" 12 | }, 13 | { 14 | "distribution_identifier": "1.2", 15 | "description": "Importación a precios de comprador, en millones de pesos de 1993 y valores anuales.", 16 | "title": "oferta_global_importacion", 17 | "dataset_identifier": "1", 18 | "scrapingIdentifierCell": "C46", 19 | "units": "Millones de pesos a precios de 1993", 20 | "type": "number", 21 | "id": "1.2_OGI_D_1993_T_25", 22 | "scrapingDataStartCell": "C47" 23 | }, 24 | { 25 | "distribution_identifier": "1.2", 26 | "description": "Oferta global total a precios de comprador, en millones de pesos de 1993 y valores anuales.", 27 | "title": "demanda_global_exportacion", 28 | "dataset_identifier": "1", 29 | "scrapingIdentifierCell": "D46", 30 | "units": "Millones de pesos a precios de 1993", 31 | "type": "number", 32 | "id": "1.2_DGE_D_1993_T_26", 33 | "scrapingDataStartCell": "D47" 34 | }, 35 | { 36 | "distribution_identifier": "1.2", 37 | "description": "Consumo privado, en millones de pesos de 1993 y valores anuales.", 38 | "title": "demanda_global_ibif", 39 | "dataset_identifier": "1", 40 | "scrapingIdentifierCell": "E46", 41 | "units": "Millones de pesos a precios de 1993", 42 | "type": "number", 43 | "id": "1.2_DGI_D_1993_T_19", 44 | "scrapingDataStartCell": "E47" 45 | } 46 | ] 47 | -------------------------------------------------------------------------------- /tests/results/write_table.csv: -------------------------------------------------------------------------------- 1 | Plato,Precio,Sabor 2 | Milanesa,Bajo,666 3 | "Thoné, Vitel",Alto,8000 4 | Aceitunas,,15 5 | -------------------------------------------------------------------------------- /tests/results/write_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/results/write_table.xlsx -------------------------------------------------------------------------------- /tests/samples/catalogo-justicia-missing-distribution-identifier.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo-justicia-missing-distribution-identifier.xlsx -------------------------------------------------------------------------------- /tests/samples/catalogo_justicia.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia.xlsx -------------------------------------------------------------------------------- /tests/samples/catalogo_justicia_extra_columns.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_extra_columns.xlsx -------------------------------------------------------------------------------- /tests/samples/catalogo_justicia_no_xlsx_suffix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_no_xlsx_suffix -------------------------------------------------------------------------------- /tests/samples/catalogo_justicia_with_defaults.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_with_defaults.xlsx -------------------------------------------------------------------------------- /tests/samples/invalid_catalog_empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "publisher": { 3 | "mbox": "", 4 | "name": "" 5 | }, 6 | "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 7 | "description": "Describí el portal. Explicá de qué se trata tu catálogo de datos. Por favor, hacelo en no más de tres líneas.", 8 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 9 | "title": "Ministerio de Desarrollos Social", 10 | "dataset": [], 11 | "version": "1.1", 12 | "themeTaxonomy": [] 13 | } -------------------------------------------------------------------------------- /tests/samples/invalid_multiple_emails.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Datos Argentina", 3 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 4 | "publisher": { 5 | "name": "Ministerio de Modernización", 6 | "mbox": "datosargentina@jefatura.gob.ar" 7 | }, 8 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 9 | "dataset": [ 10 | { 11 | "title": "publisher mail roto", 12 | "identifier": "1", 13 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 14 | "publisher": { 15 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 16 | "mbox": "first@mail.com; second@mail.com" 17 | }, 18 | "contactPoint": { 19 | "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 20 | "hasEmail": "valid@mail.com" 21 | }, 22 | "superTheme": [ 23 | "ECON" 24 | ], 25 | "accrualPeriodicity": "R/P1Y", 26 | "issued": "2016-04-14T19:48:05.433640-03:00", 27 | "distribution": [ 28 | { 29 | "identifier": "dist_1", 30 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 31 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 32 | "title": "Convocatorias abiertas durante el año 2015", 33 | "issued": "2016-04-14T19:48:05.433640-03:00" 34 | } 35 | ] 36 | }, 37 | { 38 | "title": "contact point falluto", 39 | "identifier": "2", 40 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 41 | "publisher": { 42 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 43 | "mbox": "another_valid.mail@address.com.tv" 44 | }, 45 | "contactPoint": { 46 | "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 47 | "hasEmail": "one@mail.com;two@mail.com;three@mail.com" 48 | }, 49 | "superTheme": [ 50 | "ECON" 51 | ], 52 | "accrualPeriodicity": "R/P1Y", 53 | "issued": "2016-04-14T19:48:05.433640-03:00", 54 | "distribution": [ 55 | { 56 | "identifier": "dist_2", 57 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 58 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 59 | "title": "Convocatorias abiertas durante el año 2015", 60 | "issued": "2016-04-14T19:48:05.433640-03:00" 61 | } 62 | ] 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /tests/samples/lists_extra_commas.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/lists_extra_commas.xlsx -------------------------------------------------------------------------------- /tests/samples/minimum_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Datos Argentina", 3 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 4 | "publisher": { 5 | "name": "Ministerio de Modernización", 6 | "mbox": "datosargentina@jefatura.gob.ar" 7 | }, 8 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 9 | "dataset": [ 10 | { 11 | "title": "Sistema de contrataciones electrónicas", 12 | "identifier": "1", 13 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 14 | "publisher": { 15 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones" 16 | }, 17 | "superTheme": [ 18 | "ECON" 19 | ], 20 | "accrualPeriodicity": "R/P1Y", 21 | "issued": "2016-04-14T19:48:05.433640-03:00", 22 | "distribution": [ 23 | { 24 | "identifier": "dist_1", 25 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 26 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 27 | "title": "Convocatorias abiertas durante el año 2015", 28 | "issued": "2016-04-14T19:48:05.433640-03:00" 29 | } 30 | ] 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /tests/samples/missing_catalog_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "publisher": { 3 | "mbox": "datosargentina@jefatura.gob.ar", 4 | "name": "Ministerio de Modernización" 5 | }, 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 8 | "language": [ 9 | "spa" 10 | ], 11 | "title": "Datos Argentina", 12 | "issued": "2016-04-14T19:48:05.433640-03:00", 13 | "rights": "Derechos especificados en la licencia.", 14 | "modified": "2016-04-19T19:48:05.433640-03:00", 15 | "themeTaxonomy": [ 16 | { 17 | "label": "Convocatorias", 18 | "description": "Datasets sobre licitaciones en estado de convocatoria.", 19 | "id": "convocatorias" 20 | }, 21 | { 22 | "label": "Compras", 23 | "description": "Datasets sobre compras realizadas.", 24 | "id": "compras" 25 | }, 26 | { 27 | "label": "Contrataciones", 28 | "description": "Datasets sobre contrataciones.", 29 | "id": "contrataciones" 30 | }, 31 | { 32 | "label": "Adjudicaciones", 33 | "description": "Datasets sobre licitaciones adjudicadas.", 34 | "id": "adjudicaciones" 35 | }, 36 | { 37 | "label": "Normativa", 38 | "description": "Datasets sobre normativa para compras y contrataciones.", 39 | "id": "normativa" 40 | }, 41 | { 42 | "label": "Proveedores", 43 | "description": "Datasets sobre proveedores del Estado.", 44 | "id": "proveedores" 45 | } 46 | ], 47 | "version": "1.1", 48 | "spatial": "ARG", 49 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 50 | "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 51 | "homepage": "http://datos.gob.ar" 52 | } -------------------------------------------------------------------------------- /tests/samples/missing_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "publisher": { 3 | "mbox": "datosargentina@jefatura.gob.ar", 4 | "name": "Ministerio de Modernización" 5 | }, 6 | "license": "Open Data Commons Open Database License 1.0", 7 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 8 | "language": [ 9 | "spa" 10 | ], 11 | "title": "Datos Argentina", 12 | "issued": "2016-04-14T19:48:05.433640-03:00", 13 | "rights": "Derechos especificados en la licencia.", 14 | "modified": "2016-04-19T19:48:05.433640-03:00", 15 | "themeTaxonomy": [ 16 | { 17 | "label": "Convocatorias", 18 | "description": "Datasets sobre licitaciones en estado de convocatoria.", 19 | "id": "convocatorias" 20 | }, 21 | { 22 | "label": "Compras", 23 | "description": "Datasets sobre compras realizadas.", 24 | "id": "compras" 25 | }, 26 | { 27 | "label": "Contrataciones", 28 | "description": "Datasets sobre contrataciones.", 29 | "id": "contrataciones" 30 | }, 31 | { 32 | "label": "Adjudicaciones", 33 | "description": "Datasets sobre licitaciones adjudicadas.", 34 | "id": "adjudicaciones" 35 | }, 36 | { 37 | "label": "Normativa", 38 | "description": "Datasets sobre normativa para compras y contrataciones.", 39 | "id": "normativa" 40 | }, 41 | { 42 | "label": "Proveedores", 43 | "description": "Datasets sobre proveedores del Estado.", 44 | "id": "proveedores" 45 | } 46 | ], 47 | "version": "1.1", 48 | "spatial": "ARG", 49 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 50 | "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8", 51 | "homepage": "http://datos.gob.ar" 52 | } -------------------------------------------------------------------------------- /tests/samples/missing_periodicity.json: -------------------------------------------------------------------------------- 1 | { 2 | "publisher": { 3 | "mbox": "datosargentina@jefatura.gob.ar", 4 | "name": "Ministerio de Modernización" 5 | }, 6 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 7 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 8 | "title": "Datos Argentina", 9 | "dataset": [ 10 | { 11 | "identifier": "1", 12 | "publisher": { 13 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones" 14 | }, 15 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 16 | "superTheme": [ 17 | "ECON" 18 | ], 19 | "title": "Sistema de contrataciones electrónicas", 20 | "issued": "2016-04-14T19:48:05.433640-03:00", 21 | "source": "Ministerio de modernizacion", 22 | "distribution": [ 23 | { 24 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 25 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 26 | "title": "Convocatorias abiertas durante el año 2015", 27 | "issued": "2016-04-14T19:48:05.433640-03:00" 28 | } 29 | ] 30 | } 31 | ], 32 | "version": "1.1", 33 | "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8" 34 | } -------------------------------------------------------------------------------- /tests/samples/processed_datasets_report.csv: -------------------------------------------------------------------------------- 1 | catalog_metadata_url,catalog_title,catalog_description,valid_catalog_metadata,dataset_index,dataset_title,dataset_accrualPeriodicity,valid_dataset_metadata,harvest,dataset_description,dataset_publisher_name,dataset_superTheme,dataset_theme,dataset_landingPage,distributions_list 2 | tests/samples/full_data.json,Datos Argentina,Portal de Datos Abiertos del Gobierno de la República Argentina,1,0,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,"contrataciones, compras, convocatorias",http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 3 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,0,Sistema de contrataciones electrónicas,R/P1Y,0,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),,"ECON, HEAL",,,"""Convocatorias 2015"": None 4 | ""Convocatorias 2016"": [u'http://186.33.211.253/dataset2.csv'] 5 | ""Convocatorias 2017"": 444444 6 | ""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset3.csv" 7 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,1,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 8 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,2,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv" 9 | http://181.209.63.71/data.json,Andino,Portal Andino Demo,0,0,Dataset Demo,eventual,1,0,"Este es un dataset de ejemplo, se incluye como material DEMO y no contiene ningun valor estadistico.",Andino,TECH,Tema.demo,https://github.com/datosgobar/portal-andino,"""Recurso de Ejemplo"": http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv" 10 | -------------------------------------------------------------------------------- /tests/samples/prueba_sheet_to_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/prueba_sheet_to_table.xlsx -------------------------------------------------------------------------------- /tests/samples/read_table.csv: -------------------------------------------------------------------------------- 1 | Plato,Precio,Sabor 2 | Milanesa,Bajo,666 3 | "Thoné, Vitel",Alto,8000 4 | Aceitunas,,15 5 | -------------------------------------------------------------------------------- /tests/samples/read_table.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/read_table.xlsx -------------------------------------------------------------------------------- /tests/samples/resource_sample.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/resource_sample.csv -------------------------------------------------------------------------------- /tests/samples/several_datasets_for_harvest.json: -------------------------------------------------------------------------------- 1 | { 2 | "publisher": { 3 | "mbox": "datosargentina@jefatura.gob.ar", 4 | "name": "Ministerio de Modernización" 5 | }, 6 | "description": "Datasets para reporte pre cosecha", 7 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 8 | "title": "Cosechando Datos Argentina", 9 | "dataset": [ 10 | { 11 | "identifier": "1", 12 | "publisher": {}, 13 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 14 | "superTheme": [ 15 | "ECON", 16 | "HEAL" 17 | ], 18 | "title": "Sistema de contrataciones electrónicas UNO", 19 | "issued": "2016-04-14T19:48:05.433640-03:00", 20 | "source": "Ministerio de modernizacion", 21 | "accrualPeriodicity": "R/P1Y", 22 | "distribution": [ 23 | { 24 | "identifier": "dist_1", 25 | "accessURL": "http://datos.gob.ar/dataset1.csv", 26 | "issued": "2016-04-14T19:48:05.433640-03:00", 27 | "title": "Convocatorias 2015" 28 | }, 29 | { 30 | "identifier": "dist_2", 31 | "accessURL": "http://datos.gob.ar", 32 | "downloadURL": [ 33 | "http://186.33.211.253/dataset2.csv" 34 | ], 35 | "title": "Convocatorias 2016", 36 | "issued": "2016-04-14T19:48:05.433640-03:00" 37 | }, 38 | { 39 | "identifier": "dist_3", 40 | "accessURL": "http://datos.gob.ar", 41 | "downloadURL": 444444, 42 | "title": "Convocatorias 2017", 43 | "issued": "2016-04-14T19:48:05.433640-03:00" 44 | }, 45 | { 46 | "identifier": "dist_4", 47 | "accessURL": "http://datos.gob.ar", 48 | "downloadURL": "http://186.33.211.253/dataset3.csv", 49 | "title": "Convocatorias abiertas durante el año 2015", 50 | "issued": "2016-04-14T19:48:05.433640-03:00" 51 | } 52 | ] 53 | }, 54 | { 55 | "identifier": "2", 56 | "publisher": { 57 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones" 58 | }, 59 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 60 | "superTheme": [ 61 | "ECON" 62 | ], 63 | "title": "Sistema de contrataciones electrónicas DOS", 64 | "issued": "2016-04-14T19:48:05.433640-03:00", 65 | "source": "Ministerio de modernizacion", 66 | "accrualPeriodicity": "R/P1Y", 67 | "distribution": [ 68 | { 69 | "identifier": "dist_5", 70 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 71 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 72 | "title": "Convocatorias abiertas durante el año 2015", 73 | "issued": "2016-04-14T19:48:05.433640-03:00" 74 | } 75 | ] 76 | }, 77 | { 78 | "identifier": "3", 79 | "publisher": { 80 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones" 81 | }, 82 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 83 | "superTheme": [ 84 | "ECON" 85 | ], 86 | "title": "Sistema de contrataciones electrónicas TRES", 87 | "issued": "2016-04-14T19:48:05.433640-03:00", 88 | "source": "Ministerio de modernizacion", 89 | "accrualPeriodicity": "R/P1Y", 90 | "distribution": [ 91 | { 92 | "identifier": "dist_6", 93 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 94 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 95 | "title": "Convocatorias abiertas durante el año 2015", 96 | "issued": "2016-04-14T19:48:05.433640-03:00" 97 | } 98 | ] 99 | } 100 | ], 101 | "version": "1.1", 102 | "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8" 103 | } -------------------------------------------------------------------------------- /tests/samples/valid_whitespace_emails.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Datos Argentina", 3 | "description": "Portal de Datos Abiertos del Gobierno de la República Argentina", 4 | "publisher": { 5 | "name": "Ministerio de Modernización", 6 | "mbox": "datosargentina@jefatura.gob.ar" 7 | }, 8 | "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json", 9 | "dataset": [ 10 | { 11 | "title": "publisher mail roto", 12 | "identifier": "1", 13 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 14 | "publisher": { 15 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 16 | "mbox": " whitespace@mail.com" 17 | }, 18 | "contactPoint": { 19 | "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 20 | "hasEmail": "valid@mail.com" 21 | }, 22 | "superTheme": [ 23 | "ECON" 24 | ], 25 | "accrualPeriodicity": "R/P1Y", 26 | "issued": "2016-04-14T19:48:05.433640-03:00", 27 | "distribution": [ 28 | { 29 | "identifier": "dist_1", 30 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 31 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 32 | "title": "Convocatorias abiertas durante el año 2015", 33 | "issued": "2016-04-14T19:48:05.433640-03:00" 34 | } 35 | ] 36 | }, 37 | { 38 | "title": "contact point falluto", 39 | "identifier": "2", 40 | "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)", 41 | "publisher": { 42 | "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 43 | "mbox": "another_valid.mail@address.com.tv" 44 | }, 45 | "contactPoint": { 46 | "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones", 47 | "hasEmail": "trailspace@mail.com " 48 | }, 49 | "superTheme": [ 50 | "ECON" 51 | ], 52 | "accrualPeriodicity": "R/P1Y", 53 | "issued": "2016-04-14T19:48:05.433640-03:00", 54 | "distribution": [ 55 | { 56 | "identifier": "dist_2", 57 | "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a", 58 | "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv", 59 | "title": "Convocatorias abiertas durante el año 2015", 60 | "issued": "2016-04-14T19:48:05.433640-03:00" 61 | } 62 | ] 63 | } 64 | ] 65 | } 66 | -------------------------------------------------------------------------------- /tests/support/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/support/__init__.py -------------------------------------------------------------------------------- /tests/support/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | BAD_DATAJSON_URL = "http://104.131.35.253/data.json" 5 | BAD_DATAJSON_URL2 = "http://181.209.63.71/data.json" 6 | -------------------------------------------------------------------------------- /tests/support/decorators.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import io 5 | import json 6 | import os 7 | 8 | from six import wraps 9 | 10 | RESULTS_DIR = os.path.join("tests", "results") 11 | 12 | 13 | def load_expected_result(): 14 | def case_decorator(test): 15 | case_filename = test.__name__.split("test_")[-1] 16 | 17 | @wraps(test) 18 | def decorated_test(*args, **kwargs): 19 | result_path = os.path.join(RESULTS_DIR, case_filename + ".json") 20 | 21 | with io.open(result_path, encoding='utf8') as result_file: 22 | expected_result = json.load(result_file) 23 | 24 | kwargs["expected_result"] = expected_result 25 | test(*args, **kwargs) 26 | 27 | return decorated_test 28 | 29 | return case_decorator 30 | -------------------------------------------------------------------------------- /tests/support/factories/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/support/factories/__init__.py -------------------------------------------------------------------------------- /tests/support/factories/core_files.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from __future__ import unicode_literals, absolute_import 5 | 6 | from .catalog_errors import missing_catalog_title, \ 7 | missing_catalog_description, \ 8 | missing_catalog_dataset, invalid_catalog_publisher_type, \ 9 | invalid_publisher_mbox_format, null_catalog_publisher, \ 10 | empty_mandatory_string, malformed_date, malformed_datetime, \ 11 | malformed_datetime2, malformed_uri, \ 12 | invalid_theme_taxonomy, missing_dataset 13 | from .dataset_errors import missing_dataset_title, \ 14 | missing_dataset_description, \ 15 | malformed_accrualperiodicity, malformed_temporal, \ 16 | malformed_temporal2, too_long_field_title 17 | from .distribution_errors import missing_distribution_title 18 | from .other_errors import multiple_missing_descriptions, \ 19 | invalid_multiple_fields_type 20 | 21 | FULL_DATA_RESPONSE = { 22 | "status": "OK", 23 | "error": { 24 | "catalog": { 25 | "status": "OK", 26 | "errors": [], 27 | "title": "Datos Argentina" 28 | }, 29 | "dataset": [ 30 | { 31 | "status": "OK", 32 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 33 | "list_index": 0, 34 | "errors": [], 35 | "title": "Sistema de contrataciones electrónicas" 36 | }, 37 | { 38 | "status": "OK", 39 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 40 | "list_index": 1, 41 | "errors": [], 42 | "title": "Sistema de contrataciones electrónicas (sin datos)" 43 | } 44 | ] 45 | } 46 | } 47 | 48 | TEST_FROM_RESULT_FILE = { 49 | # Tests de CAMPOS REQUERIDOS 50 | # Tests de inputs válidos 51 | 'full_data': FULL_DATA_RESPONSE, 52 | # Un datajson con valores correctos únicamente para las claves requeridas 53 | 'minimum_data': None, 54 | 55 | # Tests de TIPOS DE CAMPOS 56 | # Tests de inputs válidos 57 | 'null_dataset_theme': None, 58 | 'null_field_description': None, 59 | # Tests de inputs inválidos 60 | 'invalid_catalog_publisher_type': None, 61 | 'invalid_publisher_mbox_format': None, 62 | # Catalog_publisher y distribution_bytesize fallan 63 | 'invalid_field_description_type': None, 64 | # La clave requerida catalog["description"] NO puede ser str vacía 65 | 'empty_optional_string': None, 66 | # El format y extension de fileName de las distribuciones deben 67 | # coincidir si estan los campos presentes 68 | 'mismatched_fileName_and_format': None, 69 | # El format y extension de downloadURL de las distribuciones deben 70 | # coincidir si estan los campos presentes 71 | 'mismatched_downloadURL_and_format': None, 72 | } 73 | 74 | TEST_FROM_GENERATED_RESULT = { 75 | 76 | 'multiple_missing_descriptions': multiple_missing_descriptions(), 77 | 'invalid_multiple_fields_type': invalid_multiple_fields_type(), 78 | 79 | 'missing_catalog_title': missing_catalog_title(), 80 | 'missing_catalog_description': missing_catalog_description(), 81 | 'missing_catalog_dataset': missing_catalog_dataset(), 82 | 'null_catalog_publisher': null_catalog_publisher(), 83 | 'empty_mandatory_string': empty_mandatory_string(), 84 | 'malformed_datetime': malformed_datetime(), 85 | 'malformed_datetime2': malformed_datetime2(), 86 | 'malformed_uri': malformed_uri(), 87 | 'invalid_themeTaxonomy': invalid_theme_taxonomy(), 88 | 'missing_dataset': missing_dataset(), 89 | 90 | 'missing_dataset_title': missing_dataset_title(), 91 | 'missing_dataset_description': missing_dataset_description(), 92 | 'malformed_accrualperiodicity': malformed_accrualperiodicity(), 93 | 'malformed_date': malformed_date(), 94 | 'malformed_temporal': malformed_temporal(), 95 | 'malformed_temporal2': malformed_temporal2(), 96 | 'too_long_field_title': too_long_field_title(), 97 | 98 | 'missing_distribution_title': missing_distribution_title(), 99 | 100 | 'invalid_catalog_publisher_type': invalid_catalog_publisher_type(), 101 | 'invalid_publisher_mbox_format': invalid_publisher_mbox_format(), 102 | 103 | # 'repeated_downloadURL': repeated_downloadURL(), 104 | } 105 | 106 | TEST_FILE_RESPONSES = {} 107 | TEST_FILE_RESPONSES.update(TEST_FROM_RESULT_FILE) 108 | TEST_FILE_RESPONSES.update(TEST_FROM_GENERATED_RESULT) 109 | -------------------------------------------------------------------------------- /tests/support/factories/distribution_errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from __future__ import unicode_literals 5 | 6 | from tests.support.utils import jsonschema_str 7 | 8 | 9 | def distribution_error(): 10 | return { 11 | "status": "ERROR", 12 | "error": { 13 | "catalog": { 14 | "status": "OK", 15 | "errors": [], 16 | "title": "Datos Argentina" 17 | }, 18 | "dataset": [ 19 | { 20 | "status": "ERROR", 21 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 22 | "list_index": 0, 23 | "errors": [ 24 | { 25 | "instance": None, 26 | "validator": "required", 27 | "path": [ 28 | "dataset", 29 | 0, 30 | "distribution", 31 | 0 32 | ], 33 | "message": "%s is a required property" 34 | % jsonschema_str('title'), 35 | "error_code": 1, 36 | "validator_value": [ 37 | "accessURL", 38 | "downloadURL", 39 | "title", 40 | "issued", 41 | "identifier" 42 | ] 43 | } 44 | ], 45 | "title": "Sistema de contrataciones electrónicas" 46 | } 47 | ] 48 | } 49 | } 50 | 51 | 52 | def missing_distribution_title(): 53 | return distribution_error() 54 | -------------------------------------------------------------------------------- /tests/support/factories/other_errors.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | from __future__ import unicode_literals 5 | 6 | from tests.support.utils import jsonschema_str 7 | 8 | 9 | def gen_error(catalog_error, dataset_error): 10 | return { 11 | "status": "ERROR", 12 | "error": { 13 | "catalog": { 14 | "status": "ERROR", 15 | "errors": [ 16 | catalog_error 17 | ], 18 | "title": "Datos Argentina" 19 | }, 20 | "dataset": [ 21 | { 22 | "status": "ERROR", 23 | "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 24 | "list_index": 0, 25 | "errors": [ 26 | dataset_error, 27 | ], 28 | "title": "Sistema de contrataciones electrónicas" 29 | } 30 | ] 31 | } 32 | } 33 | 34 | 35 | def multiple_missing_descriptions(): 36 | return gen_error({ 37 | "instance": None, 38 | "validator": "required", 39 | "path": [], 40 | "message": "%s is a required property" % jsonschema_str('description'), 41 | "error_code": 1, 42 | "validator_value": [ 43 | "dataset", 44 | "title", 45 | "description", 46 | "publisher", 47 | "superThemeTaxonomy" 48 | ] 49 | }, { 50 | "instance": None, 51 | "validator": "required", 52 | "path": [ 53 | "dataset", 54 | 0 55 | ], 56 | "message": "%s is a required property" % jsonschema_str('description'), 57 | "error_code": 1, 58 | "validator_value": [ 59 | "title", 60 | "description", 61 | "publisher", 62 | "superTheme", 63 | "distribution", 64 | "accrualPeriodicity", 65 | "issued", 66 | "identifier" 67 | ] 68 | }) 69 | 70 | 71 | def invalid_multiple_fields_type(): 72 | return gen_error({ 73 | "instance": [ 74 | "Ministerio de Modernización", 75 | "datosargentina@jefatura.gob.ar" 76 | ], 77 | "validator": "type", 78 | "path": [ 79 | "publisher" 80 | ], 81 | "message": "[%s, %s] is not of type %s" % ( 82 | jsonschema_str('Ministerio de Modernización'), 83 | jsonschema_str('datosargentina@jefatura.gob.ar'), 84 | jsonschema_str('object'), 85 | ), 86 | "error_code": 2, 87 | "validator_value": "object" 88 | }, { 89 | "instance": "5120", 90 | "validator": "anyOf", 91 | "path": [ 92 | "dataset", 93 | 0, 94 | "distribution", 95 | 0, 96 | "byteSize" 97 | ], 98 | "message": "%s is not valid under any of the given schemas" 99 | % jsonschema_str('5120'), 100 | "error_code": 2, 101 | "validator_value": [ 102 | { 103 | "type": "integer" 104 | }, 105 | { 106 | '$ref': 'mixed-types.json#emptyValue' 107 | } 108 | ] 109 | }) 110 | -------------------------------------------------------------------------------- /tests/support/factories/xlsx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import json 4 | from collections import OrderedDict 5 | 6 | import six 7 | 8 | 9 | def to_native_dict(ordered_dict): 10 | return json.loads(json.dumps(ordered_dict)) 11 | 12 | 13 | def to_dict(table_list): 14 | ordered_dict = OrderedDict(table_list) 15 | if six.PY3: 16 | return to_native_dict(ordered_dict) 17 | else: 18 | return ordered_dict 19 | 20 | 21 | CSV_TABLE = [ 22 | to_dict([(u'Plato', u'Milanesa'), 23 | (u'Precio', u'Bajo'), 24 | (u'Sabor', u'666')]), 25 | to_dict([(u'Plato', u'Thoné, Vitel'), 26 | (u'Precio', u'Alto'), 27 | (u'Sabor', u'8000')]), 28 | to_dict([(u'Plato', u'Aceitunas'), 29 | (u'Precio', u''), 30 | (u'Sabor', u'15')]) 31 | ] 32 | 33 | WRITE_XLSX_TABLE = [ 34 | to_dict([(u'Plato', u'Milanesa'), 35 | (u'Precio', u'Bajo'), 36 | (u'Sabor', 666)]), 37 | to_dict([(u'Plato', u'Thoné, Vitel'), 38 | (u'Precio', u'Alto'), 39 | (u'Sabor', 8000)]), 40 | to_dict([(u'Plato', u'Aceitunas'), 41 | (u'Precio', None), 42 | (u'Sabor', 15)]) 43 | ] 44 | 45 | READ_XLSX_TABLE = [ 46 | to_dict([(u'Plato', u'Milanesa'), 47 | (u'Precio', u'Bajo'), 48 | (u'Sabor', 666)]), 49 | to_dict([(u'Plato', u'Thoné, Vitel'), 50 | (u'Precio', u'Alto'), 51 | (u'Sabor', 8000)]), 52 | to_dict([(u'Plato', u'Aceitunas'), 53 | (u'Sabor', 15)]) 54 | ] 55 | -------------------------------------------------------------------------------- /tests/support/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def jsonschema_str(string): 5 | return repr(string) 6 | -------------------------------------------------------------------------------- /tests/test_backup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Tests para las funcionalidades de el módulo 'backup' 5 | """ 6 | 7 | from __future__ import unicode_literals 8 | from __future__ import print_function 9 | from __future__ import with_statement 10 | 11 | from contextlib import contextmanager 12 | 13 | import unittest 14 | import nose 15 | import os 16 | import vcr 17 | import tempfile 18 | import shutil 19 | 20 | 21 | from .context import pydatajson 22 | 23 | SAMPLES_DIR = os.path.join("tests", "samples") 24 | RESULTS_DIR = os.path.join("tests", "results") 25 | 26 | VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'), 27 | cassette_library_dir=os.path.join("tests", "cassetes", "backup"), 28 | record_mode='once') 29 | 30 | 31 | @contextmanager 32 | def tempdir(cleanup=True): 33 | tmp = tempfile.mkdtemp(dir='tests/temp') 34 | try: 35 | yield tmp 36 | finally: 37 | cleanup and shutil.rmtree(tmp, ignore_errors=True) 38 | 39 | 40 | class BackupTestCase(unittest.TestCase): 41 | """Tests for backup methods.""" 42 | 43 | @classmethod 44 | def get_sample(cls, sample_filename): 45 | return os.path.join(SAMPLES_DIR, sample_filename) 46 | 47 | @classmethod 48 | def setUp(cls): 49 | cls.catalog_meta = pydatajson.DataJson( 50 | cls.get_sample("full_data.json")) 51 | cls.catalog_data = pydatajson.DataJson( 52 | cls.get_sample("example_time_series.json")) 53 | cls.maxDiff = None 54 | cls.longMessage = True 55 | 56 | @classmethod 57 | def tearDown(cls): 58 | del (cls.catalog_meta) 59 | del (cls.catalog_data) 60 | 61 | def test_make_catalog_backup_metadata(self): 62 | with tempdir() as temp_dir: 63 | json_path = os.path.join( 64 | temp_dir, "catalog", "example", "data.json") 65 | xlsx_path = os.path.join( 66 | temp_dir, "catalog", "example", "catalog.xlsx") 67 | 68 | pydatajson.backup.make_catalog_backup( 69 | self.catalog_meta, 70 | catalog_id="example", local_catalogs_dir=temp_dir, 71 | include_metadata=True, include_data=False) 72 | 73 | self.assertTrue(os.path.exists(json_path)) 74 | self.assertTrue(os.path.exists(xlsx_path)) 75 | 76 | @VCR.use_cassette() 77 | def test_make_catalog_backup_data(self): 78 | with tempdir() as temp_dir: 79 | distribution_path = os.path.abspath( 80 | os.path.join( 81 | temp_dir, 82 | "catalog", 83 | "example_ts", 84 | "dataset", 85 | "1", 86 | "distribution", 87 | "1.2", 88 | "download", 89 | "oferta-demanda-globales-datos-desestacionalizados" 90 | "-valores-trimestrales-base-1993.csv")) 91 | 92 | pydatajson.backup.make_catalog_backup( 93 | self.catalog_data, 94 | catalog_id="example_ts", local_catalogs_dir=temp_dir, 95 | include_metadata=True, include_data=True) 96 | 97 | self.assertTrue(os.path.exists(distribution_path)) 98 | 99 | @VCR.use_cassette() 100 | def test_make_catalog_backup_data_without_file_name(self): 101 | with tempdir() as temp_dir: 102 | distribution_path = os.path.abspath(os.path.join( 103 | temp_dir, "catalog", "example_ts", "dataset", "1", 104 | "distribution", "1.2.b", "download", 105 | "odg-total-millones-pesos-1960-trimestral.csv" 106 | )) 107 | 108 | pydatajson.backup.make_catalog_backup( 109 | self.catalog_data, 110 | catalog_id="example_ts", local_catalogs_dir=temp_dir, 111 | include_metadata=True, include_data=True) 112 | 113 | self.assertTrue(os.path.exists(distribution_path)) 114 | 115 | 116 | if __name__ == '__main__': 117 | nose.run(defaultTest=__name__) 118 | -------------------------------------------------------------------------------- /tests/test_catalog_readme.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Tests del modulo catalog_readme.""" 4 | 5 | from __future__ import print_function, unicode_literals, with_statement 6 | 7 | import io 8 | import os.path 9 | 10 | import requests_mock 11 | import vcr 12 | from nose.tools import assert_true, assert_equal 13 | 14 | try: 15 | import mock 16 | except ImportError: 17 | from unittest import mock 18 | import filecmp 19 | 20 | from pydatajson.catalog_readme import generate_readme 21 | from tests.support.decorators import RESULTS_DIR 22 | 23 | 24 | my_vcr = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'), 25 | cassette_library_dir=os.path.join("tests", "cassetes"), 26 | record_mode='once') 27 | 28 | 29 | class TestDataJsonTestCase(object): 30 | SAMPLES_DIR = os.path.join("tests", "samples") 31 | RESULTS_DIR = RESULTS_DIR 32 | TEMP_DIR = os.path.join("tests", "temp") 33 | 34 | @classmethod 35 | def get_sample(cls, sample_filename): 36 | return os.path.join(cls.SAMPLES_DIR, sample_filename) 37 | 38 | @classmethod 39 | def setUp(cls): 40 | cls.catalog = cls.get_sample("several_datasets_for_harvest.json") 41 | cls.requests_mock = requests_mock.Mocker() 42 | cls.requests_mock.start() 43 | cls.requests_mock.get(requests_mock.ANY, real_http=True) 44 | cls.requests_mock.head(requests_mock.ANY, status_code=200) 45 | 46 | @classmethod 47 | def tearDown(cls): 48 | cls.requests_mock.stop() 49 | 50 | @my_vcr.use_cassette() 51 | def test_generate_readme(self): 52 | with io.open(os.path.join(self.RESULTS_DIR, "catalog_readme.md"), 'r', 53 | encoding='utf-8') as expected_readme_file: 54 | expected_readme = expected_readme_file.read() 55 | readme = generate_readme(self.catalog) 56 | assert_equal(expected_readme, readme) 57 | 58 | @my_vcr.use_cassette() 59 | def test_readme_file_write(self): 60 | actual_filename = os.path.join(self.TEMP_DIR, "catalog_readme.md") 61 | expected_filename = os.path.join(self.RESULTS_DIR, "catalog_readme.md") 62 | generate_readme(self.catalog, export_path=actual_filename) 63 | comparison = filecmp.cmp(actual_filename, expected_filename) 64 | if comparison: 65 | os.remove(actual_filename) 66 | else: 67 | """ 68 | {} se escribió correctamente, pero no es idéntico al esperado. Por favor, 69 | revíselo manualmente""".format(actual_filename) 70 | 71 | assert_true(comparison) 72 | 73 | @my_vcr.use_cassette() 74 | @mock.patch('pydatajson.indicators._federation_indicators') 75 | def test_readme_null_indicators(self, mock_indicators): 76 | mock_indicators.return_value = { 77 | 'datasets_federados_cant': None, 78 | 'datasets_federados_pct': None, 79 | 'datasets_no_federados_cant': None, 80 | 'datasets_federados_eliminados_cant': None, 81 | 'distribuciones_federadas_cant': None, 82 | 'datasets_federados_eliminados': [], 83 | 'datasets_no_federados': [], 84 | 'datasets_federados': [], 85 | } 86 | results_path = os.path.join( 87 | self.RESULTS_DIR, "null_indicators_readme.md") 88 | 89 | with io.open(results_path, 'r', encoding='utf-8') \ 90 | as expected_readme_file: 91 | expected_readme = expected_readme_file.read() 92 | readme = generate_readme(self.catalog) 93 | assert_equal(expected_readme, readme) 94 | -------------------------------------------------------------------------------- /tests/test_threading.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from unittest import TestCase 3 | 4 | from pydatajson.threading_helper import apply_threading 5 | 6 | 7 | class ThreadingTests(TestCase): 8 | 9 | def test_threading(self): 10 | elements = [1, 2, 3, 4] 11 | 12 | def function(x): 13 | return x ** 2 14 | 15 | result = apply_threading(elements, function, 3) 16 | 17 | self.assertEqual(result, [1, 4, 9, 16]) 18 | 19 | def test_broken_function(self): 20 | elements = [1, 2, 3, 0] 21 | 22 | def divide(x): 23 | return 6 / x 24 | 25 | with self.assertRaises(ZeroDivisionError): # Es "sincrónico"! 26 | apply_threading(elements, divide, 3) 27 | -------------------------------------------------------------------------------- /tests/test_time_series.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | from __future__ import with_statement 6 | 7 | import os.path 8 | import unittest 9 | 10 | from pydatajson.core import DataJson 11 | from pydatajson.custom_exceptions import DistributionTimeIndexNonExistentError 12 | from pydatajson.time_series import get_distribution_time_index, \ 13 | distribution_has_time_index, dataset_has_time_series 14 | 15 | SAMPLES_DIR = os.path.join("tests", "samples") 16 | 17 | 18 | class TimeSeriesTestCase(unittest.TestCase): 19 | 20 | @classmethod 21 | def get_sample(cls, sample_filename): 22 | return os.path.join(SAMPLES_DIR, sample_filename) 23 | 24 | def setUp(self): 25 | ts_catalog = DataJson(self.get_sample('time_series_data.json')) 26 | full_catalog = DataJson(self.get_sample('full_data.json')) 27 | self.ts_dataset = ts_catalog.datasets[0] 28 | self.non_ts_datasets = full_catalog.datasets[0] 29 | self.ts_distribution = ts_catalog.distributions[1] 30 | self.non_ts_distribution = full_catalog.distributions[0] 31 | 32 | def test_get_distribution_time_index(self): 33 | self.assertEqual( 34 | 'indice_tiempo', 35 | get_distribution_time_index( 36 | self.ts_distribution)) 37 | with self.assertRaises(DistributionTimeIndexNonExistentError): 38 | get_distribution_time_index(self.non_ts_distribution) 39 | 40 | def test_distribution_has_time_index(self): 41 | self.assertTrue(distribution_has_time_index(self.ts_distribution)) 42 | self.assertFalse(distribution_has_time_index(self.non_ts_distribution)) 43 | self.ts_distribution['field'] = ['p', 'r', 'o', 'b', 'l', 'e', 'm'] 44 | self.assertFalse(distribution_has_time_index(self.ts_distribution)) 45 | 46 | def test_dataset_has_time_series(self): 47 | self.assertTrue(dataset_has_time_series(self.ts_dataset)) 48 | self.assertFalse(dataset_has_time_series(self.non_ts_datasets)) 49 | -------------------------------------------------------------------------------- /tests/test_urls_validation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | 5 | import requests_mock 6 | from nose.tools import assert_true, assert_false 7 | from requests import Timeout 8 | 9 | import pydatajson 10 | from .support.decorators import RESULTS_DIR 11 | 12 | 13 | class TestDataJsonTestCase(object): 14 | SAMPLES_DIR = os.path.join("tests", "samples") 15 | RESULTS_DIR = RESULTS_DIR 16 | TEMP_DIR = os.path.join("tests", "temp") 17 | 18 | @classmethod 19 | def get_sample(cls, sample_filename): 20 | return os.path.join(cls.SAMPLES_DIR, sample_filename) 21 | 22 | def setUp(self): 23 | self.dj = pydatajson.DataJson(self.get_sample("full_data.json")) 24 | self.catalog = pydatajson.readers.read_catalog( 25 | self.get_sample("full_data.json")) 26 | self.maxDiff = None 27 | self.longMessage = True 28 | self.requests_mock = requests_mock.Mocker() 29 | self.requests_mock.start() 30 | self.requests_mock.get(requests_mock.ANY, real_http=True) 31 | self.requests_mock.head(requests_mock.ANY, status_code=200) 32 | 33 | def tearDown(self): 34 | del self.dj 35 | self.requests_mock.stop() 36 | 37 | def test_urls_with_status_code_200_is_valid(self): 38 | assert_true(self.dj.is_valid_catalog(broken_links=True)) 39 | 40 | def test_urls_with_status_code_203_is_valid(self): 41 | self.requests_mock.head(requests_mock.ANY, status_code=203) 42 | assert_true(self.dj.is_valid_catalog(broken_links=True)) 43 | 44 | def test_urls_with_status_code_302_is_valid(self): 45 | self.requests_mock.head(requests_mock.ANY, status_code=302) 46 | assert_true(self.dj.is_valid_catalog(broken_links=True)) 47 | 48 | def test_urls_with_invalid_status_codes_are_not_valid(self): 49 | self.requests_mock.head(requests_mock.ANY, status_code=404) 50 | assert_false(self.dj.is_valid_catalog(broken_links=True)) 51 | 52 | def test_throws_exception(self): 53 | self.requests_mock.head(requests_mock.ANY, exc=Timeout) 54 | assert_false(self.dj.is_valid_catalog(broken_links=True)) 55 | 56 | def test_validation_without_flag_does_not_validate_urls(self): 57 | assert_true(self.dj.is_valid_catalog()) 58 | 59 | def test_validation_with_flag_does_validate_urls(self): 60 | self.requests_mock.head(requests_mock.ANY, status_code=404) 61 | assert_false(self.dj.is_valid_catalog(broken_links=True)) 62 | -------------------------------------------------------------------------------- /tests/xl_methods.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | xl_methods 6 | 7 | Métodos ligeramente modificados a partir de abenassi/xlseries para manipular 8 | archivos en formato XLSX (https://github.com/abenassi/xlseries). 9 | """ 10 | from six import string_types, text_type 11 | 12 | 13 | def compare_cells(wb1, wb2): 14 | """Compare two excels based on row iteration.""" 15 | 16 | # compare each cell of each worksheet 17 | for ws1, ws2 in zip(wb1.worksheets, wb2.worksheets): 18 | compare_cells_ws(ws1, ws2) 19 | return True 20 | 21 | 22 | def compare_cells_ws(ws1, ws2): 23 | """Compare two worksheets based on row iteration.""" 24 | 25 | # compare each cell of each worksheet 26 | for row1, row2 in zip(ws1.rows, ws2.rows): 27 | for cell1, cell2 in zip(row1, row2): 28 | 29 | msg = "".join([_safe_str(cell1.value), " != ", 30 | _safe_str(cell2.value), "\nrow: ", 31 | _safe_str(cell1.row), 32 | " column: ", _safe_str(cell1.column)]) 33 | 34 | value1 = normalize_value(cell1.value) 35 | value2 = normalize_value(cell2.value) 36 | 37 | assert value1 == value2, msg 38 | 39 | return True 40 | 41 | 42 | def normalize_value(value): 43 | """Strip spaces if the value is a string, convert None to empty string or 44 | let it pass otherwise.""" 45 | 46 | if isinstance(value, string_types): 47 | return value.strip() 48 | elif value is None: 49 | return "" 50 | else: 51 | return value 52 | 53 | 54 | def _safe_str(value): 55 | return text_type(value) 56 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py36 3 | 4 | [testenv] 5 | deps= 6 | -rrequirements.txt 7 | -rrequirements_dev.txt 8 | commands = nosetests 9 | pycodestyle pydatajson tests 10 | 11 | [testenv:flake8] 12 | commands = flake8 pydatajson 13 | 14 | 15 | ; If you want to make tox run the tests with the same versions, create a 16 | ; requirements.txt with the pinned versions and uncomment the following lines: 17 | ; deps = 18 | ; -r{toxinidir}/requirements.txt 19 | -------------------------------------------------------------------------------- /travis_pypi_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Update encrypted deploy password in Travis config file 4 | """ 5 | 6 | 7 | from __future__ import print_function 8 | import base64 9 | import json 10 | import os 11 | from getpass import getpass 12 | import yaml 13 | from cryptography.hazmat.primitives.serialization import load_pem_public_key 14 | from cryptography.hazmat.backends import default_backend 15 | from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15 16 | 17 | 18 | try: 19 | from urllib import urlopen 20 | except: 21 | from urllib.request import urlopen 22 | 23 | 24 | GITHUB_REPO = 'datosgobar/pydatajson' 25 | TRAVIS_CONFIG_FILE = os.path.join( 26 | os.path.dirname(os.path.abspath(__file__)), '.travis.yml') 27 | 28 | 29 | def load_key(pubkey): 30 | """Load public RSA key, with work-around for keys using 31 | incorrect header/footer format. 32 | 33 | Read more about RSA encryption with cryptography: 34 | https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/ 35 | """ 36 | try: 37 | return load_pem_public_key(pubkey.encode(), default_backend()) 38 | except ValueError: 39 | # workaround for https://github.com/travis-ci/travis-api/issues/196 40 | pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END') 41 | return load_pem_public_key(pubkey.encode(), default_backend()) 42 | 43 | 44 | def encrypt(pubkey, password): 45 | """Encrypt password using given RSA public key and encode it with base64. 46 | 47 | The encrypted password can only be decrypted by someone with the 48 | private key (in this case, only Travis). 49 | """ 50 | key = load_key(pubkey) 51 | encrypted_password = key.encrypt(password, PKCS1v15()) 52 | return base64.b64encode(encrypted_password) 53 | 54 | 55 | def fetch_public_key(repo): 56 | """Download RSA public key Travis will use for this repo. 57 | 58 | Travis API docs: http://docs.travis-ci.com/api/#repository-keys 59 | """ 60 | keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo) 61 | data = json.loads(urlopen(keyurl).read().decode()) 62 | if 'key' not in data: 63 | errmsg = "Could not find public key for repo: {}.\n".format(repo) 64 | errmsg += "Have you already added your GitHub repo to Travis?" 65 | raise ValueError(errmsg) 66 | return data['key'] 67 | 68 | 69 | def prepend_line(filepath, line): 70 | """Rewrite a file adding a line to its beginning. 71 | """ 72 | with open(filepath) as f: 73 | lines = f.readlines() 74 | 75 | lines.insert(0, line) 76 | 77 | with open(filepath, 'w') as f: 78 | f.writelines(lines) 79 | 80 | 81 | def load_yaml_config(filepath): 82 | with open(filepath) as f: 83 | return yaml.load(f) 84 | 85 | 86 | def save_yaml_config(filepath, config): 87 | with open(filepath, 'w') as f: 88 | yaml.dump(config, f, default_flow_style=False) 89 | 90 | 91 | def update_travis_deploy_password(encrypted_password): 92 | """Update the deploy section of the .travis.yml file 93 | to use the given encrypted password. 94 | """ 95 | config = load_yaml_config(TRAVIS_CONFIG_FILE) 96 | 97 | config['deploy']['password'] = dict(secure=encrypted_password) 98 | 99 | save_yaml_config(TRAVIS_CONFIG_FILE, config) 100 | 101 | line = ('# This file was autogenerated and will overwrite' 102 | ' each time you run travis_pypi_setup.py\n') 103 | prepend_line(TRAVIS_CONFIG_FILE, line) 104 | 105 | 106 | def main(args): 107 | public_key = fetch_public_key(args.repo) 108 | password = args.password or getpass('PyPI password: ') 109 | update_travis_deploy_password(encrypt(public_key, password.encode())) 110 | print("Wrote encrypted password to .travis.yml -- you're ready to deploy") 111 | 112 | 113 | if '__main__' == __name__: 114 | import argparse 115 | parser = argparse.ArgumentParser(description=__doc__) 116 | parser.add_argument('--repo', default=GITHUB_REPO, 117 | help='GitHub repo (default: %s)' % GITHUB_REPO) 118 | parser.add_argument('--password', 119 | help='PyPI password (will prompt if not provided)') 120 | 121 | args = parser.parse_args() 122 | main(args) 123 | --------------------------------------------------------------------------------