├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    └── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── HISTORY.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── appveyor.yml
├── docs
    ├── HISTORY.md
    ├── MANUAL.md
    ├── Makefile
    ├── README.md
    ├── assets
    │   └── catalog.xlsx
    ├── backup.rst
    ├── conf.py
    ├── core.rst
    ├── federation.rst
    ├── index.rst
    ├── indicators.rst
    ├── make.bat
    ├── modules.rst
    ├── pydatajson.response_formatters.rst
    ├── pydatajson.rst
    ├── pydatajson.validators.rst
    ├── quick_reference.rst
    ├── readers.rst
    ├── reference.rst
    ├── reporting.rst
    ├── search.rst
    ├── validation.rst
    └── writers.rst
├── fix_github_links.sh
├── pydatajson
    ├── __init__.py
    ├── __main__.py
    ├── backup.py
    ├── catalog_readme.py
    ├── ckan_reader.py
    ├── ckan_utils.py
    ├── constants.py
    ├── core.py
    ├── custom_exceptions.py
    ├── custom_remote_ckan.py
    ├── documentation.py
    ├── download.py
    ├── federation.py
    ├── federation_indicators_generator.py
    ├── fields
    │   └── fields.json
    ├── helpers.py
    ├── indicators.py
    ├── readers.py
    ├── reporting.py
    ├── response_formatters
    │   ├── __init__.py
    │   ├── dict_formatter.py
    │   ├── list_formatter.py
    │   ├── tables_formatter.py
    │   └── validation_response_formatter.py
    ├── schemas
    │   ├── accrualPeriodicity.json
    │   ├── catalog.json
    │   ├── dataset.json
    │   ├── distribution.json
    │   ├── field.json
    │   ├── mixed-types.json
    │   ├── required_fields_schema.json
    │   ├── superThemeTaxonomy.json
    │   └── theme.json
    ├── search.py
    ├── status_indicators_generator.py
    ├── templates
    │   └── catalog_readme.txt
    ├── threading_helper.py
    ├── time_series.py
    ├── transformation.py
    ├── validation.py
    ├── validators
    │   ├── __init__.py
    │   ├── consistent_distribution_fields_validator.py
    │   ├── distribution_download_urls_validator.py
    │   ├── distribution_urls_validator.py
    │   ├── landing_pages_validator.py
    │   ├── simple_validator.py
    │   ├── theme_ids_not_repeated_validator.py
    │   └── url_validator.py
    └── writers.py
├── requirements.txt
├── requirements_2.7.txt
├── requirements_dev.txt
├── samples
    ├── archivos-tests
    │   ├── excel-no-validos
    │   │   └── catalogo-justicia-con-error-datasets.xlsx
    │   └── excel-validos
    │   │   ├── catalogo-justicia-06022017.xlsx
    │   │   ├── catalogo-justicia-56-distribuciones.xlsx
    │   │   └── catalogo-justicia.xlsx
    ├── caso-uso-1-pydatajson-xlsx-justicia-valido.ipynb
    ├── caso-uso-2-pydatajson-xlsx-justicia-no-valido.ipynb
    ├── caso-uso-3-pydatajson-xlsx-justicia-valido.ipynb
    ├── caso-uso-pydatajson-indicadores.ipynb
    ├── caso-uso-pydatajson-xlsx-validando-dev.ipynb
    ├── catalogo-justicia.json
    ├── catalogo-series-tiempo.json
    └── series-tiempo
    │   └── odg-total-millones-pesos-1960-trimestral.csv
├── setup.cfg
├── setup.py
├── tests
    ├── TEST_CASES.md
    ├── __init__.py
    ├── cassetes
    │   ├── backup
    │   │   ├── test_make_catalog_backup_data.yaml
    │   │   └── test_make_catalog_backup_data_without_file_name.yaml
    │   ├── ckan_integration
    │   │   ├── push_dataset
    │   │   │   ├── tearDown.yaml
    │   │   │   ├── test_dataset_is_created_correctly.yaml
    │   │   │   ├── test_dataset_is_updated_correctly.yaml
    │   │   │   └── test_resources_swapped_correctly.yaml
    │   │   └── remove_dataset
    │   │   │   ├── setUp.yaml
    │   │   │   ├── tearDown.yaml
    │   │   │   ├── test_empty_query_result.yaml
    │   │   │   ├── test_remove_dataset_by_filter_out.yaml
    │   │   │   ├── test_remove_dataset_by_filter_out_and_organization.yaml
    │   │   │   ├── test_remove_dataset_by_id.yaml
    │   │   │   ├── test_remove_dataset_by_organization.yaml
    │   │   │   ├── test_remove_dataset_by_publisher_and_organization.yaml
    │   │   │   ├── test_remove_dataset_by_title.yaml
    │   │   │   └── test_with_no_parametres.yaml
    │   ├── indicators
    │   │   ├── test_bad_date_indicators.yaml
    │   │   ├── test_bad_summary.yaml
    │   │   ├── test_date_indicators.yaml
    │   │   ├── test_date_network_indicators_empty_catalog.yaml
    │   │   ├── test_field_indicators_on_full_catalog.yaml
    │   │   ├── test_field_indicators_on_min_catalog.yaml
    │   │   ├── test_format_indicators.yaml
    │   │   ├── test_generate_catalog_indicators.yaml
    │   │   ├── test_indicators_invalid_periodicity.yaml
    │   │   ├── test_indicators_missing_dataset.yaml
    │   │   ├── test_indicators_missing_periodicity.yaml
    │   │   ├── test_last_updated_indicator_missing_issued_field.yaml
    │   │   ├── test_license_indicators.yaml
    │   │   ├── test_network_indicators.yaml
    │   │   ├── test_network_license_indicators.yaml
    │   │   ├── test_network_type_indicators.yaml
    │   │   ├── test_no_licenses_indicators.yaml
    │   │   ├── test_no_title_nor_identifier_catalog.yaml
    │   │   ├── test_types_indicators.yaml
    │   │   └── test_valid_and_unreachable_catalogs.yaml
    │   ├── profiling
    │   │   └── main.yaml
    │   ├── readers_and_writers
    │   │   └── test_read_remote_xlsx_catalog.yaml
    │   ├── test_generate_catalog_readme.yaml
    │   ├── test_generate_datasets_report.yaml
    │   ├── test_generate_readme.yaml
    │   ├── test_generate_readme_with_null_indicators.yaml
    │   ├── test_readme_file_write.yaml
    │   ├── test_validate_bad_remote_datajson.yaml
    │   └── test_validate_bad_remote_datajson2.yaml
    ├── context.py
    ├── profiling.py
    ├── results
    │   ├── catalog_readme.md
    │   ├── catalogo_justicia.json
    │   ├── datasets.json
    │   ├── datasets_filter_in.json
    │   ├── datasets_filter_out.json
    │   ├── datasets_meta_field.json
    │   ├── distributions.json
    │   ├── distributions_filter_in.json
    │   ├── distributions_filter_out.json
    │   ├── distributions_meta_field.json
    │   ├── distributions_only_time_series.json
    │   ├── empty_optional_string.json
    │   ├── expected_datasets_report.csv
    │   ├── expected_harvester_config.csv
    │   ├── fields.json
    │   ├── fields_filter_in.json
    │   ├── fields_filter_out.json
    │   ├── fields_meta_field.json
    │   ├── full_data.json
    │   ├── get_dataset.json
    │   ├── get_distribution.json
    │   ├── get_distribution_of_dataset.json
    │   ├── get_field.json
    │   ├── get_theme.json
    │   ├── invalid_dataset_theme_type.json
    │   ├── invalid_field_description_type.json
    │   ├── invalid_multiple_fields_type.json
    │   ├── minimum_data.json
    │   ├── mismatched_downloadURL_and_format.json
    │   ├── mismatched_fileName_and_format.json
    │   ├── multiple_missing_descriptions.json
    │   ├── null_dataset_theme.json
    │   ├── null_field_description.json
    │   ├── null_indicators_readme.md
    │   ├── several_assorted_errors.json
    │   ├── time_series.json
    │   ├── write_table.csv
    │   └── write_table.xlsx
    ├── samples
    │   ├── border_cases_ditribution_filenames.json
    │   ├── catalogo-justicia-missing-distribution-identifier.xlsx
    │   ├── catalogo_justicia.json
    │   ├── catalogo_justicia.xlsx
    │   ├── catalogo_justicia_extra_columns.xlsx
    │   ├── catalogo_justicia_no_xlsx_suffix
    │   ├── catalogo_justicia_removed.json
    │   ├── catalogo_justicia_removed_publisher.json
    │   ├── catalogo_justicia_with_defaults.json
    │   ├── catalogo_justicia_with_defaults.xlsx
    │   ├── central.json
    │   ├── empty_mandatory_string.json
    │   ├── empty_optional_string.json
    │   ├── empty_super_theme_list.json
    │   ├── example_time_series.json
    │   ├── federated_1.json
    │   ├── federated_2.json
    │   ├── full_data.json
    │   ├── full_data_no_json_suffix
    │   ├── integration_full_sample_data.json
    │   ├── invalid_catalog_empty.json
    │   ├── invalid_catalog_publisher_type.json
    │   ├── invalid_dataset_theme_type.json
    │   ├── invalid_dataset_type.json
    │   ├── invalid_ditribution_filenames.json
    │   ├── invalid_field_description_type.json
    │   ├── invalid_keywords.json
    │   ├── invalid_multiple_emails.json
    │   ├── invalid_multiple_fields_type.json
    │   ├── invalid_publisher_mbox_format.json
    │   ├── invalid_themeTaxonomy.json
    │   ├── lists_extra_commas.xlsx
    │   ├── malformed_accrualperiodicity.json
    │   ├── malformed_date.json
    │   ├── malformed_datetime.json
    │   ├── malformed_datetime2.json
    │   ├── malformed_email.json
    │   ├── malformed_temporal.json
    │   ├── malformed_temporal2.json
    │   ├── malformed_uri.json
    │   ├── minimum_data.json
    │   ├── mismatched_downloadURL_and_format.json
    │   ├── mismatched_fileName_and_format.json
    │   ├── missing_catalog_dataset.json
    │   ├── missing_catalog_description.json
    │   ├── missing_catalog_title.json
    │   ├── missing_dataset.json
    │   ├── missing_dataset_description.json
    │   ├── missing_dataset_title.json
    │   ├── missing_distribution_title.json
    │   ├── missing_periodicity.json
    │   ├── multiple_missing_descriptions.json
    │   ├── null_catalog_publisher.json
    │   ├── null_dataset_theme.json
    │   ├── null_field_description.json
    │   ├── numeric_distribution_identifier.json
    │   ├── organization_tree.json
    │   ├── processed_datasets_report.csv
    │   ├── prueba_sheet_to_table.xlsx
    │   ├── read_table.csv
    │   ├── read_table.xlsx
    │   ├── repeated_downloadURL.json
    │   ├── resource_sample.csv
    │   ├── several_assorted_errors.json
    │   ├── several_datasets.json
    │   ├── several_datasets_for_harvest.json
    │   ├── several_datasets_with_licenses.json
    │   ├── several_datasets_with_types.json
    │   ├── time_series_data.json
    │   ├── too_long_field_title.json
    │   └── valid_whitespace_emails.json
    ├── support
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── decorators.py
    │   ├── factories
    │   │   ├── __init__.py
    │   │   ├── catalog_errors.py
    │   │   ├── core_files.py
    │   │   ├── dataset_errors.py
    │   │   ├── distribution_errors.py
    │   │   ├── other_errors.py
    │   │   └── xlsx.py
    │   └── utils.py
    ├── test_backup.py
    ├── test_catalog_readme.py
    ├── test_ckan_integration.py
    ├── test_ckan_reader.py
    ├── test_ckan_utils.py
    ├── test_core.py
    ├── test_documentation.py
    ├── test_federation.py
    ├── test_helpers.py
    ├── test_indicators.py
    ├── test_readers_and_writers.py
    ├── test_search.py
    ├── test_status_indicators_generator.py
    ├── test_threading.py
    ├── test_time_series.py
    ├── test_urls_validation.py
    ├── test_validation.py
    ├── test_validators.py
    └── xl_methods.py
├── tox.ini
└── travis_pypi_setup.py


/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Código de conducta
 2 | 
 3 | ## Introducción
 4 | **Gracias por ayudar**. Este código de conducta está basado en el **respeto a los miembros de la comunidad de Datos Argentina.** 
 5 | 
 6 | ## Somos
 7 | **Cuidadosos con las palabras que elegimos.** Somos adultos y profesionales. Evitamos el lenguaje vulgar y no aceptamos de ninguna manera: 
 8 | 
 9 | * Trato violento hacia alguna persona o institución.
10 | * Posteos violentos o con matices sexuales.
11 | * Posteos sobre información personal.
12 | * Chistes o insultos personales, especialmente racistas o sexistas. 
13 | * Acoso. De ningún tipo. 
14 | 
15 | **Respetuosos.** Valoramos el tiempo, el esfuerzo y las opiniones de los demás. Cuando no coinciden con las nuestras, intentamos entender su punto de vista. 
16 | 
17 | **Pacientes.** Como parte de la Administración Pública Nacional, los tiempos de respuesta de Datos Argentina están, en muchas ocasiones, atados a la enorme cantidad de compromisos que involucra el trabajo para toda la Nación. Por eso, somos pacientes con las respuestas y los tiempos de aplicación de las sugerencias o contribuciones. 
18 | 
19 | **Pedimos ayuda.** Si no entendemos o no sabemos algo, lo preguntamos. 
20 | 
21 | **Argumentamos con sustento.** Valoramos los argumentos y las decisiones basados en evidencia concreta.
22 | 
23 | ## Participación
24 | Todo lo que se hace para la Nación es de todos. Alentamos la participación para construir políticas públicas de calidad.  
25 | 
26 | ## Contacto
27 | Envianos tus comentarios o consultas a datosargentina@jefatura.gob.ar.
28 | 
29 | ## Créditos
30 | Para escribir nuestro código de conducta, nos basamos en:
31 | 
32 | * [Twitter](https://github.com/twitter/code-of-conduct/blob/master/code-of-conduct.md) 
33 | * [Django](https://www.djangoproject.com/conduct/)
34 | * [Python](https://www.python.org/psf/codeofconduct/)
35 | * [Ubuntu](https://www.ubuntu.com/about/about-ubuntu/conduct)
36 | * [TODO group](http://todogroup.org/opencodeofconduct/)
37 | * [Hackathon Hackers](https://github.com/HackathonHackers/code-of-conduct)
38 | * [Movimiento NCoC](https://github.com/domgetter/NCoC)
39 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribuciones
 2 | Te invitamos a contribuir tanto ideas como código. Para hacerlo, tené en cuenta: 
 3 | 
 4 | * Nuestro [código de conducta](https://github.com/datosgobar/estandares/blob/master/github/CODE_OF_CONDUCT.md).
 5 | * Nuestros [estándares de código (en desarrollo)](https://github.com/datosgobar/estandares/blob/master/codigo).
 6 | 
 7 | ## Restricciones del Estado Nacional
 8 | El equipo de Datos Argentina forma parte de la Administración Pública Nacional. Por eso, todas sus interfaces respetan:
 9 | * El español como idioma oficial.
10 | * Los estándares definidos por el [Proyecto Poncho](https://argob.github.io/poncho/) para diseño.
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ¡Hola! Gracias por colaborar con este proyecto. 
 2 | 
 3 | Antes de crear el issue, por favor, asegurate de que nadie haya creado el issue que estás reportando.
 4 | 
 5 | Si este issue es para pedir nuevas o mejores funcionalidades, contanos:
 6 | 
 7 | * ¿Por qué querés esta funcionalidad?
 8 | * ¿Cómo esperás que funcione? 
 9 | 
10 | Si querés reportar un bug, especificanos: 
11 | 
12 | * ¿Cuál es el comportamiento que esperabas y cuál fue el comportamiento recibido? 
13 | * ¿Cuáles son los pasos para reproducir el bug?
14 | * Detalles del contexto: ¿qué sistema operativo, qué versión de navegador estás usando, por ejemplo?
15 | * ¿Tenés algún stack trace o captura de pantalla? Adjuntalo.  
16 | 
17 | Si tenés otra consulta, dejanos una descripción bien específica, para que podamos ayudarte. 
18 | 
19 | Para  preguntas de carácter privado, podés escribirnos a datosargentina@jefatura.gob.ar.
20 | 
21 | ¡Muchas gracias por colaborar!
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | *.egg-info*
25 | .installed.cfg
26 | *.egg
27 | *.pyc
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .hypothesis/
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | 
57 | # Sphinx documentation
58 | docs/_build/
59 | docs/_static/
60 | docs/_templates/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | # pyenv python configuration file
66 | .python-version
67 | 
68 | # Vim swap files
69 | *.swp
70 | 
71 | # Archivos de desarrollo
72 | allresults.py
73 | TODO.md
74 | 
75 | # Archivos temporales de prueba
76 | tests/temp/*
77 | tests/results/catalog/*
78 | .ipynb_checkpoints
79 | samples/archivos-generados/*
80 | .DS_Store
81 | .idea/
82 | profiling_test.png
83 | backup
84 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated and will overwrite each time you run travis_pypi_setup.py
 2 | 
 3 | after_success:
 4 | - coveralls
 5 | 
 6 | install:
 7 | - pip install -r requirements.txt
 8 | - pip install -r requirements_dev.txt
 9 | - pip install python-coveralls
10 | - pip install coveralls
11 | - mkdir tests/temp
12 | language: python
13 | notifications:
14 |   slack:
15 |     on_failure: always
16 |     on_success: change
17 |     secure: GwU9+hCiBlk1FL3YDeKjpnTVxx7jcdrS+MCEbj61jrVRdPfkWs/a6v3qNBXJNU2/qN2lJvrKio0X9CdjbQksv5KtUUr08r/8tk4r8S/kB+R2xyoMjvHtJd6frYxj+d0xwhmNtNbo8jacYDrWXrQV4GQPtKAegh8+OdEvmuMsX5vFMT83mDVre/pD8dz8jFHzwE8RjBn9QG513/EyaqTHq/uDSPCO+rtAb+FLfDCa5adPHl36ZQB2DgK/1qT3lFLkJW7gLQm5bsYB3vjAO5tOR2B6OSz/Y4Kzo9tmB4Y6i4kfAZpGJCvKJj9wSf02hkB6mKJuheveHTI4m/lYgyb5pIoZaPQVQQ4zdidjPdYMB1P+6QYTdKdiiJEQ30spbmEHCEcD0YQwHlKZ+VJMFdME+gGlxz+uaEJygWL4nM2W6UDthALRgKFkuvdMJVk0qZh08q+sK6CGH9AOHQXlvLn4slRFbgONjJJQ/b8affpnnirtPjkCK0EOHZYg7q1HHu8O/nPUEGtyFVGbGSOKSsQyCDb49UOCFQOeBBk4arItjAlZGjgvao2Zdm+CGvBARVWYkw4IELOeAsKaHc3AbYh32QoPZNlnV/xykqNTDdw0ef7DrdS1GXDrLb1G3hkkzzphkSNe7ajAJDOvTAN3hgl6MQwSKDQXj40HkA0JUWWHw7E=
18 | os:
19 | - linux
20 | 
21 | sudo: false
22 | 
23 | stages:
24 |   - name: test
25 |   - name: deploy
26 |     if: tag IS present
27 | 
28 | script:
29 |   - nosetests --with-coverage --cover-package=pydatajson --cover-min-percentage=80
30 |   - pycodestyle pydatajson tests
31 | 
32 | jobs:
33 |   include:
34 |   - python: '2.7'
35 |   - python: '3.6'
36 |   - stage: deploy
37 |     install: pip install coveralls
38 |     script: skip
39 |     deploy:
40 |       repo: datosgobar/pydatajson
41 |       distributions: sdist bdist_wheel
42 |       password:
43 |         secure: cWWqxX4mC/PJ0WqNCuCdnJcgfDzMjJXyi5HGWqJTSkohMfIljChXDBS/GlFUKSAXf8aeyFntQS3F1EJQRPYIVoD64JKG1IUMiTOfleHgRPqrvY1vMquGsnYj8ZK/bqTC6JJvVD0aXEzKe6TK9g83G24FY8qdVecb9VlMsZc2cQ+kdA0lz9aoQtYETYFp6UdbbvO8Zk473oGfgqzjTb1GqkZMha2gn5F1GrI23wrnL66fMZHqXji5S8ryhzpVJW4PuM36M7onxZ/xcZLb/aptXYa33B947Bxf9VvmbdxkBpL+As5CbeSVSpMz66c/wHrspoWnC6GEPdOVwJbYmHxjjjC8Me/qwbUYHsPPCM1hr5qRJiEcVMltIu/YrDJAy0VbmQkcR1xuZMkzTtVf7c9fEhYDni6oyflLEpYHiKvnyMJF5oscdv/Splf6WYvaixwPR43JCjZZIc/0KO/u//dhyjDwEfgPRcXl0V1aeGu/jBqTsRS6qextNjmx5Bb9u/oP6zszvTFE9PVsVeg3GryxF1Db5FuzM2jkDGHgR5MF2PPcG7ZIzSdOTTl1LZbAnSkWXdzMx5pwxqYBBw6sn9lQSGHRequmKHZerUU/L0zaGrF3IT66/w1q5IXOpLmituivtog86TFRWPfGsQJZtptpABTGz0rQ9jchcHmSJ1sGdMI=
44 |       provider: pypi
45 |       user: datosgobar
46 |       on:
47 |         tags: true
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | MIT License
 3 | 
 4 | Copyright (c) 2016, Datos Argentina
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include HISTORY.md
 2 | include LICENSE
 3 | include README.md
 4 | include requirements.txt
 5 | include requirements_dev.txt
 6 | include requirements_2.7.txt
 7 | 
 8 | recursive-include tests *
 9 | recursive-include pydatajson *
10 | recursive-exclude * __pycache__
11 | recursive-exclude * *.py[co]
12 | 
13 | recursive-include docs *.rst *.md conf.py Makefile make.bat *.jpg *.png *.gif
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: clean clean-test clean-pyc clean-build docs help dist
  2 | .DEFAULT_GOAL := help
  3 | define BROWSER_PYSCRIPT
  4 | import os, webbrowser, sys
  5 | try:
  6 | 	from urllib import pathname2url
  7 | except:
  8 | 	from urllib.request import pathname2url
  9 | 
 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
 11 | endef
 12 | export BROWSER_PYSCRIPT
 13 | 
 14 | define PRINT_HELP_PYSCRIPT
 15 | import re, sys
 16 | 
 17 | for line in sys.stdin:
 18 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
 19 | 	if match:
 20 | 		target, help = match.groups()
 21 | 		print("%-20s %s" % (target, help))
 22 | endef
 23 | export PRINT_HELP_PYSCRIPT
 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
 25 | 
 26 | help:
 27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
 28 | 
 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
 30 | 
 31 | 
 32 | clean-build: ## remove build artifacts
 33 | 	rm -fr build/
 34 | 	rm -fr dist/
 35 | 	rm -fr .eggs/
 36 | 	find . -name '*.egg-info' -exec rm -fr {} +
 37 | 	find . -name '*.egg' -exec rm -f {} +
 38 | 
 39 | clean-pyc: ## remove Python file artifacts
 40 | 	find . -name '*.pyc' -exec rm -f {} +
 41 | 	find . -name '*.pyo' -exec rm -f {} +
 42 | 	find . -name '*~' -exec rm -f {} +
 43 | 	find . -name '__pycache__' -exec rm -fr {} +
 44 | 
 45 | clean-test: ## remove test and coverage artifacts
 46 | 	rm -fr .tox/
 47 | 	rm -f .coverage
 48 | 	rm -fr htmlcov/
 49 | 
 50 | lint: ## check style with pylint
 51 | 	pylint pydatajson
 52 | 
 53 | test: ## run tests quickly with nose
 54 | 	nosetests
 55 | 
 56 | test-all: ## run tests on every Python version with tox
 57 | 	tox
 58 | 
 59 | coverage: ## check code coverage quickly with the default Python
 60 | 
 61 | 		coverage run --source pydatajson setup.py test
 62 | 
 63 | 		coverage report -m
 64 | 		coverage html
 65 | 		$(BROWSER) htmlcov/index.html
 66 | 
 67 | # TEST
 68 | profiling_test:
 69 | 	python -m tests.profiling
 70 | 
 71 | # DOCUMENTACIÓN Y RELEASES
 72 | docs: ## generate Sphinx HTML documentation, including API docs
 73 | 	cp README.md docs/README.md
 74 | 	cp HISTORY.md docs/HISTORY.md
 75 | 	rm -f docs/pydatajson.rst
 76 | 	rm -f docs/modules.rst
 77 | 	sphinx-apidoc -o docs/ pydatajson
 78 | 	$(MAKE) -C docs clean
 79 | 	$(MAKE) -C docs html
 80 | 	$(BROWSER) docs/_build/html/index.html
 81 | 
 82 | servedocs: docs ## compile the docs watching for changes
 83 | 	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
 84 | 
 85 | release: dist ## package and upload a release
 86 | 	twine upload dist/*
 87 | 
 88 | dist: clean ## builds source and wheel package
 89 | 	python setup.py sdist
 90 | 	python setup.py bdist_wheel
 91 | 	ls -l dist
 92 | 
 93 | install: clean ## install the package to the active Python's site-packages
 94 | 	python setup.py install
 95 | 
 96 | pypi: dist ## register the package to PyPi get travis ready to deploy to pip
 97 | 	twine upload dist/*
 98 | 	python travis_pypi_setup.py
 99 | 
100 | doctoc: ## generate table of contents, doctoc command line tool required
101 |         ## https://github.com/thlorenz/doctoc
102 | 	doctoc --github --title " " README.md
103 | 	bash fix_github_links.sh README.md
104 | 	doctoc --github --title " " docs/MANUAL.md
105 | 	bash fix_github_links.sh docs/MANUAL.md
106 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | build: false
 2 | 
 3 | environment:
 4 |   matrix:
 5 |     - PYTHON: "C:\\Python27"
 6 |       PYTHON_VERSION: "2.7.8"
 7 |       PYTHON_ARCH: "64"
 8 | 
 9 | init:
10 |   - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
11 | 
12 | install:
13 |   - "%PYTHON%/Scripts/pip.exe install -r requirements.txt"
14 |   - "%PYTHON%/Scripts/pip.exe install -r requirements_dev.txt"
15 |   - "%PYTHON%/Scripts/pip.exe install python-coveralls"
16 |   - "%PYTHON%/Scripts/pip.exe install coveralls"
17 |   - "mkdir tests\\temp"
18 | 
19 | test_script:
20 |   - "%PYTHON%/Scripts/nosetests"
21 | 


--------------------------------------------------------------------------------
/docs/assets/catalog.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/docs/assets/catalog.xlsx


--------------------------------------------------------------------------------
/docs/backup.rst:
--------------------------------------------------------------------------------
1 | Backup
2 | ======
3 | 
4 | .. automodule:: pydatajson.backup
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/core.rst:
--------------------------------------------------------------------------------
1 | DataJson
2 | ========
3 | 
4 | .. automodule:: pydatajson.core
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/federation.rst:
--------------------------------------------------------------------------------
1 | Federación
2 | ==========
3 | 
4 | .. automodule:: pydatajson.federation
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | pydatajson
 2 | ==========
 3 | 
 4 | Documentación de pydatajson: librería con funcionalidades para gestionar los metadatos de catálogos de datos abiertos que cumplan con el Perfil Nacional de Metadatos. Pydatajson es parte del `Paquete de Apertura de Datos <https://paquete-apertura-datos.readthedocs.io>`_.
 5 | 
 6 | Podés colaborar `cargando un nuevo issue <https://github.com/datosgobar/pydatajson/issues/new>`_, o `respondiendo a un issue ya existente <https://github.com/datosgobar/pydatajson/issues>`_. Lo mismo te invitamos a hacer en el `Paquete de Apertura de Datos <https://quete-apertura-datos.readthedocs.io>`_.
 7 | 
 8 | 
 9 | Indice
10 | ------
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    README.md
16 |    MANUAL.md
17 |    quick_reference.rst
18 | 
19 | Referencia
20 | ----------
21 | 
22 | .. toctree::
23 |    :maxdepth: 1
24 | 
25 |    reference.rst
26 | 
27 | * :ref:`modindex`
28 | * :ref:`genindex`
29 | 
30 | Versiones
31 | ---------
32 | 
33 | .. toctree::
34 |    :maxdepth: 1
35 | 
36 |    HISTORY.md
37 | 


--------------------------------------------------------------------------------
/docs/indicators.rst:
--------------------------------------------------------------------------------
1 | Indicadores
2 | ===========
3 | 
4 | .. automodule:: pydatajson.indicators
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | pydatajson
2 | ==========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    pydatajson
8 | 


--------------------------------------------------------------------------------
/docs/pydatajson.response_formatters.rst:
--------------------------------------------------------------------------------
 1 | pydatajson.response\_formatters package
 2 | =======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | pydatajson.response\_formatters.dict\_formatter module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: pydatajson.response_formatters.dict_formatter
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | pydatajson.response\_formatters.list\_formatter module
16 | ------------------------------------------------------
17 | 
18 | .. automodule:: pydatajson.response_formatters.list_formatter
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | pydatajson.response\_formatters.tables\_formatter module
24 | --------------------------------------------------------
25 | 
26 | .. automodule:: pydatajson.response_formatters.tables_formatter
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | pydatajson.response\_formatters.validation\_response\_formatter module
32 | ----------------------------------------------------------------------
33 | 
34 | .. automodule:: pydatajson.response_formatters.validation_response_formatter
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: pydatajson.response_formatters
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/pydatajson.rst:
--------------------------------------------------------------------------------
  1 | pydatajson package
  2 | ==================
  3 | 
  4 | Subpackages
  5 | -----------
  6 | 
  7 | .. toctree::
  8 | 
  9 |     pydatajson.response_formatters
 10 |     pydatajson.validators
 11 | 
 12 | Submodules
 13 | ----------
 14 | 
 15 | pydatajson.backup module
 16 | ------------------------
 17 | 
 18 | .. automodule:: pydatajson.backup
 19 |     :members:
 20 |     :undoc-members:
 21 |     :show-inheritance:
 22 | 
 23 | pydatajson.catalog\_readme module
 24 | ---------------------------------
 25 | 
 26 | .. automodule:: pydatajson.catalog_readme
 27 |     :members:
 28 |     :undoc-members:
 29 |     :show-inheritance:
 30 | 
 31 | pydatajson.ckan\_reader module
 32 | ------------------------------
 33 | 
 34 | .. automodule:: pydatajson.ckan_reader
 35 |     :members:
 36 |     :undoc-members:
 37 |     :show-inheritance:
 38 | 
 39 | pydatajson.ckan\_utils module
 40 | -----------------------------
 41 | 
 42 | .. automodule:: pydatajson.ckan_utils
 43 |     :members:
 44 |     :undoc-members:
 45 |     :show-inheritance:
 46 | 
 47 | pydatajson.constants module
 48 | ---------------------------
 49 | 
 50 | .. automodule:: pydatajson.constants
 51 |     :members:
 52 |     :undoc-members:
 53 |     :show-inheritance:
 54 | 
 55 | pydatajson.core module
 56 | ----------------------
 57 | 
 58 | .. automodule:: pydatajson.core
 59 |     :members:
 60 |     :undoc-members:
 61 |     :show-inheritance:
 62 | 
 63 | pydatajson.custom\_exceptions module
 64 | ------------------------------------
 65 | 
 66 | .. automodule:: pydatajson.custom_exceptions
 67 |     :members:
 68 |     :undoc-members:
 69 |     :show-inheritance:
 70 | 
 71 | pydatajson.custom\_remote\_ckan module
 72 | --------------------------------------
 73 | 
 74 | .. automodule:: pydatajson.custom_remote_ckan
 75 |     :members:
 76 |     :undoc-members:
 77 |     :show-inheritance:
 78 | 
 79 | pydatajson.documentation module
 80 | -------------------------------
 81 | 
 82 | .. automodule:: pydatajson.documentation
 83 |     :members:
 84 |     :undoc-members:
 85 |     :show-inheritance:
 86 | 
 87 | pydatajson.download module
 88 | --------------------------
 89 | 
 90 | .. automodule:: pydatajson.download
 91 |     :members:
 92 |     :undoc-members:
 93 |     :show-inheritance:
 94 | 
 95 | pydatajson.federation module
 96 | ----------------------------
 97 | 
 98 | .. automodule:: pydatajson.federation
 99 |     :members:
100 |     :undoc-members:
101 |     :show-inheritance:
102 | 
103 | pydatajson.federation\_indicators\_generator module
104 | ---------------------------------------------------
105 | 
106 | .. automodule:: pydatajson.federation_indicators_generator
107 |     :members:
108 |     :undoc-members:
109 |     :show-inheritance:
110 | 
111 | pydatajson.helpers module
112 | -------------------------
113 | 
114 | .. automodule:: pydatajson.helpers
115 |     :members:
116 |     :undoc-members:
117 |     :show-inheritance:
118 | 
119 | pydatajson.indicators module
120 | ----------------------------
121 | 
122 | .. automodule:: pydatajson.indicators
123 |     :members:
124 |     :undoc-members:
125 |     :show-inheritance:
126 | 
127 | pydatajson.readers module
128 | -------------------------
129 | 
130 | .. automodule:: pydatajson.readers
131 |     :members:
132 |     :undoc-members:
133 |     :show-inheritance:
134 | 
135 | pydatajson.reporting module
136 | ---------------------------
137 | 
138 | .. automodule:: pydatajson.reporting
139 |     :members:
140 |     :undoc-members:
141 |     :show-inheritance:
142 | 
143 | pydatajson.search module
144 | ------------------------
145 | 
146 | .. automodule:: pydatajson.search
147 |     :members:
148 |     :undoc-members:
149 |     :show-inheritance:
150 | 
151 | pydatajson.status\_indicators\_generator module
152 | -----------------------------------------------
153 | 
154 | .. automodule:: pydatajson.status_indicators_generator
155 |     :members:
156 |     :undoc-members:
157 |     :show-inheritance:
158 | 
159 | pydatajson.threading\_helper module
160 | -----------------------------------
161 | 
162 | .. automodule:: pydatajson.threading_helper
163 |     :members:
164 |     :undoc-members:
165 |     :show-inheritance:
166 | 
167 | pydatajson.time\_series module
168 | ------------------------------
169 | 
170 | .. automodule:: pydatajson.time_series
171 |     :members:
172 |     :undoc-members:
173 |     :show-inheritance:
174 | 
175 | pydatajson.transformation module
176 | --------------------------------
177 | 
178 | .. automodule:: pydatajson.transformation
179 |     :members:
180 |     :undoc-members:
181 |     :show-inheritance:
182 | 
183 | pydatajson.validation module
184 | ----------------------------
185 | 
186 | .. automodule:: pydatajson.validation
187 |     :members:
188 |     :undoc-members:
189 |     :show-inheritance:
190 | 
191 | pydatajson.writers module
192 | -------------------------
193 | 
194 | .. automodule:: pydatajson.writers
195 |     :members:
196 |     :undoc-members:
197 |     :show-inheritance:
198 | 
199 | 
200 | Module contents
201 | ---------------
202 | 
203 | .. automodule:: pydatajson
204 |     :members:
205 |     :undoc-members:
206 |     :show-inheritance:
207 | 


--------------------------------------------------------------------------------
/docs/pydatajson.validators.rst:
--------------------------------------------------------------------------------
 1 | pydatajson.validators package
 2 | =============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | pydatajson.validators.consistent\_distribution\_fields\_validator module
 8 | ------------------------------------------------------------------------
 9 | 
10 | .. automodule:: pydatajson.validators.consistent_distribution_fields_validator
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | pydatajson.validators.distribution\_download\_urls\_validator module
16 | --------------------------------------------------------------------
17 | 
18 | .. automodule:: pydatajson.validators.distribution_download_urls_validator
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | pydatajson.validators.distribution\_urls\_validator module
24 | ----------------------------------------------------------
25 | 
26 | .. automodule:: pydatajson.validators.distribution_urls_validator
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | pydatajson.validators.landing\_pages\_validator module
32 | ------------------------------------------------------
33 | 
34 | .. automodule:: pydatajson.validators.landing_pages_validator
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | pydatajson.validators.simple\_validator module
40 | ----------------------------------------------
41 | 
42 | .. automodule:: pydatajson.validators.simple_validator
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | pydatajson.validators.theme\_ids\_not\_repeated\_validator module
48 | -----------------------------------------------------------------
49 | 
50 | .. automodule:: pydatajson.validators.theme_ids_not_repeated_validator
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | pydatajson.validators.url\_validator module
56 | -------------------------------------------
57 | 
58 | .. automodule:: pydatajson.validators.url_validator
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | 
64 | Module contents
65 | ---------------
66 | 
67 | .. automodule:: pydatajson.validators
68 |     :members:
69 |     :undoc-members:
70 |     :show-inheritance:
71 | 


--------------------------------------------------------------------------------
/docs/quick_reference.rst:
--------------------------------------------------------------------------------
 1 | Referencia rápida
 2 | =================
 3 | 
 4 | Lectura
 5 | -------
 6 | .. autoclass:: pydatajson.core.DataJson
 7 |    :members: __init__
 8 | 
 9 | Escritura
10 | ---------
11 | .. autoclass:: pydatajson.core.DataJson
12 |    :members: to_json, to_xlsx
13 | 
14 | Validación
15 | ----------
16 | .. autoclass:: pydatajson.core.DataJson
17 |    :members: is_valid_catalog, validate_catalog
18 | 
19 | Búsqueda
20 | --------
21 | .. autoclass:: pydatajson.core.DataJson
22 |    :members: get_datasets, get_dataset, get_fields, get_field
23 | 
24 | Indicadores
25 | -----------
26 | .. autoclass:: pydatajson.core.DataJson
27 |    :members: generate_indicators
28 | 
29 | Reportes
30 | --------
31 | .. autoclass:: pydatajson.core.DataJson
32 |    :members: generate_datasets_summary, generate_catalog_readme
33 | 
34 | Federación
35 | ----------
36 | .. autoclass:: pydatajson.core.DataJson
37 |    :members: harvest_dataset_to_ckan, restore_dataset_to_ckan, harvest_catalog_to_ckan, restore_catalog_to_ckan, push_theme_to_ckan, push_new_themes
38 | 
39 | .. autofunction:: pydatajson.federation.remove_dataset_from_ckan
40 | 


--------------------------------------------------------------------------------
/docs/readers.rst:
--------------------------------------------------------------------------------
1 | Lectura
2 | =======
3 | 
4 | .. automodule:: pydatajson.readers
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
 1 | Referencia completa
 2 | ===================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    core.rst
 8 |    readers.rst
 9 |    writers.rst
10 |    validation.rst
11 |    search.rst
12 |    indicators_list.rst
13 |    reporting.rst
14 |    federation.rst
15 |    backup.rst
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/reporting.rst:
--------------------------------------------------------------------------------
1 | Reportes
2 | ========
3 | 
4 | .. automodule:: pydatajson.reporting
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/search.rst:
--------------------------------------------------------------------------------
1 | Búsqueda
2 | ========
3 | 
4 | .. automodule:: pydatajson.search
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/validation.rst:
--------------------------------------------------------------------------------
1 | Validación
2 | ==========
3 | 
4 | .. automodule:: pydatajson.validation
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/writers.rst:
--------------------------------------------------------------------------------
1 | Escritura
2 | =========
3 | 
4 | .. automodule:: pydatajson.writers
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/fix_github_links.sh:
--------------------------------------------------------------------------------
1 | sed -i.bu 's/%C3%A1/a/' $1
2 | sed -i.bu 's/%C3%A9/e/' $1
3 | sed -i.bu 's/%C3%AD/i/' $1
4 | sed -i.bu 's/%C3%B3/o/' $1
5 | sed -i.bu 's/%C3%BA/u/' $1
6 | sed -i.bu 's/%C2%BF//' $1
7 | sed -i.bu 's/---/-/' $1
8 | rm $1.bu
9 | 


--------------------------------------------------------------------------------
/pydatajson/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Módulo pydatajson
 4 | Conjunto de herramientas para validar y manipular la información presente en
 5 | el archivo `data.json` de un Portal de Datos
 6 | """
 7 | 
 8 | from __future__ import absolute_import
 9 | 
10 | import logging
11 | 
12 | from . import helpers
13 | from .core import DataJson
14 | from .helpers import parse_repeating_time_interval
15 | 
16 | __author__ = """Datos Argentina"""
17 | __email__ = 'datosargentina@jefatura.gob.ar'
18 | __version__ = '0.4.67'
19 | 
20 | """
21 | Logger base para librería pydatajson
22 | https://docs.python.org/2/howto/logging.html#configuring-logging-for-a-library
23 | """
24 | logger = logging.getLogger('pydatajson')
25 | logger.addHandler(logging.NullHandler())
26 | 


--------------------------------------------------------------------------------
/pydatajson/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Módulo de entrada para la interfaz de línea de comandos
 5 | 
 6 | Todos los módulos de pydatajson se pueden llamar por línea de comandos siempre
 7 | que tengan un método main() definido en el módulo, que recibe argumentos y
 8 | realiza acciones relacionadas con el core de su funcionalidad.
 9 | 
10 | Example:
11 |     pydatajson backup http://infra.datos.gob.ar/catalog/modernizacion/data.json
12 | """
13 | 
14 | from __future__ import unicode_literals
15 | from __future__ import print_function
16 | from __future__ import with_statement
17 | import os
18 | import sys
19 | import importlib
20 | 
21 | 
22 | def main():
23 |     module_name = sys.argv[1]
24 |     module = importlib.import_module("." + module_name, "pydatajson")
25 |     args = sys.argv[2:] if len(sys.argv) > 2 else []
26 |     module.main(*args)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     main()
31 | 


--------------------------------------------------------------------------------
/pydatajson/catalog_readme.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from __future__ import print_function
  5 | from __future__ import unicode_literals
  6 | from __future__ import with_statement
  7 | 
  8 | import io
  9 | import logging
 10 | import os
 11 | 
 12 | from six import string_types
 13 | 
 14 | from pydatajson.helpers import traverse_dict
 15 | from pydatajson.indicators import generate_catalogs_indicators
 16 | from pydatajson.readers import read_catalog
 17 | from pydatajson.validation import validate_catalog
 18 | 
 19 | logger = logging.getLogger('pydatajson')
 20 | 
 21 | CENTRAL_CATALOG = "http://datos.gob.ar/data.json"
 22 | ABSOLUTE_PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
 23 | TEMPLATES_PATH = os.path.join(ABSOLUTE_PROJECT_DIR, "templates")
 24 | 
 25 | 
 26 | def generate_catalog_readme(_datajson, catalog,
 27 |                             export_path=None, verify_ssl=True):
 28 |     """Este método está para mantener retrocompatibilidad con versiones
 29 |     anteriores. Se ignora el argumento _data_json."""
 30 |     return generate_readme(catalog, export_path, verify_ssl=verify_ssl)
 31 | 
 32 | 
 33 | def generate_readme(catalog, export_path=None, verify_ssl=True):
 34 |     """Genera una descripción textual en formato Markdown sobre los
 35 |     metadatos generales de un catálogo (título, editor, fecha de
 36 |     publicación, et cetera), junto con:
 37 |         - estado de los metadatos a nivel catálogo,
 38 |         - estado global de los metadatos,
 39 |         - cantidad de datasets federados y no federados,
 40 |         - detalles de los datasets no federados
 41 |         - cantidad de datasets y distribuciones incluidas
 42 | 
 43 |     Es utilizada por la rutina diaria de `libreria-catalogos` para generar
 44 |     un README con información básica sobre los catálogos mantenidos.
 45 | 
 46 |     Args:
 47 |         catalog (str o dict): Path a un catálogo en cualquier formato,
 48 |             JSON, XLSX, o diccionario de python.
 49 |         export_path (str): Path donde exportar el texto generado (en
 50 |             formato Markdown). Si se especifica, el método no devolverá
 51 |             nada.
 52 | 
 53 |     Returns:
 54 |         str: Texto de la descripción generada.
 55 |     """
 56 |     # Si se paso una ruta, guardarla
 57 |     if isinstance(catalog, string_types):
 58 |         catalog_path_or_url = catalog
 59 |     else:
 60 |         catalog_path_or_url = None
 61 | 
 62 |     catalog = read_catalog(catalog)
 63 |     validation = validate_catalog(catalog, verify_ssl=verify_ssl)
 64 |     # Solo necesito indicadores para un catalogo
 65 |     indicators = generate_catalogs_indicators(
 66 |         catalog, CENTRAL_CATALOG)[0][0]
 67 | 
 68 |     with io.open(os.path.join(TEMPLATES_PATH, 'catalog_readme.txt'), 'r',
 69 |                  encoding='utf-8') as template_file:
 70 |         readme_template = template_file.read()
 71 | 
 72 |         not_federated_datasets_list = "\n".join([
 73 |             "- [{}]({})".format(dataset[0], dataset[1])
 74 |             for dataset in indicators["datasets_no_federados"]
 75 |         ])
 76 |         federated_removed_datasets_list = "\n".join([
 77 |             "- [{}]({})".format(dataset[0], dataset[1])
 78 |             for dataset in indicators["datasets_federados_eliminados"]
 79 |         ])
 80 |         federated_datasets_list = "\n".join([
 81 |             "- [{}]({})".format(dataset[0], dataset[1])
 82 |             for dataset in indicators["datasets_federados"]
 83 |         ])
 84 |         non_federated_pct = 1.0 - indicators["datasets_federados_pct"] if \
 85 |             indicators["datasets_federados_pct"] is not None else \
 86 |             indicators["datasets_federados_pct"]
 87 |         content = {
 88 |             "title": catalog.get("title"),
 89 |             "publisher_name": traverse_dict(
 90 |                 catalog, ["publisher", "name"]),
 91 |             "publisher_mbox": traverse_dict(
 92 |                 catalog, ["publisher", "mbox"]),
 93 |             "catalog_path_or_url": catalog_path_or_url,
 94 |             "description": catalog.get("description"),
 95 |             "global_status": validation["status"],
 96 |             "catalog_status": validation["error"]["catalog"]["status"],
 97 |             "no_of_datasets": len(catalog["dataset"]),
 98 |             "no_of_distributions": sum([len(dataset["distribution"]) for
 99 |                                         dataset in catalog["dataset"]]),
100 |             "federated_datasets": indicators["datasets_federados_cant"],
101 |             "not_federated_datasets": indicators["datasets_no_federados_cant"],
102 |             "not_federated_datasets_pct": non_federated_pct,
103 |             "not_federated_datasets_list": not_federated_datasets_list,
104 |             "federated_removed_datasets_list": federated_removed_datasets_list,
105 |             "federated_datasets_list": federated_datasets_list,
106 |         }
107 | 
108 |         catalog_readme = readme_template.format(**content)
109 | 
110 |     if export_path:
111 |         with io.open(export_path, 'w+', encoding='utf-8') as target:
112 |             target.write(catalog_readme)
113 |     else:
114 |         return catalog_readme
115 | 


--------------------------------------------------------------------------------
/pydatajson/constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | REQUESTS_TIMEOUT = 30
 4 | DEFAULT_TIMEZONE = "America/Buenos_Aires"
 5 | 
 6 | INVALID_STATUS_CODES_REGEX = ["^4[0-9]+$", "^5[0-9]+$"]
 7 | EXCEPTION_STATUS_CODES = [429]
 8 | 
 9 | DEFAULT_CHECK_TIMEOUT = 1
10 | 


--------------------------------------------------------------------------------
/pydatajson/custom_remote_ckan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from ckanapi import RemoteCKAN
 4 | 
 5 | from pydatajson.constants import REQUESTS_TIMEOUT
 6 | 
 7 | 
 8 | class CustomRemoteCKAN(RemoteCKAN):
 9 | 
10 |     def __init__(self, address, apikey=None, user_agent=None, get_only=False,
11 |                  verify_ssl=False, requests_timeout=REQUESTS_TIMEOUT):
12 |         self.verify_ssl = verify_ssl
13 |         self.requests_timeout = requests_timeout
14 |         super(CustomRemoteCKAN, self).__init__(address, apikey,
15 |                                                user_agent, get_only)
16 | 
17 |     def call_action(self, action, data_dict=None, context=None, apikey=None,
18 |                     files=None, requests_kwargs=None):
19 |         requests_kwargs = requests_kwargs or {}
20 |         requests_kwargs.setdefault('verify', self.verify_ssl)
21 |         requests_kwargs.setdefault('timeout', self.requests_timeout)
22 |         return super(CustomRemoteCKAN, self).call_action(
23 |             action, data_dict, context, apikey, files, requests_kwargs)
24 | 


--------------------------------------------------------------------------------
/pydatajson/documentation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """Módulo 'documentation' de Pydatajson
  5 | 
  6 | Contiene métodos para generar documentación en markdown de distintos
  7 | componentes de un catálogo.
  8 | """
  9 | 
 10 | from __future__ import print_function, unicode_literals, with_statement
 11 | 
 12 | from six.moves import map
 13 | 
 14 | 
 15 | def dataset_to_markdown(dataset):
 16 |     """Genera texto en markdown a partir de los metadatos de una `dataset`.
 17 | 
 18 |     Args:
 19 |         dataset (dict): Diccionario con metadatos de una `dataset`.
 20 | 
 21 |     Returns:
 22 |         str: Texto que describe una `dataset`.
 23 |     """
 24 |     text_template = """
 25 | # {title}
 26 | 
 27 | {description}
 28 | 
 29 | ## Recursos del dataset
 30 | 
 31 | {distributions}
 32 | """
 33 | 
 34 |     if "distribution" in dataset:
 35 |         distributions = "".join(
 36 |             map(distribution_to_markdown, dataset["distribution"]))
 37 |     else:
 38 |         distributions = ""
 39 | 
 40 |     text = text_template.format(
 41 |         title=dataset["title"],
 42 |         description=dataset.get("description", ""),
 43 |         distributions=distributions
 44 |     )
 45 | 
 46 |     return text
 47 | 
 48 | 
 49 | def distribution_to_markdown(distribution):
 50 |     """Genera texto en markdown a partir de los metadatos de una
 51 |     `distribution`.
 52 | 
 53 |     Args:
 54 |         distribution (dict): Diccionario con metadatos de una
 55 |         `distribution`.
 56 | 
 57 |     Returns:
 58 |         str: Texto que describe una `distribution`.
 59 |     """
 60 |     text_template = """
 61 | ### {title}
 62 | 
 63 | {description}
 64 | 
 65 | #### Campos del recurso
 66 | 
 67 | {fields}
 68 | """
 69 | 
 70 |     if "field" in distribution:
 71 |         fields = "- " + \
 72 |             "\n- ".join(map(field_to_markdown, distribution["field"]))
 73 |     else:
 74 |         fields = ""
 75 | 
 76 |     text = text_template.format(
 77 |         title=distribution["title"],
 78 |         description=distribution.get("description", ""),
 79 |         fields=fields
 80 |     )
 81 | 
 82 |     return text
 83 | 
 84 | 
 85 | def field_to_markdown(field):
 86 |     """Genera texto en markdown a partir de los metadatos de un `field`.
 87 | 
 88 |     Args:
 89 |         field (dict): Diccionario con metadatos de un `field`.
 90 | 
 91 |     Returns:
 92 |         str: Texto que describe un `field`.
 93 |     """
 94 |     if "title" in field:
 95 |         field_title = "**{}**".format(field["title"])
 96 |     else:
 97 |         raise Exception("Es necesario un `title` para describir un campo.")
 98 | 
 99 |     field_type = " ({})".format(field["type"]) if "type" in field else ""
100 |     field_desc = ": {}".format(
101 |         field["description"]) if "description" in field else ""
102 | 
103 |     text_template = "{title}{type}{description}"
104 |     text = text_template.format(title=field_title, type=field_type,
105 |                                 description=field_desc)
106 | 
107 |     return text
108 | 


--------------------------------------------------------------------------------
/pydatajson/download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Módulo 'download' de pydatajson
 4 | 
 5 | Contiene métodos para descargar archivos a través del protocolo HTTP.
 6 | """
 7 | 
 8 | from __future__ import unicode_literals, print_function, with_statement
 9 | from __future__ import absolute_import
10 | 
11 | import requests
12 | import time
13 | import sys
14 | 
15 | DEFAULT_TRIES = 3
16 | RETRY_DELAY = 1
17 | 
18 | 
19 | def download(url, file_path, tries=DEFAULT_TRIES, retry_delay=RETRY_DELAY):
20 |     """
21 |     Descarga un archivo a través del protocolo HTTP, en uno o más intentos.
22 | 
23 |     Args:
24 |         url (str): URL (schema HTTP) del archivo a descargar.
25 |         tries (int): Intentos a realizar (default: 3).
26 |         retry_delay (int o float): Tiempo a esperar, en segundos, entre cada
27 |             intento.
28 |         try_timeout (int o float): Tiempo máximo a esperar por intento.
29 |         proxies (dict): Proxies a utilizar. El diccionario debe contener los
30 |             valores 'http' y 'https', cada uno asociados a la URL del proxy
31 |             correspondiente.
32 | 
33 |     Returns:
34 |         bytes: Contenido del archivo
35 |     """
36 |     timeout = 10
37 |     for i in range(1, tries + 1):
38 |         try:
39 |             with requests.get(url, timeout=timeout ** i, stream=True,
40 |                               verify=False) as r:
41 |                 r.raise_for_status()
42 |                 with open(file_path, 'wb') as f:
43 |                     for chunk in r.iter_content(chunk_size=8192):
44 |                         if chunk:  # filter out keep-alive new chunks
45 |                             f.write(chunk)
46 | 
47 |         except requests.TooManyRedirects as e:
48 |             raise e
49 |         except Exception as e:
50 |             download_exception = e
51 |             raise download_exception
52 | 
53 | 
54 | def download_to_file(url, file_path, **kwargs):
55 |     """
56 |     Descarga un archivo a través del protocolo HTTP, en uno o más intentos, y
57 |     escribe el contenido descargado el el path especificado.
58 | 
59 |     Args:
60 |         url (str): URL (schema HTTP) del archivo a descargar.
61 |         file_path (str): Path del archivo a escribir. Si un archivo ya existe
62 |             en el path especificado, se sobrescribirá con nuevos contenidos.
63 |         kwargs: Parámetros para download().
64 |     """
65 |     content = download(url, file_path, **kwargs)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     download_to_file(sys.argv[1], sys.argv[2])
70 | 


--------------------------------------------------------------------------------
/pydatajson/fields/fields.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "title": "requerido",
 3 |     "description": "requerido",
 4 |     "publisher": {
 5 |       "name": "requerido",
 6 |       "mbox": "requerido"
 7 |     },
 8 |     "issued": "recomendado",
 9 |     "modified": "recomendado",
10 |     "language": "recomendado",
11 |     "superThemeTaxonomy": "requerido",
12 |     "themeTaxonomy": "recomendado",
13 |     "license": "recomendado",
14 |     "homepage": "recomendado",
15 |     "rights": "optativo",
16 |     "spatial": "optativo",
17 |     "dataset": {
18 |         "title": "requerido",
19 |         "description": "requerido",
20 |         "publisher": {
21 |           "name": "requerido",
22 |           "mbox": "recomendado"
23 |         },
24 |         "contactPoint": {
25 |           "fn": "recomendado",
26 |           "hasEmail": "recomendado"
27 |         },
28 |         "superTheme": "requerido",
29 |         "theme": "recomendado",
30 |         "keyword": "recomendado",
31 |         "accrualPeriodicity": "requerido",
32 |         "issued": "requerido",
33 |         "modified": "recomendado",
34 |         "identifier": "requerido",
35 |         "language": "optativo",
36 |         "spatial": "optativo",
37 |         "temporal": "recomendado",
38 |         "landingPage": "optativo",
39 |         "license": "recomendado",
40 |         "distribution": {
41 |             "accessURL": "requerido",
42 |             "description": "recomendado",
43 |             "format": "recomendado",
44 |             "mediaType": "optativo",
45 |             "downloadURL": "requerido",
46 |             "title": "requerido",
47 |             "license": "recomendado",
48 |             "byteSize": "optativo",
49 |             "issued": "requerido",
50 |             "modified": "recomendado",
51 |             "rights": "optativo",
52 |             "field": {
53 |                 "title": "recomendado",
54 |                 "type": "recomendado",
55 |                 "description": "recomendado"
56 |             }
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/pydatajson/reporting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Módulo 'reporting' de Pydatajson
 5 | 
 6 | Contiene los métodos para generar reportes sobre un catálogo.
 7 | """
 8 | 
 9 | from __future__ import unicode_literals, print_function, \
10 |     with_statement, absolute_import
11 | 
12 | from collections import OrderedDict
13 | 
14 | from pydatajson import writers
15 | from . import helpers
16 | from . import readers
17 | from .validation import validate_catalog
18 | 
19 | 
20 | def generate_datasets_summary(catalog, export_path=None, validator=None,
21 |                               verify_ssl=True, url_check_timeout=1):
22 |     """Genera un informe sobre los datasets presentes en un catálogo,
23 |     indicando para cada uno:
24 |         - Índice en la lista catalog["dataset"]
25 |         - Título
26 |         - Identificador
27 |         - Cantidad de distribuciones
28 |         - Estado de sus metadatos ["OK"|"ERROR"]
29 | 
30 |     Es utilizada por la rutina diaria de `libreria-catalogos` para reportar
31 |     sobre los datasets de los catálogos mantenidos.
32 | 
33 |     Args:
34 |         catalog (str o dict): Path a un catálogo en cualquier formato,
35 |             JSON, XLSX, o diccionario de python.
36 |         export_path (str): Path donde exportar el informe generado (en
37 |             formato XLSX o CSV). Si se especifica, el método no devolverá
38 |             nada.
39 | 
40 |     Returns:
41 |         list: Contiene tantos dicts como datasets estén presentes en
42 |         `catalogs`, con los datos antes mencionados.
43 |     """
44 |     catalog = readers.read_catalog(catalog)
45 | 
46 |     # Trato de leer todos los datasets bien formados de la lista
47 |     # catalog["dataset"], si existe.
48 |     if "dataset" in catalog and isinstance(catalog["dataset"], list):
49 |         datasets = [d if isinstance(d, dict) else {} for d in
50 |                     catalog["dataset"]]
51 |     else:
52 |         # Si no, considero que no hay datasets presentes
53 |         datasets = []
54 | 
55 |     validation = validate_catalog(
56 |         catalog, validator=validator, verify_ssl=verify_ssl,
57 |         url_check_timeout=url_check_timeout)["error"]["dataset"]
58 | 
59 |     def info_dataset(index, dataset):
60 |         """Recolecta información básica de un dataset."""
61 |         info = OrderedDict()
62 |         info["indice"] = index
63 |         info["titulo"] = dataset.get("title")
64 |         info["identificador"] = dataset.get("identifier")
65 |         info["estado_metadatos"] = validation[index]["status"]
66 |         info["cant_errores"] = len(validation[index]["errors"])
67 |         info["cant_distribuciones"] = len(dataset["distribution"])
68 |         if helpers.dataset_has_data_distributions(dataset):
69 |             info["tiene_datos"] = "SI"
70 |         else:
71 |             info["tiene_datos"] = "NO"
72 | 
73 |         return info
74 | 
75 |     summary = [info_dataset(i, ds) for i, ds in enumerate(datasets)]
76 |     if export_path:
77 |         writers.write_table(summary, export_path)
78 |     else:
79 |         return summary
80 | 


--------------------------------------------------------------------------------
/pydatajson/response_formatters/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from pydatajson import custom_exceptions
 6 | 
 7 | from pydatajson.response_formatters.dict_formatter import DictFormatter
 8 | from pydatajson.response_formatters.list_formatter import ListFormatter
 9 | from pydatajson.response_formatters.tables_formatter import TablesFormatter
10 | 
11 | 
12 | def format_response(validation, export_path, response_format):
13 |     formats = {
14 |         'table': TablesFormatter(validation, export_path),
15 |         'dict': DictFormatter(validation),
16 |         'list': ListFormatter(validation),
17 |     }
18 |     try:
19 |         return formats[response_format].format()
20 |     except KeyError:
21 |         msg = "No se reconoce el formato {}".format(response_format)
22 |         raise custom_exceptions.FormatNameError(msg)
23 | 


--------------------------------------------------------------------------------
/pydatajson/response_formatters/dict_formatter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from pydatajson.response_formatters.validation_response_formatter import \
 6 |     ValidationResponseFormatter
 7 | 
 8 | 
 9 | class DictFormatter(ValidationResponseFormatter):
10 | 
11 |     def format(self):
12 |         return self.response
13 | 


--------------------------------------------------------------------------------
/pydatajson/response_formatters/list_formatter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from pydatajson.response_formatters.validation_response_formatter import \
 6 |     ValidationResponseFormatter
 7 | 
 8 | 
 9 | class ListFormatter(ValidationResponseFormatter):
10 | 
11 |     def format(self):
12 |         rows_catalog = []
13 |         validation_result = {
14 |             "catalog_title": self.response["error"]["catalog"]["title"],
15 |             "catalog_status": self.response["error"]["catalog"]["status"],
16 |         }
17 |         for error in self.response["error"]["catalog"]["errors"]:
18 |             catalog_result = dict(validation_result)
19 |             catalog_result.update({
20 |                 "catalog_error_message": error["message"],
21 |                 "catalog_error_location": ", ".join(error["path"]),
22 |             })
23 |             rows_catalog.append(catalog_result)
24 | 
25 |         if len(self.response["error"]["catalog"]["errors"]) == 0:
26 |             catalog_result = dict(validation_result)
27 |             catalog_result.update({
28 |                 "catalog_error_message": None,
29 |                 "catalog_error_location": None
30 |             })
31 |             rows_catalog.append(catalog_result)
32 | 
33 |         # crea una lista de dicts para volcarse en una tabla (dataset)
34 |         rows_dataset = []
35 |         for dataset in self.response["error"]["dataset"]:
36 |             validation_result = {
37 |                 "dataset_title": dataset["title"],
38 |                 "dataset_identifier": dataset["identifier"],
39 |                 "dataset_list_index": dataset["list_index"],
40 |                 "dataset_status": dataset["status"]
41 |             }
42 |             for error in dataset["errors"]:
43 |                 dataset_result = dict(validation_result)
44 |                 dataset_result.update({
45 |                     "dataset_error_message": error["message"],
46 |                     "dataset_error_location": error["path"][-1]
47 |                 })
48 |                 rows_dataset.append(dataset_result)
49 | 
50 |             if len(dataset["errors"]) == 0:
51 |                 dataset_result = dict(validation_result)
52 |                 dataset_result.update({
53 |                     "dataset_error_message": None,
54 |                     "dataset_error_location": None
55 |                 })
56 |                 rows_dataset.append(dataset_result)
57 | 
58 |         return {"catalog": rows_catalog, "dataset": rows_dataset}
59 | 


--------------------------------------------------------------------------------
/pydatajson/response_formatters/tables_formatter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | 
 5 | from openpyxl.styles import Alignment, Font
 6 | 
 7 | from pydatajson import writers
 8 | from pydatajson.response_formatters.list_formatter import ListFormatter
 9 | from pydatajson.response_formatters.validation_response_formatter import\
10 |     ValidationResponseFormatter
11 | 
12 | 
13 | class TablesFormatter(ValidationResponseFormatter):
14 | 
15 |     def __init__(self, response, export_path):
16 |         super(TablesFormatter, self).__init__(response)
17 |         self.export_path = export_path
18 | 
19 |     def format(self):
20 |         validation_lists = ListFormatter(self.response).format()
21 | 
22 |         column_styles = {
23 |             "catalog": {
24 |                 "catalog_status": {"width": 20},
25 |                 "catalog_error_location": {"width": 40},
26 |                 "catalog_error_message": {"width": 40},
27 |                 "catalog_title": {"width": 20},
28 |             },
29 |             "dataset": {
30 |                 "dataset_error_location": {"width": 20},
31 |                 "dataset_identifier": {"width": 40},
32 |                 "dataset_status": {"width": 20},
33 |                 "dataset_title": {"width": 40},
34 |                 "dataset_list_index": {"width": 20},
35 |                 "dataset_error_message": {"width": 40},
36 |             }
37 |         }
38 |         cell_styles = {
39 |             "catalog": [
40 |                 {"alignment": Alignment(vertical="center")},
41 |                 {"row": 1, "font": Font(bold=True)},
42 |             ],
43 |             "dataset": [
44 |                 {"alignment": Alignment(vertical="center")},
45 |                 {"row": 1, "font": Font(bold=True)},
46 |             ]
47 |         }
48 | 
49 |         # crea tablas en un sólo excel o varios CSVs
50 |         writers.write_tables(
51 |             tables=validation_lists, path=self.export_path,
52 |             column_styles=column_styles, cell_styles=cell_styles
53 |         )
54 | 


--------------------------------------------------------------------------------
/pydatajson/response_formatters/validation_response_formatter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import unicode_literals
 4 | import abc
 5 | 
 6 | 
 7 | class ValidationResponseFormatter(object):
 8 | 
 9 |     def __init__(self, response):
10 |         self.response = response
11 | 
12 |     @abc.abstractmethod
13 |     def format(self):
14 |         raise NotImplementedError
15 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/accrualPeriodicity.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "R/P10Y",
 4 |     "description": "Cada diez años"
 5 |   },
 6 |   {
 7 |     "id": "R/P4Y",
 8 |     "description": "Cada cuatro años"
 9 |   },
10 |   {
11 |     "id": "R/P3Y",
12 |     "description": "Cada tres años"
13 |   },
14 |   {
15 |     "id": "R/P2Y",
16 |     "description": "Cada dos años"
17 |   },
18 |   {
19 |     "id": "R/P1Y",
20 |     "description": "Anualmente"
21 |   },
22 |   {
23 |     "id": "R/P6M",
24 |     "description": "Cada medio año"
25 |   },
26 |   {
27 |     "id": "R/P4M",
28 |     "description": "Cuatrimestralmente"
29 |   },
30 |   {
31 |     "id": "R/P3M",
32 |     "description": "Trimestralmente"
33 |   },
34 |   {
35 |     "id": "R/P2M",
36 |     "description": "Bimestralmente"
37 |   },
38 |   {
39 |     "id": "R/P1M",
40 |     "description": "Mensualmente"
41 |   },
42 |   {
43 |     "id": "R/P0.5M",
44 |     "description": "Cada 15 días"
45 |   },
46 |   {
47 |     "id": "R/P0.33M",
48 |     "description": "Tres veces por mes"
49 |   },
50 |   {
51 |     "id": "R/P1W",
52 |     "description": "Semanalmente"
53 |   },
54 |   {
55 |     "id": "R/P0.5W",
56 |     "description": "Dos veces a la semana"
57 |   },
58 |   {
59 |     "id": "R/P0.33W",
60 |     "description": "Tres veces a la semana"
61 |   },
62 |   {
63 |     "id": "R/P1D",
64 |     "description": "Diariamente"
65 |   },
66 |   {
67 |     "id": "R/PT1H",
68 |     "description": "Cada hora"
69 |   },
70 |   {
71 |     "id": "R/PT1S",
72 |     "description": "Continuamente actualizado"
73 |   },
74 |   {
75 |     "id": "eventual",
76 |     "description": "Eventual"
77 |   }
78 | ]
79 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/catalog.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://json-schema.org/draft-04/schema#",
 3 |     "type": "object",
 4 |     "required": [
 5 |         "dataset",
 6 |         "title",
 7 |         "description",
 8 |         "publisher",
 9 |         "superThemeTaxonomy"
10 |     ],
11 |     "properties": {
12 |         "identifier": {"$ref": "mixed-types.json#stringOrNull"},
13 |         "publisher": {
14 |             "type": "object",
15 |             "required": ["name"],
16 |             "properties": {
17 |                 "name": { "$ref": "mixed-types.json#nonEmptyString" },
18 |                 "mbox": {
19 |                     "anyOf": [
20 |                         { "type": "string", "format": "email" },
21 |                         { "type": "string", "maxLength": 0 }
22 |                     ]}
23 |             }
24 |         },
25 |         "dataset": {
26 |             "type": "array",
27 |             "items": { "$ref": "dataset.json" },
28 |             "uniqueItems": true
29 |         },
30 |         "title": { "$ref": "mixed-types.json#nonEmptyString" },
31 |         "description": { "$ref": "mixed-types.json#nonEmptyString" },
32 |         "superThemeTaxonomy": { "type": "string", "format": "uri" },
33 |         "issued": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" },
34 |         "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" },
35 |         "language": { "$ref": "mixed-types.json#arrayOrNull" },
36 |         "themeTaxonomy": {
37 |             "type": "array",
38 |             "items": { "$ref": "theme.json" }
39 |                },
40 |         "license": { "$ref": "mixed-types.json#stringOrNull" },
41 |         "homepage": {
42 |             "anyOf": [
43 |                 { "type": "string", "format": "uri" },
44 |                 { "$ref": "mixed-types.json#emptyValue" }
45 |             ]
46 |         },
47 |         "rights": { "$ref": "mixed-types.json#stringOrNull" },
48 |         "spatial": {
49 |             "anyOf": [
50 |                 { "type": "string" },
51 |                 { "type": "array" },
52 |                 { "type": "null" }
53 |             ]
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://json-schema.org/draft-04/schema#",
 3 |     "type": "object",
 4 |     "required": [
 5 |         "title",
 6 |         "description",
 7 |         "publisher",
 8 |         "superTheme",
 9 |         "distribution",
10 |         "accrualPeriodicity",
11 |         "issued",
12 |         "identifier"
13 |     ],
14 |     "properties": {
15 |         "publisher": {
16 |             "type": "object",
17 |             "required": ["name"],
18 |             "properties": {
19 |                 "name": { "$ref": "mixed-types.json#nonEmptyString" },
20 |                 "mbox": {
21 |                     "anyOf": [
22 |                         { "type": "string", "pattern": "^[ ]*[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[ ]*$" },
23 |                         { "$ref": "mixed-types.json#emptyValue" }
24 |                     ]
25 |                 }
26 |             }
27 |         },
28 |         "distribution": {
29 |            "type": "array",
30 |            "items": { "$ref": "distribution.json" }
31 |         },
32 |         "title": {
33 |             "allOf": [
34 |                 { "$ref": "mixed-types.json#nonEmptyString"},
35 |                 { "maxLength" : 100}
36 |             ]
37 |         },
38 |         "description": { "$ref": "mixed-types.json#nonEmptyString" },
39 |         "issued": { "$ref": "mixed-types.json#dateOrDatetimeString" },
40 |         "superTheme": {
41 |             "type": "array",
42 |             "minItems": 1,
43 |             "items": { "$ref": "mixed-types.json#superTheme" }
44 |         },
45 |         "accrualPeriodicity": {
46 |             "anyOf" : [
47 |                 {"type": "string", "pattern": "^R/P\\d+(\\.\\d+)?[Y|M|W|D]$"},
48 |                 {"type": "string", "pattern": "^R/PT\\d+(\\.\\d+)?[H|M|S]$"},
49 |                 {"type": "string", "pattern": "^eventual$"},
50 |                 {"type": "string", "pattern": "^EVENTUAL$"}
51 |             ]
52 |         },
53 |         "contactPoint": {
54 |             "type": "object",
55 |             "properties": {
56 |                 "fn": { "$ref": "mixed-types.json#stringOrNull" },
57 |                 "hasEmail": {
58 |                     "anyOf": [
59 |                         { "type": "string", "pattern": "^[ ]*[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[ ]*$" },
60 |                         { "$ref": "mixed-types.json#emptyValue" }
61 |                     ]
62 |                 }
63 |             }
64 |         },
65 |         "theme": { "$ref": "mixed-types.json#arrayOrNull" },
66 |         "keyword": {
67 |             "allOf": [
68 |                 { "$ref": "mixed-types.json#arrayOrNull" },
69 |                 { "anyOf": [
70 |                     {"not": { "type": "array" }},
71 |                     {"items": { "pattern": "^[ 0-9a-zá-źÁ-ŹA-ZñÑ._-]+$" }}
72 |                 ]}
73 |             ]
74 |         },
75 |         "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" },
76 |         "identifier": { "$ref": "mixed-types.json#nonEmptyString" },
77 |         "language": { "$ref": "mixed-types.json#arrayOrNull" },
78 |         "spatial": {
79 |             "anyOf": [
80 |                 { "type": "string" },
81 |                 { "type": "array" },
82 |                 { "type": "null" }
83 |             ]
84 |         },
85 |         "temporal": { "$ref": "mixed-types.json#temporalOrNull" },
86 |         "landingPage": {
87 |                     "anyOf": [
88 |                         { "type": "string", "format": "uri" },
89 |                         { "$ref": "mixed-types.json#emptyValue" }
90 |                     ]
91 |         },
92 |         "license": { "$ref": "mixed-types.json#stringOrNull" },
93 |         "source": { "$ref": "mixed-types.json#stringOrNull" }
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/distribution.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://json-schema.org/draft-04/schema#",
 3 |     "type": "object",
 4 |     "required": [
 5 |         "accessURL",
 6 |         "downloadURL",
 7 |         "title",
 8 |         "issued",
 9 |         "identifier"
10 |     ],
11 |     "properties": {
12 |         "identifier": { "$ref": "mixed-types.json#nonEmptyString" },
13 |         "accessURL": { "type": "string", "format": "uri" },
14 |         "downloadURL": { "type": "string", "format": "uri" },
15 |         "title": { "$ref": "mixed-types.json#nonEmptyString" },
16 |         "issued": { "$ref": "mixed-types.json#dateOrDatetimeString" },
17 |         "description": { "$ref": "mixed-types.json#stringOrNull" },
18 |         "format": { "$ref": "mixed-types.json#stringOrNull" },
19 |         "mediaType": { "$ref": "mixed-types.json#stringOrNull" },
20 |         "license": { "$ref": "mixed-types.json#stringOrNull" },
21 |         "byteSize": {
22 |             "anyOf": [
23 |                 { "type": "integer" },
24 |                 { "$ref": "mixed-types.json#emptyValue" }
25 |             ]
26 |          },
27 |         "modified": { "$ref": "mixed-types.json#dateOrDatetimeStringOrNull" },
28 |         "rights": { "$ref": "mixed-types.json#stringOrNull" },
29 |         "fileName": { "$ref": "mixed-types.json#stringOrNull" },
30 |         "field": {
31 |             "type": "array",
32 |             "items": { "$ref": "field.json" }
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/field.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://json-schema.org/draft-04/schema#",
 3 |     "type": "object",
 4 |     "properties": {
 5 |         "title": { "$ref": "mixed-types.json#fieldTitle"},
 6 |         "type": { "$ref": "mixed-types.json#stringOrNull" },
 7 |         "description": { "$ref": "mixed-types.json#stringOrNull" }
 8 |     }
 9 | }
10 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/mixed-types.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "superTheme": {
 3 |         "anyOf": [
 4 |             {"type": "string", "pattern": "^AGRI$"},
 5 |             {"type": "string", "pattern": "^ECON$"},
 6 |             {"type": "string", "pattern": "^EDUC$"},
 7 |             {"type": "string", "pattern": "^ENER$"},
 8 |             {"type": "string", "pattern": "^ENVI$"},
 9 |             {"type": "string", "pattern": "^GOVE$"},
10 |             {"type": "string", "pattern": "^HEAL$"},
11 |             {"type": "string", "pattern": "^INTR$"},
12 |             {"type": "string", "pattern": "^JUST$"},
13 |             {"type": "string", "pattern": "^REGI$"},
14 |             {"type": "string", "pattern": "^SOCI$"},
15 |             {"type": "string", "pattern": "^TECH$"},
16 |             {"type": "string", "pattern": "^TRAN$"},
17 |             {"type": "string", "pattern": "^agri$"},
18 |             {"type": "string", "pattern": "^econ$"},
19 |             {"type": "string", "pattern": "^educ$"},
20 |             {"type": "string", "pattern": "^ener$"},
21 |             {"type": "string", "pattern": "^envi$"},
22 |             {"type": "string", "pattern": "^gove$"},
23 |             {"type": "string", "pattern": "^heal$"},
24 |             {"type": "string", "pattern": "^intr$"},
25 |             {"type": "string", "pattern": "^just$"},
26 |             {"type": "string", "pattern": "^regi$"},
27 |             {"type": "string", "pattern": "^soci$"},
28 |             {"type": "string", "pattern": "^tech$"},
29 |             {"type": "string", "pattern": "^tran$"}
30 |         ]
31 |     },
32 |     "nonEmptyStringOrNull": {
33 |         "anyOf": [
34 |             { "type": "string", "minLength": 1 },
35 |             { "type": "null" }
36 |         ]
37 |     },
38 |     "nonEmptyString": { "type": "string", "minLength": 1},
39 |     "arrayOrNull": {
40 |         "anyOf": [
41 |             {
42 |                 "type": "array",
43 |                 "items": {"$ref": "#/nonEmptyString"}
44 |             },
45 |             { "type": "null" }
46 |         ]
47 |     },
48 |     "dateOrDatetimeString": {
49 |         "anyOf": [
50 |             { "type": "string", "format": "date" },
51 |             { "type": "string", "format": "date-time" }
52 |         ]
53 |     },
54 |     "dateOrDatetimeStringOrNull": {
55 |         "anyOf": [
56 |             { "type": "string", "format": "date" },
57 |             { "type": "string", "format": "date-time" },
58 |             { "type": "null" },
59 |             { "type": "string", "maxLength": 0 }
60 |         ]
61 |     },
62 |     "stringOrNull": {
63 |         "anyOf": [
64 |             { "type": "string" },
65 |             { "type": "null" }
66 |         ]
67 |     },
68 |     "fieldTitle": {
69 |         "anyOf": [
70 |             { "type": "string", "maxLength": 60 },
71 |             { "type": "null" }
72 |         ]
73 |     },
74 |     "temporalOrNull": {
75 |         "anyOf": [
76 |             { "type": "string", "pattern": "^(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)\\/(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)$" },
77 |             { "type": "string", "pattern": "^(\\d{4}-\\d\\d-\\d\\d(T\\d\\d:\\d\\d:\\d\\d(\\.\\d+)?)?(([+-]\\d\\d:\\d\\d)|Z)?)$" },
78 |             { "type": "null" },
79 |             { "type": "string", "maxLength": 0 }
80 |         ]
81 |     },
82 |     "emptyValue": {
83 |         "anyOf": [
84 |             { "type": "string", "maxLength": 0 },
85 |             { "type": "null" }
86 |         ]
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/required_fields_schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "object",
 3 |     "required": [
 4 |         "dataset",
 5 |         "title",
 6 |         "description",
 7 |         "publisher",
 8 |         "superThemeTaxonomy"
 9 |     ],
10 |     "properties": {
11 |         "publisher": {
12 |             "type": "object",
13 |             "required": ["name", "mbox"]
14 |         },
15 |         "dataset": {
16 |             "type": "array",
17 |             "items": {
18 |                 "type": "object",
19 |                 "required": [
20 |                     "title",
21 |                     "description",
22 |                     "publisher",
23 |                     "superTheme",
24 |                     "distribution",
25 |                     "accrualPeriodicity",
26 |                     "issued",
27 |                     "identifier"
28 |                 ],
29 |                 "properties": {
30 |                     "publisher": {
31 |                         "type": "object",
32 |                         "required": ["name"]
33 |                     },
34 |                     "distribution": {
35 |                         "type": "array",
36 |                         "items": {
37 |                             "type": "object",
38 |                             "required": [
39 |                                 "accessURL",
40 |                                 "downloadURL",
41 |                                 "title",
42 |                                 "issued",
43 |                                 "identifier"
44 |                             ]
45 |                         }
46 |                     }
47 |                 }
48 |             }
49 |         }
50 |     }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/superThemeTaxonomy.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "AGRI",
 4 |     "label": "Agroganadería, pesca y forestación",
 5 |     "description": "Por ejemplo: 'Lechería: precio pagado al productor' o 'Superficie forestada'."
 6 |   },
 7 |   {
 8 |     "id": "ECON",
 9 |     "label": "Economía y finanzas",
10 |     "description": "Por ejemplo: 'Deuda pública'."
11 |   },
12 |   {
13 |     "id": "EDUC",
14 |     "label": "Educación, cultura y deportes",
15 |     "description": "Por ejemplo: 'Registro de Establecimientos Educativos'."
16 |   },
17 |   {
18 |     "id": "ENER",
19 |     "label": "Energía",
20 |     "description": "Por ejemplo: 'Productos mineros exportados' o 'Precios del GNC'."
21 |   },
22 |   {
23 |     "id": "ENVI",
24 |     "label": "Medio ambiente",
25 |     "description": "Por ejemplo: 'Operadores de residuos peligrosos'."
26 |   },
27 |   {
28 |     "id": "GOVE",
29 |     "label": "Gobierno y sector público",
30 |     "description": "Por ejemplo: 'Inmuebles del estado Nacional'."
31 |   },
32 |   {
33 |     "id": "HEAL",
34 |     "label": "Salud",
35 |     "description": "Por ejemplo: 'Estadísticas nacionales de VIH/SIDA'."
36 |   },
37 |   {
38 |     "id": "INTR",
39 |     "label": "Asuntos internacionales",
40 |     "description": "Por ejemplo: 'Representaciones argentinas en el exterior'."
41 |   },
42 |   {
43 |     "id": "JUST",
44 |     "label": "Justicia, seguridad y legales",
45 |     "description": "Por ejemplo:'Censo penitenciario'."
46 |   },
47 |   {
48 |     "id": "REGI",
49 |     "label": "Regiones y ciudades",
50 |     "description": "Por ejemplo: 'Departamentos de la provincia de Río Negro'."
51 |   },
52 |   {
53 |     "id": "SOCI",
54 |     "label": "Población y sociedad",
55 |     "description": "Por ejemplo: 'Turistas residentes que viajan por Argentina'."
56 |   },
57 |   {
58 |     "id": "TECH",
59 |     "label": "Ciencia y tecnología",
60 |     "description": "Por ejemplo: 'Recursos humanos en ciencia y tecnología'."
61 |   },
62 |   {
63 |     "id": "TRAN",
64 |     "label": "Transporte",
65 |     "description": "Por ejemplo: 'Estadísticas viales'."
66 |   }
67 | ]
68 | 


--------------------------------------------------------------------------------
/pydatajson/schemas/theme.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "http://json-schema.org/draft-04/schema#",
 3 |     "type": "object",
 4 |     "properties": {
 5 |         "id": { "$ref": "mixed-types.json#stringOrNull" },
 6 |         "label": { "$ref": "mixed-types.json#stringOrNull" },
 7 |         "description": { "$ref": "mixed-types.json#stringOrNull" }
 8 |     }
 9 | }
10 | 


--------------------------------------------------------------------------------
/pydatajson/status_indicators_generator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from pydatajson.readers import read_catalog
 4 | from pydatajson.reporting import generate_datasets_summary
 5 | from pydatajson.validators\
 6 |     .distribution_download_urls_validator \
 7 |     import DistributionDownloadUrlsValidator
 8 | 
 9 | 
10 | class StatusIndicatorsGenerator(object):
11 | 
12 |     def __init__(self, catalog, validator=None, verify_ssl=True,
13 |                  url_check_timeout=1, threads_count=1):
14 |         self.download_url_ok = None
15 |         self.catalog = read_catalog(catalog)
16 |         self.summary = generate_datasets_summary(self.catalog,
17 |                                                  validator=validator,
18 |                                                  verify_ssl=verify_ssl)
19 |         self.verify_url = verify_ssl
20 |         self.url_check_timeout = url_check_timeout
21 |         self.threads_count = threads_count
22 | 
23 |     def datasets_cant(self):
24 |         return len(self.summary)
25 | 
26 |     def distribuciones_cant(self):
27 |         return sum(ds['cant_distribuciones'] for ds in self.summary)
28 | 
29 |     def datasets_meta_ok_cant(self):
30 |         return sum(ds['estado_metadatos'] == 'OK' for ds in self.summary)
31 | 
32 |     def datasets_meta_error_cant(self):
33 |         return sum(ds['estado_metadatos'] == 'ERROR' for ds in self.summary)
34 | 
35 |     def datasets_meta_ok_pct(self):
36 |         return self._get_dataset_percentage(self.datasets_meta_ok_cant)
37 | 
38 |     def datasets_con_datos_cant(self):
39 |         return sum(ds['tiene_datos'] == 'SI' for ds in self.summary)
40 | 
41 |     def datasets_sin_datos_cant(self):
42 |         return sum(ds['tiene_datos'] == 'NO' for ds in self.summary)
43 | 
44 |     def datasets_con_datos_pct(self):
45 |         return self._get_dataset_percentage(self.datasets_con_datos_cant)
46 | 
47 |     def distribuciones_download_url_ok_cant(self):
48 |         if self.download_url_ok:
49 |             return self.download_url_ok
50 |         validator = DistributionDownloadUrlsValidator(
51 |             self.catalog, self.verify_url, self.url_check_timeout,
52 |             self.threads_count)
53 |         self.download_url_ok = validator.validate()
54 |         return self.download_url_ok
55 | 
56 |     def distribuciones_download_url_error_cant(self):
57 |         return self.distribuciones_cant() - \
58 |                self.distribuciones_download_url_ok_cant()
59 | 
60 |     def distribuciones_download_url_ok_pct(self):
61 |         total = self.distribuciones_cant()
62 |         if not total:
63 |             return None
64 |         return \
65 |             round(float(self.distribuciones_download_url_ok_cant()) / total, 4)
66 | 
67 |     def _get_dataset_percentage(self, indicator):
68 |         total = self.datasets_cant()
69 |         if not total:
70 |             return None
71 |         return round(float(indicator()) / total, 4)
72 | 


--------------------------------------------------------------------------------
/pydatajson/templates/catalog_readme.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # Catálogo: {title}
 3 | 
 4 | ## Información General
 5 | 
 6 | - **Autor**: {publisher_name}
 7 | - **Correo Electrónico**: {publisher_mbox}
 8 | - **Ruta del catálogo**: {catalog_path_or_url}
 9 | - **Nombre del catálogo**: {title}
10 | - **Descripción**:
11 | 
12 | > {description}
13 | 
14 | ## Estado de los metadatos y cantidad de recursos
15 | 
16 | - **Estado metadatos globales**: {global_status}
17 | - **Estado metadatos catálogo**: {catalog_status}
18 | - **Cantidad Total de Datasets**: {no_of_datasets}
19 | - **Cantidad Total de Distribuciones**: {no_of_distributions}
20 | 
21 | - **Cantidad de Datasets Federados**: {federated_datasets}
22 | - **Cantidad de Datasets NO Federados**: {not_federated_datasets}
23 | - **Porcentaje de Datasets NO Federados**: {not_federated_datasets_pct}
24 | 
25 | ## Datasets federados que fueron eliminados en el nodo original
26 | 
27 | {federated_removed_datasets_list}
28 | 
29 | ## Datasets no federados
30 | 
31 | {not_federated_datasets_list}
32 | 
33 | ## Datasets federados
34 | 
35 | {federated_datasets_list}
36 | 
37 | ## Reporte
38 | 
39 | Por favor, consulte el informe [`datasets.csv`](datasets.csv).
40 | 


--------------------------------------------------------------------------------
/pydatajson/threading_helper.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from multiprocessing.pool import ThreadPool
 4 | 
 5 | 
 6 | def apply_threading(l, function, cant_threads, **kwargs):
 7 |     if cant_threads == 1:
 8 |         return [function(x, **kwargs) for x in l]
 9 |     pool = ThreadPool(processes=cant_threads)
10 |     results = pool.map(function, l)
11 |     pool.close()
12 |     pool.join()
13 |     return results
14 | 


--------------------------------------------------------------------------------
/pydatajson/time_series.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Módulo `time_series` de pydatajson
 5 | 
 6 | Contiene funciones auxiliares para analizar catálogos con series de tiempo,
 7 | definidas según la extensión del perfil de metadatos para series de tiempo.
 8 | """
 9 | 
10 | from __future__ import unicode_literals
11 | from __future__ import print_function
12 | from __future__ import with_statement
13 | 
14 | from . import custom_exceptions as ce
15 | 
16 | 
17 | def field_is_time_series(field, distribution=None):
18 |     field_may_be_ts = (
19 |         not field.get("specialType") and
20 |         not field.get("specialTypeDetail") and
21 |         (
22 |             field.get("type", "").lower() == "number" or
23 |             field.get("type", "").lower() == "integer"
24 |         ) and
25 |         field.get("id")
26 |     )
27 |     distribution_may_has_ts = (
28 |         not distribution or distribution_has_time_index(distribution)
29 |     )
30 |     return field_may_be_ts and distribution_may_has_ts
31 | 
32 | 
33 | def get_distribution_time_index(distribution):
34 |     for field in distribution.get('field', []):
35 |         if field.get('specialType') == 'time_index':
36 |             return field.get('title')
37 | 
38 |     raise ce.DistributionTimeIndexNonExistentError(
39 |         distribution.get("title"),
40 |         distribution.get("dataset_identifier"),
41 |         "no tiene índice de tiempo."
42 |     )
43 | 
44 | 
45 | def get_distribution_time_index_frequency(distribution):
46 |     for field in distribution.get('field', []):
47 |         if field.get('specialType') == 'time_index':
48 |             return field.get('specialTypeDetail')
49 | 
50 |     raise ce.DistributionTimeIndexNonExistentError(
51 |         distribution.get("title"),
52 |         distribution.get("dataset_identifier"),
53 |         "no tiene índice de tiempo."
54 |     )
55 | 
56 | 
57 | def distribution_has_time_index(distribution):
58 |     try:
59 |         return any([field.get('specialType') ==
60 |                     'time_index' for field in distribution.get('field', [])])
61 |     except AttributeError:
62 |         return False
63 | 
64 | 
65 | def dataset_has_time_series(dataset):
66 |     for distribution in dataset.get('distribution', []):
67 |         if distribution_has_time_index(distribution):
68 |             return True
69 |     return False
70 | 


--------------------------------------------------------------------------------
/pydatajson/transformation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Funciones auxiliares para realizar transformaciones de metadatos"""
 5 | 
 6 | from __future__ import unicode_literals
 7 | from __future__ import print_function
 8 | from __future__ import with_statement
 9 | import os
10 | 
11 | 
12 | def generate_distribution_ids(catalog):
13 |     """Genera identificadores para las distribuciones que no los tienen.
14 | 
15 |     Los identificadores de distribuciones se generan concatenando el id del
16 |     dataset al que pertenecen con el índice posicional de la distribución en el
17 |     dataset: distribution_identifier = "{dataset_identifier}_{index}".
18 |     """
19 | 
20 |     for dataset in catalog.get("dataset", []):
21 |         for distribution_index, distribution in enumerate(
22 |                 dataset.get("distribution", [])):
23 |             if "identifier" not in distribution:
24 |                 distribution["identifier"] = "{}_{}".format(
25 |                     dataset["identifier"], distribution_index)
26 | 


--------------------------------------------------------------------------------
/pydatajson/validators/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/pydatajson/validators/consistent_distribution_fields_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import mimetypes
 4 | import os
 5 | 
 6 | try:
 7 |     from urlparse import urlparse
 8 | except ImportError:
 9 |     from urllib.parse import urlparse
10 | 
11 | import pydatajson.custom_exceptions as ce
12 | from pydatajson.validators.simple_validator import SimpleValidator
13 | 
14 | EXTENSIONS_EXCEPTIONS = ["zip", "php", "asp", "aspx"]
15 | 
16 | 
17 | class ConsistentDistributionFieldsValidator(SimpleValidator):
18 | 
19 |     def validate(self):
20 |         for dataset_idx, dataset in enumerate(self.catalog["dataset"]):
21 |             for distribution_idx, distribution in enumerate(
22 |                     dataset["distribution"]):
23 |                 for attribute in ['downloadURL', 'fileName']:
24 |                     if not self._format_matches_extension(distribution,
25 |                                                           attribute):
26 |                         yield ce.ExtensionError(dataset_idx, distribution_idx,
27 |                                                 distribution, attribute)
28 | 
29 |     @staticmethod
30 |     def _format_matches_extension(distribution, attribute):
31 |         """Chequea si una extensión podría corresponder a un formato dado."""
32 | 
33 |         if attribute in distribution and "format" in distribution:
34 |             if "/" in distribution['format']:
35 |                 possible_format_extensions = mimetypes.guess_all_extensions(
36 |                     distribution['format'])
37 |             else:
38 |                 possible_format_extensions = [
39 |                     '.' + distribution['format'].lower()
40 |                 ]
41 | 
42 |             file_name = urlparse(distribution[attribute]).path
43 |             extension = os.path.splitext(file_name)[-1].lower()
44 | 
45 |             if not extension:
46 |                 return True
47 | 
48 |             # hay extensiones exceptuadas porque enmascaran otros formatos
49 |             if extension.lower().replace(".", "") in EXTENSIONS_EXCEPTIONS:
50 |                 return True
51 | 
52 |             if extension not in possible_format_extensions:
53 |                 if possible_format_extensions:
54 |                     return True
55 |                 return False
56 | 
57 |         return True
58 | 


--------------------------------------------------------------------------------
/pydatajson/validators/distribution_download_urls_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from pydatajson import threading_helper
 4 | from pydatajson.validators.url_validator import UrlValidator
 5 | 
 6 | 
 7 | class DistributionDownloadUrlsValidator(UrlValidator):
 8 | 
 9 |     def validate(self):
10 |         async_results = []
11 |         for dataset in self.catalog.get('dataset', []):
12 |             distribution_urls = \
13 |                 [distribution.get('downloadURL', '')
14 |                  for distribution in dataset.get('distribution', [])]
15 |             async_results += threading_helper \
16 |                 .apply_threading(distribution_urls,
17 |                                  self.is_working_url,
18 |                                  self.threads_count)
19 | 
20 |         result = 0
21 |         for res, _ in async_results:
22 |             result += res
23 | 
24 |         return result
25 | 


--------------------------------------------------------------------------------
/pydatajson/validators/distribution_urls_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import pydatajson.custom_exceptions as ce
 4 | from pydatajson import threading_helper
 5 | from pydatajson.validators.url_validator import UrlValidator
 6 | 
 7 | 
 8 | class DistributionUrlsValidator(UrlValidator):
 9 | 
10 |     def validate(self):
11 |         datasets = self.catalog.get('dataset')
12 | 
13 |         metadata = []
14 |         urls = []
15 |         for dataset_idx, dataset in enumerate(datasets):
16 |             distributions = dataset.get('distribution')
17 | 
18 |             for distribution_idx, distribution in enumerate(distributions):
19 |                 distribution_title = distribution.get('title')
20 |                 access_url = distribution.get('accessURL')
21 |                 download_url = distribution.get('downloadURL')
22 | 
23 |                 metadata.append({
24 |                     "dataset_idx": dataset_idx,
25 |                     "dist_idx": distribution_idx,
26 |                     "dist_title": distribution_title
27 |                 })
28 |                 urls += [access_url, download_url]
29 | 
30 |         sync_res = threading_helper \
31 |             .apply_threading(urls,
32 |                              self.is_working_url,
33 |                              self.threads_count)
34 | 
35 |         for i in range(len(metadata)):
36 |             actual_metadata = metadata[i]
37 |             dataset_idx = actual_metadata["dataset_idx"]
38 |             distribution_idx = actual_metadata["dist_idx"]
39 |             distribution_title = actual_metadata["dist_title"]
40 | 
41 |             k = i * 2
42 |             access_url = urls[k]
43 |             download_url = urls[k + 1]
44 | 
45 |             access_url_is_valid, access_url_status_code = sync_res[k]
46 |             download_url_is_valid, download_url_status_code = sync_res[k + 1]
47 | 
48 |             if not access_url_is_valid:
49 |                 yield ce.BrokenAccessUrlError(dataset_idx,
50 |                                               distribution_idx,
51 |                                               distribution_title,
52 |                                               access_url,
53 |                                               access_url_status_code)
54 |             if not download_url_is_valid:
55 |                 yield ce.BrokenDownloadUrlError(dataset_idx,
56 |                                                 distribution_idx,
57 |                                                 distribution_title,
58 |                                                 download_url,
59 |                                                 download_url_status_code)
60 | 


--------------------------------------------------------------------------------
/pydatajson/validators/landing_pages_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import pydatajson.custom_exceptions as ce
 5 | from pydatajson import threading_helper
 6 | from pydatajson.validators.url_validator import UrlValidator
 7 | 
 8 | 
 9 | class LandingPagesValidator(UrlValidator):
10 | 
11 |     def validate(self):
12 |         datasets = self.catalog.get('dataset')
13 |         datasets = filter(lambda x: x.get('landingPage'), datasets)
14 | 
15 |         metadata = []
16 |         urls = []
17 | 
18 |         for dataset_idx, dataset in enumerate(datasets):
19 |             metadata.append({
20 |                 "dataset_idx": dataset_idx,
21 |                 "dataset_title": dataset.get('title'),
22 |                 "landing_page": dataset.get('landingPage'),
23 |             })
24 |             urls.append(dataset.get('landingPage'))
25 | 
26 |         sync_res = threading_helper \
27 |             .apply_threading(urls,
28 |                              self.is_working_url,
29 |                              self.threads_count)
30 | 
31 |         for i in range(len(sync_res)):
32 |             valid, status_code = sync_res[i]
33 |             act_metadata = metadata[i]
34 |             dataset_idx = act_metadata["dataset_idx"]
35 |             dataset_title = act_metadata["dataset_title"]
36 |             landing_page = act_metadata["landing_page"]
37 | 
38 |             if not valid:
39 |                 yield ce.BrokenLandingPageError(dataset_idx, dataset_title,
40 |                                                 landing_page, status_code)
41 | 


--------------------------------------------------------------------------------
/pydatajson/validators/simple_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | class SimpleValidator(object):
 6 | 
 7 |     def __init__(self, catalog):
 8 |         self.catalog = catalog
 9 | 
10 |     def validate(self):
11 |         raise NotImplementedError
12 | 


--------------------------------------------------------------------------------
/pydatajson/validators/theme_ids_not_repeated_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from collections import Counter
 4 | 
 5 | import pydatajson.custom_exceptions as ce
 6 | from pydatajson.validators.simple_validator import SimpleValidator
 7 | 
 8 | 
 9 | class ThemeIdsNotRepeatedValidator(SimpleValidator):
10 | 
11 |     def validate(self):
12 |         if "themeTaxonomy" in self.catalog:
13 |             theme_ids = [theme["id"]
14 |                          for theme in self.catalog["themeTaxonomy"]]
15 |             dups = self._find_dups(theme_ids)
16 |             if len(dups) > 0:
17 |                 yield ce.ThemeIdRepeated(dups)
18 | 
19 |     @staticmethod
20 |     def _find_dups(elements):
21 |         return [item for item, count in Counter(elements).items()
22 |                 if count > 1]
23 | 


--------------------------------------------------------------------------------
/pydatajson/validators/url_validator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import re
 4 | 
 5 | import requests
 6 | from requests import RequestException, Timeout
 7 | 
 8 | from pydatajson.constants import EXCEPTION_STATUS_CODES, \
 9 |     INVALID_STATUS_CODES_REGEX
10 | from pydatajson.validators.simple_validator import SimpleValidator
11 | 
12 | 
13 | class UrlValidator(SimpleValidator):
14 | 
15 |     def __init__(self, catalog, verify_ssl, url_check_timeout, threads_count):
16 |         super(UrlValidator, self).__init__(catalog)
17 |         self.verify_ssl = verify_ssl
18 |         self.url_check_timeout = url_check_timeout
19 |         self.threads_count = threads_count
20 | 
21 |     def validate(self):
22 |         raise NotImplementedError
23 | 
24 |     def is_working_url(self, url):
25 |         try:
26 |             response = requests.head(url,
27 |                                      timeout=self.url_check_timeout,
28 |                                      verify=self.verify_ssl)
29 |             matches = []
30 |             if response.status_code not in EXCEPTION_STATUS_CODES:
31 |                 matches = \
32 |                     [re.match(pattern, str(response.status_code)) is not None
33 |                      for pattern in INVALID_STATUS_CODES_REGEX]
34 |             return True not in matches, response.status_code
35 |         except Timeout:
36 |             return False, 408
37 |         except (RequestException, Exception):
38 |             return False, None
39 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pytz
 2 | jsonschema==2.6.0
 3 | requests
 4 | # Para validar fechas y horas acorde a ISO 8601
 5 | isodate==0.6.0
 6 | # Para validar URIs
 7 | rfc3987==1.3.7
 8 | # Para exportar CSVs en unicode
 9 | unicodecsv==0.14.1
10 | # Para leer y escribir XLSXs
11 | openpyxl>=2.4
12 | # Para consultar programáticamente la API de CKAN
13 | ckanapi==4.0
14 | urllib3
15 | Unidecode==0.4.21
16 | six
17 | python-dateutil==2.8.0
18 | requests-mock
19 | 


--------------------------------------------------------------------------------
/requirements_2.7.txt:
--------------------------------------------------------------------------------
1 | functools32==3.2.3.post2


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | bumpversion==0.5.3
 2 | watchdog==0.8.3
 3 | flake8==2.6.0
 4 | coverage==4.1
 5 | sphinx==1.7.5
 6 | cryptography==2.1.4
 7 | PyYAML
 8 | nose>=1.3
 9 | recommonmark==0.4.0
10 | twine>=1.11
11 | sphinx-rtd-theme==0.4.0
12 | sphinxcontrib-napoleon==0.6.1
13 | mock==2.0.0;python_version<"3.4"
14 | pycallgraph
15 | setuptools>=38.6
16 | wheel>=0.31
17 | vcrpy
18 | 


--------------------------------------------------------------------------------
/samples/archivos-tests/excel-no-validos/catalogo-justicia-con-error-datasets.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-no-validos/catalogo-justicia-con-error-datasets.xlsx


--------------------------------------------------------------------------------
/samples/archivos-tests/excel-validos/catalogo-justicia-06022017.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia-06022017.xlsx


--------------------------------------------------------------------------------
/samples/archivos-tests/excel-validos/catalogo-justicia-56-distribuciones.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia-56-distribuciones.xlsx


--------------------------------------------------------------------------------
/samples/archivos-tests/excel-validos/catalogo-justicia.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/samples/archivos-tests/excel-validos/catalogo-justicia.xlsx


--------------------------------------------------------------------------------
/samples/series-tiempo/odg-total-millones-pesos-1960-trimestral.csv:
--------------------------------------------------------------------------------
 1 | indice_tiempo,oferta_total_60,pib_total_pm_60,importaciones_60,demanda_total_60,consumo_60,inversion_bruta_interna_60,exportaciones_60
 2 | 1970-01-01,15729,14354,1375,15729,10455,3530,1744
 3 | 1970-04-01,17250,15743,1507,17250,11450,3664,2136
 4 | 1970-07-01,16999,15486,1513,16999,11435,3746,1818
 5 | 1970-10-01,17048,15555,1493,17048,11699,3894,1455
 6 | 1971-01-01,16341,14864,1478,16341,11133,3761,1448
 7 | 1971-04-01,18156,16453,1703,18156,12044,4324,1787
 8 | 1971-07-01,18240,16546,1694,18240,12541,4162,1538
 9 | 1971-10-01,18182,16867,1315,18182,12416,4228,1539
10 | 1972-01-01,17054,15655,1399,17054,11941,3541,1572
11 | 1972-04-01,18339,16893,1446,18339,12815,3981,1543
12 | 1972-07-01,18622,17154,1468,18622,13004,4229,1389
13 | 1972-10-01,18938,17492,1446,18938,12879,4632,1427
14 | 1973-01-01,17839,16538,1301,17839,12349,3797,1693
15 | 1973-04-01,19012,17798,1214,19012,13529,4008,1476
16 | 1973-07-01,18831,17605,1227,18831,13420,3808,1604
17 | 1973-10-01,19988,18417,1571,19988,14089,4384,1516
18 | 1974-01-01,18438,17230,1208,18438,13387,3655,1396
19 | 1974-04-01,20408,19030,1378,20408,14661,4158,1588
20 | 1974-07-01,20312,18803,1510,20312,14785,4096,1432
21 | 1974-10-01,21603,19597,2006,21603,14947,4803,1854
22 | 1975-01-01,19665,17877,1787,19665,14884,3714,1067
23 | 1975-04-01,21002,19275,1727,21002,15526,4121,1355
24 | 1975-07-01,19803,18267,1536,19803,14463,3966,1374
25 | 1975-10-01,19948,18593,1356,19948,14011,4554,1384
26 | 1976-01-01,18426,17284,1143,18426,13465,3659,1302
27 | 1976-04-01,19662,18574,1088,19662,13545,4342,1775
28 | 1976-07-01,19730,18360,1370,19730,13200,4536,1994
29 | 1976-10-01,19754,18531,1223,19754,12882,4661,2212
30 | 1977-01-01,18898,17525,1373,18898,12010,4349,2539
31 | 1977-04-01,21191,19643,1549,21191,12732,5266,3193
32 | 1977-07-01,21528,19900,1628,21528,13128,5506,2894
33 | 1977-10-01,20842,19274,1569,20842,13630,4864,2349
34 | 1978-01-01,18172,16888,1284,18172,11915,3761,2496
35 | 1978-04-01,20070,18785,1285,20070,12027,4790,3253
36 | 1978-07-01,20325,18924,1401,20325,12288,4577,3461
37 | 1978-10-01,20495,19121,1374,20495,13352,4776,2368
38 | 1979-01-01,20500,18855,1645,20500,13262,4570,2668
39 | 1979-04-01,22142,20429,1713,22142,13185,5246,3711
40 | 1979-07-01,22421,20392,2029,22421,14049,5084,3288
41 | 1979-10-01,22455,20215,2240,22455,14469,5415,2570
42 | 1980-01-01,21863,19440,2423,21863,13798,5147,2918
43 | 1980-04-01,22052,19831,2221,22052,13515,5719,2818
44 | 1980-07-01,22980,20400,2580,22980,14799,5275,2905
45 | 1980-10-01,22884,20093,2791,22884,15002,5145,2738
46 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.4.25
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:pydatajson/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | 
20 | [aliases]
21 | test=nosetests
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | 
 6 | from setuptools import setup
 7 | 
 8 | with open(os.path.abspath('README.md')) as readme_file:
 9 |     readme = readme_file.read()
10 | 
11 | with open(os.path.abspath('HISTORY.md')) as history_file:
12 |     history = history_file.read()
13 | 
14 | with open(os.path.abspath("requirements.txt")) as f:
15 |     requirements = [req.strip() for req in f.readlines()]
16 | 
17 | with open(os.path.abspath("requirements_dev.txt")) as f:
18 |     test_requirements = [req.strip() for req in f.readlines()]
19 | 
20 | with open(os.path.abspath("requirements_2.7.txt")) as f:
21 |     backport_requirements = [req.strip() for req in f.readlines()]
22 | 
23 | setup(
24 |     name='pydatajson',
25 |     version='0.4.67',
26 |     description="Paquete en python con herramientas para generar y validar metadatos de catálogos de datos en formato data.json.",
27 |     long_description=readme + '\n\n' + history,
28 |     long_description_content_type='text/markdown',
29 |     author="Datos Argentina",
30 |     author_email='datosargentina@jefatura.gob.ar',
31 |     url='https://github.com/datosgobar/pydatajson',
32 |     packages=[
33 |         'pydatajson',
34 |     ],
35 |     package_dir={'pydatajson':
36 |                  'pydatajson'},
37 |     include_package_data=True,
38 |     install_requires=requirements,
39 |     license="MIT license",
40 |     zip_safe=False,
41 |     keywords='pydatajson',
42 |     classifiers=[
43 |         'Development Status :: 2 - Pre-Alpha',
44 |         'Intended Audience :: Developers',
45 |         'License :: OSI Approved :: MIT License',
46 |         'Natural Language :: English',
47 |         "Programming Language :: Python :: 2",
48 |         'Programming Language :: Python :: 2.7',
49 |         "Programming Language :: Python :: 3",
50 |         'Programming Language :: Python :: 3.6',
51 |     ],
52 |     test_suite='tests',
53 |     tests_require=test_requirements,
54 |     extras_require={
55 |         ':python_version=="2.7"': backport_requirements
56 |     },
57 |     entry_points={
58 |         'console_scripts': [
59 |             'pydatajson = pydatajson.__main__:main'
60 |         ]
61 |     }
62 | )
63 | 


--------------------------------------------------------------------------------
/tests/TEST_CASES.md:
--------------------------------------------------------------------------------
 1 | # Estrategia de testeo para `pydatajson`
 2 | 
 3 | ## Tests de `is_valid_catalog` y `validate_catalog` locales
 4 | 
 5 | Estas dos funciones son las principales herramientas de validación de archivos `data.json` (de ahroa en más, "datajsons"). Para testearlas, se utilizan datajsons de prueba guardados en [`samples/`](samples/).
 6 | 
 7 | El archivo de prueba fundamental se llama [full_data.json`](samples/full_data.json), que contiene todas las claves (requeridos y opcionales) descritos en el [Perfil de Metadatos](https://docs.google.com/spreadsheets/d/1PqlkhB1o0u2xKDYuex3UC-UIPubSjxKCSBxfG9QhQaA/edit#gid=1493891225), con valores de ejemplo de tipo y formato correcto. Este archivo *siempre* debe pasar la validación sin errores.
 8 | 
 9 | A partir de este archivo base, se crearon otros 26 datajsons, cada uno con una o (rara vez) unas pocas modificaciones que cubren distintas funcionaliades del validador. Por ejemplo, el archivo [`missing_dataset_title.json`](samples/missing_dataset_title.json) es idéntico a `full_data.json`, salvo que la clave `catalog["dataset"]["title"]` fue eliminada.
10 | 
11 | Cada uno de ellos se utiliza en una función de testeo cuyo nombre tiene el formato `test_validity_of_[DATAJSON_FILENAME]` en el archivo[`test_pydatajson.json`](test_pydtajson.json). En caso de que el nombre del datajson no sea lo suficientemente esclarecedor respecto a su intención, la función de testeo tendrá un docstring brevísimo explicándola en cierto detalle.
12 | 
13 | ### Casos de testeo **válidos**:
14 | 
15 | - `full_data.json`: Ejemplo completo según las especificaciones de `paquete-apertura-datos`.
16 | - `minimum_data.json`: Idéntico a `full_data.json`, pero incluye únicamente con los campos obligatorios.
17 | - `null_dataset_theme.json`, `null_field_description.json`: Idénticos a `full_data.json`, con un campo opcional faltante.
18 | 
19 | ## Casos de testeo **inválidos**:
20 | 
21 | Todos los demás datajsons (23) son casos de testeo inválidos, y los errores que contienen caen en una de tres categorías:
22 | - una clave requerida está ausente del catálogo,
23 | - una clave requerida u opcional está presente, pero el tipo del valor que toma no es el esperado, o
24 | - una clave requerida u opcional está presente y su valor es del tipo esperado, pero el formato no es el correcto.
25 | 
26 | ## Tests de `is_valid_catalog` y `validate_catalog` remotos
27 | 
28 | Como ambas funciones tienen la capacidad de validar un datajson en una ubicación remota en caso de que se les pase una URL bien formada, la función `test_correctness_of_accrualPeriodicity_regex`.
29 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/tests/cassetes/ckan_integration/remove_dataset/test_with_no_parametres.yaml:
--------------------------------------------------------------------------------
  1 | interactions:
  2 | - request:
  3 |     body: '{}'
  4 |     headers:
  5 |       Accept: ['*/*']
  6 |       Accept-Encoding: ['gzip, deflate']
  7 |       Connection: [keep-alive]
  8 |       Content-Length: ['2']
  9 |       Content-Type: [application/json]
 10 |       User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)']
 11 |     method: POST
 12 |     uri: http://localhost:8080/api/action/package_list
 13 |   response:
 14 |     body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list",
 15 |         "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2",
 16 |         "data3_3"]}'}
 17 |     headers:
 18 |       cache-control: [no-cache]
 19 |       connection: [Keep-Alive]
 20 |       content-length: ['169']
 21 |       content-type: [application/json;charset=utf-8]
 22 |       date: ['Tue, 06 Mar 2018 17:22:51 GMT']
 23 |       keep-alive: ['timeout=5, max=100']
 24 |       pragma: [no-cache]
 25 |       server: [Apache/2.4.7 (Ubuntu)]
 26 |     status: {code: 200, message: OK}
 27 | - request:
 28 |     body: '{}'
 29 |     headers:
 30 |       Accept: ['*/*']
 31 |       Accept-Encoding: ['gzip, deflate']
 32 |       Connection: [keep-alive]
 33 |       Content-Length: ['2']
 34 |       Content-Type: [application/json]
 35 |       User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)']
 36 |     method: POST
 37 |     uri: http://localhost:8080/api/action/package_list
 38 |   response:
 39 |     body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list",
 40 |         "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2",
 41 |         "data3_3"]}'}
 42 |     headers:
 43 |       cache-control: [no-cache]
 44 |       connection: [Keep-Alive]
 45 |       content-length: ['169']
 46 |       content-type: [application/json;charset=utf-8]
 47 |       date: ['Tue, 06 Mar 2018 17:22:51 GMT']
 48 |       keep-alive: ['timeout=5, max=99']
 49 |       pragma: [no-cache]
 50 |       server: [Apache/2.4.7 (Ubuntu)]
 51 |     status: {code: 200, message: OK}
 52 | - request:
 53 |     body: '{}'
 54 |     headers:
 55 |       Accept: ['*/*']
 56 |       Accept-Encoding: ['gzip, deflate']
 57 |       Connection: [keep-alive]
 58 |       Content-Length: ['2']
 59 |       Content-Type: [application/json]
 60 |       User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)']
 61 |     method: POST
 62 |     uri: http://localhost:8080/api/action/package_list
 63 |   response:
 64 |     body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list",
 65 |         "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2",
 66 |         "data3_3"]}'}
 67 |     headers:
 68 |       Cache-Control: [no-cache]
 69 |       Connection: [Keep-Alive]
 70 |       Content-Length: ['169']
 71 |       Content-Type: [application/json;charset=utf-8]
 72 |       Date: ['Tue, 06 Mar 2018 17:24:01 GMT']
 73 |       Keep-Alive: ['timeout=5, max=100']
 74 |       Pragma: [no-cache]
 75 |       Server: [Apache/2.4.7 (Ubuntu)]
 76 |     status: {code: 200, message: OK}
 77 | - request:
 78 |     body: '{}'
 79 |     headers:
 80 |       Accept: ['*/*']
 81 |       Accept-Encoding: ['gzip, deflate']
 82 |       Connection: [keep-alive]
 83 |       Content-Length: ['2']
 84 |       Content-Type: [application/json]
 85 |       User-Agent: ['ckanapi/4.0 (+https://github.com/ckan/ckanapi)']
 86 |     method: POST
 87 |     uri: http://localhost:8080/api/action/package_list
 88 |   response:
 89 |     body: {string: '{"help": "http://localhost:8080/api/3/action/help_show?name=package_list",
 90 |         "success": true, "result": ["data1_1", "data2_1", "data2_2", "data3_1", "data3_2",
 91 |         "data3_3"]}'}
 92 |     headers:
 93 |       Cache-Control: [no-cache]
 94 |       Connection: [Keep-Alive]
 95 |       Content-Length: ['169']
 96 |       Content-Type: [application/json;charset=utf-8]
 97 |       Date: ['Tue, 06 Mar 2018 17:24:01 GMT']
 98 |       Keep-Alive: ['timeout=5, max=99']
 99 |       Pragma: [no-cache]
100 |       Server: [Apache/2.4.7 (Ubuntu)]
101 |     status: {code: 200, message: OK}
102 | version: 1
103 | 


--------------------------------------------------------------------------------
/tests/cassetes/test_generate_datasets_report.yaml:
--------------------------------------------------------------------------------
 1 | interactions:
 2 | - request:
 3 |     body: null
 4 |     headers:
 5 |       Accept: ['*/*']
 6 |       Accept-Encoding: ['gzip, deflate']
 7 |       Connection: [keep-alive]
 8 |       User-Agent: [python-requests/2.11.1]
 9 |     method: GET
10 |     uri: http://181.209.63.71/data.json
11 |   response:
12 |     body: {string: !!python/unicode "{\n  \"title\": \"Andino\", \n  \"description\":
13 |         \"Portal Andino Demo\", \n  \"superThemeTaxonomy\": \"http://datos.gob.ar/superThemeTaxonomy.json\",
14 |         \n  \"publisher\": {\n    \"mbox\": \"\", \n    \"name\": \"\"\n  }, \n  \"themeTaxonomy\":
15 |         [\n    {\n      \"label\": \"Tema.demo\", \n      \"id\": \"tema-demo\", \n
16 |         \     \"description\": \"Ejemplo de un tema\"\n    }\n  ], \n  \"dataset\":
17 |         [\n    {\n      \"@type\": \"dcat:Dataset\", \n      \"title\": \"Dataset
18 |         Demo\", \n      \"description\": \"Este es un dataset de ejemplo, se incluye
19 |         como material DEMO y no contiene ningun valor estadistico.\", \n      \"modified\":
20 |         \"2016-11-30T22:25:33.503104\", \n      \"accessLevel\": \"public\", \n      \"identifier\":
21 |         \"6897d435-8084-4685-b8ce-304b190755e4\", \n      \"issued\": \"2016-11-30T22:22:48.635757\",
22 |         \n      \"landingPage\": \"https://github.com/datosgobar/portal-andino\",
23 |         \n      \"license\": \"Creative Commons Attribution\", \n      \"publisher\":
24 |         {\n        \"mbox\": \"datosargentina@jefatura.gob.ar\", \n        \"name\": \"Andino\"\n
25 |         \     }, \n      \"contactPoint\": {\n        \"@type\": \"vcard:Contact\",
26 |         \n        \"fn\": \"Andino\", \n        \"hasEmail\": \"mailto:datosargentina@jefatura.gob.ar\"\n
27 |         \     }, \n      \"distribution\": [\n        {\n          \"@type\": \"dcat:Distribution\",
28 |         \n          \"format\": \"CSV\", \n          \"title\": \"Recurso de Ejemplo\",
29 |         \n          \"description\": \"Este es un recurso DEMO, se incluye como material
30 |         de demostracion y no posee ningun valor estadistico.\", \n          \"issued\":
31 |         \"2016-11-30T22:24:01.259909\", \n          \"modified\": \"2016-11-30T22:24:01.225394\",
32 |         \n          \"license\": \"cc-by\", \n          \"accessURL\": \"http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/archivo/6145bf1c-a2fb-4bb5-b090-bb25f8419198\",
33 |         \n          \"downloadURL\": \"http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv\"\n
34 |         \       }\n      ], \n      \"keyword\": [\n        \"andino\", \n        \"demo\",
35 |         \n        \"plan de datos\"\n      ], \n      \"superTheme\": [\n        \"TECH\"\n
36 |         \     ], \n      \"accrualPeriodicity\": \"eventual\", \n      \"language\":
37 |         [\n        \"spa\"\n      ], \n      \"theme\": [\n        \"Tema.demo\"\n
38 |         \     ]\n    }\n  ]\n}"}
39 |     headers:
40 |       connection: [keep-alive]
41 |       content-length: ['2120']
42 |       content-type: [application/json; charset=UTF-8]
43 |       date: ['Wed, 21 Dec 2016 18:39:34 GMT']
44 |       server: [nginx/1.4.6 (Ubuntu)]
45 |     status: {code: 200, message: OK}
46 | version: 1
47 | 


--------------------------------------------------------------------------------
/tests/context.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | import sys
 5 | 
 6 | import pydatajson
 7 | import pydatajson.search
 8 | import pydatajson.backup
 9 | 
10 | sys.path.insert(0, os.path.abspath("."))
11 | sys.path.insert(0, os.path.abspath(".."))
12 | 


--------------------------------------------------------------------------------
/tests/profiling.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Decorador auxiliar
 5 | 
 6 | Debe instalarse 'graphviz' en el sistema para que funcione.
 7 | 
 8 |     Ubuntu: sudo apt-get install graphviz
 9 |     Mac: brew install graphviz
10 | """
11 | 
12 | from __future__ import unicode_literals
13 | from __future__ import print_function
14 | from __future__ import with_statement
15 | import os
16 | import sys
17 | import vcr
18 | 
19 | from functools import wraps
20 | from pycallgraph import PyCallGraph
21 | from pycallgraph import Config
22 | from pycallgraph import GlobbingFilter
23 | from pycallgraph.output import GraphvizOutput
24 | 
25 | # módulo de ejemplo que se quiere analizar
26 | import pydatajson
27 | 
28 | SAMPLES_DIR = os.path.join("tests", "samples")
29 | TEMP_DIR = os.path.join("tests", "temp")
30 | PROFILING_DIR = os.path.join("tests", "profiling")
31 | os.makedirs(PROFILING_DIR) if not os.path.exists(PROFILING_DIR) else None
32 | 
33 | VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
34 |               cassette_library_dir=os.path.join(
35 |                   "tests", "cassetes", "profiling"),
36 |               record_mode='once')
37 | 
38 | 
39 | def profile(profiling_result_path):
40 |     """Decorador de una función para que se corra haciendo profiling."""
41 | 
42 |     def fn_decorator(fn):
43 |         """Decora una función con el análisis de profiling."""
44 | 
45 |         @wraps(fn)
46 |         def fn_decorated(*args, **kwargs):
47 |             """Crea la función decorada."""
48 | 
49 |             graphviz = GraphvizOutput()
50 |             graphviz.output_file = profiling_result_path
51 | 
52 |             with PyCallGraph(output=graphviz, config=None):
53 |                 fn(*args, **kwargs)
54 | 
55 |         return fn_decorated
56 |     return fn_decorator
57 | 
58 | 
59 | @VCR.use_cassette()
60 | @profile("tests/profiling/profiling_test.png")
61 | def main():
62 |     """Hace un profiling de la función para guarda un catálogo en Excel"""
63 | 
64 |     # ejemplo liviano
65 |     # original_catalog = pydatajson.DataJson(
66 |     #     os.path.join(SAMPLES_DIR, "catalogo_justicia.json"))
67 | 
68 |     # ejemplo grande
69 |     datasets_cant = 200
70 |     original_catalog = pydatajson.DataJson(
71 |         "http://infra.datos.gob.ar/catalog/sspm/data.json")
72 |     original_catalog["dataset"] = original_catalog["dataset"][:datasets_cant]
73 | 
74 |     tmp_xlsx = os.path.join(TEMP_DIR, "xlsx_catalog.xlsx")
75 |     original_catalog.to_xlsx(tmp_xlsx)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/tests/results/catalog_readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Catálogo: Cosechando Datos Argentina
 3 | 
 4 | ## Información General
 5 | 
 6 | - **Autor**: Ministerio de Modernización
 7 | - **Correo Electrónico**: datosargentina@jefatura.gob.ar
 8 | - **Ruta del catálogo**: tests/samples/several_datasets_for_harvest.json
 9 | - **Nombre del catálogo**: Cosechando Datos Argentina
10 | - **Descripción**:
11 | 
12 | > Datasets para reporte pre cosecha
13 | 
14 | ## Estado de los metadatos y cantidad de recursos
15 | 
16 | - **Estado metadatos globales**: ERROR
17 | - **Estado metadatos catálogo**: OK
18 | - **Cantidad Total de Datasets**: 3
19 | - **Cantidad Total de Distribuciones**: 6
20 | 
21 | - **Cantidad de Datasets Federados**: 0
22 | - **Cantidad de Datasets NO Federados**: 3
23 | - **Porcentaje de Datasets NO Federados**: 1.0
24 | 
25 | ## Datasets federados que fueron eliminados en el nodo original
26 | 
27 | 
28 | 
29 | ## Datasets no federados
30 | 
31 | - [Sistema de contrataciones electrónicas UNO](None)
32 | - [Sistema de contrataciones electrónicas DOS](None)
33 | - [Sistema de contrataciones electrónicas TRES](None)
34 | 
35 | ## Datasets federados
36 | 
37 | 
38 | 
39 | ## Reporte
40 | 
41 | Por favor, consulte el informe [`datasets.csv`](datasets.csv).
42 | 


--------------------------------------------------------------------------------
/tests/results/datasets_filter_out.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "publisher": {
 4 |             "mbox": "onc@modernizacion.gob.ar",
 5 |             "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones"
 6 |         },
 7 |         "landingPage": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra",
 8 |         "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra) (sin datos)",
 9 |         "superTheme": [
10 |             "ECON"
11 |         ],
12 |         "title": "Sistema de contrataciones electrónicas (sin datos)",
13 |         "issued": "2016-04-14T19:48:05.433640-03:00",
14 |         "temporal": "2015-01-01/2015-12-31",
15 |         "modified": "2016-04-19T19:48:05.433640-03:00",
16 |         "language": [
17 |             "spa"
18 |         ],
19 |         "theme": [
20 |             "contrataciones",
21 |             "compras",
22 |             "convocatorias"
23 |         ],
24 |         "keyword": [
25 |             "bienes",
26 |             "compras",
27 |             "contrataciones",
28 |             "bienes y compras"
29 |         ],
30 |         "accrualPeriodicity": "R/P1Y",
31 |         "source": "Ministerio de modernizacion",
32 |         "spatial": "ARG",
33 |         "license": "Open Data Commons Open Database License 1.0",
34 |         "contactPoint": {
35 |             "hasEmail": "onc-compraselectronicas@modernizacion.gob.ar",
36 |             "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones. Dirección de Compras Electrónicas."
37 |         },
38 |         "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
39 |         "distribution": [
40 |             {
41 |                 "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
42 |                 "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
43 |                 "rights": "Derechos especificados en la licencia.",
44 |                 "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
45 |                 "license": "Open Data Commons Open Database License 1.0",
46 |                 "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
47 |                 "title": "Convocatorias abiertas durante el año 2015",
48 |                 "byteSize": 5120,
49 |                 "fileName": "convocatoriasabiertasduranteelao.pdf",
50 |                 "format": "PDF",
51 |                 "type": "documentation",
52 |                 "mediaType": "application/pdf",
53 |                 "fileName": "convocatoriasabiertasduranteelao.pdf",
54 |                 "modified": "2016-04-19T19:48:05.433640-03:00",
55 |                 "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
56 |                 "issued": "2016-04-14T19:48:05.433640-03:00"
57 |             }
58 |         ]
59 |     }
60 | ]
61 | 


--------------------------------------------------------------------------------
/tests/results/datasets_meta_field.json:
--------------------------------------------------------------------------------
1 | [
2 |     "Sistema de contrataciones electrónicas",
3 |     "Sistema de contrataciones electrónicas (sin datos)"
4 | ]
5 | 


--------------------------------------------------------------------------------
/tests/results/distributions.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 4 |         "rights": "Derechos especificados en la licencia.",
 5 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
 6 |         "license": "Open Data Commons Open Database License 1.0",
 7 |         "title": "Convocatorias abiertas durante el año 2015",
 8 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 9 |         "byteSize": 5120,
10 |         "type": "file",
11 |         "format": "CSV",
12 |         "mediaType": "text/csv",
13 |         "modified": "2016-04-19T19:48:05.433640-03:00",
14 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
15 |         "field": [
16 |             {
17 |                 "description": "Identificador único del procedimiento de contratación",
18 |                 "type": "integer",
19 |                 "id": "proc12",
20 |                 "title": "procedimiento_id"
21 |             },
22 |             {
23 |                 "type": "integer",
24 |                 "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
25 |                 "title": "organismo_unidad_operativa_contrataciones_id"
26 |             },
27 |             {
28 |                 "type": "integer",
29 |                 "description": "Identificador único de la unidad operativa de contrataciones",
30 |                 "title": "unidad_operativa_contrataciones_id"
31 |             },
32 |             {
33 |                 "type": "string",
34 |                 "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
35 |                 "title": "organismo_unidad_operativa_contrataciones_desc"
36 |             },
37 |             {
38 |                 "type": "string",
39 |                 "description": "Unidad operativa de contrataciones.",
40 |                 "title": "unidad_operativa_contrataciones_desc"
41 |             },
42 |             {
43 |                 "type": "string",
44 |                 "description": "Tipo de procedimiento al que se adecua la contratación.",
45 |                 "title": "tipo_procedimiento_contratacion"
46 |             },
47 |             {
48 |                 "type": "date",
49 |                 "description": "Año en el que se inició el proceso de la convocatoria.",
50 |                 "title": "ejercicio_procedimiento_anio"
51 |             },
52 |             {
53 |                 "type": "date",
54 |                 "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.",
55 |                 "title": "fecha_publicacion_convocatoria"
56 |             },
57 |             {
58 |                 "type": "string",
59 |                 "description": "Modalidad bajo la cual se realiza la convocatoria.",
60 |                 "title": "modalidad_convocatoria"
61 |             },
62 |             {
63 |                 "type": "string",
64 |                 "description": "Clase de la convocatoria.",
65 |                 "title": "clase_convocatoria"
66 |             },
67 |             {
68 |                 "type": "string",
69 |                 "description": "Objeto/objetivo de la convocatoria",
70 |                 "title": "objeto_convocatoria"
71 |             }
72 |         ],
73 |         "issued": "2016-04-14T19:48:05.433640-03:00",
74 |         "fileName": "convocatoriasabiertasduranteelao.csv",
75 |         "identifier": "1.1"
76 |     },
77 |     {
78 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
79 |         "rights": "Derechos especificados en la licencia.",
80 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
81 |         "license": "Open Data Commons Open Database License 1.0",
82 |         "title": "Convocatorias abiertas durante el año 2015",
83 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
84 |         "byteSize": 5120,
85 |         "type": "documentation",
86 |         "format": "PDF",
87 |         "mediaType": "application/pdf",
88 |         "modified": "2016-04-19T19:48:05.433640-03:00",
89 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
90 |         "issued": "2016-04-14T19:48:05.433640-03:00",
91 |         "fileName": "convocatoriasabiertasduranteelao.pdf",
92 |         "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8"
93 |     }
94 | ]
95 | 


--------------------------------------------------------------------------------
/tests/results/distributions_filter_in.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 4 |         "rights": "Derechos especificados en la licencia.",
 5 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
 6 |         "license": "Open Data Commons Open Database License 1.0",
 7 |         "title": "Convocatorias abiertas durante el año 2015",
 8 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 9 |         "byteSize": 5120,
10 |         "type": "file",
11 |         "format": "CSV",
12 |         "mediaType": "text/csv",
13 |         "modified": "2016-04-19T19:48:05.433640-03:00",
14 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
15 |         "field": [
16 |             {
17 |                 "description": "Identificador único del procedimiento de contratación",
18 |                 "type": "integer",
19 |                 "id": "proc12",
20 |                 "title": "procedimiento_id"
21 |             },
22 |             {
23 |                 "type": "integer",
24 |                 "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
25 |                 "title": "organismo_unidad_operativa_contrataciones_id"
26 |             },
27 |             {
28 |                 "type": "integer",
29 |                 "description": "Identificador único de la unidad operativa de contrataciones",
30 |                 "title": "unidad_operativa_contrataciones_id"
31 |             },
32 |             {
33 |                 "type": "string",
34 |                 "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
35 |                 "title": "organismo_unidad_operativa_contrataciones_desc"
36 |             },
37 |             {
38 |                 "type": "string",
39 |                 "description": "Unidad operativa de contrataciones.",
40 |                 "title": "unidad_operativa_contrataciones_desc"
41 |             },
42 |             {
43 |                 "type": "string",
44 |                 "description": "Tipo de procedimiento al que se adecua la contratación.",
45 |                 "title": "tipo_procedimiento_contratacion"
46 |             },
47 |             {
48 |                 "type": "date",
49 |                 "description": "Año en el que se inició el proceso de la convocatoria.",
50 |                 "title": "ejercicio_procedimiento_anio"
51 |             },
52 |             {
53 |                 "type": "date",
54 |                 "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.",
55 |                 "title": "fecha_publicacion_convocatoria"
56 |             },
57 |             {
58 |                 "type": "string",
59 |                 "description": "Modalidad bajo la cual se realiza la convocatoria.",
60 |                 "title": "modalidad_convocatoria"
61 |             },
62 |             {
63 |                 "type": "string",
64 |                 "description": "Clase de la convocatoria.",
65 |                 "title": "clase_convocatoria"
66 |             },
67 |             {
68 |                 "type": "string",
69 |                 "description": "Objeto/objetivo de la convocatoria",
70 |                 "title": "objeto_convocatoria"
71 |             }
72 |         ],
73 |         "issued": "2016-04-14T19:48:05.433640-03:00",
74 |         "fileName": "convocatoriasabiertasduranteelao.csv",
75 |         "identifier": "1.1"
76 |     },
77 |     {
78 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
79 |         "rights": "Derechos especificados en la licencia.",
80 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
81 |         "license": "Open Data Commons Open Database License 1.0",
82 |         "title": "Convocatorias abiertas durante el año 2015",
83 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
84 |         "byteSize": 5120,
85 |         "type": "documentation",
86 |         "format": "PDF",
87 |         "mediaType": "application/pdf",
88 |         "modified": "2016-04-19T19:48:05.433640-03:00",
89 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
90 |         "issued": "2016-04-14T19:48:05.433640-03:00",
91 |         "fileName": "convocatoriasabiertasduranteelao.pdf",
92 |         "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8"
93 |     }
94 | ]
95 | 


--------------------------------------------------------------------------------
/tests/results/distributions_filter_out.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 4 |         "rights": "Derechos especificados en la licencia.",
 5 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
 6 |         "license": "Open Data Commons Open Database License 1.0",
 7 |         "fileName": "convocatoriasabiertasduranteelao.pdf",
 8 |         "title": "Convocatorias abiertas durante el año 2015",
 9 |         "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
10 |         "byteSize": 5120,
11 |         "type": "documentation",
12 |         "format": "PDF",
13 |         "mediaType": "application/pdf",
14 |         "modified": "2016-04-19T19:48:05.433640-03:00",
15 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
16 |         "issued": "2016-04-14T19:48:05.433640-03:00",
17 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420"
18 |     }
19 | ]
20 | 


--------------------------------------------------------------------------------
/tests/results/distributions_meta_field.json:
--------------------------------------------------------------------------------
1 | [
2 |     "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
3 |     "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a"
4 | ]
5 | 


--------------------------------------------------------------------------------
/tests/results/distributions_only_time_series.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "accessURL": "https://www.minhacienda.gob.ar/secretarias/politica-economica/programacion-macroeconomica/",
 4 |         "scrapingFileSheet": "1.2 OyD real s.e.",
 5 |         "description": "Oferta y Demanda Globales por componente, a precios de comprador, en millones de pesos de 1993 y valores anuales desestacionalizados.",
 6 |         "format": "CSV",
 7 |         "dataset_identifier": "1",
 8 |         "issued": "2017-09-28T00:00:00",
 9 |         "title": "Oferta y Demanda Global. Precios constantes desestacionalizados. Base 1993. Valores trimestrales.",
10 |         "modified": "2017-09-28T00:00:00",
11 |         "fileName": "oferta-demanda-globales-datos-desestacionalizados-valores-trimestrales-base-1993.csv",
12 |         "field": [
13 |             {
14 |                 "title": "indice_tiempo",
15 |                 "scrapingIdentifierCell": "A46",
16 |                 "specialTypeDetail": "R/P3M",
17 |                 "specialType": "time_index",
18 |                 "type": "date",
19 |                 "id": "1.2_IT_D_1993_T_13",
20 |                 "scrapingDataStartCell": "A47"
21 |             },
22 |             {
23 |                 "description": "PIB a precios de comprador, en millones de pesos de 1993 y valores anuales.",
24 |                 "title": "oferta_global_pib",
25 |                 "scrapingIdentifierCell": "B46",
26 |                 "units": "Millones de pesos a precios de 1993",
27 |                 "type": "number",
28 |                 "id": "1.2_OGP_D_1993_T_17",
29 |                 "scrapingDataStartCell": "B47"
30 |             },
31 |             {
32 |                 "description": "Importación a precios de comprador, en millones de pesos de 1993 y valores anuales.",
33 |                 "title": "oferta_global_importacion",
34 |                 "scrapingIdentifierCell": "C46",
35 |                 "units": "Millones de pesos a precios de 1993",
36 |                 "type": "number",
37 |                 "id": "1.2_OGI_D_1993_T_25",
38 |                 "scrapingDataStartCell": "C47"
39 |             },
40 |             {
41 |                 "description": "Oferta global total a precios de comprador, en millones de pesos de 1993 y valores anuales.",
42 |                 "title": "demanda_global_exportacion",
43 |                 "scrapingIdentifierCell": "D46",
44 |                 "units": "Millones de pesos a precios de 1993",
45 |                 "type": "number",
46 |                 "id": "1.2_DGE_D_1993_T_26",
47 |                 "scrapingDataStartCell": "D47"
48 |             },
49 |             {
50 |                 "description": "Consumo privado, en millones de pesos de 1993 y valores anuales.",
51 |                 "title": "demanda_global_ibif",
52 |                 "scrapingIdentifierCell": "E46",
53 |                 "units": "Millones de pesos a precios de 1993",
54 |                 "type": "number",
55 |                 "id": "1.2_DGI_D_1993_T_19",
56 |                 "scrapingDataStartCell": "E47"
57 |             },
58 |             {
59 |                 "description": "Consumo publico, en millones de pesos de 1993 y valores anuales.",
60 |                 "title": "demanda_global_consumo_priv",
61 |                 "scrapingIdentifierCell": "F46",
62 |                 "units": "Millones de pesos a precios de 1993",
63 |                 "type": "number",
64 |                 "scrapingDataStartCell": "F47"
65 |             },
66 |             {
67 |                 "description": "Inversion bruta interna fija, en millones de pesos de 1993 y valores anuales.",
68 |                 "title": "demanda_global_consumo_publico",
69 |                 "scrapingIdentifierCell": "G46",
70 |                 "units": "Millones de pesos a precios de 1993",
71 |                 "type": "string",
72 |                 "id": "1.2_DGCP_D_1993_T_30",
73 |                 "scrapingDataStartCell": "G47"
74 |             }
75 |         ],
76 |         "draft": false,
77 |         "units": "Millones de pesos, a precios de 1993",
78 |         "identifier": "1.2",
79 |         "scrapingFileURL": "https://www.economia.gob.ar/download/infoeco/actividad_ied.xlsx"
80 |     }
81 | ]
82 | 


--------------------------------------------------------------------------------
/tests/results/empty_optional_string.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
13 |                 "list_index": 0,
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electrónicas"
16 |             }
17 |         ]
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/results/expected_datasets_report.csv:
--------------------------------------------------------------------------------
 1 | catalog_metadata_url,catalog_title,catalog_description,valid_catalog_metadata,dataset_index,dataset_title,dataset_accrualPeriodicity,valid_dataset_metadata,harvest,dataset_description,dataset_publisher_name,dataset_superTheme,dataset_theme,dataset_landingPage,distributions_list
 2 | tests/samples/full_data.json,Datos Argentina,Portal de Datos Abiertos del Gobierno de la República Argentina,1,0,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,"contrataciones, compras, convocatorias",http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 3 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,0,Sistema de contrataciones electrónicas,R/P1Y,0,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),,"ECON, HEAL",,,"""Convocatorias 2015"": None
 4 | ""Convocatorias 2016"": [u'http://186.33.211.253/dataset2.csv']
 5 | ""Convocatorias 2017"": 444444
 6 | ""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset3.csv"
 7 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,1,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 8 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,2,Sistema de contrataciones electrónicas,R/P1Y,1,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 9 | http://181.209.63.71/data.json,Andino,Portal Andino Demo,0,0,Dataset Demo,eventual,1,0,"Este es un dataset de ejemplo, se incluye como material DEMO y no contiene ningun valor estadistico.",Andino,TECH,Tema.demo,https://github.com/datosgobar/portal-andino,"""Recurso de Ejemplo"": http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv"
10 | 


--------------------------------------------------------------------------------
/tests/results/expected_harvester_config.csv:
--------------------------------------------------------------------------------
1 | catalog_metadata_url,dataset_title,dataset_accrualPeriodicity,job_name
2 | tests/samples/full_data.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion
3 | tests/samples/several_datasets_for_harvest.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion
4 | tests/samples/several_datasets_for_harvest.json,Sistema de contrataciones electrónicas,R/P1Y,modernizacion
5 | 


--------------------------------------------------------------------------------
/tests/results/fields.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "distribution_identifier": "1.1",
 4 |         "description": "Identificador único del procedimiento de contratación",
 5 |         "title": "procedimiento_id",
 6 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 7 |         "type": "integer",
 8 |         "id": "proc12"
 9 |     },
10 |     {
11 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
12 |         "distribution_identifier": "1.1",
13 |         "type": "integer",
14 |         "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
15 |         "title": "organismo_unidad_operativa_contrataciones_id"
16 |     },
17 |     {
18 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
19 |         "distribution_identifier": "1.1",
20 |         "type": "integer",
21 |         "description": "Identificador único de la unidad operativa de contrataciones",
22 |         "title": "unidad_operativa_contrataciones_id"
23 |     },
24 |     {
25 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
26 |         "distribution_identifier": "1.1",
27 |         "type": "string",
28 |         "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
29 |         "title": "organismo_unidad_operativa_contrataciones_desc"
30 |     },
31 |     {
32 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
33 |         "distribution_identifier": "1.1",
34 |         "type": "string",
35 |         "description": "Unidad operativa de contrataciones.",
36 |         "title": "unidad_operativa_contrataciones_desc"
37 |     },
38 |     {
39 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
40 |         "distribution_identifier": "1.1",
41 |         "type": "string",
42 |         "description": "Tipo de procedimiento al que se adecua la contratación.",
43 |         "title": "tipo_procedimiento_contratacion"
44 |     },
45 |     {
46 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
47 |         "distribution_identifier": "1.1",
48 |         "type": "date",
49 |         "description": "Año en el que se inició el proceso de la convocatoria.",
50 |         "title": "ejercicio_procedimiento_anio"
51 |     },
52 |     {
53 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
54 |         "distribution_identifier": "1.1",
55 |         "type": "date",
56 |         "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.",
57 |         "title": "fecha_publicacion_convocatoria"
58 |     },
59 |     {
60 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
61 |         "distribution_identifier": "1.1",
62 |         "type": "string",
63 |         "description": "Modalidad bajo la cual se realiza la convocatoria.",
64 |         "title": "modalidad_convocatoria"
65 |     },
66 |     {
67 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
68 |         "distribution_identifier": "1.1",
69 |         "type": "string",
70 |         "description": "Clase de la convocatoria.",
71 |         "title": "clase_convocatoria"
72 |     },
73 |     {
74 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
75 |         "distribution_identifier": "1.1",
76 |         "type": "string",
77 |         "description": "Objeto/objetivo de la convocatoria",
78 |         "title": "objeto_convocatoria"
79 |     }
80 | ]
81 | 


--------------------------------------------------------------------------------
/tests/results/fields_filter_in.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |     	"id": "proc12",
 4 | 		"dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 5 | 		"distribution_identifier": "1.1",
 6 |         "type": "integer",
 7 |         "description": "Identificador único del procedimiento de contratación",
 8 |         "title": "procedimiento_id"
 9 |     }
10 | ]
11 | 


--------------------------------------------------------------------------------
/tests/results/fields_filter_out.json:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/tests/results/fields_meta_field.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     "integer",
 3 |     "integer",
 4 |     "integer",
 5 |     "string",
 6 |     "string",
 7 |     "string",
 8 |     "date",
 9 |     "date",
10 |     "string",
11 |     "string",
12 |     "string"
13 | ]
14 | 


--------------------------------------------------------------------------------
/tests/results/full_data.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
13 |                 "list_index": 0,
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electrónicas"
16 |             },
17 |             {
18 |                 "status": "OK",
19 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
20 |                 "list_index": 1,
21 |                 "errors": [],
22 |                 "title": "Sistema de contrataciones electrónicas (sin datos)"
23 |             }
24 |         ]
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/results/get_distribution.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 4 |         "rights": "Derechos especificados en la licencia.",
 5 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
 6 |         "license": "Open Data Commons Open Database License 1.0",
 7 |         "title": "Convocatorias abiertas durante el año 2015",
 8 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 9 |         "byteSize": 5120,
10 |         "type": "file",
11 |         "format": "CSV",
12 |         "mediaType": "text/csv",
13 |         "modified": "2016-04-19T19:48:05.433640-03:00",
14 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
15 |         "field": [
16 |             {
17 |                 "description": "Identificador único del procedimiento de contratación",
18 |                 "type": "integer",
19 |                 "id": "proc12",
20 |                 "title": "procedimiento_id"
21 |             },
22 |             {
23 |                 "type": "integer",
24 |                 "description": "Identificador único del organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
25 |                 "title": "organismo_unidad_operativa_contrataciones_id"
26 |             },
27 |             {
28 |                 "type": "integer",
29 |                 "description": "Identificador único de la unidad operativa de contrataciones",
30 |                 "title": "unidad_operativa_contrataciones_id"
31 |             },
32 |             {
33 |                 "type": "string",
34 |                 "description": "Organismo que realiza la convocatoria. Organismo de máximo nivel jerárquico al que pertenece la unidad operativa de contrataciones.",
35 |                 "title": "organismo_unidad_operativa_contrataciones_desc"
36 |             },
37 |             {
38 |                 "type": "string",
39 |                 "description": "Unidad operativa de contrataciones.",
40 |                 "title": "unidad_operativa_contrataciones_desc"
41 |             },
42 |             {
43 |                 "type": "string",
44 |                 "description": "Tipo de procedimiento al que se adecua la contratación.",
45 |                 "title": "tipo_procedimiento_contratacion"
46 |             },
47 |             {
48 |                 "type": "date",
49 |                 "description": "Año en el que se inició el proceso de la convocatoria.",
50 |                 "title": "ejercicio_procedimiento_anio"
51 |             },
52 |             {
53 |                 "type": "date",
54 |                 "description": "Fecha de publicación de la convocatoria en formato AAAA-MM-DD, ISO 8601.",
55 |                 "title": "fecha_publicacion_convocatoria"
56 |             },
57 |             {
58 |                 "type": "string",
59 |                 "description": "Modalidad bajo la cual se realiza la convocatoria.",
60 |                 "title": "modalidad_convocatoria"
61 |             },
62 |             {
63 |                 "type": "string",
64 |                 "description": "Clase de la convocatoria.",
65 |                 "title": "clase_convocatoria"
66 |             },
67 |             {
68 |                 "type": "string",
69 |                 "description": "Objeto/objetivo de la convocatoria",
70 |                 "title": "objeto_convocatoria"
71 |             }
72 |         ],
73 |         "issued": "2016-04-14T19:48:05.433640-03:00",
74 |         "fileName": "convocatoriasabiertasduranteelao.csv",
75 |         "identifier": "1.1"
76 |     },
77 |     {
78 |         "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
79 |         "rights": "Derechos especificados en la licencia.",
80 |         "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
81 |         "license": "Open Data Commons Open Database License 1.0",
82 |         "title": "Convocatorias abiertas durante el año 2015",
83 |         "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
84 |         "byteSize": 5120,
85 |         "type": "documentation",
86 |         "format": "PDF",
87 |         "mediaType": "application/pdf",
88 |         "modified": "2016-04-19T19:48:05.433640-03:00",
89 |         "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
90 |         "issued": "2016-04-14T19:48:05.433640-03:00",
91 |         "fileName": "convocatoriasabiertasduranteelao.pdf",
92 |         "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8"
93 |     }
94 | ]
95 | 


--------------------------------------------------------------------------------
/tests/results/get_distribution_of_dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 3 |     "identifier": "d_7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
 4 |     "rights": "Derechos especificados en la licencia.",
 5 |     "description": "Listado de las convocatorias abiertas durante el año 2015 en el sistema de contrataciones electrónicas",
 6 |     "license": "Open Data Commons Open Database License 1.0",
 7 |     "fileName": "convocatoriasabiertasduranteelao.pdf",
 8 |     "title": "Convocatorias abiertas durante el año 2015",
 9 |     "byteSize": 5120,
10 |     "type": "documentation",
11 |     "fileName": "convocatoriasabiertasduranteelao.pdf",
12 |     "format": "PDF",
13 |     "mediaType": "application/pdf",
14 |     "modified": "2016-04-19T19:48:05.433640-03:00",
15 |     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.pdf",
16 |     "issued": "2016-04-14T19:48:05.433640-03:00",
17 |     "dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c420"
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/results/get_field.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"distribution_identifier": "1.1",
3 | 	"dataset_identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
4 |     "id": "proc12",
5 |     "type": "integer",
6 |     "description": "Identificador único del procedimiento de contratación",
7 |     "title": "procedimiento_id"
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/results/get_theme.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"id": "adjudicaciones",
3 | 	"label": "Adjudicaciones",
4 | 	"description": "Datasets sobre licitaciones adjudicadas."
5 | }
6 | 


--------------------------------------------------------------------------------
/tests/results/invalid_dataset_theme_type.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "ERROR",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "ERROR",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
13 |                 "list_index": 0,
14 |                 "errors": [
15 |                     {
16 |                         "instance": "contrataciones",
17 |                         "validator": "anyOf",
18 |                         "path": [
19 |                             "dataset",
20 |                             0,
21 |                             "theme"
22 |                         ],
23 |                         "message": "u'contrataciones' is not valid under any of the given schemas",
24 |                         "error_code": 2,
25 |                         "validator_value": [
26 |                             {
27 |                                 "type": "array"
28 |                             },
29 |                             {
30 |                                 "type": "null"
31 |                             }
32 |                         ]
33 |                     }
34 |                 ],
35 |                 "title": "Sistema de contrataciones electrónicas"
36 |             }
37 |         ]
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/results/invalid_field_description_type.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "ERROR",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "ERROR",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
13 |                 "list_index": 0,
14 |                 "errors": [
15 |                     {
16 |                         "instance": 123,
17 |                         "validator": "anyOf",
18 |                         "path": [
19 |                             "dataset",
20 |                             0,
21 |                             "distribution",
22 |                             0,
23 |                             "field",
24 |                             0,
25 |                             "description"
26 |                         ],
27 |                         "message": "123 is not valid under any of the given schemas",
28 |                         "error_code": 2,
29 |                         "validator_value": [
30 |                             {
31 |                                 "type": "string"
32 |                             },
33 |                             {
34 |                                 "type": "null"
35 |                             }
36 |                         ]
37 |                     }
38 |                 ],
39 |                 "title": "Sistema de contrataciones electrónicas"
40 |             }
41 |         ]
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/results/invalid_multiple_fields_type.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "ERROR",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "ERROR",
 6 |             "errors": [
 7 |                 {
 8 |                     "instance": [
 9 |                         "Ministerio de Modernización",
10 |                         "datosargentina@jefatura.gob.ar"
11 |                     ],
12 |                     "validator": "type",
13 |                     "path": [
14 |                         "publisher"
15 |                     ],
16 |                     "message": "[u'Ministerio de Modernizaci\\xf3n', u'datosargentina@jefatura.gob.ar'] is not of type u'object'",
17 |                     "error_code": 2,
18 |                     "validator_value": "object"
19 |                 }
20 |             ],
21 |             "title": "Datos Argentina"
22 |         },
23 |         "dataset": [
24 |             {
25 |                 "status": "ERROR",
26 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
27 |                 "list_index": 0,
28 |                 "errors": [
29 |                     {
30 |                         "instance": "5120",
31 |                         "validator": "anyOf",
32 |                         "path": [
33 |                             "dataset",
34 |                             0,
35 |                             "distribution",
36 |                             0,
37 |                             "byteSize"
38 |                         ],
39 |                         "message": "u'5120' is not valid under any of the given schemas",
40 |                         "error_code": 2,
41 |                         "validator_value": [
42 |                             {
43 |                                 "type": "integer"
44 |                             },
45 |                             {
46 |                                 "type": "null"
47 |                             },
48 |                             {
49 |                                 "type": "string",
50 |                                 "maxLength": 0
51 |                             }
52 |                         ]
53 |                     }
54 |                 ],
55 |                 "title": "Sistema de contrataciones electrónicas"
56 |             }
57 |         ]
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/results/minimum_data.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "1",
13 |                 "list_index": 0,
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electrónicas"
16 |             }
17 |         ]
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/results/mismatched_downloadURL_and_format.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 
13 |                 "list_index": 0, 
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electr\u00f3nicas"
16 |             }, 
17 |             {
18 |                 "status": "OK",
19 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 
20 |                 "list_index": 1, 
21 |                 "errors": [],
22 |                 "title": "Sistema de contrataciones electr\u00f3nicas (sin datos)"
23 |             }
24 |         ]
25 |     }
26 | }


--------------------------------------------------------------------------------
/tests/results/mismatched_fileName_and_format.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK", 
 6 |             "errors": [], 
 7 |             "title": "Datos Argentina"
 8 |         }, 
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777", 
13 |                 "list_index": 0, 
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electr\u00f3nicas"
16 |             }, 
17 |             {
18 |                 "status": "OK",
19 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420", 
20 |                 "list_index": 1, 
21 |                 "errors": [],
22 |                 "title": "Sistema de contrataciones electr\u00f3nicas (sin datos)"
23 |             }
24 |         ]
25 |     }
26 | }


--------------------------------------------------------------------------------
/tests/results/multiple_missing_descriptions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "ERROR",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "ERROR",
 6 |             "errors": [
 7 |                 {
 8 |                     "instance": null,
 9 |                     "validator": "required",
10 |                     "path": [],
11 |                     "message": "'description' is a required property",
12 |                     "error_code": 1,
13 |                     "validator_value": [
14 |                         "dataset",
15 |                         "title",
16 |                         "description",
17 |                         "publisher",
18 |                         "superThemeTaxonomy"
19 |                     ]
20 |                 }
21 |             ],
22 |             "title": "Datos Argentina"
23 |         },
24 |         "dataset": [
25 |             {
26 |                 "status": "ERROR",
27 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
28 |                 "list_index": 0,
29 |                 "errors": [
30 |                     {
31 |                         "instance": null,
32 |                         "validator": "required",
33 |                         "path": [
34 |                             "dataset",
35 |                             0
36 |                         ],
37 |                         "message": "'description' is a required property",
38 |                         "error_code": 1,
39 |                         "validator_value": [
40 |                             "title",
41 |                             "description",
42 |                             "publisher",
43 |                             "superTheme",
44 |                             "distribution",
45 |                             "accrualPeriodicity",
46 |                             "issued",
47 |                             "identifier"
48 |                         ]
49 |                     }
50 |                 ],
51 |                 "title": "Sistema de contrataciones electrónicas"
52 |             }
53 |         ]
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/tests/results/null_dataset_theme.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
13 |                 "list_index": 0,
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electrónicas"
16 |             }
17 |         ]
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/results/null_field_description.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "status": "OK",
 3 |     "error": {
 4 |         "catalog": {
 5 |             "status": "OK",
 6 |             "errors": [],
 7 |             "title": "Datos Argentina"
 8 |         },
 9 |         "dataset": [
10 |             {
11 |                 "status": "OK",
12 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
13 |                 "list_index": 0,
14 |                 "errors": [],
15 |                 "title": "Sistema de contrataciones electrónicas"
16 |             }
17 |         ]
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/results/null_indicators_readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Catálogo: Cosechando Datos Argentina
 3 | 
 4 | ## Información General
 5 | 
 6 | - **Autor**: Ministerio de Modernización
 7 | - **Correo Electrónico**: datosargentina@jefatura.gob.ar
 8 | - **Ruta del catálogo**: tests/samples/several_datasets_for_harvest.json
 9 | - **Nombre del catálogo**: Cosechando Datos Argentina
10 | - **Descripción**:
11 | 
12 | > Datasets para reporte pre cosecha
13 | 
14 | ## Estado de los metadatos y cantidad de recursos
15 | 
16 | - **Estado metadatos globales**: ERROR
17 | - **Estado metadatos catálogo**: OK
18 | - **Cantidad Total de Datasets**: 3
19 | - **Cantidad Total de Distribuciones**: 6
20 | 
21 | - **Cantidad de Datasets Federados**: None
22 | - **Cantidad de Datasets NO Federados**: None
23 | - **Porcentaje de Datasets NO Federados**: None
24 | 
25 | ## Datasets federados que fueron eliminados en el nodo original
26 | 
27 | 
28 | 
29 | ## Datasets no federados
30 | 
31 | 
32 | 
33 | ## Datasets federados
34 | 
35 | 
36 | 
37 | ## Reporte
38 | 
39 | Por favor, consulte el informe [`datasets.csv`](datasets.csv).
40 | 


--------------------------------------------------------------------------------
/tests/results/time_series.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "distribution_identifier": "1.2",
 4 |         "description": "PIB a precios de comprador, en millones de pesos de 1993 y valores anuales.",
 5 |         "title": "oferta_global_pib",
 6 |         "dataset_identifier": "1",
 7 |         "scrapingIdentifierCell": "B46",
 8 |         "units": "Millones de pesos a precios de 1993",
 9 |         "type": "number",
10 |         "id": "1.2_OGP_D_1993_T_17",
11 |         "scrapingDataStartCell": "B47"
12 |     },
13 |     {
14 |         "distribution_identifier": "1.2",
15 |         "description": "Importación a precios de comprador, en millones de pesos de 1993 y valores anuales.",
16 |         "title": "oferta_global_importacion",
17 |         "dataset_identifier": "1",
18 |         "scrapingIdentifierCell": "C46",
19 |         "units": "Millones de pesos a precios de 1993",
20 |         "type": "number",
21 |         "id": "1.2_OGI_D_1993_T_25",
22 |         "scrapingDataStartCell": "C47"
23 |     },
24 |     {
25 |         "distribution_identifier": "1.2",
26 |         "description": "Oferta global total a precios de comprador, en millones de pesos de 1993 y valores anuales.",
27 |         "title": "demanda_global_exportacion",
28 |         "dataset_identifier": "1",
29 |         "scrapingIdentifierCell": "D46",
30 |         "units": "Millones de pesos a precios de 1993",
31 |         "type": "number",
32 |         "id": "1.2_DGE_D_1993_T_26",
33 |         "scrapingDataStartCell": "D47"
34 |     },
35 |     {
36 |         "distribution_identifier": "1.2",
37 |         "description": "Consumo privado, en millones de pesos de 1993 y valores anuales.",
38 |         "title": "demanda_global_ibif",
39 |         "dataset_identifier": "1",
40 |         "scrapingIdentifierCell": "E46",
41 |         "units": "Millones de pesos a precios de 1993",
42 |         "type": "number",
43 |         "id": "1.2_DGI_D_1993_T_19",
44 |         "scrapingDataStartCell": "E47"
45 |     }
46 | ]
47 | 


--------------------------------------------------------------------------------
/tests/results/write_table.csv:
--------------------------------------------------------------------------------
1 | Plato,Precio,Sabor
2 | Milanesa,Bajo,666
3 | "Thoné, Vitel",Alto,8000
4 | Aceitunas,,15
5 | 


--------------------------------------------------------------------------------
/tests/results/write_table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/results/write_table.xlsx


--------------------------------------------------------------------------------
/tests/samples/catalogo-justicia-missing-distribution-identifier.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo-justicia-missing-distribution-identifier.xlsx


--------------------------------------------------------------------------------
/tests/samples/catalogo_justicia.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia.xlsx


--------------------------------------------------------------------------------
/tests/samples/catalogo_justicia_extra_columns.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_extra_columns.xlsx


--------------------------------------------------------------------------------
/tests/samples/catalogo_justicia_no_xlsx_suffix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_no_xlsx_suffix


--------------------------------------------------------------------------------
/tests/samples/catalogo_justicia_with_defaults.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/catalogo_justicia_with_defaults.xlsx


--------------------------------------------------------------------------------
/tests/samples/invalid_catalog_empty.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "publisher": {
 3 |         "mbox": "",
 4 |         "name": ""
 5 |     },
 6 |     "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
 7 |     "description": "Describí el portal. Explicá de qué se trata tu catálogo de datos. Por favor, hacelo en no más de tres líneas.",
 8 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
 9 |     "title": "Ministerio de Desarrollos Social",
10 |     "dataset": [],
11 |     "version": "1.1",
12 |     "themeTaxonomy": []
13 | }


--------------------------------------------------------------------------------
/tests/samples/invalid_multiple_emails.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "title": "Datos Argentina",
 3 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 4 |     "publisher": {
 5 |         "name": "Ministerio de Modernización",
 6 |         "mbox": "datosargentina@jefatura.gob.ar"
 7 |     },
 8 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
 9 |     "dataset": [
10 |         {
11 |             "title": "publisher mail roto",
12 |             "identifier": "1",
13 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
14 |             "publisher": {
15 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
16 |                 "mbox": "first@mail.com; second@mail.com"
17 |             },
18 |             "contactPoint": {
19 |                 "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
20 |                 "hasEmail": "valid@mail.com"
21 |             },
22 |             "superTheme": [
23 |                 "ECON"
24 |             ],
25 |             "accrualPeriodicity": "R/P1Y",
26 |             "issued": "2016-04-14T19:48:05.433640-03:00",
27 |             "distribution": [
28 |                 {
29 |                     "identifier": "dist_1",
30 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
31 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
32 |                     "title": "Convocatorias abiertas durante el año 2015",
33 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
34 |                 }
35 |             ]
36 |         },
37 |         {
38 |             "title": "contact point falluto",
39 |             "identifier": "2",
40 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
41 |             "publisher": {
42 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
43 |                 "mbox": "another_valid.mail@address.com.tv"
44 |             },
45 |             "contactPoint": {
46 |                 "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
47 |                 "hasEmail": "one@mail.com;two@mail.com;three@mail.com"
48 |             },
49 |             "superTheme": [
50 |                 "ECON"
51 |             ],
52 |             "accrualPeriodicity": "R/P1Y",
53 |             "issued": "2016-04-14T19:48:05.433640-03:00",
54 |             "distribution": [
55 |                 {
56 |                     "identifier": "dist_2",
57 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
58 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
59 |                     "title": "Convocatorias abiertas durante el año 2015",
60 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
61 |                 }
62 |             ]
63 |         }
64 |     ]
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/samples/lists_extra_commas.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/lists_extra_commas.xlsx


--------------------------------------------------------------------------------
/tests/samples/minimum_data.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "title": "Datos Argentina",
 3 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 4 |     "publisher": {
 5 |         "name": "Ministerio de Modernización",
 6 |         "mbox": "datosargentina@jefatura.gob.ar"
 7 |     },
 8 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
 9 |     "dataset": [
10 |         {
11 |             "title": "Sistema de contrataciones electrónicas",
12 |             "identifier": "1",
13 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
14 |             "publisher": {
15 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones"
16 |             },
17 |             "superTheme": [
18 |                 "ECON"
19 |             ],
20 |             "accrualPeriodicity": "R/P1Y",
21 |             "issued": "2016-04-14T19:48:05.433640-03:00",
22 |             "distribution": [
23 |                 {
24 |                     "identifier": "dist_1",
25 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
26 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
27 |                     "title": "Convocatorias abiertas durante el año 2015",
28 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
29 |                 }
30 |             ]
31 |         }
32 |     ]
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/samples/missing_catalog_dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "publisher": {
 3 |         "mbox": "datosargentina@jefatura.gob.ar",
 4 |         "name": "Ministerio de Modernización"
 5 |     },
 6 |     "license": "Open Data Commons Open Database License 1.0",
 7 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 8 |     "language": [
 9 |         "spa"
10 |     ],
11 |     "title": "Datos Argentina",
12 |     "issued": "2016-04-14T19:48:05.433640-03:00",
13 |     "rights": "Derechos especificados en la licencia.",
14 |     "modified": "2016-04-19T19:48:05.433640-03:00",
15 |     "themeTaxonomy": [
16 |         {
17 |             "label": "Convocatorias",
18 |             "description": "Datasets sobre licitaciones en estado de convocatoria.",
19 |             "id": "convocatorias"
20 |         },
21 |         {
22 |             "label": "Compras",
23 |             "description": "Datasets sobre compras realizadas.",
24 |             "id": "compras"
25 |         },
26 |         {
27 |             "label": "Contrataciones",
28 |             "description": "Datasets sobre contrataciones.",
29 |             "id": "contrataciones"
30 |         },
31 |         {
32 |             "label": "Adjudicaciones",
33 |             "description": "Datasets sobre licitaciones adjudicadas.",
34 |             "id": "adjudicaciones"
35 |         },
36 |         {
37 |             "label": "Normativa",
38 |             "description": "Datasets sobre normativa para compras y contrataciones.",
39 |             "id": "normativa"
40 |         },
41 |         {
42 |             "label": "Proveedores",
43 |             "description": "Datasets sobre proveedores del Estado.",
44 |             "id": "proveedores"
45 |         }
46 |     ],
47 |     "version": "1.1",
48 |     "spatial": "ARG",
49 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
50 |     "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
51 |     "homepage": "http://datos.gob.ar"
52 | }


--------------------------------------------------------------------------------
/tests/samples/missing_dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "publisher": {
 3 |         "mbox": "datosargentina@jefatura.gob.ar",
 4 |         "name": "Ministerio de Modernización"
 5 |     },
 6 |     "license": "Open Data Commons Open Database License 1.0",
 7 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 8 |     "language": [
 9 |         "spa"
10 |     ],
11 |     "title": "Datos Argentina",
12 |     "issued": "2016-04-14T19:48:05.433640-03:00",
13 |     "rights": "Derechos especificados en la licencia.",
14 |     "modified": "2016-04-19T19:48:05.433640-03:00",
15 |     "themeTaxonomy": [
16 |         {
17 |             "label": "Convocatorias",
18 |             "description": "Datasets sobre licitaciones en estado de convocatoria.",
19 |             "id": "convocatorias"
20 |         },
21 |         {
22 |             "label": "Compras",
23 |             "description": "Datasets sobre compras realizadas.",
24 |             "id": "compras"
25 |         },
26 |         {
27 |             "label": "Contrataciones",
28 |             "description": "Datasets sobre contrataciones.",
29 |             "id": "contrataciones"
30 |         },
31 |         {
32 |             "label": "Adjudicaciones",
33 |             "description": "Datasets sobre licitaciones adjudicadas.",
34 |             "id": "adjudicaciones"
35 |         },
36 |         {
37 |             "label": "Normativa",
38 |             "description": "Datasets sobre normativa para compras y contrataciones.",
39 |             "id": "normativa"
40 |         },
41 |         {
42 |             "label": "Proveedores",
43 |             "description": "Datasets sobre proveedores del Estado.",
44 |             "id": "proveedores"
45 |         }
46 |     ],
47 |     "version": "1.1",
48 |     "spatial": "ARG",
49 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
50 |     "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8",
51 |     "homepage": "http://datos.gob.ar"
52 | }


--------------------------------------------------------------------------------
/tests/samples/missing_periodicity.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "publisher": {
 3 |         "mbox": "datosargentina@jefatura.gob.ar",
 4 |         "name": "Ministerio de Modernización"
 5 |     },
 6 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 7 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
 8 |     "title": "Datos Argentina",
 9 |     "dataset": [
10 |         {
11 |             "identifier": "1",
12 |             "publisher": {
13 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones"
14 |             },
15 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
16 |             "superTheme": [
17 |                 "ECON"
18 |             ],
19 |             "title": "Sistema de contrataciones electrónicas",
20 |             "issued": "2016-04-14T19:48:05.433640-03:00",
21 |             "source": "Ministerio de modernizacion",
22 |             "distribution": [
23 |                 {
24 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
25 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
26 |                     "title": "Convocatorias abiertas durante el año 2015",
27 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
28 |                 }
29 |             ]
30 |         }
31 |     ],
32 |     "version": "1.1",
33 |     "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8"
34 | }


--------------------------------------------------------------------------------
/tests/samples/processed_datasets_report.csv:
--------------------------------------------------------------------------------
 1 | catalog_metadata_url,catalog_title,catalog_description,valid_catalog_metadata,dataset_index,dataset_title,dataset_accrualPeriodicity,valid_dataset_metadata,harvest,dataset_description,dataset_publisher_name,dataset_superTheme,dataset_theme,dataset_landingPage,distributions_list
 2 | tests/samples/full_data.json,Datos Argentina,Portal de Datos Abiertos del Gobierno de la República Argentina,1,0,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,"contrataciones, compras, convocatorias",http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 3 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,0,Sistema de contrataciones electrónicas,R/P1Y,0,0,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),,"ECON, HEAL",,,"""Convocatorias 2015"": None
 4 | ""Convocatorias 2016"": [u'http://186.33.211.253/dataset2.csv']
 5 | ""Convocatorias 2017"": 444444
 6 | ""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset3.csv"
 7 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,1,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 8 | tests/samples/several_datasets_for_harvest.json,Cosechando Datos Argentina,Datasets para reporte pre cosecha,1,2,Sistema de contrataciones electrónicas,R/P1Y,1,1,Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra),Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones,ECON,,,"""Convocatorias abiertas durante el año 2015"": http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv"
 9 | http://181.209.63.71/data.json,Andino,Portal Andino Demo,0,0,Dataset Demo,eventual,1,0,"Este es un dataset de ejemplo, se incluye como material DEMO y no contiene ningun valor estadistico.",Andino,TECH,Tema.demo,https://github.com/datosgobar/portal-andino,"""Recurso de Ejemplo"": http://181.209.63.71/dataset/6897d435-8084-4685-b8ce-304b190755e4/resource/6145bf1c-a2fb-4bb5-b090-bb25f8419198/download/estructura-organica-3.csv"
10 | 


--------------------------------------------------------------------------------
/tests/samples/prueba_sheet_to_table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/prueba_sheet_to_table.xlsx


--------------------------------------------------------------------------------
/tests/samples/read_table.csv:
--------------------------------------------------------------------------------
1 | Plato,Precio,Sabor
2 | Milanesa,Bajo,666
3 | "Thoné, Vitel",Alto,8000
4 | Aceitunas,,15
5 | 


--------------------------------------------------------------------------------
/tests/samples/read_table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/read_table.xlsx


--------------------------------------------------------------------------------
/tests/samples/resource_sample.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/samples/resource_sample.csv


--------------------------------------------------------------------------------
/tests/samples/several_datasets_for_harvest.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "publisher": {
  3 |         "mbox": "datosargentina@jefatura.gob.ar",
  4 |         "name": "Ministerio de Modernización"
  5 |     },
  6 |     "description": "Datasets para reporte pre cosecha",
  7 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
  8 |     "title": "Cosechando Datos Argentina",
  9 |     "dataset": [
 10 |         {
 11 |             "identifier": "1",
 12 |             "publisher": {},
 13 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
 14 |             "superTheme": [
 15 |                 "ECON",
 16 |                 "HEAL"
 17 |             ],
 18 |             "title": "Sistema de contrataciones electrónicas UNO",
 19 |             "issued": "2016-04-14T19:48:05.433640-03:00",
 20 |             "source": "Ministerio de modernizacion",
 21 |             "accrualPeriodicity": "R/P1Y",
 22 |             "distribution": [
 23 |                 {
 24 |                     "identifier": "dist_1",
 25 |                     "accessURL": "http://datos.gob.ar/dataset1.csv",
 26 |                     "issued": "2016-04-14T19:48:05.433640-03:00",
 27 |                     "title": "Convocatorias 2015"
 28 |                 },
 29 |                 {
 30 |                     "identifier": "dist_2",
 31 |                     "accessURL": "http://datos.gob.ar",
 32 |                     "downloadURL": [
 33 |                         "http://186.33.211.253/dataset2.csv"
 34 |                     ],
 35 |                     "title": "Convocatorias 2016",
 36 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
 37 |                 },
 38 |                 {
 39 |                     "identifier": "dist_3",
 40 |                     "accessURL": "http://datos.gob.ar",
 41 |                     "downloadURL": 444444,
 42 |                     "title": "Convocatorias 2017",
 43 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
 44 |                 },
 45 |                 {
 46 |                     "identifier": "dist_4",
 47 |                     "accessURL": "http://datos.gob.ar",
 48 |                     "downloadURL": "http://186.33.211.253/dataset3.csv",
 49 |                     "title": "Convocatorias abiertas durante el año 2015",
 50 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
 51 |                 }
 52 |             ]
 53 |         },
 54 |         {
 55 |             "identifier": "2",
 56 |             "publisher": {
 57 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones"
 58 |             },
 59 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
 60 |             "superTheme": [
 61 |                 "ECON"
 62 |             ],
 63 |             "title": "Sistema de contrataciones electrónicas DOS",
 64 |             "issued": "2016-04-14T19:48:05.433640-03:00",
 65 |             "source": "Ministerio de modernizacion",
 66 |             "accrualPeriodicity": "R/P1Y",
 67 |             "distribution": [
 68 |                 {
 69 |                     "identifier": "dist_5",
 70 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 71 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
 72 |                     "title": "Convocatorias abiertas durante el año 2015",
 73 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
 74 |                 }
 75 |             ]
 76 |         },
 77 |         {
 78 |             "identifier": "3",
 79 |             "publisher": {
 80 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones"
 81 |             },
 82 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
 83 |             "superTheme": [
 84 |                 "ECON"
 85 |             ],
 86 |             "title": "Sistema de contrataciones electrónicas TRES",
 87 |             "issued": "2016-04-14T19:48:05.433640-03:00",
 88 |             "source": "Ministerio de modernizacion",
 89 |             "accrualPeriodicity": "R/P1Y",
 90 |             "distribution": [
 91 |                 {
 92 |                     "identifier": "dist_6",
 93 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
 94 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
 95 |                     "title": "Convocatorias abiertas durante el año 2015",
 96 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
 97 |                 }
 98 |             ]
 99 |         }
100 |     ],
101 |     "version": "1.1",
102 |     "identifier": "7d4d816f-3a40-476e-ab71-d48a3f0eb3c8"
103 | }


--------------------------------------------------------------------------------
/tests/samples/valid_whitespace_emails.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "title": "Datos Argentina",
 3 |     "description": "Portal de Datos Abiertos del Gobierno de la República Argentina",
 4 |     "publisher": {
 5 |         "name": "Ministerio de Modernización",
 6 |         "mbox": "datosargentina@jefatura.gob.ar"
 7 |     },
 8 |     "superThemeTaxonomy": "http://datos.gob.ar/superThemeTaxonomy.json",
 9 |     "dataset": [
10 |         {
11 |             "title": "publisher mail roto",
12 |             "identifier": "1",
13 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
14 |             "publisher": {
15 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
16 |                 "mbox": " whitespace@mail.com"
17 |             },
18 |             "contactPoint": {
19 |                 "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
20 |                 "hasEmail": "valid@mail.com"
21 |             },
22 |             "superTheme": [
23 |                 "ECON"
24 |             ],
25 |             "accrualPeriodicity": "R/P1Y",
26 |             "issued": "2016-04-14T19:48:05.433640-03:00",
27 |             "distribution": [
28 |                 {
29 |                     "identifier": "dist_1",
30 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
31 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
32 |                     "title": "Convocatorias abiertas durante el año 2015",
33 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
34 |                 }
35 |             ]
36 |         },
37 |         {
38 |             "title": "contact point falluto",
39 |             "identifier": "2",
40 |             "description": "Datos correspondientes al Sistema de Contrataciones Electrónicas (Argentina Compra)",
41 |             "publisher": {
42 |                 "name": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
43 |                 "mbox": "another_valid.mail@address.com.tv"
44 |             },
45 |             "contactPoint": {
46 |                 "fn": "Ministerio de Modernización. Secretaría de Modernización Administrativa. Oficina Nacional de Contrataciones",
47 |                 "hasEmail": "trailspace@mail.com "
48 |             },
49 |             "superTheme": [
50 |                 "ECON"
51 |             ],
52 |             "accrualPeriodicity": "R/P1Y",
53 |             "issued": "2016-04-14T19:48:05.433640-03:00",
54 |             "distribution": [
55 |                 {
56 |                     "identifier": "dist_2",
57 |                     "accessURL": "http://datos.gob.ar/dataset/sistema-de-contrataciones-electronicas-argentina-compra/archivo/fa3603b3-0af7-43cc-9da9-90a512217d8a",
58 |                     "downloadURL": "http://186.33.211.253/dataset/99db6631-d1c9-470b-a73e-c62daa32c420/resource/4b7447cb-31ff-4352-96c3-589d212e1cc9/download/convocatorias-abiertas-anio-2015.csv",
59 |                     "title": "Convocatorias abiertas durante el año 2015",
60 |                     "issued": "2016-04-14T19:48:05.433640-03:00"
61 |                 }
62 |             ]
63 |         }
64 |     ]
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/support/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/support/__init__.py


--------------------------------------------------------------------------------
/tests/support/constants.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 
4 | BAD_DATAJSON_URL = "http://104.131.35.253/data.json"
5 | BAD_DATAJSON_URL2 = "http://181.209.63.71/data.json"
6 | 


--------------------------------------------------------------------------------
/tests/support/decorators.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import io
 5 | import json
 6 | import os
 7 | 
 8 | from six import wraps
 9 | 
10 | RESULTS_DIR = os.path.join("tests", "results")
11 | 
12 | 
13 | def load_expected_result():
14 |     def case_decorator(test):
15 |         case_filename = test.__name__.split("test_")[-1]
16 | 
17 |         @wraps(test)
18 |         def decorated_test(*args, **kwargs):
19 |             result_path = os.path.join(RESULTS_DIR, case_filename + ".json")
20 | 
21 |             with io.open(result_path, encoding='utf8') as result_file:
22 |                 expected_result = json.load(result_file)
23 | 
24 |             kwargs["expected_result"] = expected_result
25 |             test(*args, **kwargs)
26 | 
27 |         return decorated_test
28 | 
29 |     return case_decorator
30 | 


--------------------------------------------------------------------------------
/tests/support/factories/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datosgobar/pydatajson/f26e3d5928ce9d455485e03fa63a8d8741588b7a/tests/support/factories/__init__.py


--------------------------------------------------------------------------------
/tests/support/factories/core_files.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | from __future__ import unicode_literals, absolute_import
  5 | 
  6 | from .catalog_errors import missing_catalog_title, \
  7 |     missing_catalog_description, \
  8 |     missing_catalog_dataset, invalid_catalog_publisher_type, \
  9 |     invalid_publisher_mbox_format, null_catalog_publisher, \
 10 |     empty_mandatory_string, malformed_date, malformed_datetime, \
 11 |     malformed_datetime2, malformed_uri, \
 12 |     invalid_theme_taxonomy, missing_dataset
 13 | from .dataset_errors import missing_dataset_title, \
 14 |     missing_dataset_description, \
 15 |     malformed_accrualperiodicity, malformed_temporal, \
 16 |     malformed_temporal2, too_long_field_title
 17 | from .distribution_errors import missing_distribution_title
 18 | from .other_errors import multiple_missing_descriptions, \
 19 |     invalid_multiple_fields_type
 20 | 
 21 | FULL_DATA_RESPONSE = {
 22 |     "status": "OK",
 23 |     "error": {
 24 |         "catalog": {
 25 |             "status": "OK",
 26 |             "errors": [],
 27 |             "title": "Datos Argentina"
 28 |         },
 29 |         "dataset": [
 30 |             {
 31 |                 "status": "OK",
 32 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c777",
 33 |                 "list_index": 0,
 34 |                 "errors": [],
 35 |                 "title": "Sistema de contrataciones electrónicas"
 36 |             },
 37 |             {
 38 |                 "status": "OK",
 39 |                 "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
 40 |                 "list_index": 1,
 41 |                 "errors": [],
 42 |                 "title": "Sistema de contrataciones electrónicas (sin datos)"
 43 |             }
 44 |         ]
 45 |     }
 46 | }
 47 | 
 48 | TEST_FROM_RESULT_FILE = {
 49 |     # Tests de CAMPOS REQUERIDOS
 50 |     # Tests de inputs válidos
 51 |     'full_data': FULL_DATA_RESPONSE,
 52 |     # Un datajson con valores correctos únicamente para las claves requeridas
 53 |     'minimum_data': None,
 54 | 
 55 |     # Tests de TIPOS DE CAMPOS
 56 |     # Tests de inputs válidos
 57 |     'null_dataset_theme': None,
 58 |     'null_field_description': None,
 59 |     # Tests de inputs inválidos
 60 |     'invalid_catalog_publisher_type': None,
 61 |     'invalid_publisher_mbox_format': None,
 62 |     # Catalog_publisher y distribution_bytesize fallan
 63 |     'invalid_field_description_type': None,
 64 |     # La clave requerida catalog["description"] NO puede ser str vacía
 65 |     'empty_optional_string': None,
 66 |     # El format y extension de fileName de las distribuciones deben
 67 |     # coincidir si estan los campos presentes
 68 |     'mismatched_fileName_and_format': None,
 69 |     # El format y extension de downloadURL de las distribuciones deben
 70 |     # coincidir si estan los campos presentes
 71 |     'mismatched_downloadURL_and_format': None,
 72 | }
 73 | 
 74 | TEST_FROM_GENERATED_RESULT = {
 75 | 
 76 |     'multiple_missing_descriptions': multiple_missing_descriptions(),
 77 |     'invalid_multiple_fields_type': invalid_multiple_fields_type(),
 78 | 
 79 |     'missing_catalog_title': missing_catalog_title(),
 80 |     'missing_catalog_description': missing_catalog_description(),
 81 |     'missing_catalog_dataset': missing_catalog_dataset(),
 82 |     'null_catalog_publisher': null_catalog_publisher(),
 83 |     'empty_mandatory_string': empty_mandatory_string(),
 84 |     'malformed_datetime': malformed_datetime(),
 85 |     'malformed_datetime2': malformed_datetime2(),
 86 |     'malformed_uri': malformed_uri(),
 87 |     'invalid_themeTaxonomy': invalid_theme_taxonomy(),
 88 |     'missing_dataset': missing_dataset(),
 89 | 
 90 |     'missing_dataset_title': missing_dataset_title(),
 91 |     'missing_dataset_description': missing_dataset_description(),
 92 |     'malformed_accrualperiodicity': malformed_accrualperiodicity(),
 93 |     'malformed_date': malformed_date(),
 94 |     'malformed_temporal': malformed_temporal(),
 95 |     'malformed_temporal2': malformed_temporal2(),
 96 |     'too_long_field_title': too_long_field_title(),
 97 | 
 98 |     'missing_distribution_title': missing_distribution_title(),
 99 | 
100 |     'invalid_catalog_publisher_type': invalid_catalog_publisher_type(),
101 |     'invalid_publisher_mbox_format': invalid_publisher_mbox_format(),
102 | 
103 |     # 'repeated_downloadURL': repeated_downloadURL(),
104 | }
105 | 
106 | TEST_FILE_RESPONSES = {}
107 | TEST_FILE_RESPONSES.update(TEST_FROM_RESULT_FILE)
108 | TEST_FILE_RESPONSES.update(TEST_FROM_GENERATED_RESULT)
109 | 


--------------------------------------------------------------------------------
/tests/support/factories/distribution_errors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | 
 4 | from __future__ import unicode_literals
 5 | 
 6 | from tests.support.utils import jsonschema_str
 7 | 
 8 | 
 9 | def distribution_error():
10 |     return {
11 |         "status": "ERROR",
12 |         "error": {
13 |             "catalog": {
14 |                 "status": "OK",
15 |                 "errors": [],
16 |                 "title": "Datos Argentina"
17 |             },
18 |             "dataset": [
19 |                 {
20 |                     "status": "ERROR",
21 |                     "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
22 |                     "list_index": 0,
23 |                     "errors": [
24 |                         {
25 |                             "instance": None,
26 |                             "validator": "required",
27 |                             "path": [
28 |                                 "dataset",
29 |                                 0,
30 |                                 "distribution",
31 |                                 0
32 |                             ],
33 |                             "message": "%s is a required property"
34 |                                        % jsonschema_str('title'),
35 |                             "error_code": 1,
36 |                             "validator_value": [
37 |                                 "accessURL",
38 |                                 "downloadURL",
39 |                                 "title",
40 |                                 "issued",
41 |                                 "identifier"
42 |                             ]
43 |                         }
44 |                     ],
45 |                     "title": "Sistema de contrataciones electrónicas"
46 |                 }
47 |             ]
48 |         }
49 |     }
50 | 
51 | 
52 | def missing_distribution_title():
53 |     return distribution_error()
54 | 


--------------------------------------------------------------------------------
/tests/support/factories/other_errors.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | from __future__ import unicode_literals
  5 | 
  6 | from tests.support.utils import jsonschema_str
  7 | 
  8 | 
  9 | def gen_error(catalog_error, dataset_error):
 10 |     return {
 11 |         "status": "ERROR",
 12 |         "error": {
 13 |             "catalog": {
 14 |                 "status": "ERROR",
 15 |                 "errors": [
 16 |                     catalog_error
 17 |                 ],
 18 |                 "title": "Datos Argentina"
 19 |             },
 20 |             "dataset": [
 21 |                 {
 22 |                     "status": "ERROR",
 23 |                     "identifier": "99db6631-d1c9-470b-a73e-c62daa32c420",
 24 |                     "list_index": 0,
 25 |                     "errors": [
 26 |                         dataset_error,
 27 |                     ],
 28 |                     "title": "Sistema de contrataciones electrónicas"
 29 |                 }
 30 |             ]
 31 |         }
 32 |     }
 33 | 
 34 | 
 35 | def multiple_missing_descriptions():
 36 |     return gen_error({
 37 |         "instance": None,
 38 |         "validator": "required",
 39 |         "path": [],
 40 |         "message": "%s is a required property" % jsonschema_str('description'),
 41 |         "error_code": 1,
 42 |         "validator_value": [
 43 |             "dataset",
 44 |             "title",
 45 |             "description",
 46 |             "publisher",
 47 |             "superThemeTaxonomy"
 48 |         ]
 49 |     }, {
 50 |         "instance": None,
 51 |         "validator": "required",
 52 |         "path": [
 53 |             "dataset",
 54 |             0
 55 |         ],
 56 |         "message": "%s is a required property" % jsonschema_str('description'),
 57 |         "error_code": 1,
 58 |         "validator_value": [
 59 |             "title",
 60 |             "description",
 61 |             "publisher",
 62 |             "superTheme",
 63 |             "distribution",
 64 |             "accrualPeriodicity",
 65 |             "issued",
 66 |             "identifier"
 67 |         ]
 68 |     })
 69 | 
 70 | 
 71 | def invalid_multiple_fields_type():
 72 |     return gen_error({
 73 |         "instance": [
 74 |             "Ministerio de Modernización",
 75 |             "datosargentina@jefatura.gob.ar"
 76 |         ],
 77 |         "validator": "type",
 78 |         "path": [
 79 |             "publisher"
 80 |         ],
 81 |         "message": "[%s, %s] is not of type %s" % (
 82 |             jsonschema_str('Ministerio de Modernización'),
 83 |             jsonschema_str('datosargentina@jefatura.gob.ar'),
 84 |             jsonschema_str('object'),
 85 |         ),
 86 |         "error_code": 2,
 87 |         "validator_value": "object"
 88 |     }, {
 89 |         "instance": "5120",
 90 |         "validator": "anyOf",
 91 |         "path": [
 92 |             "dataset",
 93 |             0,
 94 |             "distribution",
 95 |             0,
 96 |             "byteSize"
 97 |         ],
 98 |         "message": "%s is not valid under any of the given schemas"
 99 |                    % jsonschema_str('5120'),
100 |         "error_code": 2,
101 |         "validator_value": [
102 |             {
103 |                 "type": "integer"
104 |             },
105 |             {
106 |                 '$ref': 'mixed-types.json#emptyValue'
107 |             }
108 |         ]
109 |     })
110 | 


--------------------------------------------------------------------------------
/tests/support/factories/xlsx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import json
 4 | from collections import OrderedDict
 5 | 
 6 | import six
 7 | 
 8 | 
 9 | def to_native_dict(ordered_dict):
10 |     return json.loads(json.dumps(ordered_dict))
11 | 
12 | 
13 | def to_dict(table_list):
14 |     ordered_dict = OrderedDict(table_list)
15 |     if six.PY3:
16 |         return to_native_dict(ordered_dict)
17 |     else:
18 |         return ordered_dict
19 | 
20 | 
21 | CSV_TABLE = [
22 |     to_dict([(u'Plato', u'Milanesa'),
23 |              (u'Precio', u'Bajo'),
24 |              (u'Sabor', u'666')]),
25 |     to_dict([(u'Plato', u'Thoné, Vitel'),
26 |              (u'Precio', u'Alto'),
27 |              (u'Sabor', u'8000')]),
28 |     to_dict([(u'Plato', u'Aceitunas'),
29 |              (u'Precio', u''),
30 |              (u'Sabor', u'15')])
31 | ]
32 | 
33 | WRITE_XLSX_TABLE = [
34 |     to_dict([(u'Plato', u'Milanesa'),
35 |              (u'Precio', u'Bajo'),
36 |              (u'Sabor', 666)]),
37 |     to_dict([(u'Plato', u'Thoné, Vitel'),
38 |              (u'Precio', u'Alto'),
39 |              (u'Sabor', 8000)]),
40 |     to_dict([(u'Plato', u'Aceitunas'),
41 |              (u'Precio', None),
42 |              (u'Sabor', 15)])
43 | ]
44 | 
45 | READ_XLSX_TABLE = [
46 |     to_dict([(u'Plato', u'Milanesa'),
47 |              (u'Precio', u'Bajo'),
48 |              (u'Sabor', 666)]),
49 |     to_dict([(u'Plato', u'Thoné, Vitel'),
50 |              (u'Precio', u'Alto'),
51 |              (u'Sabor', 8000)]),
52 |     to_dict([(u'Plato', u'Aceitunas'),
53 |              (u'Sabor', 15)])
54 | ]
55 | 


--------------------------------------------------------------------------------
/tests/support/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | 
4 | def jsonschema_str(string):
5 |     return repr(string)
6 | 


--------------------------------------------------------------------------------
/tests/test_backup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """Tests para las funcionalidades de el módulo 'backup'
  5 | """
  6 | 
  7 | from __future__ import unicode_literals
  8 | from __future__ import print_function
  9 | from __future__ import with_statement
 10 | 
 11 | from contextlib import contextmanager
 12 | 
 13 | import unittest
 14 | import nose
 15 | import os
 16 | import vcr
 17 | import tempfile
 18 | import shutil
 19 | 
 20 | 
 21 | from .context import pydatajson
 22 | 
 23 | SAMPLES_DIR = os.path.join("tests", "samples")
 24 | RESULTS_DIR = os.path.join("tests", "results")
 25 | 
 26 | VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
 27 |               cassette_library_dir=os.path.join("tests", "cassetes", "backup"),
 28 |               record_mode='once')
 29 | 
 30 | 
 31 | @contextmanager
 32 | def tempdir(cleanup=True):
 33 |     tmp = tempfile.mkdtemp(dir='tests/temp')
 34 |     try:
 35 |         yield tmp
 36 |     finally:
 37 |         cleanup and shutil.rmtree(tmp, ignore_errors=True)
 38 | 
 39 | 
 40 | class BackupTestCase(unittest.TestCase):
 41 |     """Tests for backup methods."""
 42 | 
 43 |     @classmethod
 44 |     def get_sample(cls, sample_filename):
 45 |         return os.path.join(SAMPLES_DIR, sample_filename)
 46 | 
 47 |     @classmethod
 48 |     def setUp(cls):
 49 |         cls.catalog_meta = pydatajson.DataJson(
 50 |             cls.get_sample("full_data.json"))
 51 |         cls.catalog_data = pydatajson.DataJson(
 52 |             cls.get_sample("example_time_series.json"))
 53 |         cls.maxDiff = None
 54 |         cls.longMessage = True
 55 | 
 56 |     @classmethod
 57 |     def tearDown(cls):
 58 |         del (cls.catalog_meta)
 59 |         del (cls.catalog_data)
 60 | 
 61 |     def test_make_catalog_backup_metadata(self):
 62 |         with tempdir() as temp_dir:
 63 |             json_path = os.path.join(
 64 |                 temp_dir, "catalog", "example", "data.json")
 65 |             xlsx_path = os.path.join(
 66 |                 temp_dir, "catalog", "example", "catalog.xlsx")
 67 | 
 68 |             pydatajson.backup.make_catalog_backup(
 69 |                 self.catalog_meta,
 70 |                 catalog_id="example", local_catalogs_dir=temp_dir,
 71 |                 include_metadata=True, include_data=False)
 72 | 
 73 |             self.assertTrue(os.path.exists(json_path))
 74 |             self.assertTrue(os.path.exists(xlsx_path))
 75 | 
 76 |     @VCR.use_cassette()
 77 |     def test_make_catalog_backup_data(self):
 78 |         with tempdir() as temp_dir:
 79 |             distribution_path = os.path.abspath(
 80 |                 os.path.join(
 81 |                     temp_dir,
 82 |                     "catalog",
 83 |                     "example_ts",
 84 |                     "dataset",
 85 |                     "1",
 86 |                     "distribution",
 87 |                     "1.2",
 88 |                     "download",
 89 |                     "oferta-demanda-globales-datos-desestacionalizados"
 90 |                     "-valores-trimestrales-base-1993.csv"))
 91 | 
 92 |             pydatajson.backup.make_catalog_backup(
 93 |                 self.catalog_data,
 94 |                 catalog_id="example_ts", local_catalogs_dir=temp_dir,
 95 |                 include_metadata=True, include_data=True)
 96 | 
 97 |             self.assertTrue(os.path.exists(distribution_path))
 98 | 
 99 |     @VCR.use_cassette()
100 |     def test_make_catalog_backup_data_without_file_name(self):
101 |         with tempdir() as temp_dir:
102 |             distribution_path = os.path.abspath(os.path.join(
103 |                 temp_dir, "catalog", "example_ts", "dataset", "1",
104 |                 "distribution", "1.2.b", "download",
105 |                 "odg-total-millones-pesos-1960-trimestral.csv"
106 |             ))
107 | 
108 |             pydatajson.backup.make_catalog_backup(
109 |                 self.catalog_data,
110 |                 catalog_id="example_ts", local_catalogs_dir=temp_dir,
111 |                 include_metadata=True, include_data=True)
112 | 
113 |             self.assertTrue(os.path.exists(distribution_path))
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     nose.run(defaultTest=__name__)
118 | 


--------------------------------------------------------------------------------
/tests/test_catalog_readme.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """Tests del modulo catalog_readme."""
 4 | 
 5 | from __future__ import print_function, unicode_literals, with_statement
 6 | 
 7 | import io
 8 | import os.path
 9 | 
10 | import requests_mock
11 | import vcr
12 | from nose.tools import assert_true, assert_equal
13 | 
14 | try:
15 |     import mock
16 | except ImportError:
17 |     from unittest import mock
18 | import filecmp
19 | 
20 | from pydatajson.catalog_readme import generate_readme
21 | from tests.support.decorators import RESULTS_DIR
22 | 
23 | 
24 | my_vcr = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
25 |                  cassette_library_dir=os.path.join("tests", "cassetes"),
26 |                  record_mode='once')
27 | 
28 | 
29 | class TestDataJsonTestCase(object):
30 |     SAMPLES_DIR = os.path.join("tests", "samples")
31 |     RESULTS_DIR = RESULTS_DIR
32 |     TEMP_DIR = os.path.join("tests", "temp")
33 | 
34 |     @classmethod
35 |     def get_sample(cls, sample_filename):
36 |         return os.path.join(cls.SAMPLES_DIR, sample_filename)
37 | 
38 |     @classmethod
39 |     def setUp(cls):
40 |         cls.catalog = cls.get_sample("several_datasets_for_harvest.json")
41 |         cls.requests_mock = requests_mock.Mocker()
42 |         cls.requests_mock.start()
43 |         cls.requests_mock.get(requests_mock.ANY, real_http=True)
44 |         cls.requests_mock.head(requests_mock.ANY, status_code=200)
45 | 
46 |     @classmethod
47 |     def tearDown(cls):
48 |         cls.requests_mock.stop()
49 | 
50 |     @my_vcr.use_cassette()
51 |     def test_generate_readme(self):
52 |         with io.open(os.path.join(self.RESULTS_DIR, "catalog_readme.md"), 'r',
53 |                      encoding='utf-8') as expected_readme_file:
54 |             expected_readme = expected_readme_file.read()
55 |             readme = generate_readme(self.catalog)
56 |             assert_equal(expected_readme, readme)
57 | 
58 |     @my_vcr.use_cassette()
59 |     def test_readme_file_write(self):
60 |         actual_filename = os.path.join(self.TEMP_DIR, "catalog_readme.md")
61 |         expected_filename = os.path.join(self.RESULTS_DIR, "catalog_readme.md")
62 |         generate_readme(self.catalog, export_path=actual_filename)
63 |         comparison = filecmp.cmp(actual_filename, expected_filename)
64 |         if comparison:
65 |             os.remove(actual_filename)
66 |         else:
67 |             """
68 | {} se escribió correctamente, pero no es idéntico al esperado. Por favor,
69 | revíselo manualmente""".format(actual_filename)
70 | 
71 |         assert_true(comparison)
72 | 
73 |     @my_vcr.use_cassette()
74 |     @mock.patch('pydatajson.indicators._federation_indicators')
75 |     def test_readme_null_indicators(self, mock_indicators):
76 |         mock_indicators.return_value = {
77 |             'datasets_federados_cant': None,
78 |             'datasets_federados_pct': None,
79 |             'datasets_no_federados_cant': None,
80 |             'datasets_federados_eliminados_cant': None,
81 |             'distribuciones_federadas_cant': None,
82 |             'datasets_federados_eliminados': [],
83 |             'datasets_no_federados': [],
84 |             'datasets_federados': [],
85 |             }
86 |         results_path = os.path.join(
87 |             self.RESULTS_DIR, "null_indicators_readme.md")
88 | 
89 |         with io.open(results_path, 'r', encoding='utf-8') \
90 |                 as expected_readme_file:
91 |             expected_readme = expected_readme_file.read()
92 |             readme = generate_readme(self.catalog)
93 |             assert_equal(expected_readme, readme)
94 | 


--------------------------------------------------------------------------------
/tests/test_threading.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from unittest import TestCase
 3 | 
 4 | from pydatajson.threading_helper import apply_threading
 5 | 
 6 | 
 7 | class ThreadingTests(TestCase):
 8 | 
 9 |     def test_threading(self):
10 |         elements = [1, 2, 3, 4]
11 | 
12 |         def function(x):
13 |             return x ** 2
14 | 
15 |         result = apply_threading(elements, function, 3)
16 | 
17 |         self.assertEqual(result, [1, 4, 9, 16])
18 | 
19 |     def test_broken_function(self):
20 |         elements = [1, 2, 3, 0]
21 | 
22 |         def divide(x):
23 |             return 6 / x
24 | 
25 |         with self.assertRaises(ZeroDivisionError):  # Es "sincrónico"!
26 |             apply_threading(elements, divide, 3)
27 | 


--------------------------------------------------------------------------------
/tests/test_time_series.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | from __future__ import with_statement
 6 | 
 7 | import os.path
 8 | import unittest
 9 | 
10 | from pydatajson.core import DataJson
11 | from pydatajson.custom_exceptions import DistributionTimeIndexNonExistentError
12 | from pydatajson.time_series import get_distribution_time_index, \
13 |     distribution_has_time_index, dataset_has_time_series
14 | 
15 | SAMPLES_DIR = os.path.join("tests", "samples")
16 | 
17 | 
18 | class TimeSeriesTestCase(unittest.TestCase):
19 | 
20 |     @classmethod
21 |     def get_sample(cls, sample_filename):
22 |         return os.path.join(SAMPLES_DIR, sample_filename)
23 | 
24 |     def setUp(self):
25 |         ts_catalog = DataJson(self.get_sample('time_series_data.json'))
26 |         full_catalog = DataJson(self.get_sample('full_data.json'))
27 |         self.ts_dataset = ts_catalog.datasets[0]
28 |         self.non_ts_datasets = full_catalog.datasets[0]
29 |         self.ts_distribution = ts_catalog.distributions[1]
30 |         self.non_ts_distribution = full_catalog.distributions[0]
31 | 
32 |     def test_get_distribution_time_index(self):
33 |         self.assertEqual(
34 |             'indice_tiempo',
35 |             get_distribution_time_index(
36 |                 self.ts_distribution))
37 |         with self.assertRaises(DistributionTimeIndexNonExistentError):
38 |             get_distribution_time_index(self.non_ts_distribution)
39 | 
40 |     def test_distribution_has_time_index(self):
41 |         self.assertTrue(distribution_has_time_index(self.ts_distribution))
42 |         self.assertFalse(distribution_has_time_index(self.non_ts_distribution))
43 |         self.ts_distribution['field'] = ['p', 'r', 'o', 'b', 'l', 'e', 'm']
44 |         self.assertFalse(distribution_has_time_index(self.ts_distribution))
45 | 
46 |     def test_dataset_has_time_series(self):
47 |         self.assertTrue(dataset_has_time_series(self.ts_dataset))
48 |         self.assertFalse(dataset_has_time_series(self.non_ts_datasets))
49 | 


--------------------------------------------------------------------------------
/tests/test_urls_validation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | 
 5 | import requests_mock
 6 | from nose.tools import assert_true, assert_false
 7 | from requests import Timeout
 8 | 
 9 | import pydatajson
10 | from .support.decorators import RESULTS_DIR
11 | 
12 | 
13 | class TestDataJsonTestCase(object):
14 |     SAMPLES_DIR = os.path.join("tests", "samples")
15 |     RESULTS_DIR = RESULTS_DIR
16 |     TEMP_DIR = os.path.join("tests", "temp")
17 | 
18 |     @classmethod
19 |     def get_sample(cls, sample_filename):
20 |         return os.path.join(cls.SAMPLES_DIR, sample_filename)
21 | 
22 |     def setUp(self):
23 |         self.dj = pydatajson.DataJson(self.get_sample("full_data.json"))
24 |         self.catalog = pydatajson.readers.read_catalog(
25 |             self.get_sample("full_data.json"))
26 |         self.maxDiff = None
27 |         self.longMessage = True
28 |         self.requests_mock = requests_mock.Mocker()
29 |         self.requests_mock.start()
30 |         self.requests_mock.get(requests_mock.ANY, real_http=True)
31 |         self.requests_mock.head(requests_mock.ANY, status_code=200)
32 | 
33 |     def tearDown(self):
34 |         del self.dj
35 |         self.requests_mock.stop()
36 | 
37 |     def test_urls_with_status_code_200_is_valid(self):
38 |         assert_true(self.dj.is_valid_catalog(broken_links=True))
39 | 
40 |     def test_urls_with_status_code_203_is_valid(self):
41 |         self.requests_mock.head(requests_mock.ANY, status_code=203)
42 |         assert_true(self.dj.is_valid_catalog(broken_links=True))
43 | 
44 |     def test_urls_with_status_code_302_is_valid(self):
45 |         self.requests_mock.head(requests_mock.ANY, status_code=302)
46 |         assert_true(self.dj.is_valid_catalog(broken_links=True))
47 | 
48 |     def test_urls_with_invalid_status_codes_are_not_valid(self):
49 |         self.requests_mock.head(requests_mock.ANY, status_code=404)
50 |         assert_false(self.dj.is_valid_catalog(broken_links=True))
51 | 
52 |     def test_throws_exception(self):
53 |         self.requests_mock.head(requests_mock.ANY, exc=Timeout)
54 |         assert_false(self.dj.is_valid_catalog(broken_links=True))
55 | 
56 |     def test_validation_without_flag_does_not_validate_urls(self):
57 |         assert_true(self.dj.is_valid_catalog())
58 | 
59 |     def test_validation_with_flag_does_validate_urls(self):
60 |         self.requests_mock.head(requests_mock.ANY, status_code=404)
61 |         assert_false(self.dj.is_valid_catalog(broken_links=True))
62 | 


--------------------------------------------------------------------------------
/tests/xl_methods.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | xl_methods
 6 | 
 7 | Métodos ligeramente modificados a partir de abenassi/xlseries para manipular
 8 | archivos en formato XLSX (https://github.com/abenassi/xlseries).
 9 | """
10 | from six import string_types, text_type
11 | 
12 | 
13 | def compare_cells(wb1, wb2):
14 |     """Compare two excels based on row iteration."""
15 | 
16 |     # compare each cell of each worksheet
17 |     for ws1, ws2 in zip(wb1.worksheets, wb2.worksheets):
18 |         compare_cells_ws(ws1, ws2)
19 |     return True
20 | 
21 | 
22 | def compare_cells_ws(ws1, ws2):
23 |     """Compare two worksheets based on row iteration."""
24 | 
25 |     # compare each cell of each worksheet
26 |     for row1, row2 in zip(ws1.rows, ws2.rows):
27 |         for cell1, cell2 in zip(row1, row2):
28 | 
29 |             msg = "".join([_safe_str(cell1.value), " != ",
30 |                            _safe_str(cell2.value), "\nrow: ",
31 |                            _safe_str(cell1.row),
32 |                            " column: ", _safe_str(cell1.column)])
33 | 
34 |             value1 = normalize_value(cell1.value)
35 |             value2 = normalize_value(cell2.value)
36 | 
37 |             assert value1 == value2, msg
38 | 
39 |     return True
40 | 
41 | 
42 | def normalize_value(value):
43 |     """Strip spaces if the value is a string, convert None to empty string or
44 |     let it pass otherwise."""
45 | 
46 |     if isinstance(value, string_types):
47 |         return value.strip()
48 |     elif value is None:
49 |         return ""
50 |     else:
51 |         return value
52 | 
53 | 
54 | def _safe_str(value):
55 |     return text_type(value)
56 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py36
 3 | 
 4 | [testenv]
 5 | deps=
 6 |    -rrequirements.txt
 7 |    -rrequirements_dev.txt
 8 | commands = nosetests
 9 |    pycodestyle pydatajson tests
10 | 
11 | [testenv:flake8]
12 | commands = flake8 pydatajson
13 | 
14 | 
15 | ; If you want to make tox run the tests with the same versions, create a
16 | ; requirements.txt with the pinned versions and uncomment the following lines:
17 | ; deps =
18 | ;     -r{toxinidir}/requirements.txt
19 | 


--------------------------------------------------------------------------------
/travis_pypi_setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """Update encrypted deploy password in Travis config file
  4 | """
  5 | 
  6 | 
  7 | from __future__ import print_function
  8 | import base64
  9 | import json
 10 | import os
 11 | from getpass import getpass
 12 | import yaml
 13 | from cryptography.hazmat.primitives.serialization import load_pem_public_key
 14 | from cryptography.hazmat.backends import default_backend
 15 | from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15
 16 | 
 17 | 
 18 | try:
 19 |     from urllib import urlopen
 20 | except:
 21 |     from urllib.request import urlopen
 22 | 
 23 | 
 24 | GITHUB_REPO = 'datosgobar/pydatajson'
 25 | TRAVIS_CONFIG_FILE = os.path.join(
 26 |     os.path.dirname(os.path.abspath(__file__)), '.travis.yml')
 27 | 
 28 | 
 29 | def load_key(pubkey):
 30 |     """Load public RSA key, with work-around for keys using
 31 |     incorrect header/footer format.
 32 | 
 33 |     Read more about RSA encryption with cryptography:
 34 |     https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/
 35 |     """
 36 |     try:
 37 |         return load_pem_public_key(pubkey.encode(), default_backend())
 38 |     except ValueError:
 39 |         # workaround for https://github.com/travis-ci/travis-api/issues/196
 40 |         pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END')
 41 |         return load_pem_public_key(pubkey.encode(), default_backend())
 42 | 
 43 | 
 44 | def encrypt(pubkey, password):
 45 |     """Encrypt password using given RSA public key and encode it with base64.
 46 | 
 47 |     The encrypted password can only be decrypted by someone with the
 48 |     private key (in this case, only Travis).
 49 |     """
 50 |     key = load_key(pubkey)
 51 |     encrypted_password = key.encrypt(password, PKCS1v15())
 52 |     return base64.b64encode(encrypted_password)
 53 | 
 54 | 
 55 | def fetch_public_key(repo):
 56 |     """Download RSA public key Travis will use for this repo.
 57 | 
 58 |     Travis API docs: http://docs.travis-ci.com/api/#repository-keys
 59 |     """
 60 |     keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo)
 61 |     data = json.loads(urlopen(keyurl).read().decode())
 62 |     if 'key' not in data:
 63 |         errmsg = "Could not find public key for repo: {}.\n".format(repo)
 64 |         errmsg += "Have you already added your GitHub repo to Travis?"
 65 |         raise ValueError(errmsg)
 66 |     return data['key']
 67 | 
 68 | 
 69 | def prepend_line(filepath, line):
 70 |     """Rewrite a file adding a line to its beginning.
 71 |     """
 72 |     with open(filepath) as f:
 73 |         lines = f.readlines()
 74 | 
 75 |     lines.insert(0, line)
 76 | 
 77 |     with open(filepath, 'w') as f:
 78 |         f.writelines(lines)
 79 | 
 80 | 
 81 | def load_yaml_config(filepath):
 82 |     with open(filepath) as f:
 83 |         return yaml.load(f)
 84 | 
 85 | 
 86 | def save_yaml_config(filepath, config):
 87 |     with open(filepath, 'w') as f:
 88 |         yaml.dump(config, f, default_flow_style=False)
 89 | 
 90 | 
 91 | def update_travis_deploy_password(encrypted_password):
 92 |     """Update the deploy section of the .travis.yml file
 93 |     to use the given encrypted password.
 94 |     """
 95 |     config = load_yaml_config(TRAVIS_CONFIG_FILE)
 96 | 
 97 |     config['deploy']['password'] = dict(secure=encrypted_password)
 98 | 
 99 |     save_yaml_config(TRAVIS_CONFIG_FILE, config)
100 | 
101 |     line = ('# This file was autogenerated and will overwrite'
102 |             ' each time you run travis_pypi_setup.py\n')
103 |     prepend_line(TRAVIS_CONFIG_FILE, line)
104 | 
105 | 
106 | def main(args):
107 |     public_key = fetch_public_key(args.repo)
108 |     password = args.password or getpass('PyPI password: ')
109 |     update_travis_deploy_password(encrypt(public_key, password.encode()))
110 |     print("Wrote encrypted password to .travis.yml -- you're ready to deploy")
111 | 
112 | 
113 | if '__main__' == __name__:
114 |     import argparse
115 |     parser = argparse.ArgumentParser(description=__doc__)
116 |     parser.add_argument('--repo', default=GITHUB_REPO,
117 |                         help='GitHub repo (default: %s)' % GITHUB_REPO)
118 |     parser.add_argument('--password',
119 |                         help='PyPI password (will prompt if not provided)')
120 | 
121 |     args = parser.parse_args()
122 |     main(args)
123 | 


--------------------------------------------------------------------------------